"""Test for arXiv year extraction bug fix.""" import shutil from pathlib import Path import pytest from paperlib.config import LibraryPaths from paperlib.models import SourceType from paperlib.storage import PaperStorageManager class TestArxivYearFix: """Test the arXiv year extraction fix.""" @pytest.fixture def temp_library(self): """Create a temporary library for testing.""" temp_dir = Path("./.tmp") / f"test_arxiv_year_{hash(self)}" temp_dir.mkdir(parents=True, exist_ok=True) library_paths = LibraryPaths.from_root(temp_dir) library_paths.create_directories() yield library_paths # Cleanup if temp_dir.exists(): shutil.rmtree(temp_dir) @pytest.fixture def storage_manager(self, temp_library): """Create a storage manager for testing.""" return PaperStorageManager(temp_library) def test_arxiv_year_extraction_2022(self, storage_manager): """Test year extraction for 2022 paper (2212.06340).""" paper_dir = storage_manager.get_paper_directory( "arxiv-2212_06340", SourceType.ARXIV ) # Should extract year 2022 from 2212.06340 expected = ( storage_manager.library_paths.papers_dir / "arxiv" / "2022" / "arxiv-2212_06340" ) assert paper_dir == expected def test_arxiv_year_extraction_2023(self, storage_manager): """Test year extraction for 2023 paper (2301.12345).""" paper_dir = storage_manager.get_paper_directory( "arxiv-2301_12345", SourceType.ARXIV ) # Should extract year 2023 from 2301.12345 expected = ( storage_manager.library_paths.papers_dir / "arxiv" / "2023" / "arxiv-2301_12345" ) assert paper_dir == expected def test_arxiv_year_extraction_2020(self, storage_manager): """Test year extraction for 2020 paper (2005.67890).""" paper_dir = storage_manager.get_paper_directory( "arxiv-2005_67890", SourceType.ARXIV ) # Should extract year 2020 from 2005.67890 expected = ( storage_manager.library_paths.papers_dir / "arxiv" / "2020" / "arxiv-2005_67890" ) assert paper_dir == expected def test_arxiv_year_extraction_1999(self, storage_manager): """Test year extraction for 1999 paper (9912.12345).""" paper_dir = storage_manager.get_paper_directory( "arxiv-9912_12345", SourceType.ARXIV ) # Should extract year 1999 from 9912.12345 (99 -> 1999) expected = ( storage_manager.library_paths.papers_dir / "arxiv" / "1999" / "arxiv-9912_12345" ) assert paper_dir == expected def test_arxiv_year_extraction_2000(self, storage_manager): """Test year extraction for 2000 paper (0001.12345).""" paper_dir = storage_manager.get_paper_directory( "arxiv-0001_12345", SourceType.ARXIV ) # Should extract year 2000 from 0001.12345 (00 -> 2000) expected = ( storage_manager.library_paths.papers_dir / "arxiv" / "2000" / "arxiv-0001_12345" ) assert paper_dir == expected def test_arxiv_id_with_version(self, storage_manager): """Test year extraction with version number.""" paper_dir = storage_manager.get_paper_directory( "arxiv-2212_06340v1", SourceType.ARXIV ) # Should extract year 2022 from 2212.06340v1 expected = ( storage_manager.library_paths.papers_dir / "arxiv" / "2022" / "arxiv-2212_06340v1" ) assert paper_dir == expected def test_existing_storage_test_still_passes(self, storage_manager): """Ensure we didn't break the existing test case.""" # This matches the test case in test_storage.py paper_dir = storage_manager.get_paper_directory( "arxiv-2212_06340", SourceType.ARXIV ) # The old test expected papers/arxiv/2212/ but should now be papers/arxiv/2022/ expected = ( storage_manager.library_paths.papers_dir / "arxiv" / "2022" / "arxiv-2212_06340" ) assert paper_dir == expected