fix: arxiv year

This commit is contained in:
2026-04-17 17:03:59 -04:00
parent 432010f431
commit 174801242d
4 changed files with 158 additions and 8 deletions
+139
View File
@@ -0,0 +1,139 @@
"""Test for arXiv year extraction bug fix."""
import shutil
from pathlib import Path
import pytest
from paperlib.config import LibraryPaths
from paperlib.models import SourceType
from paperlib.storage import PaperStorageManager
class TestArxivYearFix:
"""Test the arXiv year extraction fix."""
@pytest.fixture
def temp_library(self):
"""Create a temporary library for testing."""
temp_dir = Path("./.tmp") / f"test_arxiv_year_{hash(self)}"
temp_dir.mkdir(parents=True, exist_ok=True)
library_paths = LibraryPaths.from_root(temp_dir)
library_paths.create_directories()
yield library_paths
# Cleanup
if temp_dir.exists():
shutil.rmtree(temp_dir)
@pytest.fixture
def storage_manager(self, temp_library):
"""Create a storage manager for testing."""
return PaperStorageManager(temp_library)
def test_arxiv_year_extraction_2022(self, storage_manager):
"""Test year extraction for 2022 paper (2212.06340)."""
paper_dir = storage_manager.get_paper_directory(
"arxiv-2212_06340", SourceType.ARXIV
)
# Should extract year 2022 from 2212.06340
expected = (
storage_manager.library_paths.papers_dir
/ "arxiv"
/ "2022"
/ "arxiv-2212_06340"
)
assert paper_dir == expected
def test_arxiv_year_extraction_2023(self, storage_manager):
"""Test year extraction for 2023 paper (2301.12345)."""
paper_dir = storage_manager.get_paper_directory(
"arxiv-2301_12345", SourceType.ARXIV
)
# Should extract year 2023 from 2301.12345
expected = (
storage_manager.library_paths.papers_dir
/ "arxiv"
/ "2023"
/ "arxiv-2301_12345"
)
assert paper_dir == expected
def test_arxiv_year_extraction_2020(self, storage_manager):
"""Test year extraction for 2020 paper (2005.67890)."""
paper_dir = storage_manager.get_paper_directory(
"arxiv-2005_67890", SourceType.ARXIV
)
# Should extract year 2020 from 2005.67890
expected = (
storage_manager.library_paths.papers_dir
/ "arxiv"
/ "2020"
/ "arxiv-2005_67890"
)
assert paper_dir == expected
def test_arxiv_year_extraction_1999(self, storage_manager):
"""Test year extraction for 1999 paper (9912.12345)."""
paper_dir = storage_manager.get_paper_directory(
"arxiv-9912_12345", SourceType.ARXIV
)
# Should extract year 1999 from 9912.12345 (99 -> 1999)
expected = (
storage_manager.library_paths.papers_dir
/ "arxiv"
/ "1999"
/ "arxiv-9912_12345"
)
assert paper_dir == expected
def test_arxiv_year_extraction_2000(self, storage_manager):
"""Test year extraction for 2000 paper (0001.12345)."""
paper_dir = storage_manager.get_paper_directory(
"arxiv-0001_12345", SourceType.ARXIV
)
# Should extract year 2000 from 0001.12345 (00 -> 2000)
expected = (
storage_manager.library_paths.papers_dir
/ "arxiv"
/ "2000"
/ "arxiv-0001_12345"
)
assert paper_dir == expected
def test_arxiv_id_with_version(self, storage_manager):
"""Test year extraction with version number."""
paper_dir = storage_manager.get_paper_directory(
"arxiv-2212_06340v1", SourceType.ARXIV
)
# Should extract year 2022 from 2212.06340v1
expected = (
storage_manager.library_paths.papers_dir
/ "arxiv"
/ "2022"
/ "arxiv-2212_06340v1"
)
assert paper_dir == expected
def test_existing_storage_test_still_passes(self, storage_manager):
"""Ensure we didn't break the existing test case."""
# This matches the test case in test_storage.py
paper_dir = storage_manager.get_paper_directory(
"arxiv-2212_06340", SourceType.ARXIV
)
# The old test expected papers/arxiv/2212/ but should now be papers/arxiv/2022/
expected = (
storage_manager.library_paths.papers_dir
/ "arxiv"
/ "2022"
/ "arxiv-2212_06340"
)
assert paper_dir == expected