Files
paperlib/tests/test_arxiv_year_fix.py
T
2026-04-17 17:03:59 -04:00

140 lines
4.4 KiB
Python

"""Test for arXiv year extraction bug fix."""
import shutil
from pathlib import Path
import pytest
from paperlib.config import LibraryPaths
from paperlib.models import SourceType
from paperlib.storage import PaperStorageManager
class TestArxivYearFix:
"""Test the arXiv year extraction fix."""
@pytest.fixture
def temp_library(self):
"""Create a temporary library for testing."""
temp_dir = Path("./.tmp") / f"test_arxiv_year_{hash(self)}"
temp_dir.mkdir(parents=True, exist_ok=True)
library_paths = LibraryPaths.from_root(temp_dir)
library_paths.create_directories()
yield library_paths
# Cleanup
if temp_dir.exists():
shutil.rmtree(temp_dir)
@pytest.fixture
def storage_manager(self, temp_library):
"""Create a storage manager for testing."""
return PaperStorageManager(temp_library)
def test_arxiv_year_extraction_2022(self, storage_manager):
"""Test year extraction for 2022 paper (2212.06340)."""
paper_dir = storage_manager.get_paper_directory(
"arxiv-2212_06340", SourceType.ARXIV
)
# Should extract year 2022 from 2212.06340
expected = (
storage_manager.library_paths.papers_dir
/ "arxiv"
/ "2022"
/ "arxiv-2212_06340"
)
assert paper_dir == expected
def test_arxiv_year_extraction_2023(self, storage_manager):
"""Test year extraction for 2023 paper (2301.12345)."""
paper_dir = storage_manager.get_paper_directory(
"arxiv-2301_12345", SourceType.ARXIV
)
# Should extract year 2023 from 2301.12345
expected = (
storage_manager.library_paths.papers_dir
/ "arxiv"
/ "2023"
/ "arxiv-2301_12345"
)
assert paper_dir == expected
def test_arxiv_year_extraction_2020(self, storage_manager):
"""Test year extraction for 2020 paper (2005.67890)."""
paper_dir = storage_manager.get_paper_directory(
"arxiv-2005_67890", SourceType.ARXIV
)
# Should extract year 2020 from 2005.67890
expected = (
storage_manager.library_paths.papers_dir
/ "arxiv"
/ "2020"
/ "arxiv-2005_67890"
)
assert paper_dir == expected
def test_arxiv_year_extraction_1999(self, storage_manager):
"""Test year extraction for 1999 paper (9912.12345)."""
paper_dir = storage_manager.get_paper_directory(
"arxiv-9912_12345", SourceType.ARXIV
)
# Should extract year 1999 from 9912.12345 (99 -> 1999)
expected = (
storage_manager.library_paths.papers_dir
/ "arxiv"
/ "1999"
/ "arxiv-9912_12345"
)
assert paper_dir == expected
def test_arxiv_year_extraction_2000(self, storage_manager):
"""Test year extraction for 2000 paper (0001.12345)."""
paper_dir = storage_manager.get_paper_directory(
"arxiv-0001_12345", SourceType.ARXIV
)
# Should extract year 2000 from 0001.12345 (00 -> 2000)
expected = (
storage_manager.library_paths.papers_dir
/ "arxiv"
/ "2000"
/ "arxiv-0001_12345"
)
assert paper_dir == expected
def test_arxiv_id_with_version(self, storage_manager):
"""Test year extraction with version number."""
paper_dir = storage_manager.get_paper_directory(
"arxiv-2212_06340v1", SourceType.ARXIV
)
# Should extract year 2022 from 2212.06340v1
expected = (
storage_manager.library_paths.papers_dir
/ "arxiv"
/ "2022"
/ "arxiv-2212_06340v1"
)
assert paper_dir == expected
def test_existing_storage_test_still_passes(self, storage_manager):
"""Ensure we didn't break the existing test case."""
# This matches the test case in test_storage.py
paper_dir = storage_manager.get_paper_directory(
"arxiv-2212_06340", SourceType.ARXIV
)
# The old test expected papers/arxiv/2212/ but should now be papers/arxiv/2022/
expected = (
storage_manager.library_paths.papers_dir
/ "arxiv"
/ "2022"
/ "arxiv-2212_06340"
)
assert paper_dir == expected