"""Tests for paperlib storage manager.""" import shutil from pathlib import Path import pytest from paperlib.config import LibraryPaths from paperlib.models import ConversionStatus, SourceType from paperlib.storage import PaperStorageManager class TestPaperStorageManager: """Test PaperStorageManager functionality.""" @pytest.fixture def temp_library(self): """Create a temporary library for testing.""" temp_dir = Path("./.tmp") / f"test_library_{hash(self)}" temp_dir.mkdir(parents=True, exist_ok=True) library_paths = LibraryPaths.from_root(temp_dir) library_paths.create_directories() yield library_paths # Cleanup if temp_dir.exists(): shutil.rmtree(temp_dir) @pytest.fixture def storage_manager(self, temp_library): """Create a storage manager for testing.""" return PaperStorageManager(temp_library) @pytest.fixture def sample_pdf(self): """Create a sample PDF file for testing.""" # Create a minimal PDF-like file temp_file = Path("./.tmp") / f"test_paper_{hash(self)}.pdf" with temp_file.open("wb") as f: # Minimal PDF header f.write(b"%PDF-1.4\n") f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n") f.write(b"%%EOF\n") yield temp_file # Cleanup if temp_file.exists(): temp_file.unlink() def test_generate_paper_id_local(self, storage_manager, sample_pdf): """Test generating paper ID for local files.""" paper_id = storage_manager.generate_paper_id( SourceType.LOCAL, pdf_path=sample_pdf ) assert paper_id.startswith("local-") assert len(paper_id) == 22 # "local-" + 16 chars hash def test_generate_paper_id_arxiv(self, storage_manager): """Test generating paper ID for arXiv papers.""" paper_id = storage_manager.generate_paper_id( SourceType.ARXIV, source_id="2212.06340" ) assert paper_id == "arxiv-2212_06340" def test_get_paper_directory_arxiv(self, storage_manager): """Test getting paper directory for arXiv papers.""" paper_dir = storage_manager.get_paper_directory( "arxiv-2212_06340", SourceType.ARXIV ) # Should extract year 2022 from 2212.06340 (22 -> 2022) expected = ( storage_manager.library_paths.papers_dir / "arxiv" / "2022" / "arxiv-2212_06340" ) assert paper_dir == expected def test_get_paper_directory_local(self, storage_manager): """Test getting paper directory for local papers.""" paper_dir = storage_manager.get_paper_directory( "local-abcd1234efgh5678", SourceType.LOCAL ) expected = ( storage_manager.library_paths.papers_dir / "local" / "abcd1234efgh5678" ) assert paper_dir == expected def test_get_paper_paths(self, storage_manager): """Test getting all paper paths.""" paths = storage_manager.get_paper_paths("arxiv-2212_06340", SourceType.ARXIV) assert "directory" in paths assert "meta" in paths assert "pdf" in paths assert "markdown" in paths assert "summary_json" in paths assert "summary_md" in paths assert "assets" in paths assert "logs" in paths # Check that paths are Path objects assert isinstance(paths["meta"], Path) assert paths["meta"].name == "meta.json" assert paths["pdf"].name == "source.pdf" def test_store_paper_local(self, storage_manager, sample_pdf): """Test storing a local PDF paper.""" metadata = storage_manager.store_paper( pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper", authors=["Test Author"], tags=["test"], ) # Check metadata assert metadata.source_type == SourceType.LOCAL assert metadata.title == "Test Paper" assert metadata.authors == ["Test Author"] assert metadata.tags == ["test"] assert metadata.conversion_status == ConversionStatus.PENDING # Check file structure was created paths = storage_manager.get_paper_paths(metadata.paper_id, metadata.source_type) assert paths["directory"].exists() assert paths["meta"].exists() assert paths["pdf"].exists() assert paths["assets"].exists() assert paths["logs"].exists() def test_store_paper_arxiv(self, storage_manager, sample_pdf): """Test storing an arXiv paper.""" metadata = storage_manager.store_paper( pdf_path=sample_pdf, source_type=SourceType.ARXIV, source_id="2212.06340", title="Test arXiv Paper", authors=["Alice Smith", "Bob Jones"], categories=["cs.AI"], ) # Check metadata assert metadata.source_type == SourceType.ARXIV assert metadata.source_id == "2212.06340" assert metadata.title == "Test arXiv Paper" assert metadata.authors == ["Alice Smith", "Bob Jones"] assert metadata.categories == ["cs.AI"] # Check file paths are set correctly assert metadata.pdf_path assert metadata.paper_md_path assert metadata.summary_json_path assert metadata.summary_md_path def test_load_paper_metadata(self, storage_manager, sample_pdf): """Test loading paper metadata.""" # First store a paper original_metadata = storage_manager.store_paper( pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper" ) # Load it back loaded_metadata = storage_manager.load_paper_metadata( original_metadata.paper_id, original_metadata.source_type ) assert loaded_metadata is not None assert loaded_metadata.paper_id == original_metadata.paper_id assert loaded_metadata.title == "Test Paper" assert loaded_metadata.source_type == SourceType.LOCAL def test_load_nonexistent_paper(self, storage_manager): """Test loading metadata for nonexistent paper.""" metadata = storage_manager.load_paper_metadata("nonexistent", SourceType.LOCAL) assert metadata is None def test_update_paper_metadata(self, storage_manager, sample_pdf): """Test updating paper metadata.""" # Store initial paper metadata = storage_manager.store_paper( pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Original Title" ) # Update metadata metadata.title = "Updated Title" metadata.conversion_status = ConversionStatus.SUCCESS storage_manager.update_paper_metadata(metadata) # Load and verify update loaded_metadata = storage_manager.load_paper_metadata( metadata.paper_id, metadata.source_type ) assert loaded_metadata.title == "Updated Title" assert loaded_metadata.conversion_status == ConversionStatus.SUCCESS def test_list_all_papers(self, storage_manager, sample_pdf): """Test listing all papers in library.""" # Initially empty papers = list(storage_manager.list_all_papers()) assert len(papers) == 0 # Add some papers metadata1 = storage_manager.store_paper( pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Paper 1" ) metadata2 = storage_manager.store_paper( pdf_path=sample_pdf, source_type=SourceType.ARXIV, source_id="2212.06340", title="Paper 2", ) # List papers papers = list(storage_manager.list_all_papers()) assert len(papers) == 2 paper_ids = {p.paper_id for p in papers} assert metadata1.paper_id in paper_ids assert metadata2.paper_id in paper_ids def test_paper_exists(self, storage_manager, sample_pdf): """Test checking if paper exists.""" # Initially doesn't exist assert not storage_manager.paper_exists("nonexistent", SourceType.LOCAL) # Store a paper metadata = storage_manager.store_paper( pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper" ) # Now it exists assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type) def test_delete_paper(self, storage_manager, sample_pdf): """Test deleting a paper.""" # Store a paper metadata = storage_manager.store_paper( pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper" ) # Verify it exists assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type) # Delete it result = storage_manager.delete_paper(metadata.paper_id, metadata.source_type) assert result is True # Verify it's gone assert not storage_manager.paper_exists(metadata.paper_id, metadata.source_type) # Deleting again should return False result = storage_manager.delete_paper(metadata.paper_id, metadata.source_type) assert result is False