"""Tests for paperlib storage manager.""" import shutil import tempfile from pathlib import Path import pytest from paperlib.config import LibraryPaths from paperlib.models import ConversionStatus, PaperMetadata, SourceType from paperlib.storage import PaperStorageManager class TestPaperStorageManager: """Test PaperStorageManager functionality.""" @pytest.fixture def temp_library(self): """Create a temporary library for testing.""" temp_dir = Path("./.tmp") / f"test_library_{hash(self)}" temp_dir.mkdir(parents=True, exist_ok=True) library_paths = LibraryPaths.from_root(temp_dir) library_paths.create_directories() yield library_paths # Cleanup if temp_dir.exists(): shutil.rmtree(temp_dir) @pytest.fixture def storage_manager(self, temp_library): """Create a storage manager for testing.""" return PaperStorageManager(temp_library) @pytest.fixture def sample_pdf(self): """Create a sample PDF file for testing.""" # Create a minimal PDF-like file temp_file = Path("./.tmp") / f"test_paper_{hash(self)}.pdf" with temp_file.open("wb") as f: # Minimal PDF header f.write(b"%PDF-1.4\n") f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n") f.write(b"%%EOF\n") yield temp_file # Cleanup if temp_file.exists(): temp_file.unlink() def test_generate_paper_id_local(self, storage_manager, sample_pdf): """Test generating paper ID for local files.""" paper_id = storage_manager.generate_paper_id( SourceType.LOCAL, pdf_path=sample_pdf ) assert paper_id.startswith("local-") assert len(paper_id) == 22 # "local-" + 16 chars hash def test_generate_paper_id_arxiv(self, storage_manager): """Test generating paper ID for arXiv papers.""" paper_id = storage_manager.generate_paper_id( SourceType.ARXIV, source_id="2212.06340" ) assert paper_id == "arxiv-2212_06340" def test_get_paper_directory_arxiv(self, storage_manager): """Test getting paper directory for arXiv papers.""" paper_dir = storage_manager.get_paper_directory( "arxiv-2212_06340", SourceType.ARXIV ) expected = ( storage_manager.library_paths.papers_dir / "arxiv" / "2212" / "arxiv-2212_06340" ) assert paper_dir == expected def test_get_paper_directory_local(self, storage_manager): """Test getting paper directory for local papers.""" paper_dir = storage_manager.get_paper_directory( "local-abcd1234efgh5678", SourceType.LOCAL ) expected = ( storage_manager.library_paths.papers_dir / "local" / "abcd1234efgh5678" ) assert paper_dir == expected def test_get_paper_paths(self, storage_manager): """Test getting all paper paths.""" paths = storage_manager.get_paper_paths("arxiv-2212_06340", SourceType.ARXIV) assert "directory" in paths assert "meta" in paths assert "pdf" in paths assert "markdown" in paths assert "summary_json" in paths assert "summary_md" in paths assert "assets" in paths assert "logs" in paths # Check that paths are Path objects assert isinstance(paths["meta"], Path) assert paths["meta"].name == "meta.json" assert paths["pdf"].name == "source.pdf" def test_store_paper_local(self, storage_manager, sample_pdf): """Test storing a local PDF paper.""" metadata = storage_manager.store_paper( pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper", authors=["Test Author"], tags=["test"], ) # Check metadata assert metadata.source_type == SourceType.LOCAL assert metadata.title == "Test Paper" assert metadata.authors == ["Test Author"] assert metadata.tags == ["test"] assert metadata.conversion_status == ConversionStatus.PENDING # Check file structure was created paths = storage_manager.get_paper_paths(metadata.paper_id, metadata.source_type) assert paths["directory"].exists() assert paths["meta"].exists() assert paths["pdf"].exists() assert paths["assets"].exists() assert paths["logs"].exists() def test_store_paper_arxiv(self, storage_manager, sample_pdf): """Test storing an arXiv paper.""" metadata = storage_manager.store_paper( pdf_path=sample_pdf, source_type=SourceType.ARXIV, source_id="2212.06340", title="Test arXiv Paper", authors=["Alice Smith", "Bob Jones"], categories=["cs.AI"], ) # Check metadata assert metadata.source_type == SourceType.ARXIV assert metadata.source_id == "2212.06340" assert metadata.title == "Test arXiv Paper" assert metadata.authors == ["Alice Smith", "Bob Jones"] assert metadata.categories == ["cs.AI"] # Check file paths are set correctly assert metadata.pdf_path assert metadata.paper_md_path assert metadata.summary_json_path assert metadata.summary_md_path def test_load_paper_metadata(self, storage_manager, sample_pdf): """Test loading paper metadata.""" # First store a paper original_metadata = storage_manager.store_paper( pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper" ) # Load it back loaded_metadata = storage_manager.load_paper_metadata( original_metadata.paper_id, original_metadata.source_type ) assert loaded_metadata is not None assert loaded_metadata.paper_id == original_metadata.paper_id assert loaded_metadata.title == "Test Paper" assert loaded_metadata.source_type == SourceType.LOCAL def test_load_nonexistent_paper(self, storage_manager): """Test loading metadata for nonexistent paper.""" metadata = storage_manager.load_paper_metadata("nonexistent", SourceType.LOCAL) assert metadata is None def test_update_paper_metadata(self, storage_manager, sample_pdf): """Test updating paper metadata.""" # Store initial paper metadata = storage_manager.store_paper( pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Original Title" ) # Update metadata metadata.title = "Updated Title" metadata.conversion_status = ConversionStatus.SUCCESS storage_manager.update_paper_metadata(metadata) # Load and verify update loaded_metadata = storage_manager.load_paper_metadata( metadata.paper_id, metadata.source_type ) assert loaded_metadata.title == "Updated Title" assert loaded_metadata.conversion_status == ConversionStatus.SUCCESS def test_list_all_papers(self, storage_manager, sample_pdf): """Test listing all papers in library.""" # Initially empty papers = list(storage_manager.list_all_papers()) assert len(papers) == 0 # Add some papers metadata1 = storage_manager.store_paper( pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Paper 1" ) metadata2 = storage_manager.store_paper( pdf_path=sample_pdf, source_type=SourceType.ARXIV, source_id="2212.06340", title="Paper 2", ) # List papers papers = list(storage_manager.list_all_papers()) assert len(papers) == 2 paper_ids = {p.paper_id for p in papers} assert metadata1.paper_id in paper_ids assert metadata2.paper_id in paper_ids def test_paper_exists(self, storage_manager, sample_pdf): """Test checking if paper exists.""" # Initially doesn't exist assert not storage_manager.paper_exists("nonexistent", SourceType.LOCAL) # Store a paper metadata = storage_manager.store_paper( pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper" ) # Now it exists assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type) def test_delete_paper(self, storage_manager, sample_pdf): """Test deleting a paper.""" # Store a paper metadata = storage_manager.store_paper( pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper" ) # Verify it exists assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type) # Delete it result = storage_manager.delete_paper(metadata.paper_id, metadata.source_type) assert result is True # Verify it's gone assert not storage_manager.paper_exists(metadata.paper_id, metadata.source_type) # Deleting again should return False result = storage_manager.delete_paper(metadata.paper_id, metadata.source_type) assert result is False