From 74d140e5f8f1679935da44b44b969bbed4f93b76 Mon Sep 17 00:00:00 2001 From: Yingjie Wang Date: Fri, 17 Apr 2026 15:56:04 -0400 Subject: [PATCH] test: add tests --- pyproject.toml | 5 + tests/__init__.py | 1 + tests/test_cli.py | 242 +++++++++++++++++++++++++++++ tests/test_config.py | 73 +++++++++ tests/test_database.py | 312 ++++++++++++++++++++++++++++++++++++++ tests/test_importer.py | 273 +++++++++++++++++++++++++++++++++ tests/test_integration.py | 220 +++++++++++++++++++++++++++ tests/test_models.py | 230 ++++++++++++++++++++++++++++ tests/test_storage.py | 261 +++++++++++++++++++++++++++++++ uv.lock | 42 +++++ 10 files changed, 1659 insertions(+) create mode 100644 tests/__init__.py create mode 100644 tests/test_cli.py create mode 100644 tests/test_config.py create mode 100644 tests/test_database.py create mode 100644 tests/test_importer.py create mode 100644 tests/test_integration.py create mode 100644 tests/test_models.py create mode 100644 tests/test_storage.py diff --git a/pyproject.toml b/pyproject.toml index 796047f..d7313b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,3 +31,8 @@ select = ["E", "F", "I", "B", "UP"] [tool.pytest.ini_options] testpaths = ["tests"] + +[dependency-groups] +dev = [ + "pytest>=9.0.3", +] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..2b2507e --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Test package for paperlib.""" diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..d2641b3 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,242 @@ +"""Tests for paperlib CLI functionality.""" + +import shutil +import subprocess +from pathlib import Path +from unittest.mock import patch + +import pytest + + +class TestCLI: + """Test CLI functionality.""" + + @pytest.fixture + def temp_library(self): + """Create a temporary library for testing.""" + temp_dir = Path("./.tmp") / f"test_cli_{hash(self)}" + temp_dir.mkdir(parents=True, exist_ok=True) + + yield temp_dir + + # Cleanup + if temp_dir.exists(): + shutil.rmtree(temp_dir) + + @pytest.fixture + def sample_pdf(self): + """Create a sample PDF file for testing.""" + pdf_file = Path("./.tmp") / f"cli_test_{hash(self)}.pdf" + with pdf_file.open("wb") as f: + # Minimal PDF content + f.write(b"%PDF-1.4\n") + f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n") + f.write(b"%%EOF\n") + + yield pdf_file + + # Cleanup + if pdf_file.exists(): + pdf_file.unlink() + + def run_paperlib_cmd(self, *args): + """Helper to run paperlib commands.""" + cmd = ["uv", "run", "paperlib"] + list(args) + result = subprocess.run(cmd, capture_output=True, text=True, cwd=Path.cwd()) + return result + + def test_cli_help(self): + """Test CLI help output.""" + result = self.run_paperlib_cmd("--help") + + assert result.returncode == 0 + assert "paperlib" in result.stdout + assert "Local-first paper library engine" in result.stdout + assert "init" in result.stdout + assert "import" in result.stdout + assert "convert" in result.stdout + + def test_cli_version(self): + """Test CLI version output.""" + result = self.run_paperlib_cmd("--version") + + assert result.returncode == 0 + assert "paperlib" in result.stdout + assert "0.1.0" in result.stdout + + def test_init_command(self, temp_library): + """Test library initialization command.""" + result = self.run_paperlib_cmd("init", str(temp_library)) + + assert result.returncode == 0 + assert "Initialized paper library" in result.stdout + + # Check directory structure was created + assert (temp_library / "config").exists() + assert (temp_library / "papers").exists() + assert (temp_library / "inbox").exists() + assert (temp_library / "db").exists() + assert (temp_library / "cache").exists() + + def test_status_command(self, temp_library): + """Test status command.""" + # Initialize library first + self.run_paperlib_cmd("init", str(temp_library)) + + result = self.run_paperlib_cmd("status", "--library", str(temp_library)) + + assert result.returncode == 0 + # Check for absolute path since that's what we get + assert str(temp_library.resolve()) in result.stdout + assert "config:" in result.stdout + assert "database:" in result.stdout + assert "papers:" in result.stdout + + def test_import_local_pdf_command(self, temp_library, sample_pdf): + """Test importing local PDF via CLI.""" + # Initialize library + self.run_paperlib_cmd("init", str(temp_library)) + + # Import PDF + result = self.run_paperlib_cmd( + "import", + "--pdf", + str(sample_pdf), + "--title", + "Test CLI Paper", + "--tags", + "test", + "cli", + "--library", + str(temp_library), + ) + + assert result.returncode == 0 + assert "Successfully imported local PDF" in result.stdout + assert "Test CLI Paper" in result.stdout + + def test_list_command_empty(self, temp_library): + """Test list command with empty library.""" + self.run_paperlib_cmd("init", str(temp_library)) + + result = self.run_paperlib_cmd("list", "--library", str(temp_library)) + + assert result.returncode == 0 + assert "No papers found" in result.stdout + + def test_list_command_with_papers(self, temp_library, sample_pdf): + """Test list command with papers.""" + # Initialize and import + self.run_paperlib_cmd("init", str(temp_library)) + self.run_paperlib_cmd( + "import", + "--pdf", + str(sample_pdf), + "--title", + "Test Paper for List", + "--library", + str(temp_library), + ) + + result = self.run_paperlib_cmd("list", "--library", str(temp_library)) + + assert result.returncode == 0 + assert "Found 1 papers" in result.stdout + assert "Test Paper for List" in result.stdout + + def test_show_command(self, temp_library, sample_pdf): + """Test show command.""" + # Initialize and import + self.run_paperlib_cmd("init", str(temp_library)) + import_result = self.run_paperlib_cmd( + "import", + "--pdf", + str(sample_pdf), + "--title", + "Test Paper for Show", + "--library", + str(temp_library), + ) + + # Extract paper ID from import output + paper_id = None + for line in import_result.stdout.split("\n"): + if "Successfully imported local PDF:" in line: + paper_id = line.split(":")[-1].strip() + break + + assert paper_id is not None + + # Show paper details + result = self.run_paperlib_cmd("show", paper_id, "--library", str(temp_library)) + + assert result.returncode == 0 + assert f"Paper ID: {paper_id}" in result.stdout + assert "Test Paper for Show" in result.stdout + assert "Source: local" in result.stdout + + def test_show_nonexistent_paper(self, temp_library): + """Test show command with nonexistent paper.""" + self.run_paperlib_cmd("init", str(temp_library)) + + result = self.run_paperlib_cmd( + "show", "nonexistent", "--library", str(temp_library) + ) + + assert result.returncode == 1 + assert "Paper not found" in result.stdout + + def test_reindex_command(self, temp_library, sample_pdf): + """Test reindex command.""" + # Initialize and import + self.run_paperlib_cmd("init", str(temp_library)) + self.run_paperlib_cmd( + "import", "--pdf", str(sample_pdf), "--library", str(temp_library) + ) + + # Reindex + result = self.run_paperlib_cmd("reindex", "--library", str(temp_library)) + + assert result.returncode == 0 + assert "Rebuilding search index" in result.stdout + assert "papers indexed" in result.stdout + assert "Total papers: 1" in result.stdout + + def test_convert_command_no_papers(self, temp_library): + """Test convert command with no papers.""" + self.run_paperlib_cmd("init", str(temp_library)) + + result = self.run_paperlib_cmd("convert", "--library", str(temp_library)) + + assert result.returncode == 0 + assert "Complete: 0 successful, 0 failed" in result.stdout + + def test_convert_command_with_papers_no_mineru(self, temp_library, sample_pdf): + """Test convert command with papers when MinerU is not available.""" + # Initialize and import + self.run_paperlib_cmd("init", str(temp_library)) + self.run_paperlib_cmd( + "import", "--pdf", str(sample_pdf), "--library", str(temp_library) + ) + + # Convert (will fail because MinerU command may not be properly set up) + result = self.run_paperlib_cmd("convert", "--library", str(temp_library)) + + # Should complete but may have failures due to MinerU setup + assert "Complete:" in result.stdout + + def test_invalid_command(self): + """Test invalid command.""" + result = self.run_paperlib_cmd("invalid-command") + + assert result.returncode != 0 + + def test_missing_required_arguments(self): + """Test commands with missing required arguments.""" + # Import without PDF or arXiv + result = self.run_paperlib_cmd("import") + assert result.returncode != 0 + + # Show without paper ID + result = self.run_paperlib_cmd("show") + assert result.returncode != 0 diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..dab5f4c --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,73 @@ +"""Tests for paperlib configuration.""" + +import shutil +from pathlib import Path + +from paperlib.config import LibraryPaths + + +class TestLibraryPaths: + """Test LibraryPaths configuration.""" + + def test_from_root(self): + """Test creating LibraryPaths from root directory.""" + root = Path("./.tmp/test_config") + paths = LibraryPaths.from_root(root) + + # Check root path + assert paths.root == root.resolve() + + # Check default subdirectories + assert paths.config_dir == root.resolve() / "config" + assert paths.papers_dir == root.resolve() / "papers" + assert paths.inbox_dir == root.resolve() / "inbox" + assert paths.db_dir == root.resolve() / "db" + assert paths.cache_dir == root.resolve() / "cache" + + # Check specific files + assert paths.db_path == root.resolve() / "db" / "paperlib.sqlite3" + assert paths.config_path == root.resolve() / "config" / "config.toml" + + def test_create_directories(self): + """Test creating library directory structure.""" + root = Path("./.tmp/test_create_dirs") + + try: + paths = LibraryPaths.from_root(root) + + # Directories shouldn't exist initially + assert not paths.root.exists() + + # Create directories + paths.create_directories() + + # All directories should now exist + assert paths.root.exists() + assert paths.config_dir.exists() + assert paths.papers_dir.exists() + assert paths.inbox_dir.exists() + assert paths.db_dir.exists() + assert paths.cache_dir.exists() + + finally: + # Cleanup + if root.exists(): + shutil.rmtree(root) + + def test_expanduser(self): + """Test that ~ is expanded in paths.""" + # Test with tilde path + paths = LibraryPaths.from_root(Path("~/.tmp/test_tilde")) + + # Root should be expanded + assert "~" not in str(paths.root) + assert paths.root.is_absolute() + + def test_resolve_relative_paths(self): + """Test that relative paths are resolved.""" + # Use relative path + paths = LibraryPaths.from_root(Path("./relative/path")) + + # Should be absolute + assert paths.root.is_absolute() + assert "relative/path" in str(paths.root) diff --git a/tests/test_database.py b/tests/test_database.py new file mode 100644 index 0000000..11cf84b --- /dev/null +++ b/tests/test_database.py @@ -0,0 +1,312 @@ +"""Tests for paperlib database manager.""" + +import shutil +from pathlib import Path + +import pytest + +from paperlib.config import LibraryPaths +from paperlib.index import DatabaseManager +from paperlib.models import ConversionStatus, PaperMetadata, SourceType, SummaryStatus + + +class TestDatabaseManager: + """Test DatabaseManager functionality.""" + + @pytest.fixture + def temp_library(self): + """Create a temporary library for testing.""" + temp_dir = Path("./.tmp") / f"test_db_{hash(self)}" + temp_dir.mkdir(parents=True, exist_ok=True) + library_paths = LibraryPaths.from_root(temp_dir) + library_paths.create_directories() + + yield library_paths + + # Cleanup + if temp_dir.exists(): + shutil.rmtree(temp_dir) + + @pytest.fixture + def db_manager(self, temp_library): + """Create a database manager for testing.""" + manager = DatabaseManager(temp_library) + manager.initialize_database() + return manager + + @pytest.fixture + def sample_metadata(self): + """Create sample paper metadata for testing.""" + return PaperMetadata( + paper_id="test-paper-1", + source_type=SourceType.LOCAL, + source_id=None, + title="A Test Paper on Machine Learning", + authors=["Alice Smith", "Bob Jones", "Charlie Brown"], + categories=["cs.AI", "stat.ML"], + tags=["machine-learning", "neural-networks", "test"], + notes="This is a test paper for unit testing.", + pdf_path="papers/local/test-paper-1/source.pdf", + paper_md_path="papers/local/test-paper-1/paper.md", + summary_json_path="papers/local/test-paper-1/summary.json", + summary_md_path="papers/local/test-paper-1/summary.md", + ) + + def test_initialize_database(self, temp_library): + """Test database initialization.""" + db_manager = DatabaseManager(temp_library) + + # Database file shouldn't exist initially + assert not db_manager.db_path.exists() + + # Initialize database + db_manager.initialize_database() + + # Database file should now exist + assert db_manager.db_path.exists() + + # Should be able to connect and query + with db_manager._get_connection() as conn: + cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table'") + tables = [row[0] for row in cursor.fetchall()] + assert "papers" in tables + assert "papers_fts" in tables + + def test_index_paper(self, db_manager, sample_metadata): + """Test indexing a paper.""" + # Index the paper + db_manager.index_paper(sample_metadata) + + # Verify it was indexed + paper = db_manager.get_paper(sample_metadata.paper_id) + assert paper is not None + assert paper["paper_id"] == "test-paper-1" + assert paper["title"] == "A Test Paper on Machine Learning" + assert paper["source_type"] == "local" + + def test_get_paper(self, db_manager, sample_metadata): + """Test getting a paper by ID.""" + # Initially not found + paper = db_manager.get_paper("nonexistent") + assert paper is None + + # Index a paper + db_manager.index_paper(sample_metadata) + + # Now it should be found + paper = db_manager.get_paper(sample_metadata.paper_id) + assert paper is not None + assert paper["paper_id"] == sample_metadata.paper_id + assert paper["title"] == sample_metadata.title + + def test_remove_paper(self, db_manager, sample_metadata): + """Test removing a paper from index.""" + # Index a paper + db_manager.index_paper(sample_metadata) + assert db_manager.get_paper(sample_metadata.paper_id) is not None + + # Remove it + result = db_manager.remove_paper(sample_metadata.paper_id) + assert result is True + + # Verify it's gone + assert db_manager.get_paper(sample_metadata.paper_id) is None + + # Removing again should return False + result = db_manager.remove_paper(sample_metadata.paper_id) + assert result is False + + def test_list_papers(self, db_manager): + """Test listing papers with filtering.""" + # Create multiple test papers + paper1 = PaperMetadata( + paper_id="paper-1", + source_type=SourceType.LOCAL, + title="Local Paper", + conversion_status=ConversionStatus.PENDING, + summary_status=SummaryStatus.NOT_REQUESTED, + ) + + paper2 = PaperMetadata( + paper_id="paper-2", + source_type=SourceType.ARXIV, + title="ArXiv Paper", + conversion_status=ConversionStatus.SUCCESS, + summary_status=SummaryStatus.PENDING, + ) + + # Index papers + db_manager.index_paper(paper1) + db_manager.index_paper(paper2) + + # List all papers + all_papers = list(db_manager.list_papers()) + assert len(all_papers) == 2 + + # Filter by source type + local_papers = list(db_manager.list_papers(source_type=SourceType.LOCAL)) + assert len(local_papers) == 1 + assert local_papers[0]["source_type"] == "local" + + arxiv_papers = list(db_manager.list_papers(source_type=SourceType.ARXIV)) + assert len(arxiv_papers) == 1 + assert arxiv_papers[0]["source_type"] == "arxiv" + + # Filter by conversion status + pending_papers = list( + db_manager.list_papers(conversion_status=ConversionStatus.PENDING) + ) + assert len(pending_papers) == 1 + assert pending_papers[0]["conversion_status"] == "pending" + + # Test limit and offset + limited_papers = list(db_manager.list_papers(limit=1)) + assert len(limited_papers) == 1 + + def test_search_papers_fts(self, db_manager, sample_metadata): + """Test full-text search.""" + # Index a paper + db_manager.index_paper(sample_metadata) + + # Search by title words + results = list(db_manager.search_papers("Machine Learning")) + assert len(results) == 1 + assert results[0]["paper_id"] == sample_metadata.paper_id + + # Search by author + results = list(db_manager.search_papers("Alice Smith")) + assert len(results) == 1 + + # Search by tag (quoted for FTS) + results = list(db_manager.search_papers('"neural-networks"')) + assert len(results) == 1 + + # Search for non-existent term + results = list(db_manager.search_papers("nonexistent")) + assert len(results) == 0 + + def test_search_by_field(self, db_manager, sample_metadata): + """Test searching by specific field.""" + # Index a paper + db_manager.index_paper(sample_metadata) + + # Search by title + results = list(db_manager.search_by_field("title", "Machine Learning")) + assert len(results) == 1 + + # Search by author list + results = list(db_manager.search_by_field("author_list", "Alice")) + assert len(results) == 1 + + # Exact match + results = list( + db_manager.search_by_field( + "title", "A Test Paper on Machine Learning", exact_match=True + ) + ) + assert len(results) == 1 + + results = list( + db_manager.search_by_field("title", "Partial Title", exact_match=True) + ) + assert len(results) == 0 + + # Invalid field should raise error + with pytest.raises(ValueError): + list(db_manager.search_by_field("invalid_field", "test")) + + def test_get_statistics(self, db_manager): + """Test getting library statistics.""" + # Initially empty + stats = db_manager.get_statistics() + assert stats["total_papers"] == 0 + assert stats["by_source_type"] == {} + + # Add some papers + paper1 = PaperMetadata( + paper_id="paper-1", + source_type=SourceType.LOCAL, + title="Local Paper", + conversion_status=ConversionStatus.PENDING, + ) + + paper2 = PaperMetadata( + paper_id="paper-2", + source_type=SourceType.ARXIV, + title="ArXiv Paper 1", + conversion_status=ConversionStatus.SUCCESS, + ) + + paper3 = PaperMetadata( + paper_id="paper-3", + source_type=SourceType.ARXIV, + title="ArXiv Paper 2", + conversion_status=ConversionStatus.FAILED, + ) + + db_manager.index_paper(paper1) + db_manager.index_paper(paper2) + db_manager.index_paper(paper3) + + # Check updated statistics + stats = db_manager.get_statistics() + assert stats["total_papers"] == 3 + assert stats["by_source_type"]["local"] == 1 + assert stats["by_source_type"]["arxiv"] == 2 + assert stats["by_conversion_status"]["pending"] == 1 + assert stats["by_conversion_status"]["success"] == 1 + assert stats["by_conversion_status"]["failed"] == 1 + + def test_reindex_from_storage(self, db_manager, temp_library): + """Test reindexing from storage files.""" + from paperlib.storage import PaperStorageManager + + # Create storage manager and add some papers + storage_manager = PaperStorageManager(temp_library) + + # Create a mock PDF file + pdf_file = Path("./.tmp") / "test.pdf" + with pdf_file.open("wb") as f: + f.write(b"%PDF-1.4\n%%EOF\n") + + try: + # Store papers in storage + metadata1 = storage_manager.store_paper( + pdf_path=pdf_file, source_type=SourceType.LOCAL, title="Paper 1" + ) + + metadata2 = storage_manager.store_paper( + pdf_path=pdf_file, + source_type=SourceType.ARXIV, + source_id="2212.06340", + title="Paper 2", + ) + + # Database should initially be empty + stats = db_manager.get_statistics() + assert stats["total_papers"] == 0 + + # Reindex from storage + success_count, error_count = db_manager.reindex_from_storage( + storage_manager + ) + + # Check results + assert success_count == 2 + assert error_count == 0 + + # Verify papers are now in database + stats = db_manager.get_statistics() + assert stats["total_papers"] == 2 + + paper1 = db_manager.get_paper(metadata1.paper_id) + assert paper1 is not None + assert paper1["title"] == "Paper 1" + + paper2 = db_manager.get_paper(metadata2.paper_id) + assert paper2 is not None + assert paper2["title"] == "Paper 2" + + finally: + if pdf_file.exists(): + pdf_file.unlink() diff --git a/tests/test_importer.py b/tests/test_importer.py new file mode 100644 index 0000000..66540df --- /dev/null +++ b/tests/test_importer.py @@ -0,0 +1,273 @@ +"""Tests for paperlib import functionality.""" + +import shutil +from pathlib import Path +from unittest.mock import Mock, patch + +import pytest + +from paperlib.config import LibraryPaths +from paperlib.importer import ArxivImporter, LocalImporter +from paperlib.models import SourceType +from paperlib.storage import PaperStorageManager + + +class TestLocalImporter: + """Test LocalImporter functionality.""" + + @pytest.fixture + def temp_library(self): + """Create a temporary library for testing.""" + temp_dir = Path("./.tmp") / f"test_import_{hash(self)}" + temp_dir.mkdir(parents=True, exist_ok=True) + library_paths = LibraryPaths.from_root(temp_dir) + library_paths.create_directories() + + yield library_paths + + # Cleanup + if temp_dir.exists(): + shutil.rmtree(temp_dir) + + @pytest.fixture + def local_importer(self, temp_library): + """Create a LocalImporter for testing.""" + storage_manager = PaperStorageManager(temp_library) + return LocalImporter(storage_manager) + + @pytest.fixture + def sample_pdf(self): + """Create a sample PDF file for testing.""" + pdf_file = Path("./.tmp") / f"sample_{hash(self)}.pdf" + with pdf_file.open("wb") as f: + # Minimal PDF content + f.write(b"%PDF-1.4\n") + f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n") + f.write(b"%%EOF\n") + + yield pdf_file + + # Cleanup + if pdf_file.exists(): + pdf_file.unlink() + + def test_import_pdf_success(self, local_importer, sample_pdf): + """Test successful PDF import.""" + metadata = local_importer.import_pdf( + pdf_path=sample_pdf, + title="Test Paper", + notes="Test notes", + tags=["test", "sample"], + ) + + # Check metadata + assert metadata.source_type == SourceType.LOCAL + assert metadata.title == "Test Paper" + assert metadata.notes == "Test notes" + assert metadata.tags == ["test", "sample"] + assert metadata.paper_id.startswith("local-") + + def test_import_pdf_auto_title(self, local_importer, sample_pdf): + """Test PDF import with auto-generated title.""" + # Rename PDF to have a meaningful name + meaningful_pdf = sample_pdf.parent / "Machine_Learning-Paper.pdf" + sample_pdf.rename(meaningful_pdf) + + try: + metadata = local_importer.import_pdf(pdf_path=meaningful_pdf) + + # Title should be auto-generated from filename + assert metadata.title == "Machine Learning Paper" + + finally: + if meaningful_pdf.exists(): + meaningful_pdf.unlink() + + def test_import_nonexistent_pdf(self, local_importer): + """Test importing non-existent PDF file.""" + nonexistent = Path("./.tmp/nonexistent.pdf") + + with pytest.raises(FileNotFoundError): + local_importer.import_pdf(pdf_path=nonexistent) + + def test_import_non_pdf_file(self, local_importer): + """Test importing non-PDF file.""" + text_file = Path("./.tmp") / "not_a_pdf.txt" + with text_file.open("w") as f: + f.write("This is not a PDF") + + try: + with pytest.raises(ValueError, match="File is not a PDF"): + local_importer.import_pdf(pdf_path=text_file) + finally: + if text_file.exists(): + text_file.unlink() + + def test_import_duplicate_pdf(self, local_importer, sample_pdf): + """Test importing the same PDF twice.""" + # Import once + metadata1 = local_importer.import_pdf(pdf_path=sample_pdf) + + # Try to import again + with pytest.raises(ValueError, match="Paper already imported"): + local_importer.import_pdf(pdf_path=sample_pdf) + + +class TestArxivImporter: + """Test ArxivImporter functionality.""" + + @pytest.fixture + def temp_library(self): + """Create a temporary library for testing.""" + temp_dir = Path("./.tmp") / f"test_arxiv_{hash(self)}" + temp_dir.mkdir(parents=True, exist_ok=True) + library_paths = LibraryPaths.from_root(temp_dir) + library_paths.create_directories() + + yield library_paths + + # Cleanup + if temp_dir.exists(): + shutil.rmtree(temp_dir) + + @pytest.fixture + def arxiv_importer(self, temp_library): + """Create an ArxivImporter for testing.""" + storage_manager = PaperStorageManager(temp_library) + return ArxivImporter(storage_manager) + + def test_extract_arxiv_id_clean(self, arxiv_importer): + """Test extracting clean arXiv ID.""" + # Test various formats + assert arxiv_importer.extract_arxiv_id("2212.06340") == "2212.06340" + assert arxiv_importer.extract_arxiv_id("arxiv:2212.06340") == "2212.06340" + assert arxiv_importer.extract_arxiv_id("2212.06340v1") == "2212.06340v1" + assert arxiv_importer.extract_arxiv_id("math-ph/0701002") == "math-ph/0701002" + + def test_extract_arxiv_id_from_url(self, arxiv_importer): + """Test extracting arXiv ID from URLs.""" + url = "https://arxiv.org/abs/2212.06340" + extracted = arxiv_importer.extract_arxiv_id(url) + assert extracted == "2212.06340" + + def test_fetch_paper_metadata_success(self, arxiv_importer): + """Test successful metadata fetching from arXiv.""" + # Mock arXiv result + mock_result = Mock() + mock_result.title = "Test Paper" + mock_result.authors = [Mock(name="Alice Smith"), Mock(name="Bob Jones")] + mock_result.published = Mock() + mock_result.updated = Mock() + mock_result.categories = ["cs.AI", "stat.ML"] + + # Mock the client's results method directly + arxiv_importer.client.results = Mock(return_value=[mock_result]) + + # Test + result = arxiv_importer.fetch_paper_metadata("2212.06340") + assert result == mock_result + + def test_fetch_paper_metadata_not_found(self, arxiv_importer): + """Test fetching metadata for non-existent paper.""" + # Mock empty results + arxiv_importer.client.results = Mock(return_value=[]) + + with pytest.raises(ValueError, match="Paper not found on arXiv"): + arxiv_importer.fetch_paper_metadata("9999.99999") + + @patch("paperlib.importer.arxiv_importer.tempfile.NamedTemporaryFile") + def test_download_pdf(self, mock_tempfile, arxiv_importer): + """Test PDF downloading.""" + # Mock temporary file + mock_temp_path = Path("./.tmp/mock_temp.pdf") + mock_tempfile.return_value.__enter__.return_value.name = str(mock_temp_path) + + # Mock arXiv result + mock_result = Mock() + + # Create actual temp file for test + with mock_temp_path.open("wb") as f: + f.write(b"%PDF-1.4\n%%EOF\n") + + try: + pdf_path = arxiv_importer.download_pdf(mock_result) + assert pdf_path == mock_temp_path + mock_result.download_pdf.assert_called_once_with( + filename=str(mock_temp_path) + ) + finally: + if mock_temp_path.exists(): + mock_temp_path.unlink() + + @patch.object(ArxivImporter, "download_pdf") + @patch.object(ArxivImporter, "fetch_paper_metadata") + def test_import_arxiv_paper_success( + self, mock_fetch, mock_download, arxiv_importer + ): + """Test successful arXiv paper import.""" + # Mock PDF file + pdf_file = Path("./.tmp") / "test_arxiv.pdf" + with pdf_file.open("wb") as f: + f.write(b"%PDF-1.4\n%%EOF\n") + + try: + # Mock arXiv result with proper string values + mock_author = Mock() + mock_author.name = "Alice Smith" + + mock_result = Mock() + mock_result.title = "Test ArXiv Paper" + mock_result.authors = [mock_author] + mock_result.published = None + mock_result.updated = None + mock_result.categories = ["cs.AI"] + + mock_fetch.return_value = mock_result + mock_download.return_value = pdf_file + + # Test import + metadata = arxiv_importer.import_arxiv_paper( + arxiv_input="2212.06340", notes="Test notes", tags=["test"] + ) + + # Check results + assert metadata.source_type == SourceType.ARXIV + assert metadata.source_id == "2212.06340" + assert metadata.title == "Test ArXiv Paper" + assert metadata.authors == ["Alice Smith"] + assert metadata.categories == ["cs.AI"] + assert metadata.notes == "Test notes" + assert metadata.tags == ["test"] + + finally: + if pdf_file.exists(): + pdf_file.unlink() + + @patch.object(ArxivImporter, "fetch_paper_metadata") + def test_import_duplicate_arxiv_paper(self, mock_fetch, arxiv_importer): + """Test importing the same arXiv paper twice.""" + # Mock first import + pdf_file = Path("./.tmp") / "test_arxiv_dup.pdf" + with pdf_file.open("wb") as f: + f.write(b"%PDF-1.4\n%%EOF\n") + + try: + with patch.object(ArxivImporter, "download_pdf", return_value=pdf_file): + mock_result = Mock() + mock_result.title = "Test Paper" + mock_result.authors = [] + mock_result.published = None + mock_result.updated = None + mock_result.categories = [] + mock_fetch.return_value = mock_result + + # First import should succeed + arxiv_importer.import_arxiv_paper("2212.06340") + + # Second import should fail + with pytest.raises(ValueError, match="Paper already imported"): + arxiv_importer.import_arxiv_paper("2212.06340") + + finally: + if pdf_file.exists(): + pdf_file.unlink() diff --git a/tests/test_integration.py b/tests/test_integration.py new file mode 100644 index 0000000..d12bec4 --- /dev/null +++ b/tests/test_integration.py @@ -0,0 +1,220 @@ +"""Integration tests for paperlib.""" + +import shutil +from pathlib import Path + +import pytest + +from paperlib.config import LibraryPaths +from paperlib.converter import MinerUConverter +from paperlib.importer import ArxivImporter, LocalImporter +from paperlib.index import DatabaseManager +from paperlib.models import ConversionStatus, SourceType +from paperlib.storage import PaperStorageManager + + +class TestIntegration: + """Test full integration workflows.""" + + @pytest.fixture + def temp_library(self): + """Create a temporary library for testing.""" + temp_dir = Path("./.tmp") / f"test_integration_{hash(self)}" + temp_dir.mkdir(parents=True, exist_ok=True) + library_paths = LibraryPaths.from_root(temp_dir) + library_paths.create_directories() + + yield library_paths + + # Cleanup + if temp_dir.exists(): + shutil.rmtree(temp_dir) + + @pytest.fixture + def sample_pdf(self): + """Create a sample PDF file for testing.""" + pdf_file = Path("./.tmp") / f"integration_test_{hash(self)}.pdf" + with pdf_file.open("wb") as f: + # Minimal PDF content + f.write(b"%PDF-1.4\n") + f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n") + f.write(b"2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n") + f.write(b"3 0 obj\n<< /Type /Page /Parent 2 0 R >>\nendobj\n") + f.write(b"%%EOF\n") + + yield pdf_file + + # Cleanup + if pdf_file.exists(): + pdf_file.unlink() + + def test_complete_local_import_workflow(self, temp_library, sample_pdf): + """Test complete workflow for importing and managing a local PDF.""" + # Set up components + storage_manager = PaperStorageManager(temp_library) + db_manager = DatabaseManager(temp_library) + local_importer = LocalImporter(storage_manager) + + # Initialize database + db_manager.initialize_database() + + # Import PDF + metadata = local_importer.import_pdf( + pdf_path=sample_pdf, + title="Integration Test Paper", + tags=["integration", "test"], + notes="This is an integration test paper", + ) + + # Update metadata with authors after import + metadata.authors = ["Test Author"] + storage_manager.update_paper_metadata(metadata) + + # Verify metadata + assert metadata.source_type == SourceType.LOCAL + assert metadata.title == "Integration Test Paper" + assert metadata.authors == ["Test Author"] + assert metadata.tags == ["integration", "test"] + + # Index in database + db_manager.index_paper(metadata) + + # Test retrieval from database + retrieved_paper = db_manager.get_paper(metadata.paper_id) + assert retrieved_paper is not None + assert retrieved_paper["title"] == "Integration Test Paper" + + # Test search functionality + search_results = list(db_manager.search_papers("Integration Test")) + assert len(search_results) == 1 + assert search_results[0]["paper_id"] == metadata.paper_id + + # Test field search + author_results = list(db_manager.search_by_field("author_list", "Test Author")) + assert len(author_results) == 1 + + # Test listing papers + all_papers = list(db_manager.list_papers()) + assert len(all_papers) == 1 + assert all_papers[0]["paper_id"] == metadata.paper_id + + # Test statistics + stats = db_manager.get_statistics() + assert stats["total_papers"] == 1 + assert stats["by_source_type"]["local"] == 1 + + # Test updating metadata + metadata.notes = "Updated notes" + storage_manager.update_paper_metadata(metadata) + + # Re-index and verify update + db_manager.index_paper(metadata) + updated_paper = db_manager.get_paper(metadata.paper_id) + assert "Updated notes" in updated_paper["search_text"] + + def test_multiple_papers_workflow(self, temp_library, sample_pdf): + """Test workflow with multiple papers.""" + # Set up components + storage_manager = PaperStorageManager(temp_library) + db_manager = DatabaseManager(temp_library) + local_importer = LocalImporter(storage_manager) + + # Initialize database + db_manager.initialize_database() + + # Import multiple papers (create unique PDFs) + papers = [] + for i in range(3): + # Create unique PDF for each import + unique_pdf = Path("./.tmp") / f"unique_paper_{i}_{hash(self)}.pdf" + with unique_pdf.open("wb") as f: + f.write(b"%PDF-1.4\n") + f.write(f"% Unique content {i}\n".encode()) + f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n") + f.write(b"%%EOF\n") + + try: + metadata = local_importer.import_pdf( + pdf_path=unique_pdf, + title=f"Test Paper {i + 1}", + tags=[f"tag{i + 1}", "common"], + notes=f"Notes for paper {i + 1}", + ) + # Update metadata with authors after import + metadata.authors = [f"Author {i + 1}"] + storage_manager.update_paper_metadata(metadata) + + papers.append(metadata) + db_manager.index_paper(metadata) + + finally: + if unique_pdf.exists(): + unique_pdf.unlink() + + # Test listing all papers + all_papers = list(db_manager.list_papers()) + assert len(all_papers) == 3 + + # Test search across papers + common_tag_results = list(db_manager.search_papers("common")) + assert len(common_tag_results) == 3 + + # Test filtering + filtered_results = list(db_manager.list_papers(limit=2)) + assert len(filtered_results) == 2 + + # Test reindexing + success_count, error_count = db_manager.reindex_from_storage(storage_manager) + assert success_count == 3 + assert error_count == 0 + + # Verify papers still exist after reindex + stats = db_manager.get_statistics() + assert stats["total_papers"] == 3 + + def test_storage_and_database_consistency(self, temp_library, sample_pdf): + """Test consistency between storage and database.""" + # Set up components + storage_manager = PaperStorageManager(temp_library) + db_manager = DatabaseManager(temp_library) + local_importer = LocalImporter(storage_manager) + + # Initialize database + db_manager.initialize_database() + + # Import paper + metadata = local_importer.import_pdf( + pdf_path=sample_pdf, + title="Consistency Test Paper", + ) + + # Index in database + db_manager.index_paper(metadata) + + # Verify file exists in storage + assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type) + + # Verify paper exists in database + db_paper = db_manager.get_paper(metadata.paper_id) + assert db_paper is not None + + # Load from storage and compare + storage_metadata = storage_manager.load_paper_metadata( + metadata.paper_id, metadata.source_type + ) + assert storage_metadata.title == db_paper["title"] + assert storage_metadata.paper_id == db_paper["paper_id"] + + # Test reindexing maintains consistency + db_manager.remove_paper(metadata.paper_id) + assert db_manager.get_paper(metadata.paper_id) is None + + # Reindex from storage + success_count, error_count = db_manager.reindex_from_storage(storage_manager) + assert success_count == 1 + assert error_count == 0 + + # Verify paper is back in database + restored_paper = db_manager.get_paper(metadata.paper_id) + assert restored_paper is not None + assert restored_paper["title"] == "Consistency Test Paper" diff --git a/tests/test_models.py b/tests/test_models.py new file mode 100644 index 0000000..18744e2 --- /dev/null +++ b/tests/test_models.py @@ -0,0 +1,230 @@ +"""Tests for paperlib data models.""" + +import json +import tempfile +from datetime import datetime +from pathlib import Path + +import pytest + +from paperlib.models import ( + ConversionStatus, + PaperMetadata, + PaperSummary, + SourceType, + SummaryStatus, +) + + +class TestPaperMetadata: + """Test PaperMetadata data model.""" + + def test_create_metadata(self): + """Test creating a PaperMetadata instance.""" + metadata = PaperMetadata( + paper_id="test-paper-1", + source_type=SourceType.LOCAL, + title="Test Paper", + authors=["Alice Smith", "Bob Jones"], + categories=["cs.AI", "stat.ML"], + tags=["machine-learning", "ai"], + notes="Test notes", + ) + + assert metadata.paper_id == "test-paper-1" + assert metadata.source_type == SourceType.LOCAL + assert metadata.title == "Test Paper" + assert metadata.authors == ["Alice Smith", "Bob Jones"] + assert metadata.categories == ["cs.AI", "stat.ML"] + assert metadata.tags == ["machine-learning", "ai"] + assert metadata.notes == "Test notes" + assert metadata.conversion_status == ConversionStatus.PENDING + assert metadata.summary_status == SummaryStatus.NOT_REQUESTED + + def test_to_dict(self): + """Test converting metadata to dictionary.""" + metadata = PaperMetadata( + paper_id="test-paper-1", + source_type=SourceType.ARXIV, + source_id="2212.06340", + title="Test Paper", + published_date=datetime(2022, 12, 13, 2, 46, 55), + ) + + data = metadata.to_dict() + + assert data["paper_id"] == "test-paper-1" + assert data["source_type"] == "arxiv" + assert data["source_id"] == "2212.06340" + assert data["title"] == "Test Paper" + assert data["published_date"] == "2022-12-13T02:46:55" + + def test_from_dict(self): + """Test creating metadata from dictionary.""" + data = { + "paper_id": "test-paper-1", + "source_type": "local", + "title": "Test Paper", + "authors": ["Alice Smith"], + "published_date": "2022-12-13T02:46:55", + "categories": ["cs.AI"], + "pdf_path": "papers/test.pdf", + "imported_at": "2022-12-13T02:46:55", + "conversion_status": "success", + "summary_status": "pending", + "tags": ["test"], + "notes": "Test notes", + } + + metadata = PaperMetadata.from_dict(data) + + assert metadata.paper_id == "test-paper-1" + assert metadata.source_type == SourceType.LOCAL + assert metadata.title == "Test Paper" + assert metadata.authors == ["Alice Smith"] + assert metadata.published_date == datetime(2022, 12, 13, 2, 46, 55) + assert metadata.conversion_status == ConversionStatus.SUCCESS + assert metadata.summary_status == SummaryStatus.PENDING + + def test_save_and_load_file(self): + """Test saving and loading metadata from file.""" + metadata = PaperMetadata( + paper_id="test-paper-1", + source_type=SourceType.LOCAL, + title="Test Paper", + authors=["Alice Smith"], + ) + + with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tmp: + tmp_path = Path(tmp.name) + + try: + # Save to file + metadata.save_to_file(tmp_path) + + # Verify file exists and contains JSON + assert tmp_path.exists() + with tmp_path.open() as f: + data = json.load(f) + assert data["paper_id"] == "test-paper-1" + + # Load from file + loaded_metadata = PaperMetadata.load_from_file(tmp_path) + assert loaded_metadata.paper_id == "test-paper-1" + assert loaded_metadata.title == "Test Paper" + assert loaded_metadata.source_type == SourceType.LOCAL + + finally: + if tmp_path.exists(): + tmp_path.unlink() + + +class TestPaperSummary: + """Test PaperSummary data model.""" + + def test_create_summary(self): + """Test creating a PaperSummary instance.""" + summary = PaperSummary( + one_sentence_summary="This paper introduces a new method.", + problem_statement="Current methods are inefficient.", + method_overview="We propose a novel approach.", + main_results="Our method achieves 95% accuracy.", + claimed_contributions=["Novel architecture", "Improved performance"], + problem_tags=["classification", "optimization"], + technique_tags=["neural-networks", "reinforcement-learning"], + ) + + assert summary.schema_version == "1.0" + assert summary.one_sentence_summary == "This paper introduces a new method." + assert summary.problem_statement == "Current methods are inefficient." + assert summary.claimed_contributions == [ + "Novel architecture", + "Improved performance", + ] + assert summary.problem_tags == ["classification", "optimization"] + + def test_to_dict(self): + """Test converting summary to dictionary.""" + summary = PaperSummary( + one_sentence_summary="Test summary", + relevance_to_user=0.85, + ) + + data = summary.to_dict() + + assert data["schema_version"] == "1.0" + assert data["one_sentence_summary"] == "Test summary" + assert data["relevance_to_user"] == 0.85 + + def test_from_dict(self): + """Test creating summary from dictionary.""" + data = { + "schema_version": "1.0", + "one_sentence_summary": "Test summary", + "problem_statement": "Test problem", + "claimed_contributions": ["Test contribution"], + "problem_tags": ["test"], + "technique_tags": ["neural-networks"], + "entities": ["Entity1", "Entity2"], + } + + summary = PaperSummary.from_dict(data) + + assert summary.schema_version == "1.0" + assert summary.one_sentence_summary == "Test summary" + assert summary.problem_statement == "Test problem" + assert summary.claimed_contributions == ["Test contribution"] + assert summary.entities == ["Entity1", "Entity2"] + + def test_save_and_load_file(self): + """Test saving and loading summary from file.""" + summary = PaperSummary( + one_sentence_summary="Test summary", + problem_tags=["tag1", "tag2"], + ) + + with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tmp: + tmp_path = Path(tmp.name) + + try: + # Save to file + summary.save_to_file(tmp_path) + + # Verify file exists and contains JSON + assert tmp_path.exists() + with tmp_path.open() as f: + data = json.load(f) + assert data["one_sentence_summary"] == "Test summary" + + # Load from file + loaded_summary = PaperSummary.load_from_file(tmp_path) + assert loaded_summary.one_sentence_summary == "Test summary" + assert loaded_summary.problem_tags == ["tag1", "tag2"] + + finally: + if tmp_path.exists(): + tmp_path.unlink() + + +class TestEnums: + """Test enum types.""" + + def test_source_type_values(self): + """Test SourceType enum values.""" + assert SourceType.LOCAL == "local" + assert SourceType.ARXIV == "arxiv" + + def test_conversion_status_values(self): + """Test ConversionStatus enum values.""" + assert ConversionStatus.PENDING == "pending" + assert ConversionStatus.PROCESSING == "processing" + assert ConversionStatus.SUCCESS == "success" + assert ConversionStatus.FAILED == "failed" + + def test_summary_status_values(self): + """Test SummaryStatus enum values.""" + assert SummaryStatus.PENDING == "pending" + assert SummaryStatus.PROCESSING == "processing" + assert SummaryStatus.SUCCESS == "success" + assert SummaryStatus.FAILED == "failed" + assert SummaryStatus.NOT_REQUESTED == "not_requested" diff --git a/tests/test_storage.py b/tests/test_storage.py new file mode 100644 index 0000000..2ca1664 --- /dev/null +++ b/tests/test_storage.py @@ -0,0 +1,261 @@ +"""Tests for paperlib storage manager.""" + +import shutil +import tempfile +from pathlib import Path + +import pytest + +from paperlib.config import LibraryPaths +from paperlib.models import ConversionStatus, PaperMetadata, SourceType +from paperlib.storage import PaperStorageManager + + +class TestPaperStorageManager: + """Test PaperStorageManager functionality.""" + + @pytest.fixture + def temp_library(self): + """Create a temporary library for testing.""" + temp_dir = Path("./.tmp") / f"test_library_{hash(self)}" + temp_dir.mkdir(parents=True, exist_ok=True) + library_paths = LibraryPaths.from_root(temp_dir) + library_paths.create_directories() + + yield library_paths + + # Cleanup + if temp_dir.exists(): + shutil.rmtree(temp_dir) + + @pytest.fixture + def storage_manager(self, temp_library): + """Create a storage manager for testing.""" + return PaperStorageManager(temp_library) + + @pytest.fixture + def sample_pdf(self): + """Create a sample PDF file for testing.""" + # Create a minimal PDF-like file + temp_file = Path("./.tmp") / f"test_paper_{hash(self)}.pdf" + with temp_file.open("wb") as f: + # Minimal PDF header + f.write(b"%PDF-1.4\n") + f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n") + f.write(b"%%EOF\n") + + yield temp_file + + # Cleanup + if temp_file.exists(): + temp_file.unlink() + + def test_generate_paper_id_local(self, storage_manager, sample_pdf): + """Test generating paper ID for local files.""" + paper_id = storage_manager.generate_paper_id( + SourceType.LOCAL, pdf_path=sample_pdf + ) + + assert paper_id.startswith("local-") + assert len(paper_id) == 22 # "local-" + 16 chars hash + + def test_generate_paper_id_arxiv(self, storage_manager): + """Test generating paper ID for arXiv papers.""" + paper_id = storage_manager.generate_paper_id( + SourceType.ARXIV, source_id="2212.06340" + ) + + assert paper_id == "arxiv-2212_06340" + + def test_get_paper_directory_arxiv(self, storage_manager): + """Test getting paper directory for arXiv papers.""" + paper_dir = storage_manager.get_paper_directory( + "arxiv-2212_06340", SourceType.ARXIV + ) + + expected = ( + storage_manager.library_paths.papers_dir + / "arxiv" + / "2212" + / "arxiv-2212_06340" + ) + assert paper_dir == expected + + def test_get_paper_directory_local(self, storage_manager): + """Test getting paper directory for local papers.""" + paper_dir = storage_manager.get_paper_directory( + "local-abcd1234efgh5678", SourceType.LOCAL + ) + + expected = ( + storage_manager.library_paths.papers_dir / "local" / "abcd1234efgh5678" + ) + assert paper_dir == expected + + def test_get_paper_paths(self, storage_manager): + """Test getting all paper paths.""" + paths = storage_manager.get_paper_paths("arxiv-2212_06340", SourceType.ARXIV) + + assert "directory" in paths + assert "meta" in paths + assert "pdf" in paths + assert "markdown" in paths + assert "summary_json" in paths + assert "summary_md" in paths + assert "assets" in paths + assert "logs" in paths + + # Check that paths are Path objects + assert isinstance(paths["meta"], Path) + assert paths["meta"].name == "meta.json" + assert paths["pdf"].name == "source.pdf" + + def test_store_paper_local(self, storage_manager, sample_pdf): + """Test storing a local PDF paper.""" + metadata = storage_manager.store_paper( + pdf_path=sample_pdf, + source_type=SourceType.LOCAL, + title="Test Paper", + authors=["Test Author"], + tags=["test"], + ) + + # Check metadata + assert metadata.source_type == SourceType.LOCAL + assert metadata.title == "Test Paper" + assert metadata.authors == ["Test Author"] + assert metadata.tags == ["test"] + assert metadata.conversion_status == ConversionStatus.PENDING + + # Check file structure was created + paths = storage_manager.get_paper_paths(metadata.paper_id, metadata.source_type) + assert paths["directory"].exists() + assert paths["meta"].exists() + assert paths["pdf"].exists() + assert paths["assets"].exists() + assert paths["logs"].exists() + + def test_store_paper_arxiv(self, storage_manager, sample_pdf): + """Test storing an arXiv paper.""" + metadata = storage_manager.store_paper( + pdf_path=sample_pdf, + source_type=SourceType.ARXIV, + source_id="2212.06340", + title="Test arXiv Paper", + authors=["Alice Smith", "Bob Jones"], + categories=["cs.AI"], + ) + + # Check metadata + assert metadata.source_type == SourceType.ARXIV + assert metadata.source_id == "2212.06340" + assert metadata.title == "Test arXiv Paper" + assert metadata.authors == ["Alice Smith", "Bob Jones"] + assert metadata.categories == ["cs.AI"] + + # Check file paths are set correctly + assert metadata.pdf_path + assert metadata.paper_md_path + assert metadata.summary_json_path + assert metadata.summary_md_path + + def test_load_paper_metadata(self, storage_manager, sample_pdf): + """Test loading paper metadata.""" + # First store a paper + original_metadata = storage_manager.store_paper( + pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper" + ) + + # Load it back + loaded_metadata = storage_manager.load_paper_metadata( + original_metadata.paper_id, original_metadata.source_type + ) + + assert loaded_metadata is not None + assert loaded_metadata.paper_id == original_metadata.paper_id + assert loaded_metadata.title == "Test Paper" + assert loaded_metadata.source_type == SourceType.LOCAL + + def test_load_nonexistent_paper(self, storage_manager): + """Test loading metadata for nonexistent paper.""" + metadata = storage_manager.load_paper_metadata("nonexistent", SourceType.LOCAL) + assert metadata is None + + def test_update_paper_metadata(self, storage_manager, sample_pdf): + """Test updating paper metadata.""" + # Store initial paper + metadata = storage_manager.store_paper( + pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Original Title" + ) + + # Update metadata + metadata.title = "Updated Title" + metadata.conversion_status = ConversionStatus.SUCCESS + storage_manager.update_paper_metadata(metadata) + + # Load and verify update + loaded_metadata = storage_manager.load_paper_metadata( + metadata.paper_id, metadata.source_type + ) + assert loaded_metadata.title == "Updated Title" + assert loaded_metadata.conversion_status == ConversionStatus.SUCCESS + + def test_list_all_papers(self, storage_manager, sample_pdf): + """Test listing all papers in library.""" + # Initially empty + papers = list(storage_manager.list_all_papers()) + assert len(papers) == 0 + + # Add some papers + metadata1 = storage_manager.store_paper( + pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Paper 1" + ) + + metadata2 = storage_manager.store_paper( + pdf_path=sample_pdf, + source_type=SourceType.ARXIV, + source_id="2212.06340", + title="Paper 2", + ) + + # List papers + papers = list(storage_manager.list_all_papers()) + assert len(papers) == 2 + + paper_ids = {p.paper_id for p in papers} + assert metadata1.paper_id in paper_ids + assert metadata2.paper_id in paper_ids + + def test_paper_exists(self, storage_manager, sample_pdf): + """Test checking if paper exists.""" + # Initially doesn't exist + assert not storage_manager.paper_exists("nonexistent", SourceType.LOCAL) + + # Store a paper + metadata = storage_manager.store_paper( + pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper" + ) + + # Now it exists + assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type) + + def test_delete_paper(self, storage_manager, sample_pdf): + """Test deleting a paper.""" + # Store a paper + metadata = storage_manager.store_paper( + pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper" + ) + + # Verify it exists + assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type) + + # Delete it + result = storage_manager.delete_paper(metadata.paper_id, metadata.source_type) + assert result is True + + # Verify it's gone + assert not storage_manager.paper_exists(metadata.paper_id, metadata.source_type) + + # Deleting again should return False + result = storage_manager.delete_paper(metadata.paper_id, metadata.source_type) + assert result is False diff --git a/uv.lock b/uv.lock index b3f638c..f33e47f 100644 --- a/uv.lock +++ b/uv.lock @@ -764,6 +764,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/49/fa/391e437a34e55095173dca5f24070d89cbc233ff85bf1c29c93248c6588d/imageio-2.37.3-py3-none-any.whl", hash = "sha256:46f5bb8522cd421c0f5ae104d8268f569d856b29eb1a13b92829d1970f32c9f0", size = 317646, upload-time = "2026-03-09T11:31:10.771Z" }, ] +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -1413,6 +1422,11 @@ dependencies = [ { name = "typer" }, ] +[package.dev-dependencies] +dev = [ + { name = "pytest" }, +] + [package.metadata] requires-dist = [ { name = "arxiv", specifier = ">=2.0.0" }, @@ -1421,6 +1435,9 @@ requires-dist = [ { name = "typer", specifier = ">=0.24.1" }, ] +[package.metadata.requires-dev] +dev = [{ name = "pytest", specifier = ">=9.0.3" }] + [[package]] name = "pdfminer-six" version = "20260107" @@ -1482,6 +1499,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/00/a4/285f12aeacbe2d6dc36c407dfbbe9e96d4a80b0fb710a337f6d2ad978c75/pillow-12.2.0-cp313-cp313t-win_arm64.whl", hash = "sha256:2e5a76d03a6c6dcef67edabda7a52494afa4035021a79c8558e14af25313d453", size = 2465765, upload-time = "2026-04-01T14:44:45.996Z" }, ] +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + [[package]] name = "protobuf" version = "7.34.1" @@ -1665,6 +1691,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fb/d8/940fcaa6a1f3763d72751b6bc8054f40beeacd6e9e5b19069c6c73dab5af/pypptx_with_oxml-1.0.3-py3-none-any.whl", hash = "sha256:4b3ccf51185e0f9e60ebf2884e74153d7fcb00e7e4f0461404e96e0260d7bba1", size = 493041, upload-time = "2026-01-30T08:51:25.797Z" }, ] +[[package]] +name = "pytest" +version = "9.0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0"