test: add tests
This commit is contained in:
@@ -0,0 +1 @@
|
||||
"""Test package for paperlib."""
|
||||
@@ -0,0 +1,242 @@
|
||||
"""Tests for paperlib CLI functionality."""
|
||||
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestCLI:
|
||||
"""Test CLI functionality."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_library(self):
|
||||
"""Create a temporary library for testing."""
|
||||
temp_dir = Path("./.tmp") / f"test_cli_{hash(self)}"
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
yield temp_dir
|
||||
|
||||
# Cleanup
|
||||
if temp_dir.exists():
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_pdf(self):
|
||||
"""Create a sample PDF file for testing."""
|
||||
pdf_file = Path("./.tmp") / f"cli_test_{hash(self)}.pdf"
|
||||
with pdf_file.open("wb") as f:
|
||||
# Minimal PDF content
|
||||
f.write(b"%PDF-1.4\n")
|
||||
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
|
||||
f.write(b"%%EOF\n")
|
||||
|
||||
yield pdf_file
|
||||
|
||||
# Cleanup
|
||||
if pdf_file.exists():
|
||||
pdf_file.unlink()
|
||||
|
||||
def run_paperlib_cmd(self, *args):
|
||||
"""Helper to run paperlib commands."""
|
||||
cmd = ["uv", "run", "paperlib"] + list(args)
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, cwd=Path.cwd())
|
||||
return result
|
||||
|
||||
def test_cli_help(self):
|
||||
"""Test CLI help output."""
|
||||
result = self.run_paperlib_cmd("--help")
|
||||
|
||||
assert result.returncode == 0
|
||||
assert "paperlib" in result.stdout
|
||||
assert "Local-first paper library engine" in result.stdout
|
||||
assert "init" in result.stdout
|
||||
assert "import" in result.stdout
|
||||
assert "convert" in result.stdout
|
||||
|
||||
def test_cli_version(self):
|
||||
"""Test CLI version output."""
|
||||
result = self.run_paperlib_cmd("--version")
|
||||
|
||||
assert result.returncode == 0
|
||||
assert "paperlib" in result.stdout
|
||||
assert "0.1.0" in result.stdout
|
||||
|
||||
def test_init_command(self, temp_library):
|
||||
"""Test library initialization command."""
|
||||
result = self.run_paperlib_cmd("init", str(temp_library))
|
||||
|
||||
assert result.returncode == 0
|
||||
assert "Initialized paper library" in result.stdout
|
||||
|
||||
# Check directory structure was created
|
||||
assert (temp_library / "config").exists()
|
||||
assert (temp_library / "papers").exists()
|
||||
assert (temp_library / "inbox").exists()
|
||||
assert (temp_library / "db").exists()
|
||||
assert (temp_library / "cache").exists()
|
||||
|
||||
def test_status_command(self, temp_library):
|
||||
"""Test status command."""
|
||||
# Initialize library first
|
||||
self.run_paperlib_cmd("init", str(temp_library))
|
||||
|
||||
result = self.run_paperlib_cmd("status", "--library", str(temp_library))
|
||||
|
||||
assert result.returncode == 0
|
||||
# Check for absolute path since that's what we get
|
||||
assert str(temp_library.resolve()) in result.stdout
|
||||
assert "config:" in result.stdout
|
||||
assert "database:" in result.stdout
|
||||
assert "papers:" in result.stdout
|
||||
|
||||
def test_import_local_pdf_command(self, temp_library, sample_pdf):
|
||||
"""Test importing local PDF via CLI."""
|
||||
# Initialize library
|
||||
self.run_paperlib_cmd("init", str(temp_library))
|
||||
|
||||
# Import PDF
|
||||
result = self.run_paperlib_cmd(
|
||||
"import",
|
||||
"--pdf",
|
||||
str(sample_pdf),
|
||||
"--title",
|
||||
"Test CLI Paper",
|
||||
"--tags",
|
||||
"test",
|
||||
"cli",
|
||||
"--library",
|
||||
str(temp_library),
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert "Successfully imported local PDF" in result.stdout
|
||||
assert "Test CLI Paper" in result.stdout
|
||||
|
||||
def test_list_command_empty(self, temp_library):
|
||||
"""Test list command with empty library."""
|
||||
self.run_paperlib_cmd("init", str(temp_library))
|
||||
|
||||
result = self.run_paperlib_cmd("list", "--library", str(temp_library))
|
||||
|
||||
assert result.returncode == 0
|
||||
assert "No papers found" in result.stdout
|
||||
|
||||
def test_list_command_with_papers(self, temp_library, sample_pdf):
|
||||
"""Test list command with papers."""
|
||||
# Initialize and import
|
||||
self.run_paperlib_cmd("init", str(temp_library))
|
||||
self.run_paperlib_cmd(
|
||||
"import",
|
||||
"--pdf",
|
||||
str(sample_pdf),
|
||||
"--title",
|
||||
"Test Paper for List",
|
||||
"--library",
|
||||
str(temp_library),
|
||||
)
|
||||
|
||||
result = self.run_paperlib_cmd("list", "--library", str(temp_library))
|
||||
|
||||
assert result.returncode == 0
|
||||
assert "Found 1 papers" in result.stdout
|
||||
assert "Test Paper for List" in result.stdout
|
||||
|
||||
def test_show_command(self, temp_library, sample_pdf):
|
||||
"""Test show command."""
|
||||
# Initialize and import
|
||||
self.run_paperlib_cmd("init", str(temp_library))
|
||||
import_result = self.run_paperlib_cmd(
|
||||
"import",
|
||||
"--pdf",
|
||||
str(sample_pdf),
|
||||
"--title",
|
||||
"Test Paper for Show",
|
||||
"--library",
|
||||
str(temp_library),
|
||||
)
|
||||
|
||||
# Extract paper ID from import output
|
||||
paper_id = None
|
||||
for line in import_result.stdout.split("\n"):
|
||||
if "Successfully imported local PDF:" in line:
|
||||
paper_id = line.split(":")[-1].strip()
|
||||
break
|
||||
|
||||
assert paper_id is not None
|
||||
|
||||
# Show paper details
|
||||
result = self.run_paperlib_cmd("show", paper_id, "--library", str(temp_library))
|
||||
|
||||
assert result.returncode == 0
|
||||
assert f"Paper ID: {paper_id}" in result.stdout
|
||||
assert "Test Paper for Show" in result.stdout
|
||||
assert "Source: local" in result.stdout
|
||||
|
||||
def test_show_nonexistent_paper(self, temp_library):
|
||||
"""Test show command with nonexistent paper."""
|
||||
self.run_paperlib_cmd("init", str(temp_library))
|
||||
|
||||
result = self.run_paperlib_cmd(
|
||||
"show", "nonexistent", "--library", str(temp_library)
|
||||
)
|
||||
|
||||
assert result.returncode == 1
|
||||
assert "Paper not found" in result.stdout
|
||||
|
||||
def test_reindex_command(self, temp_library, sample_pdf):
|
||||
"""Test reindex command."""
|
||||
# Initialize and import
|
||||
self.run_paperlib_cmd("init", str(temp_library))
|
||||
self.run_paperlib_cmd(
|
||||
"import", "--pdf", str(sample_pdf), "--library", str(temp_library)
|
||||
)
|
||||
|
||||
# Reindex
|
||||
result = self.run_paperlib_cmd("reindex", "--library", str(temp_library))
|
||||
|
||||
assert result.returncode == 0
|
||||
assert "Rebuilding search index" in result.stdout
|
||||
assert "papers indexed" in result.stdout
|
||||
assert "Total papers: 1" in result.stdout
|
||||
|
||||
def test_convert_command_no_papers(self, temp_library):
|
||||
"""Test convert command with no papers."""
|
||||
self.run_paperlib_cmd("init", str(temp_library))
|
||||
|
||||
result = self.run_paperlib_cmd("convert", "--library", str(temp_library))
|
||||
|
||||
assert result.returncode == 0
|
||||
assert "Complete: 0 successful, 0 failed" in result.stdout
|
||||
|
||||
def test_convert_command_with_papers_no_mineru(self, temp_library, sample_pdf):
|
||||
"""Test convert command with papers when MinerU is not available."""
|
||||
# Initialize and import
|
||||
self.run_paperlib_cmd("init", str(temp_library))
|
||||
self.run_paperlib_cmd(
|
||||
"import", "--pdf", str(sample_pdf), "--library", str(temp_library)
|
||||
)
|
||||
|
||||
# Convert (will fail because MinerU command may not be properly set up)
|
||||
result = self.run_paperlib_cmd("convert", "--library", str(temp_library))
|
||||
|
||||
# Should complete but may have failures due to MinerU setup
|
||||
assert "Complete:" in result.stdout
|
||||
|
||||
def test_invalid_command(self):
|
||||
"""Test invalid command."""
|
||||
result = self.run_paperlib_cmd("invalid-command")
|
||||
|
||||
assert result.returncode != 0
|
||||
|
||||
def test_missing_required_arguments(self):
|
||||
"""Test commands with missing required arguments."""
|
||||
# Import without PDF or arXiv
|
||||
result = self.run_paperlib_cmd("import")
|
||||
assert result.returncode != 0
|
||||
|
||||
# Show without paper ID
|
||||
result = self.run_paperlib_cmd("show")
|
||||
assert result.returncode != 0
|
||||
@@ -0,0 +1,73 @@
|
||||
"""Tests for paperlib configuration."""
|
||||
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
from paperlib.config import LibraryPaths
|
||||
|
||||
|
||||
class TestLibraryPaths:
|
||||
"""Test LibraryPaths configuration."""
|
||||
|
||||
def test_from_root(self):
|
||||
"""Test creating LibraryPaths from root directory."""
|
||||
root = Path("./.tmp/test_config")
|
||||
paths = LibraryPaths.from_root(root)
|
||||
|
||||
# Check root path
|
||||
assert paths.root == root.resolve()
|
||||
|
||||
# Check default subdirectories
|
||||
assert paths.config_dir == root.resolve() / "config"
|
||||
assert paths.papers_dir == root.resolve() / "papers"
|
||||
assert paths.inbox_dir == root.resolve() / "inbox"
|
||||
assert paths.db_dir == root.resolve() / "db"
|
||||
assert paths.cache_dir == root.resolve() / "cache"
|
||||
|
||||
# Check specific files
|
||||
assert paths.db_path == root.resolve() / "db" / "paperlib.sqlite3"
|
||||
assert paths.config_path == root.resolve() / "config" / "config.toml"
|
||||
|
||||
def test_create_directories(self):
|
||||
"""Test creating library directory structure."""
|
||||
root = Path("./.tmp/test_create_dirs")
|
||||
|
||||
try:
|
||||
paths = LibraryPaths.from_root(root)
|
||||
|
||||
# Directories shouldn't exist initially
|
||||
assert not paths.root.exists()
|
||||
|
||||
# Create directories
|
||||
paths.create_directories()
|
||||
|
||||
# All directories should now exist
|
||||
assert paths.root.exists()
|
||||
assert paths.config_dir.exists()
|
||||
assert paths.papers_dir.exists()
|
||||
assert paths.inbox_dir.exists()
|
||||
assert paths.db_dir.exists()
|
||||
assert paths.cache_dir.exists()
|
||||
|
||||
finally:
|
||||
# Cleanup
|
||||
if root.exists():
|
||||
shutil.rmtree(root)
|
||||
|
||||
def test_expanduser(self):
|
||||
"""Test that ~ is expanded in paths."""
|
||||
# Test with tilde path
|
||||
paths = LibraryPaths.from_root(Path("~/.tmp/test_tilde"))
|
||||
|
||||
# Root should be expanded
|
||||
assert "~" not in str(paths.root)
|
||||
assert paths.root.is_absolute()
|
||||
|
||||
def test_resolve_relative_paths(self):
|
||||
"""Test that relative paths are resolved."""
|
||||
# Use relative path
|
||||
paths = LibraryPaths.from_root(Path("./relative/path"))
|
||||
|
||||
# Should be absolute
|
||||
assert paths.root.is_absolute()
|
||||
assert "relative/path" in str(paths.root)
|
||||
@@ -0,0 +1,312 @@
|
||||
"""Tests for paperlib database manager."""
|
||||
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from paperlib.config import LibraryPaths
|
||||
from paperlib.index import DatabaseManager
|
||||
from paperlib.models import ConversionStatus, PaperMetadata, SourceType, SummaryStatus
|
||||
|
||||
|
||||
class TestDatabaseManager:
|
||||
"""Test DatabaseManager functionality."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_library(self):
|
||||
"""Create a temporary library for testing."""
|
||||
temp_dir = Path("./.tmp") / f"test_db_{hash(self)}"
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
library_paths = LibraryPaths.from_root(temp_dir)
|
||||
library_paths.create_directories()
|
||||
|
||||
yield library_paths
|
||||
|
||||
# Cleanup
|
||||
if temp_dir.exists():
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
@pytest.fixture
|
||||
def db_manager(self, temp_library):
|
||||
"""Create a database manager for testing."""
|
||||
manager = DatabaseManager(temp_library)
|
||||
manager.initialize_database()
|
||||
return manager
|
||||
|
||||
@pytest.fixture
|
||||
def sample_metadata(self):
|
||||
"""Create sample paper metadata for testing."""
|
||||
return PaperMetadata(
|
||||
paper_id="test-paper-1",
|
||||
source_type=SourceType.LOCAL,
|
||||
source_id=None,
|
||||
title="A Test Paper on Machine Learning",
|
||||
authors=["Alice Smith", "Bob Jones", "Charlie Brown"],
|
||||
categories=["cs.AI", "stat.ML"],
|
||||
tags=["machine-learning", "neural-networks", "test"],
|
||||
notes="This is a test paper for unit testing.",
|
||||
pdf_path="papers/local/test-paper-1/source.pdf",
|
||||
paper_md_path="papers/local/test-paper-1/paper.md",
|
||||
summary_json_path="papers/local/test-paper-1/summary.json",
|
||||
summary_md_path="papers/local/test-paper-1/summary.md",
|
||||
)
|
||||
|
||||
def test_initialize_database(self, temp_library):
|
||||
"""Test database initialization."""
|
||||
db_manager = DatabaseManager(temp_library)
|
||||
|
||||
# Database file shouldn't exist initially
|
||||
assert not db_manager.db_path.exists()
|
||||
|
||||
# Initialize database
|
||||
db_manager.initialize_database()
|
||||
|
||||
# Database file should now exist
|
||||
assert db_manager.db_path.exists()
|
||||
|
||||
# Should be able to connect and query
|
||||
with db_manager._get_connection() as conn:
|
||||
cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
||||
tables = [row[0] for row in cursor.fetchall()]
|
||||
assert "papers" in tables
|
||||
assert "papers_fts" in tables
|
||||
|
||||
def test_index_paper(self, db_manager, sample_metadata):
|
||||
"""Test indexing a paper."""
|
||||
# Index the paper
|
||||
db_manager.index_paper(sample_metadata)
|
||||
|
||||
# Verify it was indexed
|
||||
paper = db_manager.get_paper(sample_metadata.paper_id)
|
||||
assert paper is not None
|
||||
assert paper["paper_id"] == "test-paper-1"
|
||||
assert paper["title"] == "A Test Paper on Machine Learning"
|
||||
assert paper["source_type"] == "local"
|
||||
|
||||
def test_get_paper(self, db_manager, sample_metadata):
|
||||
"""Test getting a paper by ID."""
|
||||
# Initially not found
|
||||
paper = db_manager.get_paper("nonexistent")
|
||||
assert paper is None
|
||||
|
||||
# Index a paper
|
||||
db_manager.index_paper(sample_metadata)
|
||||
|
||||
# Now it should be found
|
||||
paper = db_manager.get_paper(sample_metadata.paper_id)
|
||||
assert paper is not None
|
||||
assert paper["paper_id"] == sample_metadata.paper_id
|
||||
assert paper["title"] == sample_metadata.title
|
||||
|
||||
def test_remove_paper(self, db_manager, sample_metadata):
|
||||
"""Test removing a paper from index."""
|
||||
# Index a paper
|
||||
db_manager.index_paper(sample_metadata)
|
||||
assert db_manager.get_paper(sample_metadata.paper_id) is not None
|
||||
|
||||
# Remove it
|
||||
result = db_manager.remove_paper(sample_metadata.paper_id)
|
||||
assert result is True
|
||||
|
||||
# Verify it's gone
|
||||
assert db_manager.get_paper(sample_metadata.paper_id) is None
|
||||
|
||||
# Removing again should return False
|
||||
result = db_manager.remove_paper(sample_metadata.paper_id)
|
||||
assert result is False
|
||||
|
||||
def test_list_papers(self, db_manager):
|
||||
"""Test listing papers with filtering."""
|
||||
# Create multiple test papers
|
||||
paper1 = PaperMetadata(
|
||||
paper_id="paper-1",
|
||||
source_type=SourceType.LOCAL,
|
||||
title="Local Paper",
|
||||
conversion_status=ConversionStatus.PENDING,
|
||||
summary_status=SummaryStatus.NOT_REQUESTED,
|
||||
)
|
||||
|
||||
paper2 = PaperMetadata(
|
||||
paper_id="paper-2",
|
||||
source_type=SourceType.ARXIV,
|
||||
title="ArXiv Paper",
|
||||
conversion_status=ConversionStatus.SUCCESS,
|
||||
summary_status=SummaryStatus.PENDING,
|
||||
)
|
||||
|
||||
# Index papers
|
||||
db_manager.index_paper(paper1)
|
||||
db_manager.index_paper(paper2)
|
||||
|
||||
# List all papers
|
||||
all_papers = list(db_manager.list_papers())
|
||||
assert len(all_papers) == 2
|
||||
|
||||
# Filter by source type
|
||||
local_papers = list(db_manager.list_papers(source_type=SourceType.LOCAL))
|
||||
assert len(local_papers) == 1
|
||||
assert local_papers[0]["source_type"] == "local"
|
||||
|
||||
arxiv_papers = list(db_manager.list_papers(source_type=SourceType.ARXIV))
|
||||
assert len(arxiv_papers) == 1
|
||||
assert arxiv_papers[0]["source_type"] == "arxiv"
|
||||
|
||||
# Filter by conversion status
|
||||
pending_papers = list(
|
||||
db_manager.list_papers(conversion_status=ConversionStatus.PENDING)
|
||||
)
|
||||
assert len(pending_papers) == 1
|
||||
assert pending_papers[0]["conversion_status"] == "pending"
|
||||
|
||||
# Test limit and offset
|
||||
limited_papers = list(db_manager.list_papers(limit=1))
|
||||
assert len(limited_papers) == 1
|
||||
|
||||
def test_search_papers_fts(self, db_manager, sample_metadata):
|
||||
"""Test full-text search."""
|
||||
# Index a paper
|
||||
db_manager.index_paper(sample_metadata)
|
||||
|
||||
# Search by title words
|
||||
results = list(db_manager.search_papers("Machine Learning"))
|
||||
assert len(results) == 1
|
||||
assert results[0]["paper_id"] == sample_metadata.paper_id
|
||||
|
||||
# Search by author
|
||||
results = list(db_manager.search_papers("Alice Smith"))
|
||||
assert len(results) == 1
|
||||
|
||||
# Search by tag (quoted for FTS)
|
||||
results = list(db_manager.search_papers('"neural-networks"'))
|
||||
assert len(results) == 1
|
||||
|
||||
# Search for non-existent term
|
||||
results = list(db_manager.search_papers("nonexistent"))
|
||||
assert len(results) == 0
|
||||
|
||||
def test_search_by_field(self, db_manager, sample_metadata):
|
||||
"""Test searching by specific field."""
|
||||
# Index a paper
|
||||
db_manager.index_paper(sample_metadata)
|
||||
|
||||
# Search by title
|
||||
results = list(db_manager.search_by_field("title", "Machine Learning"))
|
||||
assert len(results) == 1
|
||||
|
||||
# Search by author list
|
||||
results = list(db_manager.search_by_field("author_list", "Alice"))
|
||||
assert len(results) == 1
|
||||
|
||||
# Exact match
|
||||
results = list(
|
||||
db_manager.search_by_field(
|
||||
"title", "A Test Paper on Machine Learning", exact_match=True
|
||||
)
|
||||
)
|
||||
assert len(results) == 1
|
||||
|
||||
results = list(
|
||||
db_manager.search_by_field("title", "Partial Title", exact_match=True)
|
||||
)
|
||||
assert len(results) == 0
|
||||
|
||||
# Invalid field should raise error
|
||||
with pytest.raises(ValueError):
|
||||
list(db_manager.search_by_field("invalid_field", "test"))
|
||||
|
||||
def test_get_statistics(self, db_manager):
|
||||
"""Test getting library statistics."""
|
||||
# Initially empty
|
||||
stats = db_manager.get_statistics()
|
||||
assert stats["total_papers"] == 0
|
||||
assert stats["by_source_type"] == {}
|
||||
|
||||
# Add some papers
|
||||
paper1 = PaperMetadata(
|
||||
paper_id="paper-1",
|
||||
source_type=SourceType.LOCAL,
|
||||
title="Local Paper",
|
||||
conversion_status=ConversionStatus.PENDING,
|
||||
)
|
||||
|
||||
paper2 = PaperMetadata(
|
||||
paper_id="paper-2",
|
||||
source_type=SourceType.ARXIV,
|
||||
title="ArXiv Paper 1",
|
||||
conversion_status=ConversionStatus.SUCCESS,
|
||||
)
|
||||
|
||||
paper3 = PaperMetadata(
|
||||
paper_id="paper-3",
|
||||
source_type=SourceType.ARXIV,
|
||||
title="ArXiv Paper 2",
|
||||
conversion_status=ConversionStatus.FAILED,
|
||||
)
|
||||
|
||||
db_manager.index_paper(paper1)
|
||||
db_manager.index_paper(paper2)
|
||||
db_manager.index_paper(paper3)
|
||||
|
||||
# Check updated statistics
|
||||
stats = db_manager.get_statistics()
|
||||
assert stats["total_papers"] == 3
|
||||
assert stats["by_source_type"]["local"] == 1
|
||||
assert stats["by_source_type"]["arxiv"] == 2
|
||||
assert stats["by_conversion_status"]["pending"] == 1
|
||||
assert stats["by_conversion_status"]["success"] == 1
|
||||
assert stats["by_conversion_status"]["failed"] == 1
|
||||
|
||||
def test_reindex_from_storage(self, db_manager, temp_library):
|
||||
"""Test reindexing from storage files."""
|
||||
from paperlib.storage import PaperStorageManager
|
||||
|
||||
# Create storage manager and add some papers
|
||||
storage_manager = PaperStorageManager(temp_library)
|
||||
|
||||
# Create a mock PDF file
|
||||
pdf_file = Path("./.tmp") / "test.pdf"
|
||||
with pdf_file.open("wb") as f:
|
||||
f.write(b"%PDF-1.4\n%%EOF\n")
|
||||
|
||||
try:
|
||||
# Store papers in storage
|
||||
metadata1 = storage_manager.store_paper(
|
||||
pdf_path=pdf_file, source_type=SourceType.LOCAL, title="Paper 1"
|
||||
)
|
||||
|
||||
metadata2 = storage_manager.store_paper(
|
||||
pdf_path=pdf_file,
|
||||
source_type=SourceType.ARXIV,
|
||||
source_id="2212.06340",
|
||||
title="Paper 2",
|
||||
)
|
||||
|
||||
# Database should initially be empty
|
||||
stats = db_manager.get_statistics()
|
||||
assert stats["total_papers"] == 0
|
||||
|
||||
# Reindex from storage
|
||||
success_count, error_count = db_manager.reindex_from_storage(
|
||||
storage_manager
|
||||
)
|
||||
|
||||
# Check results
|
||||
assert success_count == 2
|
||||
assert error_count == 0
|
||||
|
||||
# Verify papers are now in database
|
||||
stats = db_manager.get_statistics()
|
||||
assert stats["total_papers"] == 2
|
||||
|
||||
paper1 = db_manager.get_paper(metadata1.paper_id)
|
||||
assert paper1 is not None
|
||||
assert paper1["title"] == "Paper 1"
|
||||
|
||||
paper2 = db_manager.get_paper(metadata2.paper_id)
|
||||
assert paper2 is not None
|
||||
assert paper2["title"] == "Paper 2"
|
||||
|
||||
finally:
|
||||
if pdf_file.exists():
|
||||
pdf_file.unlink()
|
||||
@@ -0,0 +1,273 @@
|
||||
"""Tests for paperlib import functionality."""
|
||||
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from paperlib.config import LibraryPaths
|
||||
from paperlib.importer import ArxivImporter, LocalImporter
|
||||
from paperlib.models import SourceType
|
||||
from paperlib.storage import PaperStorageManager
|
||||
|
||||
|
||||
class TestLocalImporter:
|
||||
"""Test LocalImporter functionality."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_library(self):
|
||||
"""Create a temporary library for testing."""
|
||||
temp_dir = Path("./.tmp") / f"test_import_{hash(self)}"
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
library_paths = LibraryPaths.from_root(temp_dir)
|
||||
library_paths.create_directories()
|
||||
|
||||
yield library_paths
|
||||
|
||||
# Cleanup
|
||||
if temp_dir.exists():
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
@pytest.fixture
|
||||
def local_importer(self, temp_library):
|
||||
"""Create a LocalImporter for testing."""
|
||||
storage_manager = PaperStorageManager(temp_library)
|
||||
return LocalImporter(storage_manager)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_pdf(self):
|
||||
"""Create a sample PDF file for testing."""
|
||||
pdf_file = Path("./.tmp") / f"sample_{hash(self)}.pdf"
|
||||
with pdf_file.open("wb") as f:
|
||||
# Minimal PDF content
|
||||
f.write(b"%PDF-1.4\n")
|
||||
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
|
||||
f.write(b"%%EOF\n")
|
||||
|
||||
yield pdf_file
|
||||
|
||||
# Cleanup
|
||||
if pdf_file.exists():
|
||||
pdf_file.unlink()
|
||||
|
||||
def test_import_pdf_success(self, local_importer, sample_pdf):
|
||||
"""Test successful PDF import."""
|
||||
metadata = local_importer.import_pdf(
|
||||
pdf_path=sample_pdf,
|
||||
title="Test Paper",
|
||||
notes="Test notes",
|
||||
tags=["test", "sample"],
|
||||
)
|
||||
|
||||
# Check metadata
|
||||
assert metadata.source_type == SourceType.LOCAL
|
||||
assert metadata.title == "Test Paper"
|
||||
assert metadata.notes == "Test notes"
|
||||
assert metadata.tags == ["test", "sample"]
|
||||
assert metadata.paper_id.startswith("local-")
|
||||
|
||||
def test_import_pdf_auto_title(self, local_importer, sample_pdf):
|
||||
"""Test PDF import with auto-generated title."""
|
||||
# Rename PDF to have a meaningful name
|
||||
meaningful_pdf = sample_pdf.parent / "Machine_Learning-Paper.pdf"
|
||||
sample_pdf.rename(meaningful_pdf)
|
||||
|
||||
try:
|
||||
metadata = local_importer.import_pdf(pdf_path=meaningful_pdf)
|
||||
|
||||
# Title should be auto-generated from filename
|
||||
assert metadata.title == "Machine Learning Paper"
|
||||
|
||||
finally:
|
||||
if meaningful_pdf.exists():
|
||||
meaningful_pdf.unlink()
|
||||
|
||||
def test_import_nonexistent_pdf(self, local_importer):
|
||||
"""Test importing non-existent PDF file."""
|
||||
nonexistent = Path("./.tmp/nonexistent.pdf")
|
||||
|
||||
with pytest.raises(FileNotFoundError):
|
||||
local_importer.import_pdf(pdf_path=nonexistent)
|
||||
|
||||
def test_import_non_pdf_file(self, local_importer):
|
||||
"""Test importing non-PDF file."""
|
||||
text_file = Path("./.tmp") / "not_a_pdf.txt"
|
||||
with text_file.open("w") as f:
|
||||
f.write("This is not a PDF")
|
||||
|
||||
try:
|
||||
with pytest.raises(ValueError, match="File is not a PDF"):
|
||||
local_importer.import_pdf(pdf_path=text_file)
|
||||
finally:
|
||||
if text_file.exists():
|
||||
text_file.unlink()
|
||||
|
||||
def test_import_duplicate_pdf(self, local_importer, sample_pdf):
|
||||
"""Test importing the same PDF twice."""
|
||||
# Import once
|
||||
metadata1 = local_importer.import_pdf(pdf_path=sample_pdf)
|
||||
|
||||
# Try to import again
|
||||
with pytest.raises(ValueError, match="Paper already imported"):
|
||||
local_importer.import_pdf(pdf_path=sample_pdf)
|
||||
|
||||
|
||||
class TestArxivImporter:
|
||||
"""Test ArxivImporter functionality."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_library(self):
|
||||
"""Create a temporary library for testing."""
|
||||
temp_dir = Path("./.tmp") / f"test_arxiv_{hash(self)}"
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
library_paths = LibraryPaths.from_root(temp_dir)
|
||||
library_paths.create_directories()
|
||||
|
||||
yield library_paths
|
||||
|
||||
# Cleanup
|
||||
if temp_dir.exists():
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
@pytest.fixture
|
||||
def arxiv_importer(self, temp_library):
|
||||
"""Create an ArxivImporter for testing."""
|
||||
storage_manager = PaperStorageManager(temp_library)
|
||||
return ArxivImporter(storage_manager)
|
||||
|
||||
def test_extract_arxiv_id_clean(self, arxiv_importer):
|
||||
"""Test extracting clean arXiv ID."""
|
||||
# Test various formats
|
||||
assert arxiv_importer.extract_arxiv_id("2212.06340") == "2212.06340"
|
||||
assert arxiv_importer.extract_arxiv_id("arxiv:2212.06340") == "2212.06340"
|
||||
assert arxiv_importer.extract_arxiv_id("2212.06340v1") == "2212.06340v1"
|
||||
assert arxiv_importer.extract_arxiv_id("math-ph/0701002") == "math-ph/0701002"
|
||||
|
||||
def test_extract_arxiv_id_from_url(self, arxiv_importer):
|
||||
"""Test extracting arXiv ID from URLs."""
|
||||
url = "https://arxiv.org/abs/2212.06340"
|
||||
extracted = arxiv_importer.extract_arxiv_id(url)
|
||||
assert extracted == "2212.06340"
|
||||
|
||||
def test_fetch_paper_metadata_success(self, arxiv_importer):
|
||||
"""Test successful metadata fetching from arXiv."""
|
||||
# Mock arXiv result
|
||||
mock_result = Mock()
|
||||
mock_result.title = "Test Paper"
|
||||
mock_result.authors = [Mock(name="Alice Smith"), Mock(name="Bob Jones")]
|
||||
mock_result.published = Mock()
|
||||
mock_result.updated = Mock()
|
||||
mock_result.categories = ["cs.AI", "stat.ML"]
|
||||
|
||||
# Mock the client's results method directly
|
||||
arxiv_importer.client.results = Mock(return_value=[mock_result])
|
||||
|
||||
# Test
|
||||
result = arxiv_importer.fetch_paper_metadata("2212.06340")
|
||||
assert result == mock_result
|
||||
|
||||
def test_fetch_paper_metadata_not_found(self, arxiv_importer):
|
||||
"""Test fetching metadata for non-existent paper."""
|
||||
# Mock empty results
|
||||
arxiv_importer.client.results = Mock(return_value=[])
|
||||
|
||||
with pytest.raises(ValueError, match="Paper not found on arXiv"):
|
||||
arxiv_importer.fetch_paper_metadata("9999.99999")
|
||||
|
||||
@patch("paperlib.importer.arxiv_importer.tempfile.NamedTemporaryFile")
|
||||
def test_download_pdf(self, mock_tempfile, arxiv_importer):
|
||||
"""Test PDF downloading."""
|
||||
# Mock temporary file
|
||||
mock_temp_path = Path("./.tmp/mock_temp.pdf")
|
||||
mock_tempfile.return_value.__enter__.return_value.name = str(mock_temp_path)
|
||||
|
||||
# Mock arXiv result
|
||||
mock_result = Mock()
|
||||
|
||||
# Create actual temp file for test
|
||||
with mock_temp_path.open("wb") as f:
|
||||
f.write(b"%PDF-1.4\n%%EOF\n")
|
||||
|
||||
try:
|
||||
pdf_path = arxiv_importer.download_pdf(mock_result)
|
||||
assert pdf_path == mock_temp_path
|
||||
mock_result.download_pdf.assert_called_once_with(
|
||||
filename=str(mock_temp_path)
|
||||
)
|
||||
finally:
|
||||
if mock_temp_path.exists():
|
||||
mock_temp_path.unlink()
|
||||
|
||||
@patch.object(ArxivImporter, "download_pdf")
|
||||
@patch.object(ArxivImporter, "fetch_paper_metadata")
|
||||
def test_import_arxiv_paper_success(
|
||||
self, mock_fetch, mock_download, arxiv_importer
|
||||
):
|
||||
"""Test successful arXiv paper import."""
|
||||
# Mock PDF file
|
||||
pdf_file = Path("./.tmp") / "test_arxiv.pdf"
|
||||
with pdf_file.open("wb") as f:
|
||||
f.write(b"%PDF-1.4\n%%EOF\n")
|
||||
|
||||
try:
|
||||
# Mock arXiv result with proper string values
|
||||
mock_author = Mock()
|
||||
mock_author.name = "Alice Smith"
|
||||
|
||||
mock_result = Mock()
|
||||
mock_result.title = "Test ArXiv Paper"
|
||||
mock_result.authors = [mock_author]
|
||||
mock_result.published = None
|
||||
mock_result.updated = None
|
||||
mock_result.categories = ["cs.AI"]
|
||||
|
||||
mock_fetch.return_value = mock_result
|
||||
mock_download.return_value = pdf_file
|
||||
|
||||
# Test import
|
||||
metadata = arxiv_importer.import_arxiv_paper(
|
||||
arxiv_input="2212.06340", notes="Test notes", tags=["test"]
|
||||
)
|
||||
|
||||
# Check results
|
||||
assert metadata.source_type == SourceType.ARXIV
|
||||
assert metadata.source_id == "2212.06340"
|
||||
assert metadata.title == "Test ArXiv Paper"
|
||||
assert metadata.authors == ["Alice Smith"]
|
||||
assert metadata.categories == ["cs.AI"]
|
||||
assert metadata.notes == "Test notes"
|
||||
assert metadata.tags == ["test"]
|
||||
|
||||
finally:
|
||||
if pdf_file.exists():
|
||||
pdf_file.unlink()
|
||||
|
||||
@patch.object(ArxivImporter, "fetch_paper_metadata")
|
||||
def test_import_duplicate_arxiv_paper(self, mock_fetch, arxiv_importer):
|
||||
"""Test importing the same arXiv paper twice."""
|
||||
# Mock first import
|
||||
pdf_file = Path("./.tmp") / "test_arxiv_dup.pdf"
|
||||
with pdf_file.open("wb") as f:
|
||||
f.write(b"%PDF-1.4\n%%EOF\n")
|
||||
|
||||
try:
|
||||
with patch.object(ArxivImporter, "download_pdf", return_value=pdf_file):
|
||||
mock_result = Mock()
|
||||
mock_result.title = "Test Paper"
|
||||
mock_result.authors = []
|
||||
mock_result.published = None
|
||||
mock_result.updated = None
|
||||
mock_result.categories = []
|
||||
mock_fetch.return_value = mock_result
|
||||
|
||||
# First import should succeed
|
||||
arxiv_importer.import_arxiv_paper("2212.06340")
|
||||
|
||||
# Second import should fail
|
||||
with pytest.raises(ValueError, match="Paper already imported"):
|
||||
arxiv_importer.import_arxiv_paper("2212.06340")
|
||||
|
||||
finally:
|
||||
if pdf_file.exists():
|
||||
pdf_file.unlink()
|
||||
@@ -0,0 +1,220 @@
|
||||
"""Integration tests for paperlib."""
|
||||
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from paperlib.config import LibraryPaths
|
||||
from paperlib.converter import MinerUConverter
|
||||
from paperlib.importer import ArxivImporter, LocalImporter
|
||||
from paperlib.index import DatabaseManager
|
||||
from paperlib.models import ConversionStatus, SourceType
|
||||
from paperlib.storage import PaperStorageManager
|
||||
|
||||
|
||||
class TestIntegration:
|
||||
"""Test full integration workflows."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_library(self):
|
||||
"""Create a temporary library for testing."""
|
||||
temp_dir = Path("./.tmp") / f"test_integration_{hash(self)}"
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
library_paths = LibraryPaths.from_root(temp_dir)
|
||||
library_paths.create_directories()
|
||||
|
||||
yield library_paths
|
||||
|
||||
# Cleanup
|
||||
if temp_dir.exists():
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_pdf(self):
|
||||
"""Create a sample PDF file for testing."""
|
||||
pdf_file = Path("./.tmp") / f"integration_test_{hash(self)}.pdf"
|
||||
with pdf_file.open("wb") as f:
|
||||
# Minimal PDF content
|
||||
f.write(b"%PDF-1.4\n")
|
||||
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
|
||||
f.write(b"2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n")
|
||||
f.write(b"3 0 obj\n<< /Type /Page /Parent 2 0 R >>\nendobj\n")
|
||||
f.write(b"%%EOF\n")
|
||||
|
||||
yield pdf_file
|
||||
|
||||
# Cleanup
|
||||
if pdf_file.exists():
|
||||
pdf_file.unlink()
|
||||
|
||||
def test_complete_local_import_workflow(self, temp_library, sample_pdf):
|
||||
"""Test complete workflow for importing and managing a local PDF."""
|
||||
# Set up components
|
||||
storage_manager = PaperStorageManager(temp_library)
|
||||
db_manager = DatabaseManager(temp_library)
|
||||
local_importer = LocalImporter(storage_manager)
|
||||
|
||||
# Initialize database
|
||||
db_manager.initialize_database()
|
||||
|
||||
# Import PDF
|
||||
metadata = local_importer.import_pdf(
|
||||
pdf_path=sample_pdf,
|
||||
title="Integration Test Paper",
|
||||
tags=["integration", "test"],
|
||||
notes="This is an integration test paper",
|
||||
)
|
||||
|
||||
# Update metadata with authors after import
|
||||
metadata.authors = ["Test Author"]
|
||||
storage_manager.update_paper_metadata(metadata)
|
||||
|
||||
# Verify metadata
|
||||
assert metadata.source_type == SourceType.LOCAL
|
||||
assert metadata.title == "Integration Test Paper"
|
||||
assert metadata.authors == ["Test Author"]
|
||||
assert metadata.tags == ["integration", "test"]
|
||||
|
||||
# Index in database
|
||||
db_manager.index_paper(metadata)
|
||||
|
||||
# Test retrieval from database
|
||||
retrieved_paper = db_manager.get_paper(metadata.paper_id)
|
||||
assert retrieved_paper is not None
|
||||
assert retrieved_paper["title"] == "Integration Test Paper"
|
||||
|
||||
# Test search functionality
|
||||
search_results = list(db_manager.search_papers("Integration Test"))
|
||||
assert len(search_results) == 1
|
||||
assert search_results[0]["paper_id"] == metadata.paper_id
|
||||
|
||||
# Test field search
|
||||
author_results = list(db_manager.search_by_field("author_list", "Test Author"))
|
||||
assert len(author_results) == 1
|
||||
|
||||
# Test listing papers
|
||||
all_papers = list(db_manager.list_papers())
|
||||
assert len(all_papers) == 1
|
||||
assert all_papers[0]["paper_id"] == metadata.paper_id
|
||||
|
||||
# Test statistics
|
||||
stats = db_manager.get_statistics()
|
||||
assert stats["total_papers"] == 1
|
||||
assert stats["by_source_type"]["local"] == 1
|
||||
|
||||
# Test updating metadata
|
||||
metadata.notes = "Updated notes"
|
||||
storage_manager.update_paper_metadata(metadata)
|
||||
|
||||
# Re-index and verify update
|
||||
db_manager.index_paper(metadata)
|
||||
updated_paper = db_manager.get_paper(metadata.paper_id)
|
||||
assert "Updated notes" in updated_paper["search_text"]
|
||||
|
||||
def test_multiple_papers_workflow(self, temp_library, sample_pdf):
|
||||
"""Test workflow with multiple papers."""
|
||||
# Set up components
|
||||
storage_manager = PaperStorageManager(temp_library)
|
||||
db_manager = DatabaseManager(temp_library)
|
||||
local_importer = LocalImporter(storage_manager)
|
||||
|
||||
# Initialize database
|
||||
db_manager.initialize_database()
|
||||
|
||||
# Import multiple papers (create unique PDFs)
|
||||
papers = []
|
||||
for i in range(3):
|
||||
# Create unique PDF for each import
|
||||
unique_pdf = Path("./.tmp") / f"unique_paper_{i}_{hash(self)}.pdf"
|
||||
with unique_pdf.open("wb") as f:
|
||||
f.write(b"%PDF-1.4\n")
|
||||
f.write(f"% Unique content {i}\n".encode())
|
||||
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
|
||||
f.write(b"%%EOF\n")
|
||||
|
||||
try:
|
||||
metadata = local_importer.import_pdf(
|
||||
pdf_path=unique_pdf,
|
||||
title=f"Test Paper {i + 1}",
|
||||
tags=[f"tag{i + 1}", "common"],
|
||||
notes=f"Notes for paper {i + 1}",
|
||||
)
|
||||
# Update metadata with authors after import
|
||||
metadata.authors = [f"Author {i + 1}"]
|
||||
storage_manager.update_paper_metadata(metadata)
|
||||
|
||||
papers.append(metadata)
|
||||
db_manager.index_paper(metadata)
|
||||
|
||||
finally:
|
||||
if unique_pdf.exists():
|
||||
unique_pdf.unlink()
|
||||
|
||||
# Test listing all papers
|
||||
all_papers = list(db_manager.list_papers())
|
||||
assert len(all_papers) == 3
|
||||
|
||||
# Test search across papers
|
||||
common_tag_results = list(db_manager.search_papers("common"))
|
||||
assert len(common_tag_results) == 3
|
||||
|
||||
# Test filtering
|
||||
filtered_results = list(db_manager.list_papers(limit=2))
|
||||
assert len(filtered_results) == 2
|
||||
|
||||
# Test reindexing
|
||||
success_count, error_count = db_manager.reindex_from_storage(storage_manager)
|
||||
assert success_count == 3
|
||||
assert error_count == 0
|
||||
|
||||
# Verify papers still exist after reindex
|
||||
stats = db_manager.get_statistics()
|
||||
assert stats["total_papers"] == 3
|
||||
|
||||
def test_storage_and_database_consistency(self, temp_library, sample_pdf):
|
||||
"""Test consistency between storage and database."""
|
||||
# Set up components
|
||||
storage_manager = PaperStorageManager(temp_library)
|
||||
db_manager = DatabaseManager(temp_library)
|
||||
local_importer = LocalImporter(storage_manager)
|
||||
|
||||
# Initialize database
|
||||
db_manager.initialize_database()
|
||||
|
||||
# Import paper
|
||||
metadata = local_importer.import_pdf(
|
||||
pdf_path=sample_pdf,
|
||||
title="Consistency Test Paper",
|
||||
)
|
||||
|
||||
# Index in database
|
||||
db_manager.index_paper(metadata)
|
||||
|
||||
# Verify file exists in storage
|
||||
assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type)
|
||||
|
||||
# Verify paper exists in database
|
||||
db_paper = db_manager.get_paper(metadata.paper_id)
|
||||
assert db_paper is not None
|
||||
|
||||
# Load from storage and compare
|
||||
storage_metadata = storage_manager.load_paper_metadata(
|
||||
metadata.paper_id, metadata.source_type
|
||||
)
|
||||
assert storage_metadata.title == db_paper["title"]
|
||||
assert storage_metadata.paper_id == db_paper["paper_id"]
|
||||
|
||||
# Test reindexing maintains consistency
|
||||
db_manager.remove_paper(metadata.paper_id)
|
||||
assert db_manager.get_paper(metadata.paper_id) is None
|
||||
|
||||
# Reindex from storage
|
||||
success_count, error_count = db_manager.reindex_from_storage(storage_manager)
|
||||
assert success_count == 1
|
||||
assert error_count == 0
|
||||
|
||||
# Verify paper is back in database
|
||||
restored_paper = db_manager.get_paper(metadata.paper_id)
|
||||
assert restored_paper is not None
|
||||
assert restored_paper["title"] == "Consistency Test Paper"
|
||||
@@ -0,0 +1,230 @@
|
||||
"""Tests for paperlib data models."""
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from paperlib.models import (
|
||||
ConversionStatus,
|
||||
PaperMetadata,
|
||||
PaperSummary,
|
||||
SourceType,
|
||||
SummaryStatus,
|
||||
)
|
||||
|
||||
|
||||
class TestPaperMetadata:
|
||||
"""Test PaperMetadata data model."""
|
||||
|
||||
def test_create_metadata(self):
|
||||
"""Test creating a PaperMetadata instance."""
|
||||
metadata = PaperMetadata(
|
||||
paper_id="test-paper-1",
|
||||
source_type=SourceType.LOCAL,
|
||||
title="Test Paper",
|
||||
authors=["Alice Smith", "Bob Jones"],
|
||||
categories=["cs.AI", "stat.ML"],
|
||||
tags=["machine-learning", "ai"],
|
||||
notes="Test notes",
|
||||
)
|
||||
|
||||
assert metadata.paper_id == "test-paper-1"
|
||||
assert metadata.source_type == SourceType.LOCAL
|
||||
assert metadata.title == "Test Paper"
|
||||
assert metadata.authors == ["Alice Smith", "Bob Jones"]
|
||||
assert metadata.categories == ["cs.AI", "stat.ML"]
|
||||
assert metadata.tags == ["machine-learning", "ai"]
|
||||
assert metadata.notes == "Test notes"
|
||||
assert metadata.conversion_status == ConversionStatus.PENDING
|
||||
assert metadata.summary_status == SummaryStatus.NOT_REQUESTED
|
||||
|
||||
def test_to_dict(self):
|
||||
"""Test converting metadata to dictionary."""
|
||||
metadata = PaperMetadata(
|
||||
paper_id="test-paper-1",
|
||||
source_type=SourceType.ARXIV,
|
||||
source_id="2212.06340",
|
||||
title="Test Paper",
|
||||
published_date=datetime(2022, 12, 13, 2, 46, 55),
|
||||
)
|
||||
|
||||
data = metadata.to_dict()
|
||||
|
||||
assert data["paper_id"] == "test-paper-1"
|
||||
assert data["source_type"] == "arxiv"
|
||||
assert data["source_id"] == "2212.06340"
|
||||
assert data["title"] == "Test Paper"
|
||||
assert data["published_date"] == "2022-12-13T02:46:55"
|
||||
|
||||
def test_from_dict(self):
|
||||
"""Test creating metadata from dictionary."""
|
||||
data = {
|
||||
"paper_id": "test-paper-1",
|
||||
"source_type": "local",
|
||||
"title": "Test Paper",
|
||||
"authors": ["Alice Smith"],
|
||||
"published_date": "2022-12-13T02:46:55",
|
||||
"categories": ["cs.AI"],
|
||||
"pdf_path": "papers/test.pdf",
|
||||
"imported_at": "2022-12-13T02:46:55",
|
||||
"conversion_status": "success",
|
||||
"summary_status": "pending",
|
||||
"tags": ["test"],
|
||||
"notes": "Test notes",
|
||||
}
|
||||
|
||||
metadata = PaperMetadata.from_dict(data)
|
||||
|
||||
assert metadata.paper_id == "test-paper-1"
|
||||
assert metadata.source_type == SourceType.LOCAL
|
||||
assert metadata.title == "Test Paper"
|
||||
assert metadata.authors == ["Alice Smith"]
|
||||
assert metadata.published_date == datetime(2022, 12, 13, 2, 46, 55)
|
||||
assert metadata.conversion_status == ConversionStatus.SUCCESS
|
||||
assert metadata.summary_status == SummaryStatus.PENDING
|
||||
|
||||
def test_save_and_load_file(self):
|
||||
"""Test saving and loading metadata from file."""
|
||||
metadata = PaperMetadata(
|
||||
paper_id="test-paper-1",
|
||||
source_type=SourceType.LOCAL,
|
||||
title="Test Paper",
|
||||
authors=["Alice Smith"],
|
||||
)
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tmp:
|
||||
tmp_path = Path(tmp.name)
|
||||
|
||||
try:
|
||||
# Save to file
|
||||
metadata.save_to_file(tmp_path)
|
||||
|
||||
# Verify file exists and contains JSON
|
||||
assert tmp_path.exists()
|
||||
with tmp_path.open() as f:
|
||||
data = json.load(f)
|
||||
assert data["paper_id"] == "test-paper-1"
|
||||
|
||||
# Load from file
|
||||
loaded_metadata = PaperMetadata.load_from_file(tmp_path)
|
||||
assert loaded_metadata.paper_id == "test-paper-1"
|
||||
assert loaded_metadata.title == "Test Paper"
|
||||
assert loaded_metadata.source_type == SourceType.LOCAL
|
||||
|
||||
finally:
|
||||
if tmp_path.exists():
|
||||
tmp_path.unlink()
|
||||
|
||||
|
||||
class TestPaperSummary:
|
||||
"""Test PaperSummary data model."""
|
||||
|
||||
def test_create_summary(self):
|
||||
"""Test creating a PaperSummary instance."""
|
||||
summary = PaperSummary(
|
||||
one_sentence_summary="This paper introduces a new method.",
|
||||
problem_statement="Current methods are inefficient.",
|
||||
method_overview="We propose a novel approach.",
|
||||
main_results="Our method achieves 95% accuracy.",
|
||||
claimed_contributions=["Novel architecture", "Improved performance"],
|
||||
problem_tags=["classification", "optimization"],
|
||||
technique_tags=["neural-networks", "reinforcement-learning"],
|
||||
)
|
||||
|
||||
assert summary.schema_version == "1.0"
|
||||
assert summary.one_sentence_summary == "This paper introduces a new method."
|
||||
assert summary.problem_statement == "Current methods are inefficient."
|
||||
assert summary.claimed_contributions == [
|
||||
"Novel architecture",
|
||||
"Improved performance",
|
||||
]
|
||||
assert summary.problem_tags == ["classification", "optimization"]
|
||||
|
||||
def test_to_dict(self):
|
||||
"""Test converting summary to dictionary."""
|
||||
summary = PaperSummary(
|
||||
one_sentence_summary="Test summary",
|
||||
relevance_to_user=0.85,
|
||||
)
|
||||
|
||||
data = summary.to_dict()
|
||||
|
||||
assert data["schema_version"] == "1.0"
|
||||
assert data["one_sentence_summary"] == "Test summary"
|
||||
assert data["relevance_to_user"] == 0.85
|
||||
|
||||
def test_from_dict(self):
|
||||
"""Test creating summary from dictionary."""
|
||||
data = {
|
||||
"schema_version": "1.0",
|
||||
"one_sentence_summary": "Test summary",
|
||||
"problem_statement": "Test problem",
|
||||
"claimed_contributions": ["Test contribution"],
|
||||
"problem_tags": ["test"],
|
||||
"technique_tags": ["neural-networks"],
|
||||
"entities": ["Entity1", "Entity2"],
|
||||
}
|
||||
|
||||
summary = PaperSummary.from_dict(data)
|
||||
|
||||
assert summary.schema_version == "1.0"
|
||||
assert summary.one_sentence_summary == "Test summary"
|
||||
assert summary.problem_statement == "Test problem"
|
||||
assert summary.claimed_contributions == ["Test contribution"]
|
||||
assert summary.entities == ["Entity1", "Entity2"]
|
||||
|
||||
def test_save_and_load_file(self):
|
||||
"""Test saving and loading summary from file."""
|
||||
summary = PaperSummary(
|
||||
one_sentence_summary="Test summary",
|
||||
problem_tags=["tag1", "tag2"],
|
||||
)
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tmp:
|
||||
tmp_path = Path(tmp.name)
|
||||
|
||||
try:
|
||||
# Save to file
|
||||
summary.save_to_file(tmp_path)
|
||||
|
||||
# Verify file exists and contains JSON
|
||||
assert tmp_path.exists()
|
||||
with tmp_path.open() as f:
|
||||
data = json.load(f)
|
||||
assert data["one_sentence_summary"] == "Test summary"
|
||||
|
||||
# Load from file
|
||||
loaded_summary = PaperSummary.load_from_file(tmp_path)
|
||||
assert loaded_summary.one_sentence_summary == "Test summary"
|
||||
assert loaded_summary.problem_tags == ["tag1", "tag2"]
|
||||
|
||||
finally:
|
||||
if tmp_path.exists():
|
||||
tmp_path.unlink()
|
||||
|
||||
|
||||
class TestEnums:
|
||||
"""Test enum types."""
|
||||
|
||||
def test_source_type_values(self):
|
||||
"""Test SourceType enum values."""
|
||||
assert SourceType.LOCAL == "local"
|
||||
assert SourceType.ARXIV == "arxiv"
|
||||
|
||||
def test_conversion_status_values(self):
|
||||
"""Test ConversionStatus enum values."""
|
||||
assert ConversionStatus.PENDING == "pending"
|
||||
assert ConversionStatus.PROCESSING == "processing"
|
||||
assert ConversionStatus.SUCCESS == "success"
|
||||
assert ConversionStatus.FAILED == "failed"
|
||||
|
||||
def test_summary_status_values(self):
|
||||
"""Test SummaryStatus enum values."""
|
||||
assert SummaryStatus.PENDING == "pending"
|
||||
assert SummaryStatus.PROCESSING == "processing"
|
||||
assert SummaryStatus.SUCCESS == "success"
|
||||
assert SummaryStatus.FAILED == "failed"
|
||||
assert SummaryStatus.NOT_REQUESTED == "not_requested"
|
||||
@@ -0,0 +1,261 @@
|
||||
"""Tests for paperlib storage manager."""
|
||||
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from paperlib.config import LibraryPaths
|
||||
from paperlib.models import ConversionStatus, PaperMetadata, SourceType
|
||||
from paperlib.storage import PaperStorageManager
|
||||
|
||||
|
||||
class TestPaperStorageManager:
|
||||
"""Test PaperStorageManager functionality."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_library(self):
|
||||
"""Create a temporary library for testing."""
|
||||
temp_dir = Path("./.tmp") / f"test_library_{hash(self)}"
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
library_paths = LibraryPaths.from_root(temp_dir)
|
||||
library_paths.create_directories()
|
||||
|
||||
yield library_paths
|
||||
|
||||
# Cleanup
|
||||
if temp_dir.exists():
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
@pytest.fixture
|
||||
def storage_manager(self, temp_library):
|
||||
"""Create a storage manager for testing."""
|
||||
return PaperStorageManager(temp_library)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_pdf(self):
|
||||
"""Create a sample PDF file for testing."""
|
||||
# Create a minimal PDF-like file
|
||||
temp_file = Path("./.tmp") / f"test_paper_{hash(self)}.pdf"
|
||||
with temp_file.open("wb") as f:
|
||||
# Minimal PDF header
|
||||
f.write(b"%PDF-1.4\n")
|
||||
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
|
||||
f.write(b"%%EOF\n")
|
||||
|
||||
yield temp_file
|
||||
|
||||
# Cleanup
|
||||
if temp_file.exists():
|
||||
temp_file.unlink()
|
||||
|
||||
def test_generate_paper_id_local(self, storage_manager, sample_pdf):
|
||||
"""Test generating paper ID for local files."""
|
||||
paper_id = storage_manager.generate_paper_id(
|
||||
SourceType.LOCAL, pdf_path=sample_pdf
|
||||
)
|
||||
|
||||
assert paper_id.startswith("local-")
|
||||
assert len(paper_id) == 22 # "local-" + 16 chars hash
|
||||
|
||||
def test_generate_paper_id_arxiv(self, storage_manager):
|
||||
"""Test generating paper ID for arXiv papers."""
|
||||
paper_id = storage_manager.generate_paper_id(
|
||||
SourceType.ARXIV, source_id="2212.06340"
|
||||
)
|
||||
|
||||
assert paper_id == "arxiv-2212_06340"
|
||||
|
||||
def test_get_paper_directory_arxiv(self, storage_manager):
|
||||
"""Test getting paper directory for arXiv papers."""
|
||||
paper_dir = storage_manager.get_paper_directory(
|
||||
"arxiv-2212_06340", SourceType.ARXIV
|
||||
)
|
||||
|
||||
expected = (
|
||||
storage_manager.library_paths.papers_dir
|
||||
/ "arxiv"
|
||||
/ "2212"
|
||||
/ "arxiv-2212_06340"
|
||||
)
|
||||
assert paper_dir == expected
|
||||
|
||||
def test_get_paper_directory_local(self, storage_manager):
|
||||
"""Test getting paper directory for local papers."""
|
||||
paper_dir = storage_manager.get_paper_directory(
|
||||
"local-abcd1234efgh5678", SourceType.LOCAL
|
||||
)
|
||||
|
||||
expected = (
|
||||
storage_manager.library_paths.papers_dir / "local" / "abcd1234efgh5678"
|
||||
)
|
||||
assert paper_dir == expected
|
||||
|
||||
def test_get_paper_paths(self, storage_manager):
|
||||
"""Test getting all paper paths."""
|
||||
paths = storage_manager.get_paper_paths("arxiv-2212_06340", SourceType.ARXIV)
|
||||
|
||||
assert "directory" in paths
|
||||
assert "meta" in paths
|
||||
assert "pdf" in paths
|
||||
assert "markdown" in paths
|
||||
assert "summary_json" in paths
|
||||
assert "summary_md" in paths
|
||||
assert "assets" in paths
|
||||
assert "logs" in paths
|
||||
|
||||
# Check that paths are Path objects
|
||||
assert isinstance(paths["meta"], Path)
|
||||
assert paths["meta"].name == "meta.json"
|
||||
assert paths["pdf"].name == "source.pdf"
|
||||
|
||||
def test_store_paper_local(self, storage_manager, sample_pdf):
|
||||
"""Test storing a local PDF paper."""
|
||||
metadata = storage_manager.store_paper(
|
||||
pdf_path=sample_pdf,
|
||||
source_type=SourceType.LOCAL,
|
||||
title="Test Paper",
|
||||
authors=["Test Author"],
|
||||
tags=["test"],
|
||||
)
|
||||
|
||||
# Check metadata
|
||||
assert metadata.source_type == SourceType.LOCAL
|
||||
assert metadata.title == "Test Paper"
|
||||
assert metadata.authors == ["Test Author"]
|
||||
assert metadata.tags == ["test"]
|
||||
assert metadata.conversion_status == ConversionStatus.PENDING
|
||||
|
||||
# Check file structure was created
|
||||
paths = storage_manager.get_paper_paths(metadata.paper_id, metadata.source_type)
|
||||
assert paths["directory"].exists()
|
||||
assert paths["meta"].exists()
|
||||
assert paths["pdf"].exists()
|
||||
assert paths["assets"].exists()
|
||||
assert paths["logs"].exists()
|
||||
|
||||
def test_store_paper_arxiv(self, storage_manager, sample_pdf):
|
||||
"""Test storing an arXiv paper."""
|
||||
metadata = storage_manager.store_paper(
|
||||
pdf_path=sample_pdf,
|
||||
source_type=SourceType.ARXIV,
|
||||
source_id="2212.06340",
|
||||
title="Test arXiv Paper",
|
||||
authors=["Alice Smith", "Bob Jones"],
|
||||
categories=["cs.AI"],
|
||||
)
|
||||
|
||||
# Check metadata
|
||||
assert metadata.source_type == SourceType.ARXIV
|
||||
assert metadata.source_id == "2212.06340"
|
||||
assert metadata.title == "Test arXiv Paper"
|
||||
assert metadata.authors == ["Alice Smith", "Bob Jones"]
|
||||
assert metadata.categories == ["cs.AI"]
|
||||
|
||||
# Check file paths are set correctly
|
||||
assert metadata.pdf_path
|
||||
assert metadata.paper_md_path
|
||||
assert metadata.summary_json_path
|
||||
assert metadata.summary_md_path
|
||||
|
||||
def test_load_paper_metadata(self, storage_manager, sample_pdf):
|
||||
"""Test loading paper metadata."""
|
||||
# First store a paper
|
||||
original_metadata = storage_manager.store_paper(
|
||||
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper"
|
||||
)
|
||||
|
||||
# Load it back
|
||||
loaded_metadata = storage_manager.load_paper_metadata(
|
||||
original_metadata.paper_id, original_metadata.source_type
|
||||
)
|
||||
|
||||
assert loaded_metadata is not None
|
||||
assert loaded_metadata.paper_id == original_metadata.paper_id
|
||||
assert loaded_metadata.title == "Test Paper"
|
||||
assert loaded_metadata.source_type == SourceType.LOCAL
|
||||
|
||||
def test_load_nonexistent_paper(self, storage_manager):
|
||||
"""Test loading metadata for nonexistent paper."""
|
||||
metadata = storage_manager.load_paper_metadata("nonexistent", SourceType.LOCAL)
|
||||
assert metadata is None
|
||||
|
||||
def test_update_paper_metadata(self, storage_manager, sample_pdf):
|
||||
"""Test updating paper metadata."""
|
||||
# Store initial paper
|
||||
metadata = storage_manager.store_paper(
|
||||
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Original Title"
|
||||
)
|
||||
|
||||
# Update metadata
|
||||
metadata.title = "Updated Title"
|
||||
metadata.conversion_status = ConversionStatus.SUCCESS
|
||||
storage_manager.update_paper_metadata(metadata)
|
||||
|
||||
# Load and verify update
|
||||
loaded_metadata = storage_manager.load_paper_metadata(
|
||||
metadata.paper_id, metadata.source_type
|
||||
)
|
||||
assert loaded_metadata.title == "Updated Title"
|
||||
assert loaded_metadata.conversion_status == ConversionStatus.SUCCESS
|
||||
|
||||
def test_list_all_papers(self, storage_manager, sample_pdf):
|
||||
"""Test listing all papers in library."""
|
||||
# Initially empty
|
||||
papers = list(storage_manager.list_all_papers())
|
||||
assert len(papers) == 0
|
||||
|
||||
# Add some papers
|
||||
metadata1 = storage_manager.store_paper(
|
||||
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Paper 1"
|
||||
)
|
||||
|
||||
metadata2 = storage_manager.store_paper(
|
||||
pdf_path=sample_pdf,
|
||||
source_type=SourceType.ARXIV,
|
||||
source_id="2212.06340",
|
||||
title="Paper 2",
|
||||
)
|
||||
|
||||
# List papers
|
||||
papers = list(storage_manager.list_all_papers())
|
||||
assert len(papers) == 2
|
||||
|
||||
paper_ids = {p.paper_id for p in papers}
|
||||
assert metadata1.paper_id in paper_ids
|
||||
assert metadata2.paper_id in paper_ids
|
||||
|
||||
def test_paper_exists(self, storage_manager, sample_pdf):
|
||||
"""Test checking if paper exists."""
|
||||
# Initially doesn't exist
|
||||
assert not storage_manager.paper_exists("nonexistent", SourceType.LOCAL)
|
||||
|
||||
# Store a paper
|
||||
metadata = storage_manager.store_paper(
|
||||
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper"
|
||||
)
|
||||
|
||||
# Now it exists
|
||||
assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type)
|
||||
|
||||
def test_delete_paper(self, storage_manager, sample_pdf):
|
||||
"""Test deleting a paper."""
|
||||
# Store a paper
|
||||
metadata = storage_manager.store_paper(
|
||||
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper"
|
||||
)
|
||||
|
||||
# Verify it exists
|
||||
assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type)
|
||||
|
||||
# Delete it
|
||||
result = storage_manager.delete_paper(metadata.paper_id, metadata.source_type)
|
||||
assert result is True
|
||||
|
||||
# Verify it's gone
|
||||
assert not storage_manager.paper_exists(metadata.paper_id, metadata.source_type)
|
||||
|
||||
# Deleting again should return False
|
||||
result = storage_manager.delete_paper(metadata.paper_id, metadata.source_type)
|
||||
assert result is False
|
||||
Reference in New Issue
Block a user