test: add tests
This commit is contained in:
@@ -31,3 +31,8 @@ select = ["E", "F", "I", "B", "UP"]
|
|||||||
|
|
||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
testpaths = ["tests"]
|
testpaths = ["tests"]
|
||||||
|
|
||||||
|
[dependency-groups]
|
||||||
|
dev = [
|
||||||
|
"pytest>=9.0.3",
|
||||||
|
]
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
"""Test package for paperlib."""
|
||||||
@@ -0,0 +1,242 @@
|
|||||||
|
"""Tests for paperlib CLI functionality."""
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
class TestCLI:
|
||||||
|
"""Test CLI functionality."""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def temp_library(self):
|
||||||
|
"""Create a temporary library for testing."""
|
||||||
|
temp_dir = Path("./.tmp") / f"test_cli_{hash(self)}"
|
||||||
|
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
yield temp_dir
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
if temp_dir.exists():
|
||||||
|
shutil.rmtree(temp_dir)
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_pdf(self):
|
||||||
|
"""Create a sample PDF file for testing."""
|
||||||
|
pdf_file = Path("./.tmp") / f"cli_test_{hash(self)}.pdf"
|
||||||
|
with pdf_file.open("wb") as f:
|
||||||
|
# Minimal PDF content
|
||||||
|
f.write(b"%PDF-1.4\n")
|
||||||
|
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
|
||||||
|
f.write(b"%%EOF\n")
|
||||||
|
|
||||||
|
yield pdf_file
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
if pdf_file.exists():
|
||||||
|
pdf_file.unlink()
|
||||||
|
|
||||||
|
def run_paperlib_cmd(self, *args):
|
||||||
|
"""Helper to run paperlib commands."""
|
||||||
|
cmd = ["uv", "run", "paperlib"] + list(args)
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, cwd=Path.cwd())
|
||||||
|
return result
|
||||||
|
|
||||||
|
def test_cli_help(self):
|
||||||
|
"""Test CLI help output."""
|
||||||
|
result = self.run_paperlib_cmd("--help")
|
||||||
|
|
||||||
|
assert result.returncode == 0
|
||||||
|
assert "paperlib" in result.stdout
|
||||||
|
assert "Local-first paper library engine" in result.stdout
|
||||||
|
assert "init" in result.stdout
|
||||||
|
assert "import" in result.stdout
|
||||||
|
assert "convert" in result.stdout
|
||||||
|
|
||||||
|
def test_cli_version(self):
|
||||||
|
"""Test CLI version output."""
|
||||||
|
result = self.run_paperlib_cmd("--version")
|
||||||
|
|
||||||
|
assert result.returncode == 0
|
||||||
|
assert "paperlib" in result.stdout
|
||||||
|
assert "0.1.0" in result.stdout
|
||||||
|
|
||||||
|
def test_init_command(self, temp_library):
|
||||||
|
"""Test library initialization command."""
|
||||||
|
result = self.run_paperlib_cmd("init", str(temp_library))
|
||||||
|
|
||||||
|
assert result.returncode == 0
|
||||||
|
assert "Initialized paper library" in result.stdout
|
||||||
|
|
||||||
|
# Check directory structure was created
|
||||||
|
assert (temp_library / "config").exists()
|
||||||
|
assert (temp_library / "papers").exists()
|
||||||
|
assert (temp_library / "inbox").exists()
|
||||||
|
assert (temp_library / "db").exists()
|
||||||
|
assert (temp_library / "cache").exists()
|
||||||
|
|
||||||
|
def test_status_command(self, temp_library):
|
||||||
|
"""Test status command."""
|
||||||
|
# Initialize library first
|
||||||
|
self.run_paperlib_cmd("init", str(temp_library))
|
||||||
|
|
||||||
|
result = self.run_paperlib_cmd("status", "--library", str(temp_library))
|
||||||
|
|
||||||
|
assert result.returncode == 0
|
||||||
|
# Check for absolute path since that's what we get
|
||||||
|
assert str(temp_library.resolve()) in result.stdout
|
||||||
|
assert "config:" in result.stdout
|
||||||
|
assert "database:" in result.stdout
|
||||||
|
assert "papers:" in result.stdout
|
||||||
|
|
||||||
|
def test_import_local_pdf_command(self, temp_library, sample_pdf):
|
||||||
|
"""Test importing local PDF via CLI."""
|
||||||
|
# Initialize library
|
||||||
|
self.run_paperlib_cmd("init", str(temp_library))
|
||||||
|
|
||||||
|
# Import PDF
|
||||||
|
result = self.run_paperlib_cmd(
|
||||||
|
"import",
|
||||||
|
"--pdf",
|
||||||
|
str(sample_pdf),
|
||||||
|
"--title",
|
||||||
|
"Test CLI Paper",
|
||||||
|
"--tags",
|
||||||
|
"test",
|
||||||
|
"cli",
|
||||||
|
"--library",
|
||||||
|
str(temp_library),
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result.returncode == 0
|
||||||
|
assert "Successfully imported local PDF" in result.stdout
|
||||||
|
assert "Test CLI Paper" in result.stdout
|
||||||
|
|
||||||
|
def test_list_command_empty(self, temp_library):
|
||||||
|
"""Test list command with empty library."""
|
||||||
|
self.run_paperlib_cmd("init", str(temp_library))
|
||||||
|
|
||||||
|
result = self.run_paperlib_cmd("list", "--library", str(temp_library))
|
||||||
|
|
||||||
|
assert result.returncode == 0
|
||||||
|
assert "No papers found" in result.stdout
|
||||||
|
|
||||||
|
def test_list_command_with_papers(self, temp_library, sample_pdf):
|
||||||
|
"""Test list command with papers."""
|
||||||
|
# Initialize and import
|
||||||
|
self.run_paperlib_cmd("init", str(temp_library))
|
||||||
|
self.run_paperlib_cmd(
|
||||||
|
"import",
|
||||||
|
"--pdf",
|
||||||
|
str(sample_pdf),
|
||||||
|
"--title",
|
||||||
|
"Test Paper for List",
|
||||||
|
"--library",
|
||||||
|
str(temp_library),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = self.run_paperlib_cmd("list", "--library", str(temp_library))
|
||||||
|
|
||||||
|
assert result.returncode == 0
|
||||||
|
assert "Found 1 papers" in result.stdout
|
||||||
|
assert "Test Paper for List" in result.stdout
|
||||||
|
|
||||||
|
def test_show_command(self, temp_library, sample_pdf):
|
||||||
|
"""Test show command."""
|
||||||
|
# Initialize and import
|
||||||
|
self.run_paperlib_cmd("init", str(temp_library))
|
||||||
|
import_result = self.run_paperlib_cmd(
|
||||||
|
"import",
|
||||||
|
"--pdf",
|
||||||
|
str(sample_pdf),
|
||||||
|
"--title",
|
||||||
|
"Test Paper for Show",
|
||||||
|
"--library",
|
||||||
|
str(temp_library),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract paper ID from import output
|
||||||
|
paper_id = None
|
||||||
|
for line in import_result.stdout.split("\n"):
|
||||||
|
if "Successfully imported local PDF:" in line:
|
||||||
|
paper_id = line.split(":")[-1].strip()
|
||||||
|
break
|
||||||
|
|
||||||
|
assert paper_id is not None
|
||||||
|
|
||||||
|
# Show paper details
|
||||||
|
result = self.run_paperlib_cmd("show", paper_id, "--library", str(temp_library))
|
||||||
|
|
||||||
|
assert result.returncode == 0
|
||||||
|
assert f"Paper ID: {paper_id}" in result.stdout
|
||||||
|
assert "Test Paper for Show" in result.stdout
|
||||||
|
assert "Source: local" in result.stdout
|
||||||
|
|
||||||
|
def test_show_nonexistent_paper(self, temp_library):
|
||||||
|
"""Test show command with nonexistent paper."""
|
||||||
|
self.run_paperlib_cmd("init", str(temp_library))
|
||||||
|
|
||||||
|
result = self.run_paperlib_cmd(
|
||||||
|
"show", "nonexistent", "--library", str(temp_library)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result.returncode == 1
|
||||||
|
assert "Paper not found" in result.stdout
|
||||||
|
|
||||||
|
def test_reindex_command(self, temp_library, sample_pdf):
|
||||||
|
"""Test reindex command."""
|
||||||
|
# Initialize and import
|
||||||
|
self.run_paperlib_cmd("init", str(temp_library))
|
||||||
|
self.run_paperlib_cmd(
|
||||||
|
"import", "--pdf", str(sample_pdf), "--library", str(temp_library)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Reindex
|
||||||
|
result = self.run_paperlib_cmd("reindex", "--library", str(temp_library))
|
||||||
|
|
||||||
|
assert result.returncode == 0
|
||||||
|
assert "Rebuilding search index" in result.stdout
|
||||||
|
assert "papers indexed" in result.stdout
|
||||||
|
assert "Total papers: 1" in result.stdout
|
||||||
|
|
||||||
|
def test_convert_command_no_papers(self, temp_library):
|
||||||
|
"""Test convert command with no papers."""
|
||||||
|
self.run_paperlib_cmd("init", str(temp_library))
|
||||||
|
|
||||||
|
result = self.run_paperlib_cmd("convert", "--library", str(temp_library))
|
||||||
|
|
||||||
|
assert result.returncode == 0
|
||||||
|
assert "Complete: 0 successful, 0 failed" in result.stdout
|
||||||
|
|
||||||
|
def test_convert_command_with_papers_no_mineru(self, temp_library, sample_pdf):
|
||||||
|
"""Test convert command with papers when MinerU is not available."""
|
||||||
|
# Initialize and import
|
||||||
|
self.run_paperlib_cmd("init", str(temp_library))
|
||||||
|
self.run_paperlib_cmd(
|
||||||
|
"import", "--pdf", str(sample_pdf), "--library", str(temp_library)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert (will fail because MinerU command may not be properly set up)
|
||||||
|
result = self.run_paperlib_cmd("convert", "--library", str(temp_library))
|
||||||
|
|
||||||
|
# Should complete but may have failures due to MinerU setup
|
||||||
|
assert "Complete:" in result.stdout
|
||||||
|
|
||||||
|
def test_invalid_command(self):
|
||||||
|
"""Test invalid command."""
|
||||||
|
result = self.run_paperlib_cmd("invalid-command")
|
||||||
|
|
||||||
|
assert result.returncode != 0
|
||||||
|
|
||||||
|
def test_missing_required_arguments(self):
|
||||||
|
"""Test commands with missing required arguments."""
|
||||||
|
# Import without PDF or arXiv
|
||||||
|
result = self.run_paperlib_cmd("import")
|
||||||
|
assert result.returncode != 0
|
||||||
|
|
||||||
|
# Show without paper ID
|
||||||
|
result = self.run_paperlib_cmd("show")
|
||||||
|
assert result.returncode != 0
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
"""Tests for paperlib configuration."""
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from paperlib.config import LibraryPaths
|
||||||
|
|
||||||
|
|
||||||
|
class TestLibraryPaths:
|
||||||
|
"""Test LibraryPaths configuration."""
|
||||||
|
|
||||||
|
def test_from_root(self):
|
||||||
|
"""Test creating LibraryPaths from root directory."""
|
||||||
|
root = Path("./.tmp/test_config")
|
||||||
|
paths = LibraryPaths.from_root(root)
|
||||||
|
|
||||||
|
# Check root path
|
||||||
|
assert paths.root == root.resolve()
|
||||||
|
|
||||||
|
# Check default subdirectories
|
||||||
|
assert paths.config_dir == root.resolve() / "config"
|
||||||
|
assert paths.papers_dir == root.resolve() / "papers"
|
||||||
|
assert paths.inbox_dir == root.resolve() / "inbox"
|
||||||
|
assert paths.db_dir == root.resolve() / "db"
|
||||||
|
assert paths.cache_dir == root.resolve() / "cache"
|
||||||
|
|
||||||
|
# Check specific files
|
||||||
|
assert paths.db_path == root.resolve() / "db" / "paperlib.sqlite3"
|
||||||
|
assert paths.config_path == root.resolve() / "config" / "config.toml"
|
||||||
|
|
||||||
|
def test_create_directories(self):
|
||||||
|
"""Test creating library directory structure."""
|
||||||
|
root = Path("./.tmp/test_create_dirs")
|
||||||
|
|
||||||
|
try:
|
||||||
|
paths = LibraryPaths.from_root(root)
|
||||||
|
|
||||||
|
# Directories shouldn't exist initially
|
||||||
|
assert not paths.root.exists()
|
||||||
|
|
||||||
|
# Create directories
|
||||||
|
paths.create_directories()
|
||||||
|
|
||||||
|
# All directories should now exist
|
||||||
|
assert paths.root.exists()
|
||||||
|
assert paths.config_dir.exists()
|
||||||
|
assert paths.papers_dir.exists()
|
||||||
|
assert paths.inbox_dir.exists()
|
||||||
|
assert paths.db_dir.exists()
|
||||||
|
assert paths.cache_dir.exists()
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Cleanup
|
||||||
|
if root.exists():
|
||||||
|
shutil.rmtree(root)
|
||||||
|
|
||||||
|
def test_expanduser(self):
|
||||||
|
"""Test that ~ is expanded in paths."""
|
||||||
|
# Test with tilde path
|
||||||
|
paths = LibraryPaths.from_root(Path("~/.tmp/test_tilde"))
|
||||||
|
|
||||||
|
# Root should be expanded
|
||||||
|
assert "~" not in str(paths.root)
|
||||||
|
assert paths.root.is_absolute()
|
||||||
|
|
||||||
|
def test_resolve_relative_paths(self):
|
||||||
|
"""Test that relative paths are resolved."""
|
||||||
|
# Use relative path
|
||||||
|
paths = LibraryPaths.from_root(Path("./relative/path"))
|
||||||
|
|
||||||
|
# Should be absolute
|
||||||
|
assert paths.root.is_absolute()
|
||||||
|
assert "relative/path" in str(paths.root)
|
||||||
@@ -0,0 +1,312 @@
|
|||||||
|
"""Tests for paperlib database manager."""
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from paperlib.config import LibraryPaths
|
||||||
|
from paperlib.index import DatabaseManager
|
||||||
|
from paperlib.models import ConversionStatus, PaperMetadata, SourceType, SummaryStatus
|
||||||
|
|
||||||
|
|
||||||
|
class TestDatabaseManager:
|
||||||
|
"""Test DatabaseManager functionality."""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def temp_library(self):
|
||||||
|
"""Create a temporary library for testing."""
|
||||||
|
temp_dir = Path("./.tmp") / f"test_db_{hash(self)}"
|
||||||
|
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
library_paths = LibraryPaths.from_root(temp_dir)
|
||||||
|
library_paths.create_directories()
|
||||||
|
|
||||||
|
yield library_paths
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
if temp_dir.exists():
|
||||||
|
shutil.rmtree(temp_dir)
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def db_manager(self, temp_library):
|
||||||
|
"""Create a database manager for testing."""
|
||||||
|
manager = DatabaseManager(temp_library)
|
||||||
|
manager.initialize_database()
|
||||||
|
return manager
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_metadata(self):
|
||||||
|
"""Create sample paper metadata for testing."""
|
||||||
|
return PaperMetadata(
|
||||||
|
paper_id="test-paper-1",
|
||||||
|
source_type=SourceType.LOCAL,
|
||||||
|
source_id=None,
|
||||||
|
title="A Test Paper on Machine Learning",
|
||||||
|
authors=["Alice Smith", "Bob Jones", "Charlie Brown"],
|
||||||
|
categories=["cs.AI", "stat.ML"],
|
||||||
|
tags=["machine-learning", "neural-networks", "test"],
|
||||||
|
notes="This is a test paper for unit testing.",
|
||||||
|
pdf_path="papers/local/test-paper-1/source.pdf",
|
||||||
|
paper_md_path="papers/local/test-paper-1/paper.md",
|
||||||
|
summary_json_path="papers/local/test-paper-1/summary.json",
|
||||||
|
summary_md_path="papers/local/test-paper-1/summary.md",
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_initialize_database(self, temp_library):
|
||||||
|
"""Test database initialization."""
|
||||||
|
db_manager = DatabaseManager(temp_library)
|
||||||
|
|
||||||
|
# Database file shouldn't exist initially
|
||||||
|
assert not db_manager.db_path.exists()
|
||||||
|
|
||||||
|
# Initialize database
|
||||||
|
db_manager.initialize_database()
|
||||||
|
|
||||||
|
# Database file should now exist
|
||||||
|
assert db_manager.db_path.exists()
|
||||||
|
|
||||||
|
# Should be able to connect and query
|
||||||
|
with db_manager._get_connection() as conn:
|
||||||
|
cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
||||||
|
tables = [row[0] for row in cursor.fetchall()]
|
||||||
|
assert "papers" in tables
|
||||||
|
assert "papers_fts" in tables
|
||||||
|
|
||||||
|
def test_index_paper(self, db_manager, sample_metadata):
|
||||||
|
"""Test indexing a paper."""
|
||||||
|
# Index the paper
|
||||||
|
db_manager.index_paper(sample_metadata)
|
||||||
|
|
||||||
|
# Verify it was indexed
|
||||||
|
paper = db_manager.get_paper(sample_metadata.paper_id)
|
||||||
|
assert paper is not None
|
||||||
|
assert paper["paper_id"] == "test-paper-1"
|
||||||
|
assert paper["title"] == "A Test Paper on Machine Learning"
|
||||||
|
assert paper["source_type"] == "local"
|
||||||
|
|
||||||
|
def test_get_paper(self, db_manager, sample_metadata):
|
||||||
|
"""Test getting a paper by ID."""
|
||||||
|
# Initially not found
|
||||||
|
paper = db_manager.get_paper("nonexistent")
|
||||||
|
assert paper is None
|
||||||
|
|
||||||
|
# Index a paper
|
||||||
|
db_manager.index_paper(sample_metadata)
|
||||||
|
|
||||||
|
# Now it should be found
|
||||||
|
paper = db_manager.get_paper(sample_metadata.paper_id)
|
||||||
|
assert paper is not None
|
||||||
|
assert paper["paper_id"] == sample_metadata.paper_id
|
||||||
|
assert paper["title"] == sample_metadata.title
|
||||||
|
|
||||||
|
def test_remove_paper(self, db_manager, sample_metadata):
|
||||||
|
"""Test removing a paper from index."""
|
||||||
|
# Index a paper
|
||||||
|
db_manager.index_paper(sample_metadata)
|
||||||
|
assert db_manager.get_paper(sample_metadata.paper_id) is not None
|
||||||
|
|
||||||
|
# Remove it
|
||||||
|
result = db_manager.remove_paper(sample_metadata.paper_id)
|
||||||
|
assert result is True
|
||||||
|
|
||||||
|
# Verify it's gone
|
||||||
|
assert db_manager.get_paper(sample_metadata.paper_id) is None
|
||||||
|
|
||||||
|
# Removing again should return False
|
||||||
|
result = db_manager.remove_paper(sample_metadata.paper_id)
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
def test_list_papers(self, db_manager):
|
||||||
|
"""Test listing papers with filtering."""
|
||||||
|
# Create multiple test papers
|
||||||
|
paper1 = PaperMetadata(
|
||||||
|
paper_id="paper-1",
|
||||||
|
source_type=SourceType.LOCAL,
|
||||||
|
title="Local Paper",
|
||||||
|
conversion_status=ConversionStatus.PENDING,
|
||||||
|
summary_status=SummaryStatus.NOT_REQUESTED,
|
||||||
|
)
|
||||||
|
|
||||||
|
paper2 = PaperMetadata(
|
||||||
|
paper_id="paper-2",
|
||||||
|
source_type=SourceType.ARXIV,
|
||||||
|
title="ArXiv Paper",
|
||||||
|
conversion_status=ConversionStatus.SUCCESS,
|
||||||
|
summary_status=SummaryStatus.PENDING,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Index papers
|
||||||
|
db_manager.index_paper(paper1)
|
||||||
|
db_manager.index_paper(paper2)
|
||||||
|
|
||||||
|
# List all papers
|
||||||
|
all_papers = list(db_manager.list_papers())
|
||||||
|
assert len(all_papers) == 2
|
||||||
|
|
||||||
|
# Filter by source type
|
||||||
|
local_papers = list(db_manager.list_papers(source_type=SourceType.LOCAL))
|
||||||
|
assert len(local_papers) == 1
|
||||||
|
assert local_papers[0]["source_type"] == "local"
|
||||||
|
|
||||||
|
arxiv_papers = list(db_manager.list_papers(source_type=SourceType.ARXIV))
|
||||||
|
assert len(arxiv_papers) == 1
|
||||||
|
assert arxiv_papers[0]["source_type"] == "arxiv"
|
||||||
|
|
||||||
|
# Filter by conversion status
|
||||||
|
pending_papers = list(
|
||||||
|
db_manager.list_papers(conversion_status=ConversionStatus.PENDING)
|
||||||
|
)
|
||||||
|
assert len(pending_papers) == 1
|
||||||
|
assert pending_papers[0]["conversion_status"] == "pending"
|
||||||
|
|
||||||
|
# Test limit and offset
|
||||||
|
limited_papers = list(db_manager.list_papers(limit=1))
|
||||||
|
assert len(limited_papers) == 1
|
||||||
|
|
||||||
|
def test_search_papers_fts(self, db_manager, sample_metadata):
|
||||||
|
"""Test full-text search."""
|
||||||
|
# Index a paper
|
||||||
|
db_manager.index_paper(sample_metadata)
|
||||||
|
|
||||||
|
# Search by title words
|
||||||
|
results = list(db_manager.search_papers("Machine Learning"))
|
||||||
|
assert len(results) == 1
|
||||||
|
assert results[0]["paper_id"] == sample_metadata.paper_id
|
||||||
|
|
||||||
|
# Search by author
|
||||||
|
results = list(db_manager.search_papers("Alice Smith"))
|
||||||
|
assert len(results) == 1
|
||||||
|
|
||||||
|
# Search by tag (quoted for FTS)
|
||||||
|
results = list(db_manager.search_papers('"neural-networks"'))
|
||||||
|
assert len(results) == 1
|
||||||
|
|
||||||
|
# Search for non-existent term
|
||||||
|
results = list(db_manager.search_papers("nonexistent"))
|
||||||
|
assert len(results) == 0
|
||||||
|
|
||||||
|
def test_search_by_field(self, db_manager, sample_metadata):
|
||||||
|
"""Test searching by specific field."""
|
||||||
|
# Index a paper
|
||||||
|
db_manager.index_paper(sample_metadata)
|
||||||
|
|
||||||
|
# Search by title
|
||||||
|
results = list(db_manager.search_by_field("title", "Machine Learning"))
|
||||||
|
assert len(results) == 1
|
||||||
|
|
||||||
|
# Search by author list
|
||||||
|
results = list(db_manager.search_by_field("author_list", "Alice"))
|
||||||
|
assert len(results) == 1
|
||||||
|
|
||||||
|
# Exact match
|
||||||
|
results = list(
|
||||||
|
db_manager.search_by_field(
|
||||||
|
"title", "A Test Paper on Machine Learning", exact_match=True
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assert len(results) == 1
|
||||||
|
|
||||||
|
results = list(
|
||||||
|
db_manager.search_by_field("title", "Partial Title", exact_match=True)
|
||||||
|
)
|
||||||
|
assert len(results) == 0
|
||||||
|
|
||||||
|
# Invalid field should raise error
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
list(db_manager.search_by_field("invalid_field", "test"))
|
||||||
|
|
||||||
|
def test_get_statistics(self, db_manager):
|
||||||
|
"""Test getting library statistics."""
|
||||||
|
# Initially empty
|
||||||
|
stats = db_manager.get_statistics()
|
||||||
|
assert stats["total_papers"] == 0
|
||||||
|
assert stats["by_source_type"] == {}
|
||||||
|
|
||||||
|
# Add some papers
|
||||||
|
paper1 = PaperMetadata(
|
||||||
|
paper_id="paper-1",
|
||||||
|
source_type=SourceType.LOCAL,
|
||||||
|
title="Local Paper",
|
||||||
|
conversion_status=ConversionStatus.PENDING,
|
||||||
|
)
|
||||||
|
|
||||||
|
paper2 = PaperMetadata(
|
||||||
|
paper_id="paper-2",
|
||||||
|
source_type=SourceType.ARXIV,
|
||||||
|
title="ArXiv Paper 1",
|
||||||
|
conversion_status=ConversionStatus.SUCCESS,
|
||||||
|
)
|
||||||
|
|
||||||
|
paper3 = PaperMetadata(
|
||||||
|
paper_id="paper-3",
|
||||||
|
source_type=SourceType.ARXIV,
|
||||||
|
title="ArXiv Paper 2",
|
||||||
|
conversion_status=ConversionStatus.FAILED,
|
||||||
|
)
|
||||||
|
|
||||||
|
db_manager.index_paper(paper1)
|
||||||
|
db_manager.index_paper(paper2)
|
||||||
|
db_manager.index_paper(paper3)
|
||||||
|
|
||||||
|
# Check updated statistics
|
||||||
|
stats = db_manager.get_statistics()
|
||||||
|
assert stats["total_papers"] == 3
|
||||||
|
assert stats["by_source_type"]["local"] == 1
|
||||||
|
assert stats["by_source_type"]["arxiv"] == 2
|
||||||
|
assert stats["by_conversion_status"]["pending"] == 1
|
||||||
|
assert stats["by_conversion_status"]["success"] == 1
|
||||||
|
assert stats["by_conversion_status"]["failed"] == 1
|
||||||
|
|
||||||
|
def test_reindex_from_storage(self, db_manager, temp_library):
|
||||||
|
"""Test reindexing from storage files."""
|
||||||
|
from paperlib.storage import PaperStorageManager
|
||||||
|
|
||||||
|
# Create storage manager and add some papers
|
||||||
|
storage_manager = PaperStorageManager(temp_library)
|
||||||
|
|
||||||
|
# Create a mock PDF file
|
||||||
|
pdf_file = Path("./.tmp") / "test.pdf"
|
||||||
|
with pdf_file.open("wb") as f:
|
||||||
|
f.write(b"%PDF-1.4\n%%EOF\n")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Store papers in storage
|
||||||
|
metadata1 = storage_manager.store_paper(
|
||||||
|
pdf_path=pdf_file, source_type=SourceType.LOCAL, title="Paper 1"
|
||||||
|
)
|
||||||
|
|
||||||
|
metadata2 = storage_manager.store_paper(
|
||||||
|
pdf_path=pdf_file,
|
||||||
|
source_type=SourceType.ARXIV,
|
||||||
|
source_id="2212.06340",
|
||||||
|
title="Paper 2",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Database should initially be empty
|
||||||
|
stats = db_manager.get_statistics()
|
||||||
|
assert stats["total_papers"] == 0
|
||||||
|
|
||||||
|
# Reindex from storage
|
||||||
|
success_count, error_count = db_manager.reindex_from_storage(
|
||||||
|
storage_manager
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check results
|
||||||
|
assert success_count == 2
|
||||||
|
assert error_count == 0
|
||||||
|
|
||||||
|
# Verify papers are now in database
|
||||||
|
stats = db_manager.get_statistics()
|
||||||
|
assert stats["total_papers"] == 2
|
||||||
|
|
||||||
|
paper1 = db_manager.get_paper(metadata1.paper_id)
|
||||||
|
assert paper1 is not None
|
||||||
|
assert paper1["title"] == "Paper 1"
|
||||||
|
|
||||||
|
paper2 = db_manager.get_paper(metadata2.paper_id)
|
||||||
|
assert paper2 is not None
|
||||||
|
assert paper2["title"] == "Paper 2"
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if pdf_file.exists():
|
||||||
|
pdf_file.unlink()
|
||||||
@@ -0,0 +1,273 @@
|
|||||||
|
"""Tests for paperlib import functionality."""
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import Mock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from paperlib.config import LibraryPaths
|
||||||
|
from paperlib.importer import ArxivImporter, LocalImporter
|
||||||
|
from paperlib.models import SourceType
|
||||||
|
from paperlib.storage import PaperStorageManager
|
||||||
|
|
||||||
|
|
||||||
|
class TestLocalImporter:
|
||||||
|
"""Test LocalImporter functionality."""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def temp_library(self):
|
||||||
|
"""Create a temporary library for testing."""
|
||||||
|
temp_dir = Path("./.tmp") / f"test_import_{hash(self)}"
|
||||||
|
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
library_paths = LibraryPaths.from_root(temp_dir)
|
||||||
|
library_paths.create_directories()
|
||||||
|
|
||||||
|
yield library_paths
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
if temp_dir.exists():
|
||||||
|
shutil.rmtree(temp_dir)
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def local_importer(self, temp_library):
|
||||||
|
"""Create a LocalImporter for testing."""
|
||||||
|
storage_manager = PaperStorageManager(temp_library)
|
||||||
|
return LocalImporter(storage_manager)
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_pdf(self):
|
||||||
|
"""Create a sample PDF file for testing."""
|
||||||
|
pdf_file = Path("./.tmp") / f"sample_{hash(self)}.pdf"
|
||||||
|
with pdf_file.open("wb") as f:
|
||||||
|
# Minimal PDF content
|
||||||
|
f.write(b"%PDF-1.4\n")
|
||||||
|
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
|
||||||
|
f.write(b"%%EOF\n")
|
||||||
|
|
||||||
|
yield pdf_file
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
if pdf_file.exists():
|
||||||
|
pdf_file.unlink()
|
||||||
|
|
||||||
|
def test_import_pdf_success(self, local_importer, sample_pdf):
|
||||||
|
"""Test successful PDF import."""
|
||||||
|
metadata = local_importer.import_pdf(
|
||||||
|
pdf_path=sample_pdf,
|
||||||
|
title="Test Paper",
|
||||||
|
notes="Test notes",
|
||||||
|
tags=["test", "sample"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check metadata
|
||||||
|
assert metadata.source_type == SourceType.LOCAL
|
||||||
|
assert metadata.title == "Test Paper"
|
||||||
|
assert metadata.notes == "Test notes"
|
||||||
|
assert metadata.tags == ["test", "sample"]
|
||||||
|
assert metadata.paper_id.startswith("local-")
|
||||||
|
|
||||||
|
def test_import_pdf_auto_title(self, local_importer, sample_pdf):
|
||||||
|
"""Test PDF import with auto-generated title."""
|
||||||
|
# Rename PDF to have a meaningful name
|
||||||
|
meaningful_pdf = sample_pdf.parent / "Machine_Learning-Paper.pdf"
|
||||||
|
sample_pdf.rename(meaningful_pdf)
|
||||||
|
|
||||||
|
try:
|
||||||
|
metadata = local_importer.import_pdf(pdf_path=meaningful_pdf)
|
||||||
|
|
||||||
|
# Title should be auto-generated from filename
|
||||||
|
assert metadata.title == "Machine Learning Paper"
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if meaningful_pdf.exists():
|
||||||
|
meaningful_pdf.unlink()
|
||||||
|
|
||||||
|
def test_import_nonexistent_pdf(self, local_importer):
|
||||||
|
"""Test importing non-existent PDF file."""
|
||||||
|
nonexistent = Path("./.tmp/nonexistent.pdf")
|
||||||
|
|
||||||
|
with pytest.raises(FileNotFoundError):
|
||||||
|
local_importer.import_pdf(pdf_path=nonexistent)
|
||||||
|
|
||||||
|
def test_import_non_pdf_file(self, local_importer):
|
||||||
|
"""Test importing non-PDF file."""
|
||||||
|
text_file = Path("./.tmp") / "not_a_pdf.txt"
|
||||||
|
with text_file.open("w") as f:
|
||||||
|
f.write("This is not a PDF")
|
||||||
|
|
||||||
|
try:
|
||||||
|
with pytest.raises(ValueError, match="File is not a PDF"):
|
||||||
|
local_importer.import_pdf(pdf_path=text_file)
|
||||||
|
finally:
|
||||||
|
if text_file.exists():
|
||||||
|
text_file.unlink()
|
||||||
|
|
||||||
|
def test_import_duplicate_pdf(self, local_importer, sample_pdf):
|
||||||
|
"""Test importing the same PDF twice."""
|
||||||
|
# Import once
|
||||||
|
metadata1 = local_importer.import_pdf(pdf_path=sample_pdf)
|
||||||
|
|
||||||
|
# Try to import again
|
||||||
|
with pytest.raises(ValueError, match="Paper already imported"):
|
||||||
|
local_importer.import_pdf(pdf_path=sample_pdf)
|
||||||
|
|
||||||
|
|
||||||
|
class TestArxivImporter:
|
||||||
|
"""Test ArxivImporter functionality."""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def temp_library(self):
|
||||||
|
"""Create a temporary library for testing."""
|
||||||
|
temp_dir = Path("./.tmp") / f"test_arxiv_{hash(self)}"
|
||||||
|
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
library_paths = LibraryPaths.from_root(temp_dir)
|
||||||
|
library_paths.create_directories()
|
||||||
|
|
||||||
|
yield library_paths
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
if temp_dir.exists():
|
||||||
|
shutil.rmtree(temp_dir)
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def arxiv_importer(self, temp_library):
|
||||||
|
"""Create an ArxivImporter for testing."""
|
||||||
|
storage_manager = PaperStorageManager(temp_library)
|
||||||
|
return ArxivImporter(storage_manager)
|
||||||
|
|
||||||
|
def test_extract_arxiv_id_clean(self, arxiv_importer):
|
||||||
|
"""Test extracting clean arXiv ID."""
|
||||||
|
# Test various formats
|
||||||
|
assert arxiv_importer.extract_arxiv_id("2212.06340") == "2212.06340"
|
||||||
|
assert arxiv_importer.extract_arxiv_id("arxiv:2212.06340") == "2212.06340"
|
||||||
|
assert arxiv_importer.extract_arxiv_id("2212.06340v1") == "2212.06340v1"
|
||||||
|
assert arxiv_importer.extract_arxiv_id("math-ph/0701002") == "math-ph/0701002"
|
||||||
|
|
||||||
|
def test_extract_arxiv_id_from_url(self, arxiv_importer):
|
||||||
|
"""Test extracting arXiv ID from URLs."""
|
||||||
|
url = "https://arxiv.org/abs/2212.06340"
|
||||||
|
extracted = arxiv_importer.extract_arxiv_id(url)
|
||||||
|
assert extracted == "2212.06340"
|
||||||
|
|
||||||
|
def test_fetch_paper_metadata_success(self, arxiv_importer):
|
||||||
|
"""Test successful metadata fetching from arXiv."""
|
||||||
|
# Mock arXiv result
|
||||||
|
mock_result = Mock()
|
||||||
|
mock_result.title = "Test Paper"
|
||||||
|
mock_result.authors = [Mock(name="Alice Smith"), Mock(name="Bob Jones")]
|
||||||
|
mock_result.published = Mock()
|
||||||
|
mock_result.updated = Mock()
|
||||||
|
mock_result.categories = ["cs.AI", "stat.ML"]
|
||||||
|
|
||||||
|
# Mock the client's results method directly
|
||||||
|
arxiv_importer.client.results = Mock(return_value=[mock_result])
|
||||||
|
|
||||||
|
# Test
|
||||||
|
result = arxiv_importer.fetch_paper_metadata("2212.06340")
|
||||||
|
assert result == mock_result
|
||||||
|
|
||||||
|
def test_fetch_paper_metadata_not_found(self, arxiv_importer):
|
||||||
|
"""Test fetching metadata for non-existent paper."""
|
||||||
|
# Mock empty results
|
||||||
|
arxiv_importer.client.results = Mock(return_value=[])
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="Paper not found on arXiv"):
|
||||||
|
arxiv_importer.fetch_paper_metadata("9999.99999")
|
||||||
|
|
||||||
|
@patch("paperlib.importer.arxiv_importer.tempfile.NamedTemporaryFile")
|
||||||
|
def test_download_pdf(self, mock_tempfile, arxiv_importer):
|
||||||
|
"""Test PDF downloading."""
|
||||||
|
# Mock temporary file
|
||||||
|
mock_temp_path = Path("./.tmp/mock_temp.pdf")
|
||||||
|
mock_tempfile.return_value.__enter__.return_value.name = str(mock_temp_path)
|
||||||
|
|
||||||
|
# Mock arXiv result
|
||||||
|
mock_result = Mock()
|
||||||
|
|
||||||
|
# Create actual temp file for test
|
||||||
|
with mock_temp_path.open("wb") as f:
|
||||||
|
f.write(b"%PDF-1.4\n%%EOF\n")
|
||||||
|
|
||||||
|
try:
|
||||||
|
pdf_path = arxiv_importer.download_pdf(mock_result)
|
||||||
|
assert pdf_path == mock_temp_path
|
||||||
|
mock_result.download_pdf.assert_called_once_with(
|
||||||
|
filename=str(mock_temp_path)
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
if mock_temp_path.exists():
|
||||||
|
mock_temp_path.unlink()
|
||||||
|
|
||||||
|
@patch.object(ArxivImporter, "download_pdf")
|
||||||
|
@patch.object(ArxivImporter, "fetch_paper_metadata")
|
||||||
|
def test_import_arxiv_paper_success(
|
||||||
|
self, mock_fetch, mock_download, arxiv_importer
|
||||||
|
):
|
||||||
|
"""Test successful arXiv paper import."""
|
||||||
|
# Mock PDF file
|
||||||
|
pdf_file = Path("./.tmp") / "test_arxiv.pdf"
|
||||||
|
with pdf_file.open("wb") as f:
|
||||||
|
f.write(b"%PDF-1.4\n%%EOF\n")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Mock arXiv result with proper string values
|
||||||
|
mock_author = Mock()
|
||||||
|
mock_author.name = "Alice Smith"
|
||||||
|
|
||||||
|
mock_result = Mock()
|
||||||
|
mock_result.title = "Test ArXiv Paper"
|
||||||
|
mock_result.authors = [mock_author]
|
||||||
|
mock_result.published = None
|
||||||
|
mock_result.updated = None
|
||||||
|
mock_result.categories = ["cs.AI"]
|
||||||
|
|
||||||
|
mock_fetch.return_value = mock_result
|
||||||
|
mock_download.return_value = pdf_file
|
||||||
|
|
||||||
|
# Test import
|
||||||
|
metadata = arxiv_importer.import_arxiv_paper(
|
||||||
|
arxiv_input="2212.06340", notes="Test notes", tags=["test"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check results
|
||||||
|
assert metadata.source_type == SourceType.ARXIV
|
||||||
|
assert metadata.source_id == "2212.06340"
|
||||||
|
assert metadata.title == "Test ArXiv Paper"
|
||||||
|
assert metadata.authors == ["Alice Smith"]
|
||||||
|
assert metadata.categories == ["cs.AI"]
|
||||||
|
assert metadata.notes == "Test notes"
|
||||||
|
assert metadata.tags == ["test"]
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if pdf_file.exists():
|
||||||
|
pdf_file.unlink()
|
||||||
|
|
||||||
|
@patch.object(ArxivImporter, "fetch_paper_metadata")
|
||||||
|
def test_import_duplicate_arxiv_paper(self, mock_fetch, arxiv_importer):
|
||||||
|
"""Test importing the same arXiv paper twice."""
|
||||||
|
# Mock first import
|
||||||
|
pdf_file = Path("./.tmp") / "test_arxiv_dup.pdf"
|
||||||
|
with pdf_file.open("wb") as f:
|
||||||
|
f.write(b"%PDF-1.4\n%%EOF\n")
|
||||||
|
|
||||||
|
try:
|
||||||
|
with patch.object(ArxivImporter, "download_pdf", return_value=pdf_file):
|
||||||
|
mock_result = Mock()
|
||||||
|
mock_result.title = "Test Paper"
|
||||||
|
mock_result.authors = []
|
||||||
|
mock_result.published = None
|
||||||
|
mock_result.updated = None
|
||||||
|
mock_result.categories = []
|
||||||
|
mock_fetch.return_value = mock_result
|
||||||
|
|
||||||
|
# First import should succeed
|
||||||
|
arxiv_importer.import_arxiv_paper("2212.06340")
|
||||||
|
|
||||||
|
# Second import should fail
|
||||||
|
with pytest.raises(ValueError, match="Paper already imported"):
|
||||||
|
arxiv_importer.import_arxiv_paper("2212.06340")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if pdf_file.exists():
|
||||||
|
pdf_file.unlink()
|
||||||
@@ -0,0 +1,220 @@
|
|||||||
|
"""Integration tests for paperlib."""
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from paperlib.config import LibraryPaths
|
||||||
|
from paperlib.converter import MinerUConverter
|
||||||
|
from paperlib.importer import ArxivImporter, LocalImporter
|
||||||
|
from paperlib.index import DatabaseManager
|
||||||
|
from paperlib.models import ConversionStatus, SourceType
|
||||||
|
from paperlib.storage import PaperStorageManager
|
||||||
|
|
||||||
|
|
||||||
|
class TestIntegration:
|
||||||
|
"""Test full integration workflows."""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def temp_library(self):
|
||||||
|
"""Create a temporary library for testing."""
|
||||||
|
temp_dir = Path("./.tmp") / f"test_integration_{hash(self)}"
|
||||||
|
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
library_paths = LibraryPaths.from_root(temp_dir)
|
||||||
|
library_paths.create_directories()
|
||||||
|
|
||||||
|
yield library_paths
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
if temp_dir.exists():
|
||||||
|
shutil.rmtree(temp_dir)
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_pdf(self):
|
||||||
|
"""Create a sample PDF file for testing."""
|
||||||
|
pdf_file = Path("./.tmp") / f"integration_test_{hash(self)}.pdf"
|
||||||
|
with pdf_file.open("wb") as f:
|
||||||
|
# Minimal PDF content
|
||||||
|
f.write(b"%PDF-1.4\n")
|
||||||
|
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
|
||||||
|
f.write(b"2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n")
|
||||||
|
f.write(b"3 0 obj\n<< /Type /Page /Parent 2 0 R >>\nendobj\n")
|
||||||
|
f.write(b"%%EOF\n")
|
||||||
|
|
||||||
|
yield pdf_file
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
if pdf_file.exists():
|
||||||
|
pdf_file.unlink()
|
||||||
|
|
||||||
|
def test_complete_local_import_workflow(self, temp_library, sample_pdf):
|
||||||
|
"""Test complete workflow for importing and managing a local PDF."""
|
||||||
|
# Set up components
|
||||||
|
storage_manager = PaperStorageManager(temp_library)
|
||||||
|
db_manager = DatabaseManager(temp_library)
|
||||||
|
local_importer = LocalImporter(storage_manager)
|
||||||
|
|
||||||
|
# Initialize database
|
||||||
|
db_manager.initialize_database()
|
||||||
|
|
||||||
|
# Import PDF
|
||||||
|
metadata = local_importer.import_pdf(
|
||||||
|
pdf_path=sample_pdf,
|
||||||
|
title="Integration Test Paper",
|
||||||
|
tags=["integration", "test"],
|
||||||
|
notes="This is an integration test paper",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update metadata with authors after import
|
||||||
|
metadata.authors = ["Test Author"]
|
||||||
|
storage_manager.update_paper_metadata(metadata)
|
||||||
|
|
||||||
|
# Verify metadata
|
||||||
|
assert metadata.source_type == SourceType.LOCAL
|
||||||
|
assert metadata.title == "Integration Test Paper"
|
||||||
|
assert metadata.authors == ["Test Author"]
|
||||||
|
assert metadata.tags == ["integration", "test"]
|
||||||
|
|
||||||
|
# Index in database
|
||||||
|
db_manager.index_paper(metadata)
|
||||||
|
|
||||||
|
# Test retrieval from database
|
||||||
|
retrieved_paper = db_manager.get_paper(metadata.paper_id)
|
||||||
|
assert retrieved_paper is not None
|
||||||
|
assert retrieved_paper["title"] == "Integration Test Paper"
|
||||||
|
|
||||||
|
# Test search functionality
|
||||||
|
search_results = list(db_manager.search_papers("Integration Test"))
|
||||||
|
assert len(search_results) == 1
|
||||||
|
assert search_results[0]["paper_id"] == metadata.paper_id
|
||||||
|
|
||||||
|
# Test field search
|
||||||
|
author_results = list(db_manager.search_by_field("author_list", "Test Author"))
|
||||||
|
assert len(author_results) == 1
|
||||||
|
|
||||||
|
# Test listing papers
|
||||||
|
all_papers = list(db_manager.list_papers())
|
||||||
|
assert len(all_papers) == 1
|
||||||
|
assert all_papers[0]["paper_id"] == metadata.paper_id
|
||||||
|
|
||||||
|
# Test statistics
|
||||||
|
stats = db_manager.get_statistics()
|
||||||
|
assert stats["total_papers"] == 1
|
||||||
|
assert stats["by_source_type"]["local"] == 1
|
||||||
|
|
||||||
|
# Test updating metadata
|
||||||
|
metadata.notes = "Updated notes"
|
||||||
|
storage_manager.update_paper_metadata(metadata)
|
||||||
|
|
||||||
|
# Re-index and verify update
|
||||||
|
db_manager.index_paper(metadata)
|
||||||
|
updated_paper = db_manager.get_paper(metadata.paper_id)
|
||||||
|
assert "Updated notes" in updated_paper["search_text"]
|
||||||
|
|
||||||
|
def test_multiple_papers_workflow(self, temp_library, sample_pdf):
|
||||||
|
"""Test workflow with multiple papers."""
|
||||||
|
# Set up components
|
||||||
|
storage_manager = PaperStorageManager(temp_library)
|
||||||
|
db_manager = DatabaseManager(temp_library)
|
||||||
|
local_importer = LocalImporter(storage_manager)
|
||||||
|
|
||||||
|
# Initialize database
|
||||||
|
db_manager.initialize_database()
|
||||||
|
|
||||||
|
# Import multiple papers (create unique PDFs)
|
||||||
|
papers = []
|
||||||
|
for i in range(3):
|
||||||
|
# Create unique PDF for each import
|
||||||
|
unique_pdf = Path("./.tmp") / f"unique_paper_{i}_{hash(self)}.pdf"
|
||||||
|
with unique_pdf.open("wb") as f:
|
||||||
|
f.write(b"%PDF-1.4\n")
|
||||||
|
f.write(f"% Unique content {i}\n".encode())
|
||||||
|
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
|
||||||
|
f.write(b"%%EOF\n")
|
||||||
|
|
||||||
|
try:
|
||||||
|
metadata = local_importer.import_pdf(
|
||||||
|
pdf_path=unique_pdf,
|
||||||
|
title=f"Test Paper {i + 1}",
|
||||||
|
tags=[f"tag{i + 1}", "common"],
|
||||||
|
notes=f"Notes for paper {i + 1}",
|
||||||
|
)
|
||||||
|
# Update metadata with authors after import
|
||||||
|
metadata.authors = [f"Author {i + 1}"]
|
||||||
|
storage_manager.update_paper_metadata(metadata)
|
||||||
|
|
||||||
|
papers.append(metadata)
|
||||||
|
db_manager.index_paper(metadata)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if unique_pdf.exists():
|
||||||
|
unique_pdf.unlink()
|
||||||
|
|
||||||
|
# Test listing all papers
|
||||||
|
all_papers = list(db_manager.list_papers())
|
||||||
|
assert len(all_papers) == 3
|
||||||
|
|
||||||
|
# Test search across papers
|
||||||
|
common_tag_results = list(db_manager.search_papers("common"))
|
||||||
|
assert len(common_tag_results) == 3
|
||||||
|
|
||||||
|
# Test filtering
|
||||||
|
filtered_results = list(db_manager.list_papers(limit=2))
|
||||||
|
assert len(filtered_results) == 2
|
||||||
|
|
||||||
|
# Test reindexing
|
||||||
|
success_count, error_count = db_manager.reindex_from_storage(storage_manager)
|
||||||
|
assert success_count == 3
|
||||||
|
assert error_count == 0
|
||||||
|
|
||||||
|
# Verify papers still exist after reindex
|
||||||
|
stats = db_manager.get_statistics()
|
||||||
|
assert stats["total_papers"] == 3
|
||||||
|
|
||||||
|
def test_storage_and_database_consistency(self, temp_library, sample_pdf):
|
||||||
|
"""Test consistency between storage and database."""
|
||||||
|
# Set up components
|
||||||
|
storage_manager = PaperStorageManager(temp_library)
|
||||||
|
db_manager = DatabaseManager(temp_library)
|
||||||
|
local_importer = LocalImporter(storage_manager)
|
||||||
|
|
||||||
|
# Initialize database
|
||||||
|
db_manager.initialize_database()
|
||||||
|
|
||||||
|
# Import paper
|
||||||
|
metadata = local_importer.import_pdf(
|
||||||
|
pdf_path=sample_pdf,
|
||||||
|
title="Consistency Test Paper",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Index in database
|
||||||
|
db_manager.index_paper(metadata)
|
||||||
|
|
||||||
|
# Verify file exists in storage
|
||||||
|
assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type)
|
||||||
|
|
||||||
|
# Verify paper exists in database
|
||||||
|
db_paper = db_manager.get_paper(metadata.paper_id)
|
||||||
|
assert db_paper is not None
|
||||||
|
|
||||||
|
# Load from storage and compare
|
||||||
|
storage_metadata = storage_manager.load_paper_metadata(
|
||||||
|
metadata.paper_id, metadata.source_type
|
||||||
|
)
|
||||||
|
assert storage_metadata.title == db_paper["title"]
|
||||||
|
assert storage_metadata.paper_id == db_paper["paper_id"]
|
||||||
|
|
||||||
|
# Test reindexing maintains consistency
|
||||||
|
db_manager.remove_paper(metadata.paper_id)
|
||||||
|
assert db_manager.get_paper(metadata.paper_id) is None
|
||||||
|
|
||||||
|
# Reindex from storage
|
||||||
|
success_count, error_count = db_manager.reindex_from_storage(storage_manager)
|
||||||
|
assert success_count == 1
|
||||||
|
assert error_count == 0
|
||||||
|
|
||||||
|
# Verify paper is back in database
|
||||||
|
restored_paper = db_manager.get_paper(metadata.paper_id)
|
||||||
|
assert restored_paper is not None
|
||||||
|
assert restored_paper["title"] == "Consistency Test Paper"
|
||||||
@@ -0,0 +1,230 @@
|
|||||||
|
"""Tests for paperlib data models."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import tempfile
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from paperlib.models import (
|
||||||
|
ConversionStatus,
|
||||||
|
PaperMetadata,
|
||||||
|
PaperSummary,
|
||||||
|
SourceType,
|
||||||
|
SummaryStatus,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestPaperMetadata:
|
||||||
|
"""Test PaperMetadata data model."""
|
||||||
|
|
||||||
|
def test_create_metadata(self):
|
||||||
|
"""Test creating a PaperMetadata instance."""
|
||||||
|
metadata = PaperMetadata(
|
||||||
|
paper_id="test-paper-1",
|
||||||
|
source_type=SourceType.LOCAL,
|
||||||
|
title="Test Paper",
|
||||||
|
authors=["Alice Smith", "Bob Jones"],
|
||||||
|
categories=["cs.AI", "stat.ML"],
|
||||||
|
tags=["machine-learning", "ai"],
|
||||||
|
notes="Test notes",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert metadata.paper_id == "test-paper-1"
|
||||||
|
assert metadata.source_type == SourceType.LOCAL
|
||||||
|
assert metadata.title == "Test Paper"
|
||||||
|
assert metadata.authors == ["Alice Smith", "Bob Jones"]
|
||||||
|
assert metadata.categories == ["cs.AI", "stat.ML"]
|
||||||
|
assert metadata.tags == ["machine-learning", "ai"]
|
||||||
|
assert metadata.notes == "Test notes"
|
||||||
|
assert metadata.conversion_status == ConversionStatus.PENDING
|
||||||
|
assert metadata.summary_status == SummaryStatus.NOT_REQUESTED
|
||||||
|
|
||||||
|
def test_to_dict(self):
|
||||||
|
"""Test converting metadata to dictionary."""
|
||||||
|
metadata = PaperMetadata(
|
||||||
|
paper_id="test-paper-1",
|
||||||
|
source_type=SourceType.ARXIV,
|
||||||
|
source_id="2212.06340",
|
||||||
|
title="Test Paper",
|
||||||
|
published_date=datetime(2022, 12, 13, 2, 46, 55),
|
||||||
|
)
|
||||||
|
|
||||||
|
data = metadata.to_dict()
|
||||||
|
|
||||||
|
assert data["paper_id"] == "test-paper-1"
|
||||||
|
assert data["source_type"] == "arxiv"
|
||||||
|
assert data["source_id"] == "2212.06340"
|
||||||
|
assert data["title"] == "Test Paper"
|
||||||
|
assert data["published_date"] == "2022-12-13T02:46:55"
|
||||||
|
|
||||||
|
def test_from_dict(self):
|
||||||
|
"""Test creating metadata from dictionary."""
|
||||||
|
data = {
|
||||||
|
"paper_id": "test-paper-1",
|
||||||
|
"source_type": "local",
|
||||||
|
"title": "Test Paper",
|
||||||
|
"authors": ["Alice Smith"],
|
||||||
|
"published_date": "2022-12-13T02:46:55",
|
||||||
|
"categories": ["cs.AI"],
|
||||||
|
"pdf_path": "papers/test.pdf",
|
||||||
|
"imported_at": "2022-12-13T02:46:55",
|
||||||
|
"conversion_status": "success",
|
||||||
|
"summary_status": "pending",
|
||||||
|
"tags": ["test"],
|
||||||
|
"notes": "Test notes",
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata = PaperMetadata.from_dict(data)
|
||||||
|
|
||||||
|
assert metadata.paper_id == "test-paper-1"
|
||||||
|
assert metadata.source_type == SourceType.LOCAL
|
||||||
|
assert metadata.title == "Test Paper"
|
||||||
|
assert metadata.authors == ["Alice Smith"]
|
||||||
|
assert metadata.published_date == datetime(2022, 12, 13, 2, 46, 55)
|
||||||
|
assert metadata.conversion_status == ConversionStatus.SUCCESS
|
||||||
|
assert metadata.summary_status == SummaryStatus.PENDING
|
||||||
|
|
||||||
|
def test_save_and_load_file(self):
|
||||||
|
"""Test saving and loading metadata from file."""
|
||||||
|
metadata = PaperMetadata(
|
||||||
|
paper_id="test-paper-1",
|
||||||
|
source_type=SourceType.LOCAL,
|
||||||
|
title="Test Paper",
|
||||||
|
authors=["Alice Smith"],
|
||||||
|
)
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tmp:
|
||||||
|
tmp_path = Path(tmp.name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Save to file
|
||||||
|
metadata.save_to_file(tmp_path)
|
||||||
|
|
||||||
|
# Verify file exists and contains JSON
|
||||||
|
assert tmp_path.exists()
|
||||||
|
with tmp_path.open() as f:
|
||||||
|
data = json.load(f)
|
||||||
|
assert data["paper_id"] == "test-paper-1"
|
||||||
|
|
||||||
|
# Load from file
|
||||||
|
loaded_metadata = PaperMetadata.load_from_file(tmp_path)
|
||||||
|
assert loaded_metadata.paper_id == "test-paper-1"
|
||||||
|
assert loaded_metadata.title == "Test Paper"
|
||||||
|
assert loaded_metadata.source_type == SourceType.LOCAL
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if tmp_path.exists():
|
||||||
|
tmp_path.unlink()
|
||||||
|
|
||||||
|
|
||||||
|
class TestPaperSummary:
|
||||||
|
"""Test PaperSummary data model."""
|
||||||
|
|
||||||
|
def test_create_summary(self):
|
||||||
|
"""Test creating a PaperSummary instance."""
|
||||||
|
summary = PaperSummary(
|
||||||
|
one_sentence_summary="This paper introduces a new method.",
|
||||||
|
problem_statement="Current methods are inefficient.",
|
||||||
|
method_overview="We propose a novel approach.",
|
||||||
|
main_results="Our method achieves 95% accuracy.",
|
||||||
|
claimed_contributions=["Novel architecture", "Improved performance"],
|
||||||
|
problem_tags=["classification", "optimization"],
|
||||||
|
technique_tags=["neural-networks", "reinforcement-learning"],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert summary.schema_version == "1.0"
|
||||||
|
assert summary.one_sentence_summary == "This paper introduces a new method."
|
||||||
|
assert summary.problem_statement == "Current methods are inefficient."
|
||||||
|
assert summary.claimed_contributions == [
|
||||||
|
"Novel architecture",
|
||||||
|
"Improved performance",
|
||||||
|
]
|
||||||
|
assert summary.problem_tags == ["classification", "optimization"]
|
||||||
|
|
||||||
|
def test_to_dict(self):
|
||||||
|
"""Test converting summary to dictionary."""
|
||||||
|
summary = PaperSummary(
|
||||||
|
one_sentence_summary="Test summary",
|
||||||
|
relevance_to_user=0.85,
|
||||||
|
)
|
||||||
|
|
||||||
|
data = summary.to_dict()
|
||||||
|
|
||||||
|
assert data["schema_version"] == "1.0"
|
||||||
|
assert data["one_sentence_summary"] == "Test summary"
|
||||||
|
assert data["relevance_to_user"] == 0.85
|
||||||
|
|
||||||
|
def test_from_dict(self):
|
||||||
|
"""Test creating summary from dictionary."""
|
||||||
|
data = {
|
||||||
|
"schema_version": "1.0",
|
||||||
|
"one_sentence_summary": "Test summary",
|
||||||
|
"problem_statement": "Test problem",
|
||||||
|
"claimed_contributions": ["Test contribution"],
|
||||||
|
"problem_tags": ["test"],
|
||||||
|
"technique_tags": ["neural-networks"],
|
||||||
|
"entities": ["Entity1", "Entity2"],
|
||||||
|
}
|
||||||
|
|
||||||
|
summary = PaperSummary.from_dict(data)
|
||||||
|
|
||||||
|
assert summary.schema_version == "1.0"
|
||||||
|
assert summary.one_sentence_summary == "Test summary"
|
||||||
|
assert summary.problem_statement == "Test problem"
|
||||||
|
assert summary.claimed_contributions == ["Test contribution"]
|
||||||
|
assert summary.entities == ["Entity1", "Entity2"]
|
||||||
|
|
||||||
|
def test_save_and_load_file(self):
|
||||||
|
"""Test saving and loading summary from file."""
|
||||||
|
summary = PaperSummary(
|
||||||
|
one_sentence_summary="Test summary",
|
||||||
|
problem_tags=["tag1", "tag2"],
|
||||||
|
)
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tmp:
|
||||||
|
tmp_path = Path(tmp.name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Save to file
|
||||||
|
summary.save_to_file(tmp_path)
|
||||||
|
|
||||||
|
# Verify file exists and contains JSON
|
||||||
|
assert tmp_path.exists()
|
||||||
|
with tmp_path.open() as f:
|
||||||
|
data = json.load(f)
|
||||||
|
assert data["one_sentence_summary"] == "Test summary"
|
||||||
|
|
||||||
|
# Load from file
|
||||||
|
loaded_summary = PaperSummary.load_from_file(tmp_path)
|
||||||
|
assert loaded_summary.one_sentence_summary == "Test summary"
|
||||||
|
assert loaded_summary.problem_tags == ["tag1", "tag2"]
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if tmp_path.exists():
|
||||||
|
tmp_path.unlink()
|
||||||
|
|
||||||
|
|
||||||
|
class TestEnums:
|
||||||
|
"""Test enum types."""
|
||||||
|
|
||||||
|
def test_source_type_values(self):
|
||||||
|
"""Test SourceType enum values."""
|
||||||
|
assert SourceType.LOCAL == "local"
|
||||||
|
assert SourceType.ARXIV == "arxiv"
|
||||||
|
|
||||||
|
def test_conversion_status_values(self):
|
||||||
|
"""Test ConversionStatus enum values."""
|
||||||
|
assert ConversionStatus.PENDING == "pending"
|
||||||
|
assert ConversionStatus.PROCESSING == "processing"
|
||||||
|
assert ConversionStatus.SUCCESS == "success"
|
||||||
|
assert ConversionStatus.FAILED == "failed"
|
||||||
|
|
||||||
|
def test_summary_status_values(self):
|
||||||
|
"""Test SummaryStatus enum values."""
|
||||||
|
assert SummaryStatus.PENDING == "pending"
|
||||||
|
assert SummaryStatus.PROCESSING == "processing"
|
||||||
|
assert SummaryStatus.SUCCESS == "success"
|
||||||
|
assert SummaryStatus.FAILED == "failed"
|
||||||
|
assert SummaryStatus.NOT_REQUESTED == "not_requested"
|
||||||
@@ -0,0 +1,261 @@
|
|||||||
|
"""Tests for paperlib storage manager."""
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from paperlib.config import LibraryPaths
|
||||||
|
from paperlib.models import ConversionStatus, PaperMetadata, SourceType
|
||||||
|
from paperlib.storage import PaperStorageManager
|
||||||
|
|
||||||
|
|
||||||
|
class TestPaperStorageManager:
|
||||||
|
"""Test PaperStorageManager functionality."""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def temp_library(self):
|
||||||
|
"""Create a temporary library for testing."""
|
||||||
|
temp_dir = Path("./.tmp") / f"test_library_{hash(self)}"
|
||||||
|
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
library_paths = LibraryPaths.from_root(temp_dir)
|
||||||
|
library_paths.create_directories()
|
||||||
|
|
||||||
|
yield library_paths
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
if temp_dir.exists():
|
||||||
|
shutil.rmtree(temp_dir)
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def storage_manager(self, temp_library):
|
||||||
|
"""Create a storage manager for testing."""
|
||||||
|
return PaperStorageManager(temp_library)
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_pdf(self):
|
||||||
|
"""Create a sample PDF file for testing."""
|
||||||
|
# Create a minimal PDF-like file
|
||||||
|
temp_file = Path("./.tmp") / f"test_paper_{hash(self)}.pdf"
|
||||||
|
with temp_file.open("wb") as f:
|
||||||
|
# Minimal PDF header
|
||||||
|
f.write(b"%PDF-1.4\n")
|
||||||
|
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
|
||||||
|
f.write(b"%%EOF\n")
|
||||||
|
|
||||||
|
yield temp_file
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
if temp_file.exists():
|
||||||
|
temp_file.unlink()
|
||||||
|
|
||||||
|
def test_generate_paper_id_local(self, storage_manager, sample_pdf):
|
||||||
|
"""Test generating paper ID for local files."""
|
||||||
|
paper_id = storage_manager.generate_paper_id(
|
||||||
|
SourceType.LOCAL, pdf_path=sample_pdf
|
||||||
|
)
|
||||||
|
|
||||||
|
assert paper_id.startswith("local-")
|
||||||
|
assert len(paper_id) == 22 # "local-" + 16 chars hash
|
||||||
|
|
||||||
|
def test_generate_paper_id_arxiv(self, storage_manager):
|
||||||
|
"""Test generating paper ID for arXiv papers."""
|
||||||
|
paper_id = storage_manager.generate_paper_id(
|
||||||
|
SourceType.ARXIV, source_id="2212.06340"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert paper_id == "arxiv-2212_06340"
|
||||||
|
|
||||||
|
def test_get_paper_directory_arxiv(self, storage_manager):
|
||||||
|
"""Test getting paper directory for arXiv papers."""
|
||||||
|
paper_dir = storage_manager.get_paper_directory(
|
||||||
|
"arxiv-2212_06340", SourceType.ARXIV
|
||||||
|
)
|
||||||
|
|
||||||
|
expected = (
|
||||||
|
storage_manager.library_paths.papers_dir
|
||||||
|
/ "arxiv"
|
||||||
|
/ "2212"
|
||||||
|
/ "arxiv-2212_06340"
|
||||||
|
)
|
||||||
|
assert paper_dir == expected
|
||||||
|
|
||||||
|
def test_get_paper_directory_local(self, storage_manager):
|
||||||
|
"""Test getting paper directory for local papers."""
|
||||||
|
paper_dir = storage_manager.get_paper_directory(
|
||||||
|
"local-abcd1234efgh5678", SourceType.LOCAL
|
||||||
|
)
|
||||||
|
|
||||||
|
expected = (
|
||||||
|
storage_manager.library_paths.papers_dir / "local" / "abcd1234efgh5678"
|
||||||
|
)
|
||||||
|
assert paper_dir == expected
|
||||||
|
|
||||||
|
def test_get_paper_paths(self, storage_manager):
|
||||||
|
"""Test getting all paper paths."""
|
||||||
|
paths = storage_manager.get_paper_paths("arxiv-2212_06340", SourceType.ARXIV)
|
||||||
|
|
||||||
|
assert "directory" in paths
|
||||||
|
assert "meta" in paths
|
||||||
|
assert "pdf" in paths
|
||||||
|
assert "markdown" in paths
|
||||||
|
assert "summary_json" in paths
|
||||||
|
assert "summary_md" in paths
|
||||||
|
assert "assets" in paths
|
||||||
|
assert "logs" in paths
|
||||||
|
|
||||||
|
# Check that paths are Path objects
|
||||||
|
assert isinstance(paths["meta"], Path)
|
||||||
|
assert paths["meta"].name == "meta.json"
|
||||||
|
assert paths["pdf"].name == "source.pdf"
|
||||||
|
|
||||||
|
def test_store_paper_local(self, storage_manager, sample_pdf):
|
||||||
|
"""Test storing a local PDF paper."""
|
||||||
|
metadata = storage_manager.store_paper(
|
||||||
|
pdf_path=sample_pdf,
|
||||||
|
source_type=SourceType.LOCAL,
|
||||||
|
title="Test Paper",
|
||||||
|
authors=["Test Author"],
|
||||||
|
tags=["test"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check metadata
|
||||||
|
assert metadata.source_type == SourceType.LOCAL
|
||||||
|
assert metadata.title == "Test Paper"
|
||||||
|
assert metadata.authors == ["Test Author"]
|
||||||
|
assert metadata.tags == ["test"]
|
||||||
|
assert metadata.conversion_status == ConversionStatus.PENDING
|
||||||
|
|
||||||
|
# Check file structure was created
|
||||||
|
paths = storage_manager.get_paper_paths(metadata.paper_id, metadata.source_type)
|
||||||
|
assert paths["directory"].exists()
|
||||||
|
assert paths["meta"].exists()
|
||||||
|
assert paths["pdf"].exists()
|
||||||
|
assert paths["assets"].exists()
|
||||||
|
assert paths["logs"].exists()
|
||||||
|
|
||||||
|
def test_store_paper_arxiv(self, storage_manager, sample_pdf):
|
||||||
|
"""Test storing an arXiv paper."""
|
||||||
|
metadata = storage_manager.store_paper(
|
||||||
|
pdf_path=sample_pdf,
|
||||||
|
source_type=SourceType.ARXIV,
|
||||||
|
source_id="2212.06340",
|
||||||
|
title="Test arXiv Paper",
|
||||||
|
authors=["Alice Smith", "Bob Jones"],
|
||||||
|
categories=["cs.AI"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check metadata
|
||||||
|
assert metadata.source_type == SourceType.ARXIV
|
||||||
|
assert metadata.source_id == "2212.06340"
|
||||||
|
assert metadata.title == "Test arXiv Paper"
|
||||||
|
assert metadata.authors == ["Alice Smith", "Bob Jones"]
|
||||||
|
assert metadata.categories == ["cs.AI"]
|
||||||
|
|
||||||
|
# Check file paths are set correctly
|
||||||
|
assert metadata.pdf_path
|
||||||
|
assert metadata.paper_md_path
|
||||||
|
assert metadata.summary_json_path
|
||||||
|
assert metadata.summary_md_path
|
||||||
|
|
||||||
|
def test_load_paper_metadata(self, storage_manager, sample_pdf):
|
||||||
|
"""Test loading paper metadata."""
|
||||||
|
# First store a paper
|
||||||
|
original_metadata = storage_manager.store_paper(
|
||||||
|
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Load it back
|
||||||
|
loaded_metadata = storage_manager.load_paper_metadata(
|
||||||
|
original_metadata.paper_id, original_metadata.source_type
|
||||||
|
)
|
||||||
|
|
||||||
|
assert loaded_metadata is not None
|
||||||
|
assert loaded_metadata.paper_id == original_metadata.paper_id
|
||||||
|
assert loaded_metadata.title == "Test Paper"
|
||||||
|
assert loaded_metadata.source_type == SourceType.LOCAL
|
||||||
|
|
||||||
|
def test_load_nonexistent_paper(self, storage_manager):
|
||||||
|
"""Test loading metadata for nonexistent paper."""
|
||||||
|
metadata = storage_manager.load_paper_metadata("nonexistent", SourceType.LOCAL)
|
||||||
|
assert metadata is None
|
||||||
|
|
||||||
|
def test_update_paper_metadata(self, storage_manager, sample_pdf):
|
||||||
|
"""Test updating paper metadata."""
|
||||||
|
# Store initial paper
|
||||||
|
metadata = storage_manager.store_paper(
|
||||||
|
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Original Title"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update metadata
|
||||||
|
metadata.title = "Updated Title"
|
||||||
|
metadata.conversion_status = ConversionStatus.SUCCESS
|
||||||
|
storage_manager.update_paper_metadata(metadata)
|
||||||
|
|
||||||
|
# Load and verify update
|
||||||
|
loaded_metadata = storage_manager.load_paper_metadata(
|
||||||
|
metadata.paper_id, metadata.source_type
|
||||||
|
)
|
||||||
|
assert loaded_metadata.title == "Updated Title"
|
||||||
|
assert loaded_metadata.conversion_status == ConversionStatus.SUCCESS
|
||||||
|
|
||||||
|
def test_list_all_papers(self, storage_manager, sample_pdf):
|
||||||
|
"""Test listing all papers in library."""
|
||||||
|
# Initially empty
|
||||||
|
papers = list(storage_manager.list_all_papers())
|
||||||
|
assert len(papers) == 0
|
||||||
|
|
||||||
|
# Add some papers
|
||||||
|
metadata1 = storage_manager.store_paper(
|
||||||
|
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Paper 1"
|
||||||
|
)
|
||||||
|
|
||||||
|
metadata2 = storage_manager.store_paper(
|
||||||
|
pdf_path=sample_pdf,
|
||||||
|
source_type=SourceType.ARXIV,
|
||||||
|
source_id="2212.06340",
|
||||||
|
title="Paper 2",
|
||||||
|
)
|
||||||
|
|
||||||
|
# List papers
|
||||||
|
papers = list(storage_manager.list_all_papers())
|
||||||
|
assert len(papers) == 2
|
||||||
|
|
||||||
|
paper_ids = {p.paper_id for p in papers}
|
||||||
|
assert metadata1.paper_id in paper_ids
|
||||||
|
assert metadata2.paper_id in paper_ids
|
||||||
|
|
||||||
|
def test_paper_exists(self, storage_manager, sample_pdf):
|
||||||
|
"""Test checking if paper exists."""
|
||||||
|
# Initially doesn't exist
|
||||||
|
assert not storage_manager.paper_exists("nonexistent", SourceType.LOCAL)
|
||||||
|
|
||||||
|
# Store a paper
|
||||||
|
metadata = storage_manager.store_paper(
|
||||||
|
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Now it exists
|
||||||
|
assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type)
|
||||||
|
|
||||||
|
def test_delete_paper(self, storage_manager, sample_pdf):
|
||||||
|
"""Test deleting a paper."""
|
||||||
|
# Store a paper
|
||||||
|
metadata = storage_manager.store_paper(
|
||||||
|
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify it exists
|
||||||
|
assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type)
|
||||||
|
|
||||||
|
# Delete it
|
||||||
|
result = storage_manager.delete_paper(metadata.paper_id, metadata.source_type)
|
||||||
|
assert result is True
|
||||||
|
|
||||||
|
# Verify it's gone
|
||||||
|
assert not storage_manager.paper_exists(metadata.paper_id, metadata.source_type)
|
||||||
|
|
||||||
|
# Deleting again should return False
|
||||||
|
result = storage_manager.delete_paper(metadata.paper_id, metadata.source_type)
|
||||||
|
assert result is False
|
||||||
@@ -764,6 +764,15 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/49/fa/391e437a34e55095173dca5f24070d89cbc233ff85bf1c29c93248c6588d/imageio-2.37.3-py3-none-any.whl", hash = "sha256:46f5bb8522cd421c0f5ae104d8268f569d856b29eb1a13b92829d1970f32c9f0", size = 317646, upload-time = "2026-03-09T11:31:10.771Z" },
|
{ url = "https://files.pythonhosted.org/packages/49/fa/391e437a34e55095173dca5f24070d89cbc233ff85bf1c29c93248c6588d/imageio-2.37.3-py3-none-any.whl", hash = "sha256:46f5bb8522cd421c0f5ae104d8268f569d856b29eb1a13b92829d1970f32c9f0", size = 317646, upload-time = "2026-03-09T11:31:10.771Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "iniconfig"
|
||||||
|
version = "2.3.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "jinja2"
|
name = "jinja2"
|
||||||
version = "3.1.6"
|
version = "3.1.6"
|
||||||
@@ -1413,6 +1422,11 @@ dependencies = [
|
|||||||
{ name = "typer" },
|
{ name = "typer" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[package.dev-dependencies]
|
||||||
|
dev = [
|
||||||
|
{ name = "pytest" },
|
||||||
|
]
|
||||||
|
|
||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
{ name = "arxiv", specifier = ">=2.0.0" },
|
{ name = "arxiv", specifier = ">=2.0.0" },
|
||||||
@@ -1421,6 +1435,9 @@ requires-dist = [
|
|||||||
{ name = "typer", specifier = ">=0.24.1" },
|
{ name = "typer", specifier = ">=0.24.1" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[package.metadata.requires-dev]
|
||||||
|
dev = [{ name = "pytest", specifier = ">=9.0.3" }]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pdfminer-six"
|
name = "pdfminer-six"
|
||||||
version = "20260107"
|
version = "20260107"
|
||||||
@@ -1482,6 +1499,15 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/00/a4/285f12aeacbe2d6dc36c407dfbbe9e96d4a80b0fb710a337f6d2ad978c75/pillow-12.2.0-cp313-cp313t-win_arm64.whl", hash = "sha256:2e5a76d03a6c6dcef67edabda7a52494afa4035021a79c8558e14af25313d453", size = 2465765, upload-time = "2026-04-01T14:44:45.996Z" },
|
{ url = "https://files.pythonhosted.org/packages/00/a4/285f12aeacbe2d6dc36c407dfbbe9e96d4a80b0fb710a337f6d2ad978c75/pillow-12.2.0-cp313-cp313t-win_arm64.whl", hash = "sha256:2e5a76d03a6c6dcef67edabda7a52494afa4035021a79c8558e14af25313d453", size = 2465765, upload-time = "2026-04-01T14:44:45.996Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pluggy"
|
||||||
|
version = "1.6.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "protobuf"
|
name = "protobuf"
|
||||||
version = "7.34.1"
|
version = "7.34.1"
|
||||||
@@ -1665,6 +1691,22 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/fb/d8/940fcaa6a1f3763d72751b6bc8054f40beeacd6e9e5b19069c6c73dab5af/pypptx_with_oxml-1.0.3-py3-none-any.whl", hash = "sha256:4b3ccf51185e0f9e60ebf2884e74153d7fcb00e7e4f0461404e96e0260d7bba1", size = 493041, upload-time = "2026-01-30T08:51:25.797Z" },
|
{ url = "https://files.pythonhosted.org/packages/fb/d8/940fcaa6a1f3763d72751b6bc8054f40beeacd6e9e5b19069c6c73dab5af/pypptx_with_oxml-1.0.3-py3-none-any.whl", hash = "sha256:4b3ccf51185e0f9e60ebf2884e74153d7fcb00e7e4f0461404e96e0260d7bba1", size = 493041, upload-time = "2026-01-30T08:51:25.797Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pytest"
|
||||||
|
version = "9.0.3"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "colorama", marker = "sys_platform == 'win32'" },
|
||||||
|
{ name = "iniconfig" },
|
||||||
|
{ name = "packaging" },
|
||||||
|
{ name = "pluggy" },
|
||||||
|
{ name = "pygments" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "python-dateutil"
|
name = "python-dateutil"
|
||||||
version = "2.9.0.post0"
|
version = "2.9.0.post0"
|
||||||
|
|||||||
Reference in New Issue
Block a user