test: add tests
This commit is contained in:
@@ -0,0 +1,261 @@
|
||||
"""Tests for paperlib storage manager."""
|
||||
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from paperlib.config import LibraryPaths
|
||||
from paperlib.models import ConversionStatus, PaperMetadata, SourceType
|
||||
from paperlib.storage import PaperStorageManager
|
||||
|
||||
|
||||
class TestPaperStorageManager:
|
||||
"""Test PaperStorageManager functionality."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_library(self):
|
||||
"""Create a temporary library for testing."""
|
||||
temp_dir = Path("./.tmp") / f"test_library_{hash(self)}"
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
library_paths = LibraryPaths.from_root(temp_dir)
|
||||
library_paths.create_directories()
|
||||
|
||||
yield library_paths
|
||||
|
||||
# Cleanup
|
||||
if temp_dir.exists():
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
@pytest.fixture
|
||||
def storage_manager(self, temp_library):
|
||||
"""Create a storage manager for testing."""
|
||||
return PaperStorageManager(temp_library)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_pdf(self):
|
||||
"""Create a sample PDF file for testing."""
|
||||
# Create a minimal PDF-like file
|
||||
temp_file = Path("./.tmp") / f"test_paper_{hash(self)}.pdf"
|
||||
with temp_file.open("wb") as f:
|
||||
# Minimal PDF header
|
||||
f.write(b"%PDF-1.4\n")
|
||||
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
|
||||
f.write(b"%%EOF\n")
|
||||
|
||||
yield temp_file
|
||||
|
||||
# Cleanup
|
||||
if temp_file.exists():
|
||||
temp_file.unlink()
|
||||
|
||||
def test_generate_paper_id_local(self, storage_manager, sample_pdf):
|
||||
"""Test generating paper ID for local files."""
|
||||
paper_id = storage_manager.generate_paper_id(
|
||||
SourceType.LOCAL, pdf_path=sample_pdf
|
||||
)
|
||||
|
||||
assert paper_id.startswith("local-")
|
||||
assert len(paper_id) == 22 # "local-" + 16 chars hash
|
||||
|
||||
def test_generate_paper_id_arxiv(self, storage_manager):
|
||||
"""Test generating paper ID for arXiv papers."""
|
||||
paper_id = storage_manager.generate_paper_id(
|
||||
SourceType.ARXIV, source_id="2212.06340"
|
||||
)
|
||||
|
||||
assert paper_id == "arxiv-2212_06340"
|
||||
|
||||
def test_get_paper_directory_arxiv(self, storage_manager):
|
||||
"""Test getting paper directory for arXiv papers."""
|
||||
paper_dir = storage_manager.get_paper_directory(
|
||||
"arxiv-2212_06340", SourceType.ARXIV
|
||||
)
|
||||
|
||||
expected = (
|
||||
storage_manager.library_paths.papers_dir
|
||||
/ "arxiv"
|
||||
/ "2212"
|
||||
/ "arxiv-2212_06340"
|
||||
)
|
||||
assert paper_dir == expected
|
||||
|
||||
def test_get_paper_directory_local(self, storage_manager):
|
||||
"""Test getting paper directory for local papers."""
|
||||
paper_dir = storage_manager.get_paper_directory(
|
||||
"local-abcd1234efgh5678", SourceType.LOCAL
|
||||
)
|
||||
|
||||
expected = (
|
||||
storage_manager.library_paths.papers_dir / "local" / "abcd1234efgh5678"
|
||||
)
|
||||
assert paper_dir == expected
|
||||
|
||||
def test_get_paper_paths(self, storage_manager):
|
||||
"""Test getting all paper paths."""
|
||||
paths = storage_manager.get_paper_paths("arxiv-2212_06340", SourceType.ARXIV)
|
||||
|
||||
assert "directory" in paths
|
||||
assert "meta" in paths
|
||||
assert "pdf" in paths
|
||||
assert "markdown" in paths
|
||||
assert "summary_json" in paths
|
||||
assert "summary_md" in paths
|
||||
assert "assets" in paths
|
||||
assert "logs" in paths
|
||||
|
||||
# Check that paths are Path objects
|
||||
assert isinstance(paths["meta"], Path)
|
||||
assert paths["meta"].name == "meta.json"
|
||||
assert paths["pdf"].name == "source.pdf"
|
||||
|
||||
def test_store_paper_local(self, storage_manager, sample_pdf):
|
||||
"""Test storing a local PDF paper."""
|
||||
metadata = storage_manager.store_paper(
|
||||
pdf_path=sample_pdf,
|
||||
source_type=SourceType.LOCAL,
|
||||
title="Test Paper",
|
||||
authors=["Test Author"],
|
||||
tags=["test"],
|
||||
)
|
||||
|
||||
# Check metadata
|
||||
assert metadata.source_type == SourceType.LOCAL
|
||||
assert metadata.title == "Test Paper"
|
||||
assert metadata.authors == ["Test Author"]
|
||||
assert metadata.tags == ["test"]
|
||||
assert metadata.conversion_status == ConversionStatus.PENDING
|
||||
|
||||
# Check file structure was created
|
||||
paths = storage_manager.get_paper_paths(metadata.paper_id, metadata.source_type)
|
||||
assert paths["directory"].exists()
|
||||
assert paths["meta"].exists()
|
||||
assert paths["pdf"].exists()
|
||||
assert paths["assets"].exists()
|
||||
assert paths["logs"].exists()
|
||||
|
||||
def test_store_paper_arxiv(self, storage_manager, sample_pdf):
|
||||
"""Test storing an arXiv paper."""
|
||||
metadata = storage_manager.store_paper(
|
||||
pdf_path=sample_pdf,
|
||||
source_type=SourceType.ARXIV,
|
||||
source_id="2212.06340",
|
||||
title="Test arXiv Paper",
|
||||
authors=["Alice Smith", "Bob Jones"],
|
||||
categories=["cs.AI"],
|
||||
)
|
||||
|
||||
# Check metadata
|
||||
assert metadata.source_type == SourceType.ARXIV
|
||||
assert metadata.source_id == "2212.06340"
|
||||
assert metadata.title == "Test arXiv Paper"
|
||||
assert metadata.authors == ["Alice Smith", "Bob Jones"]
|
||||
assert metadata.categories == ["cs.AI"]
|
||||
|
||||
# Check file paths are set correctly
|
||||
assert metadata.pdf_path
|
||||
assert metadata.paper_md_path
|
||||
assert metadata.summary_json_path
|
||||
assert metadata.summary_md_path
|
||||
|
||||
def test_load_paper_metadata(self, storage_manager, sample_pdf):
|
||||
"""Test loading paper metadata."""
|
||||
# First store a paper
|
||||
original_metadata = storage_manager.store_paper(
|
||||
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper"
|
||||
)
|
||||
|
||||
# Load it back
|
||||
loaded_metadata = storage_manager.load_paper_metadata(
|
||||
original_metadata.paper_id, original_metadata.source_type
|
||||
)
|
||||
|
||||
assert loaded_metadata is not None
|
||||
assert loaded_metadata.paper_id == original_metadata.paper_id
|
||||
assert loaded_metadata.title == "Test Paper"
|
||||
assert loaded_metadata.source_type == SourceType.LOCAL
|
||||
|
||||
def test_load_nonexistent_paper(self, storage_manager):
|
||||
"""Test loading metadata for nonexistent paper."""
|
||||
metadata = storage_manager.load_paper_metadata("nonexistent", SourceType.LOCAL)
|
||||
assert metadata is None
|
||||
|
||||
def test_update_paper_metadata(self, storage_manager, sample_pdf):
|
||||
"""Test updating paper metadata."""
|
||||
# Store initial paper
|
||||
metadata = storage_manager.store_paper(
|
||||
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Original Title"
|
||||
)
|
||||
|
||||
# Update metadata
|
||||
metadata.title = "Updated Title"
|
||||
metadata.conversion_status = ConversionStatus.SUCCESS
|
||||
storage_manager.update_paper_metadata(metadata)
|
||||
|
||||
# Load and verify update
|
||||
loaded_metadata = storage_manager.load_paper_metadata(
|
||||
metadata.paper_id, metadata.source_type
|
||||
)
|
||||
assert loaded_metadata.title == "Updated Title"
|
||||
assert loaded_metadata.conversion_status == ConversionStatus.SUCCESS
|
||||
|
||||
def test_list_all_papers(self, storage_manager, sample_pdf):
|
||||
"""Test listing all papers in library."""
|
||||
# Initially empty
|
||||
papers = list(storage_manager.list_all_papers())
|
||||
assert len(papers) == 0
|
||||
|
||||
# Add some papers
|
||||
metadata1 = storage_manager.store_paper(
|
||||
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Paper 1"
|
||||
)
|
||||
|
||||
metadata2 = storage_manager.store_paper(
|
||||
pdf_path=sample_pdf,
|
||||
source_type=SourceType.ARXIV,
|
||||
source_id="2212.06340",
|
||||
title="Paper 2",
|
||||
)
|
||||
|
||||
# List papers
|
||||
papers = list(storage_manager.list_all_papers())
|
||||
assert len(papers) == 2
|
||||
|
||||
paper_ids = {p.paper_id for p in papers}
|
||||
assert metadata1.paper_id in paper_ids
|
||||
assert metadata2.paper_id in paper_ids
|
||||
|
||||
def test_paper_exists(self, storage_manager, sample_pdf):
|
||||
"""Test checking if paper exists."""
|
||||
# Initially doesn't exist
|
||||
assert not storage_manager.paper_exists("nonexistent", SourceType.LOCAL)
|
||||
|
||||
# Store a paper
|
||||
metadata = storage_manager.store_paper(
|
||||
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper"
|
||||
)
|
||||
|
||||
# Now it exists
|
||||
assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type)
|
||||
|
||||
def test_delete_paper(self, storage_manager, sample_pdf):
|
||||
"""Test deleting a paper."""
|
||||
# Store a paper
|
||||
metadata = storage_manager.store_paper(
|
||||
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper"
|
||||
)
|
||||
|
||||
# Verify it exists
|
||||
assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type)
|
||||
|
||||
# Delete it
|
||||
result = storage_manager.delete_paper(metadata.paper_id, metadata.source_type)
|
||||
assert result is True
|
||||
|
||||
# Verify it's gone
|
||||
assert not storage_manager.paper_exists(metadata.paper_id, metadata.source_type)
|
||||
|
||||
# Deleting again should return False
|
||||
result = storage_manager.delete_paper(metadata.paper_id, metadata.source_type)
|
||||
assert result is False
|
||||
Reference in New Issue
Block a user