Files
2026-04-17 17:03:59 -04:00

262 lines
9.1 KiB
Python

"""Tests for paperlib storage manager."""
import shutil
from pathlib import Path
import pytest
from paperlib.config import LibraryPaths
from paperlib.models import ConversionStatus, SourceType
from paperlib.storage import PaperStorageManager
class TestPaperStorageManager:
"""Test PaperStorageManager functionality."""
@pytest.fixture
def temp_library(self):
"""Create a temporary library for testing."""
temp_dir = Path("./.tmp") / f"test_library_{hash(self)}"
temp_dir.mkdir(parents=True, exist_ok=True)
library_paths = LibraryPaths.from_root(temp_dir)
library_paths.create_directories()
yield library_paths
# Cleanup
if temp_dir.exists():
shutil.rmtree(temp_dir)
@pytest.fixture
def storage_manager(self, temp_library):
"""Create a storage manager for testing."""
return PaperStorageManager(temp_library)
@pytest.fixture
def sample_pdf(self):
"""Create a sample PDF file for testing."""
# Create a minimal PDF-like file
temp_file = Path("./.tmp") / f"test_paper_{hash(self)}.pdf"
with temp_file.open("wb") as f:
# Minimal PDF header
f.write(b"%PDF-1.4\n")
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
f.write(b"%%EOF\n")
yield temp_file
# Cleanup
if temp_file.exists():
temp_file.unlink()
def test_generate_paper_id_local(self, storage_manager, sample_pdf):
"""Test generating paper ID for local files."""
paper_id = storage_manager.generate_paper_id(
SourceType.LOCAL, pdf_path=sample_pdf
)
assert paper_id.startswith("local-")
assert len(paper_id) == 22 # "local-" + 16 chars hash
def test_generate_paper_id_arxiv(self, storage_manager):
"""Test generating paper ID for arXiv papers."""
paper_id = storage_manager.generate_paper_id(
SourceType.ARXIV, source_id="2212.06340"
)
assert paper_id == "arxiv-2212_06340"
def test_get_paper_directory_arxiv(self, storage_manager):
"""Test getting paper directory for arXiv papers."""
paper_dir = storage_manager.get_paper_directory(
"arxiv-2212_06340", SourceType.ARXIV
)
# Should extract year 2022 from 2212.06340 (22 -> 2022)
expected = (
storage_manager.library_paths.papers_dir
/ "arxiv"
/ "2022"
/ "arxiv-2212_06340"
)
assert paper_dir == expected
def test_get_paper_directory_local(self, storage_manager):
"""Test getting paper directory for local papers."""
paper_dir = storage_manager.get_paper_directory(
"local-abcd1234efgh5678", SourceType.LOCAL
)
expected = (
storage_manager.library_paths.papers_dir / "local" / "abcd1234efgh5678"
)
assert paper_dir == expected
def test_get_paper_paths(self, storage_manager):
"""Test getting all paper paths."""
paths = storage_manager.get_paper_paths("arxiv-2212_06340", SourceType.ARXIV)
assert "directory" in paths
assert "meta" in paths
assert "pdf" in paths
assert "markdown" in paths
assert "summary_json" in paths
assert "summary_md" in paths
assert "assets" in paths
assert "logs" in paths
# Check that paths are Path objects
assert isinstance(paths["meta"], Path)
assert paths["meta"].name == "meta.json"
assert paths["pdf"].name == "source.pdf"
def test_store_paper_local(self, storage_manager, sample_pdf):
"""Test storing a local PDF paper."""
metadata = storage_manager.store_paper(
pdf_path=sample_pdf,
source_type=SourceType.LOCAL,
title="Test Paper",
authors=["Test Author"],
tags=["test"],
)
# Check metadata
assert metadata.source_type == SourceType.LOCAL
assert metadata.title == "Test Paper"
assert metadata.authors == ["Test Author"]
assert metadata.tags == ["test"]
assert metadata.conversion_status == ConversionStatus.PENDING
# Check file structure was created
paths = storage_manager.get_paper_paths(metadata.paper_id, metadata.source_type)
assert paths["directory"].exists()
assert paths["meta"].exists()
assert paths["pdf"].exists()
assert paths["assets"].exists()
assert paths["logs"].exists()
def test_store_paper_arxiv(self, storage_manager, sample_pdf):
"""Test storing an arXiv paper."""
metadata = storage_manager.store_paper(
pdf_path=sample_pdf,
source_type=SourceType.ARXIV,
source_id="2212.06340",
title="Test arXiv Paper",
authors=["Alice Smith", "Bob Jones"],
categories=["cs.AI"],
)
# Check metadata
assert metadata.source_type == SourceType.ARXIV
assert metadata.source_id == "2212.06340"
assert metadata.title == "Test arXiv Paper"
assert metadata.authors == ["Alice Smith", "Bob Jones"]
assert metadata.categories == ["cs.AI"]
# Check file paths are set correctly
assert metadata.pdf_path
assert metadata.paper_md_path
assert metadata.summary_json_path
assert metadata.summary_md_path
def test_load_paper_metadata(self, storage_manager, sample_pdf):
"""Test loading paper metadata."""
# First store a paper
original_metadata = storage_manager.store_paper(
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper"
)
# Load it back
loaded_metadata = storage_manager.load_paper_metadata(
original_metadata.paper_id, original_metadata.source_type
)
assert loaded_metadata is not None
assert loaded_metadata.paper_id == original_metadata.paper_id
assert loaded_metadata.title == "Test Paper"
assert loaded_metadata.source_type == SourceType.LOCAL
def test_load_nonexistent_paper(self, storage_manager):
"""Test loading metadata for nonexistent paper."""
metadata = storage_manager.load_paper_metadata("nonexistent", SourceType.LOCAL)
assert metadata is None
def test_update_paper_metadata(self, storage_manager, sample_pdf):
"""Test updating paper metadata."""
# Store initial paper
metadata = storage_manager.store_paper(
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Original Title"
)
# Update metadata
metadata.title = "Updated Title"
metadata.conversion_status = ConversionStatus.SUCCESS
storage_manager.update_paper_metadata(metadata)
# Load and verify update
loaded_metadata = storage_manager.load_paper_metadata(
metadata.paper_id, metadata.source_type
)
assert loaded_metadata.title == "Updated Title"
assert loaded_metadata.conversion_status == ConversionStatus.SUCCESS
def test_list_all_papers(self, storage_manager, sample_pdf):
"""Test listing all papers in library."""
# Initially empty
papers = list(storage_manager.list_all_papers())
assert len(papers) == 0
# Add some papers
metadata1 = storage_manager.store_paper(
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Paper 1"
)
metadata2 = storage_manager.store_paper(
pdf_path=sample_pdf,
source_type=SourceType.ARXIV,
source_id="2212.06340",
title="Paper 2",
)
# List papers
papers = list(storage_manager.list_all_papers())
assert len(papers) == 2
paper_ids = {p.paper_id for p in papers}
assert metadata1.paper_id in paper_ids
assert metadata2.paper_id in paper_ids
def test_paper_exists(self, storage_manager, sample_pdf):
"""Test checking if paper exists."""
# Initially doesn't exist
assert not storage_manager.paper_exists("nonexistent", SourceType.LOCAL)
# Store a paper
metadata = storage_manager.store_paper(
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper"
)
# Now it exists
assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type)
def test_delete_paper(self, storage_manager, sample_pdf):
"""Test deleting a paper."""
# Store a paper
metadata = storage_manager.store_paper(
pdf_path=sample_pdf, source_type=SourceType.LOCAL, title="Test Paper"
)
# Verify it exists
assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type)
# Delete it
result = storage_manager.delete_paper(metadata.paper_id, metadata.source_type)
assert result is True
# Verify it's gone
assert not storage_manager.paper_exists(metadata.paper_id, metadata.source_type)
# Deleting again should return False
result = storage_manager.delete_paper(metadata.paper_id, metadata.source_type)
assert result is False