Files
paperlib/tests/test_integration.py
2026-04-17 16:54:30 -04:00

220 lines
7.7 KiB
Python

"""Integration tests for paperlib."""
import shutil
from pathlib import Path
import pytest
from paperlib.config import LibraryPaths
from paperlib.importer import LocalImporter
from paperlib.index import DatabaseManager
from paperlib.models import SourceType
from paperlib.storage import PaperStorageManager
class TestIntegration:
"""Test full integration workflows."""
@pytest.fixture
def temp_library(self):
"""Create a temporary library for testing."""
temp_dir = Path("./.tmp") / f"test_integration_{hash(self)}"
temp_dir.mkdir(parents=True, exist_ok=True)
library_paths = LibraryPaths.from_root(temp_dir)
library_paths.create_directories()
yield library_paths
# Cleanup
if temp_dir.exists():
shutil.rmtree(temp_dir)
@pytest.fixture
def sample_pdf(self):
"""Create a sample PDF file for testing."""
pdf_file = Path("./.tmp") / f"integration_test_{hash(self)}.pdf"
with pdf_file.open("wb") as f:
# Minimal PDF content
f.write(b"%PDF-1.4\n")
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
f.write(b"2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n")
f.write(b"3 0 obj\n<< /Type /Page /Parent 2 0 R >>\nendobj\n")
f.write(b"%%EOF\n")
yield pdf_file
# Cleanup
if pdf_file.exists():
pdf_file.unlink()
def test_complete_local_import_workflow(self, temp_library, sample_pdf):
"""Test complete workflow for importing and managing a local PDF."""
# Set up components
storage_manager = PaperStorageManager(temp_library)
db_manager = DatabaseManager(temp_library)
local_importer = LocalImporter(storage_manager)
# Initialize database
db_manager.initialize_database()
# Import PDF
metadata = local_importer.import_pdf(
pdf_path=sample_pdf,
title="Integration Test Paper",
tags=["integration", "test"],
notes="This is an integration test paper",
)
# Update metadata with authors after import
metadata.authors = ["Test Author"]
storage_manager.update_paper_metadata(metadata)
# Verify metadata
assert metadata.source_type == SourceType.LOCAL
assert metadata.title == "Integration Test Paper"
assert metadata.authors == ["Test Author"]
assert metadata.tags == ["integration", "test"]
# Index in database
db_manager.index_paper(metadata)
# Test retrieval from database
retrieved_paper = db_manager.get_paper(metadata.paper_id)
assert retrieved_paper is not None
assert retrieved_paper["title"] == "Integration Test Paper"
# Test search functionality
search_results = list(db_manager.search_papers("Integration Test"))
assert len(search_results) == 1
assert search_results[0]["paper_id"] == metadata.paper_id
# Test field search
author_results = list(db_manager.search_by_field("author_list", "Test Author"))
assert len(author_results) == 1
# Test listing papers
all_papers = list(db_manager.list_papers())
assert len(all_papers) == 1
assert all_papers[0]["paper_id"] == metadata.paper_id
# Test statistics
stats = db_manager.get_statistics()
assert stats["total_papers"] == 1
assert stats["by_source_type"]["local"] == 1
# Test updating metadata
metadata.notes = "Updated notes"
storage_manager.update_paper_metadata(metadata)
# Re-index and verify update
db_manager.index_paper(metadata)
updated_paper = db_manager.get_paper(metadata.paper_id)
assert "Updated notes" in updated_paper["search_text"]
def test_multiple_papers_workflow(self, temp_library, sample_pdf):
"""Test workflow with multiple papers."""
# Set up components
storage_manager = PaperStorageManager(temp_library)
db_manager = DatabaseManager(temp_library)
local_importer = LocalImporter(storage_manager)
# Initialize database
db_manager.initialize_database()
# Import multiple papers (create unique PDFs)
papers = []
for i in range(3):
# Create unique PDF for each import
unique_pdf = Path("./.tmp") / f"unique_paper_{i}_{hash(self)}.pdf"
with unique_pdf.open("wb") as f:
f.write(b"%PDF-1.4\n")
f.write(f"% Unique content {i}\n".encode())
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
f.write(b"%%EOF\n")
try:
metadata = local_importer.import_pdf(
pdf_path=unique_pdf,
title=f"Test Paper {i + 1}",
tags=[f"tag{i + 1}", "common"],
notes=f"Notes for paper {i + 1}",
)
# Update metadata with authors after import
metadata.authors = [f"Author {i + 1}"]
storage_manager.update_paper_metadata(metadata)
papers.append(metadata)
db_manager.index_paper(metadata)
finally:
if unique_pdf.exists():
unique_pdf.unlink()
# Test listing all papers
all_papers = list(db_manager.list_papers())
assert len(all_papers) == 3
# Test search across papers
common_tag_results = list(db_manager.search_papers("common"))
assert len(common_tag_results) == 3
# Test filtering
filtered_results = list(db_manager.list_papers(limit=2))
assert len(filtered_results) == 2
# Test reindexing
success_count, error_count = db_manager.reindex_from_storage(storage_manager)
assert success_count == 3
assert error_count == 0
# Verify papers still exist after reindex
stats = db_manager.get_statistics()
assert stats["total_papers"] == 3
def test_storage_and_database_consistency(self, temp_library, sample_pdf):
"""Test consistency between storage and database."""
# Set up components
storage_manager = PaperStorageManager(temp_library)
db_manager = DatabaseManager(temp_library)
local_importer = LocalImporter(storage_manager)
# Initialize database
db_manager.initialize_database()
# Import paper
metadata = local_importer.import_pdf(
pdf_path=sample_pdf,
title="Consistency Test Paper",
)
# Index in database
db_manager.index_paper(metadata)
# Verify file exists in storage
assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type)
# Verify paper exists in database
db_paper = db_manager.get_paper(metadata.paper_id)
assert db_paper is not None
# Load from storage and compare
storage_metadata = storage_manager.load_paper_metadata(
metadata.paper_id, metadata.source_type
)
assert storage_metadata.title == db_paper["title"]
assert storage_metadata.paper_id == db_paper["paper_id"]
# Test reindexing maintains consistency
db_manager.remove_paper(metadata.paper_id)
assert db_manager.get_paper(metadata.paper_id) is None
# Reindex from storage
success_count, error_count = db_manager.reindex_from_storage(storage_manager)
assert success_count == 1
assert error_count == 0
# Verify paper is back in database
restored_paper = db_manager.get_paper(metadata.paper_id)
assert restored_paper is not None
assert restored_paper["title"] == "Consistency Test Paper"