220 lines
7.7 KiB
Python
220 lines
7.7 KiB
Python
"""Integration tests for paperlib."""
|
|
|
|
import shutil
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from paperlib.config import LibraryPaths
|
|
from paperlib.importer import LocalImporter
|
|
from paperlib.index import DatabaseManager
|
|
from paperlib.models import SourceType
|
|
from paperlib.storage import PaperStorageManager
|
|
|
|
|
|
class TestIntegration:
|
|
"""Test full integration workflows."""
|
|
|
|
@pytest.fixture
|
|
def temp_library(self):
|
|
"""Create a temporary library for testing."""
|
|
temp_dir = Path("./.tmp") / f"test_integration_{hash(self)}"
|
|
temp_dir.mkdir(parents=True, exist_ok=True)
|
|
library_paths = LibraryPaths.from_root(temp_dir)
|
|
library_paths.create_directories()
|
|
|
|
yield library_paths
|
|
|
|
# Cleanup
|
|
if temp_dir.exists():
|
|
shutil.rmtree(temp_dir)
|
|
|
|
@pytest.fixture
|
|
def sample_pdf(self):
|
|
"""Create a sample PDF file for testing."""
|
|
pdf_file = Path("./.tmp") / f"integration_test_{hash(self)}.pdf"
|
|
with pdf_file.open("wb") as f:
|
|
# Minimal PDF content
|
|
f.write(b"%PDF-1.4\n")
|
|
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
|
|
f.write(b"2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n")
|
|
f.write(b"3 0 obj\n<< /Type /Page /Parent 2 0 R >>\nendobj\n")
|
|
f.write(b"%%EOF\n")
|
|
|
|
yield pdf_file
|
|
|
|
# Cleanup
|
|
if pdf_file.exists():
|
|
pdf_file.unlink()
|
|
|
|
def test_complete_local_import_workflow(self, temp_library, sample_pdf):
|
|
"""Test complete workflow for importing and managing a local PDF."""
|
|
# Set up components
|
|
storage_manager = PaperStorageManager(temp_library)
|
|
db_manager = DatabaseManager(temp_library)
|
|
local_importer = LocalImporter(storage_manager)
|
|
|
|
# Initialize database
|
|
db_manager.initialize_database()
|
|
|
|
# Import PDF
|
|
metadata = local_importer.import_pdf(
|
|
pdf_path=sample_pdf,
|
|
title="Integration Test Paper",
|
|
tags=["integration", "test"],
|
|
notes="This is an integration test paper",
|
|
)
|
|
|
|
# Update metadata with authors after import
|
|
metadata.authors = ["Test Author"]
|
|
storage_manager.update_paper_metadata(metadata)
|
|
|
|
# Verify metadata
|
|
assert metadata.source_type == SourceType.LOCAL
|
|
assert metadata.title == "Integration Test Paper"
|
|
assert metadata.authors == ["Test Author"]
|
|
assert metadata.tags == ["integration", "test"]
|
|
|
|
# Index in database
|
|
db_manager.index_paper(metadata)
|
|
|
|
# Test retrieval from database
|
|
retrieved_paper = db_manager.get_paper(metadata.paper_id)
|
|
assert retrieved_paper is not None
|
|
assert retrieved_paper["title"] == "Integration Test Paper"
|
|
|
|
# Test search functionality
|
|
search_results = list(db_manager.search_papers("Integration Test"))
|
|
assert len(search_results) == 1
|
|
assert search_results[0]["paper_id"] == metadata.paper_id
|
|
|
|
# Test field search
|
|
author_results = list(db_manager.search_by_field("author_list", "Test Author"))
|
|
assert len(author_results) == 1
|
|
|
|
# Test listing papers
|
|
all_papers = list(db_manager.list_papers())
|
|
assert len(all_papers) == 1
|
|
assert all_papers[0]["paper_id"] == metadata.paper_id
|
|
|
|
# Test statistics
|
|
stats = db_manager.get_statistics()
|
|
assert stats["total_papers"] == 1
|
|
assert stats["by_source_type"]["local"] == 1
|
|
|
|
# Test updating metadata
|
|
metadata.notes = "Updated notes"
|
|
storage_manager.update_paper_metadata(metadata)
|
|
|
|
# Re-index and verify update
|
|
db_manager.index_paper(metadata)
|
|
updated_paper = db_manager.get_paper(metadata.paper_id)
|
|
assert "Updated notes" in updated_paper["search_text"]
|
|
|
|
def test_multiple_papers_workflow(self, temp_library, sample_pdf):
|
|
"""Test workflow with multiple papers."""
|
|
# Set up components
|
|
storage_manager = PaperStorageManager(temp_library)
|
|
db_manager = DatabaseManager(temp_library)
|
|
local_importer = LocalImporter(storage_manager)
|
|
|
|
# Initialize database
|
|
db_manager.initialize_database()
|
|
|
|
# Import multiple papers (create unique PDFs)
|
|
papers = []
|
|
for i in range(3):
|
|
# Create unique PDF for each import
|
|
unique_pdf = Path("./.tmp") / f"unique_paper_{i}_{hash(self)}.pdf"
|
|
with unique_pdf.open("wb") as f:
|
|
f.write(b"%PDF-1.4\n")
|
|
f.write(f"% Unique content {i}\n".encode())
|
|
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
|
|
f.write(b"%%EOF\n")
|
|
|
|
try:
|
|
metadata = local_importer.import_pdf(
|
|
pdf_path=unique_pdf,
|
|
title=f"Test Paper {i + 1}",
|
|
tags=[f"tag{i + 1}", "common"],
|
|
notes=f"Notes for paper {i + 1}",
|
|
)
|
|
# Update metadata with authors after import
|
|
metadata.authors = [f"Author {i + 1}"]
|
|
storage_manager.update_paper_metadata(metadata)
|
|
|
|
papers.append(metadata)
|
|
db_manager.index_paper(metadata)
|
|
|
|
finally:
|
|
if unique_pdf.exists():
|
|
unique_pdf.unlink()
|
|
|
|
# Test listing all papers
|
|
all_papers = list(db_manager.list_papers())
|
|
assert len(all_papers) == 3
|
|
|
|
# Test search across papers
|
|
common_tag_results = list(db_manager.search_papers("common"))
|
|
assert len(common_tag_results) == 3
|
|
|
|
# Test filtering
|
|
filtered_results = list(db_manager.list_papers(limit=2))
|
|
assert len(filtered_results) == 2
|
|
|
|
# Test reindexing
|
|
success_count, error_count = db_manager.reindex_from_storage(storage_manager)
|
|
assert success_count == 3
|
|
assert error_count == 0
|
|
|
|
# Verify papers still exist after reindex
|
|
stats = db_manager.get_statistics()
|
|
assert stats["total_papers"] == 3
|
|
|
|
def test_storage_and_database_consistency(self, temp_library, sample_pdf):
|
|
"""Test consistency between storage and database."""
|
|
# Set up components
|
|
storage_manager = PaperStorageManager(temp_library)
|
|
db_manager = DatabaseManager(temp_library)
|
|
local_importer = LocalImporter(storage_manager)
|
|
|
|
# Initialize database
|
|
db_manager.initialize_database()
|
|
|
|
# Import paper
|
|
metadata = local_importer.import_pdf(
|
|
pdf_path=sample_pdf,
|
|
title="Consistency Test Paper",
|
|
)
|
|
|
|
# Index in database
|
|
db_manager.index_paper(metadata)
|
|
|
|
# Verify file exists in storage
|
|
assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type)
|
|
|
|
# Verify paper exists in database
|
|
db_paper = db_manager.get_paper(metadata.paper_id)
|
|
assert db_paper is not None
|
|
|
|
# Load from storage and compare
|
|
storage_metadata = storage_manager.load_paper_metadata(
|
|
metadata.paper_id, metadata.source_type
|
|
)
|
|
assert storage_metadata.title == db_paper["title"]
|
|
assert storage_metadata.paper_id == db_paper["paper_id"]
|
|
|
|
# Test reindexing maintains consistency
|
|
db_manager.remove_paper(metadata.paper_id)
|
|
assert db_manager.get_paper(metadata.paper_id) is None
|
|
|
|
# Reindex from storage
|
|
success_count, error_count = db_manager.reindex_from_storage(storage_manager)
|
|
assert success_count == 1
|
|
assert error_count == 0
|
|
|
|
# Verify paper is back in database
|
|
restored_paper = db_manager.get_paper(metadata.paper_id)
|
|
assert restored_paper is not None
|
|
assert restored_paper["title"] == "Consistency Test Paper"
|