test: add tests
This commit is contained in:
@@ -0,0 +1,220 @@
|
||||
"""Integration tests for paperlib."""
|
||||
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from paperlib.config import LibraryPaths
|
||||
from paperlib.converter import MinerUConverter
|
||||
from paperlib.importer import ArxivImporter, LocalImporter
|
||||
from paperlib.index import DatabaseManager
|
||||
from paperlib.models import ConversionStatus, SourceType
|
||||
from paperlib.storage import PaperStorageManager
|
||||
|
||||
|
||||
class TestIntegration:
|
||||
"""Test full integration workflows."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_library(self):
|
||||
"""Create a temporary library for testing."""
|
||||
temp_dir = Path("./.tmp") / f"test_integration_{hash(self)}"
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
library_paths = LibraryPaths.from_root(temp_dir)
|
||||
library_paths.create_directories()
|
||||
|
||||
yield library_paths
|
||||
|
||||
# Cleanup
|
||||
if temp_dir.exists():
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_pdf(self):
|
||||
"""Create a sample PDF file for testing."""
|
||||
pdf_file = Path("./.tmp") / f"integration_test_{hash(self)}.pdf"
|
||||
with pdf_file.open("wb") as f:
|
||||
# Minimal PDF content
|
||||
f.write(b"%PDF-1.4\n")
|
||||
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
|
||||
f.write(b"2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n")
|
||||
f.write(b"3 0 obj\n<< /Type /Page /Parent 2 0 R >>\nendobj\n")
|
||||
f.write(b"%%EOF\n")
|
||||
|
||||
yield pdf_file
|
||||
|
||||
# Cleanup
|
||||
if pdf_file.exists():
|
||||
pdf_file.unlink()
|
||||
|
||||
def test_complete_local_import_workflow(self, temp_library, sample_pdf):
|
||||
"""Test complete workflow for importing and managing a local PDF."""
|
||||
# Set up components
|
||||
storage_manager = PaperStorageManager(temp_library)
|
||||
db_manager = DatabaseManager(temp_library)
|
||||
local_importer = LocalImporter(storage_manager)
|
||||
|
||||
# Initialize database
|
||||
db_manager.initialize_database()
|
||||
|
||||
# Import PDF
|
||||
metadata = local_importer.import_pdf(
|
||||
pdf_path=sample_pdf,
|
||||
title="Integration Test Paper",
|
||||
tags=["integration", "test"],
|
||||
notes="This is an integration test paper",
|
||||
)
|
||||
|
||||
# Update metadata with authors after import
|
||||
metadata.authors = ["Test Author"]
|
||||
storage_manager.update_paper_metadata(metadata)
|
||||
|
||||
# Verify metadata
|
||||
assert metadata.source_type == SourceType.LOCAL
|
||||
assert metadata.title == "Integration Test Paper"
|
||||
assert metadata.authors == ["Test Author"]
|
||||
assert metadata.tags == ["integration", "test"]
|
||||
|
||||
# Index in database
|
||||
db_manager.index_paper(metadata)
|
||||
|
||||
# Test retrieval from database
|
||||
retrieved_paper = db_manager.get_paper(metadata.paper_id)
|
||||
assert retrieved_paper is not None
|
||||
assert retrieved_paper["title"] == "Integration Test Paper"
|
||||
|
||||
# Test search functionality
|
||||
search_results = list(db_manager.search_papers("Integration Test"))
|
||||
assert len(search_results) == 1
|
||||
assert search_results[0]["paper_id"] == metadata.paper_id
|
||||
|
||||
# Test field search
|
||||
author_results = list(db_manager.search_by_field("author_list", "Test Author"))
|
||||
assert len(author_results) == 1
|
||||
|
||||
# Test listing papers
|
||||
all_papers = list(db_manager.list_papers())
|
||||
assert len(all_papers) == 1
|
||||
assert all_papers[0]["paper_id"] == metadata.paper_id
|
||||
|
||||
# Test statistics
|
||||
stats = db_manager.get_statistics()
|
||||
assert stats["total_papers"] == 1
|
||||
assert stats["by_source_type"]["local"] == 1
|
||||
|
||||
# Test updating metadata
|
||||
metadata.notes = "Updated notes"
|
||||
storage_manager.update_paper_metadata(metadata)
|
||||
|
||||
# Re-index and verify update
|
||||
db_manager.index_paper(metadata)
|
||||
updated_paper = db_manager.get_paper(metadata.paper_id)
|
||||
assert "Updated notes" in updated_paper["search_text"]
|
||||
|
||||
def test_multiple_papers_workflow(self, temp_library, sample_pdf):
|
||||
"""Test workflow with multiple papers."""
|
||||
# Set up components
|
||||
storage_manager = PaperStorageManager(temp_library)
|
||||
db_manager = DatabaseManager(temp_library)
|
||||
local_importer = LocalImporter(storage_manager)
|
||||
|
||||
# Initialize database
|
||||
db_manager.initialize_database()
|
||||
|
||||
# Import multiple papers (create unique PDFs)
|
||||
papers = []
|
||||
for i in range(3):
|
||||
# Create unique PDF for each import
|
||||
unique_pdf = Path("./.tmp") / f"unique_paper_{i}_{hash(self)}.pdf"
|
||||
with unique_pdf.open("wb") as f:
|
||||
f.write(b"%PDF-1.4\n")
|
||||
f.write(f"% Unique content {i}\n".encode())
|
||||
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
|
||||
f.write(b"%%EOF\n")
|
||||
|
||||
try:
|
||||
metadata = local_importer.import_pdf(
|
||||
pdf_path=unique_pdf,
|
||||
title=f"Test Paper {i + 1}",
|
||||
tags=[f"tag{i + 1}", "common"],
|
||||
notes=f"Notes for paper {i + 1}",
|
||||
)
|
||||
# Update metadata with authors after import
|
||||
metadata.authors = [f"Author {i + 1}"]
|
||||
storage_manager.update_paper_metadata(metadata)
|
||||
|
||||
papers.append(metadata)
|
||||
db_manager.index_paper(metadata)
|
||||
|
||||
finally:
|
||||
if unique_pdf.exists():
|
||||
unique_pdf.unlink()
|
||||
|
||||
# Test listing all papers
|
||||
all_papers = list(db_manager.list_papers())
|
||||
assert len(all_papers) == 3
|
||||
|
||||
# Test search across papers
|
||||
common_tag_results = list(db_manager.search_papers("common"))
|
||||
assert len(common_tag_results) == 3
|
||||
|
||||
# Test filtering
|
||||
filtered_results = list(db_manager.list_papers(limit=2))
|
||||
assert len(filtered_results) == 2
|
||||
|
||||
# Test reindexing
|
||||
success_count, error_count = db_manager.reindex_from_storage(storage_manager)
|
||||
assert success_count == 3
|
||||
assert error_count == 0
|
||||
|
||||
# Verify papers still exist after reindex
|
||||
stats = db_manager.get_statistics()
|
||||
assert stats["total_papers"] == 3
|
||||
|
||||
def test_storage_and_database_consistency(self, temp_library, sample_pdf):
|
||||
"""Test consistency between storage and database."""
|
||||
# Set up components
|
||||
storage_manager = PaperStorageManager(temp_library)
|
||||
db_manager = DatabaseManager(temp_library)
|
||||
local_importer = LocalImporter(storage_manager)
|
||||
|
||||
# Initialize database
|
||||
db_manager.initialize_database()
|
||||
|
||||
# Import paper
|
||||
metadata = local_importer.import_pdf(
|
||||
pdf_path=sample_pdf,
|
||||
title="Consistency Test Paper",
|
||||
)
|
||||
|
||||
# Index in database
|
||||
db_manager.index_paper(metadata)
|
||||
|
||||
# Verify file exists in storage
|
||||
assert storage_manager.paper_exists(metadata.paper_id, metadata.source_type)
|
||||
|
||||
# Verify paper exists in database
|
||||
db_paper = db_manager.get_paper(metadata.paper_id)
|
||||
assert db_paper is not None
|
||||
|
||||
# Load from storage and compare
|
||||
storage_metadata = storage_manager.load_paper_metadata(
|
||||
metadata.paper_id, metadata.source_type
|
||||
)
|
||||
assert storage_metadata.title == db_paper["title"]
|
||||
assert storage_metadata.paper_id == db_paper["paper_id"]
|
||||
|
||||
# Test reindexing maintains consistency
|
||||
db_manager.remove_paper(metadata.paper_id)
|
||||
assert db_manager.get_paper(metadata.paper_id) is None
|
||||
|
||||
# Reindex from storage
|
||||
success_count, error_count = db_manager.reindex_from_storage(storage_manager)
|
||||
assert success_count == 1
|
||||
assert error_count == 0
|
||||
|
||||
# Verify paper is back in database
|
||||
restored_paper = db_manager.get_paper(metadata.paper_id)
|
||||
assert restored_paper is not None
|
||||
assert restored_paper["title"] == "Consistency Test Paper"
|
||||
Reference in New Issue
Block a user