test: add tests

This commit is contained in:
2026-04-17 15:56:04 -04:00
parent 088e07dee8
commit 74d140e5f8
10 changed files with 1659 additions and 0 deletions
+273
View File
@@ -0,0 +1,273 @@
"""Tests for paperlib import functionality."""
import shutil
from pathlib import Path
from unittest.mock import Mock, patch
import pytest
from paperlib.config import LibraryPaths
from paperlib.importer import ArxivImporter, LocalImporter
from paperlib.models import SourceType
from paperlib.storage import PaperStorageManager
class TestLocalImporter:
"""Test LocalImporter functionality."""
@pytest.fixture
def temp_library(self):
"""Create a temporary library for testing."""
temp_dir = Path("./.tmp") / f"test_import_{hash(self)}"
temp_dir.mkdir(parents=True, exist_ok=True)
library_paths = LibraryPaths.from_root(temp_dir)
library_paths.create_directories()
yield library_paths
# Cleanup
if temp_dir.exists():
shutil.rmtree(temp_dir)
@pytest.fixture
def local_importer(self, temp_library):
"""Create a LocalImporter for testing."""
storage_manager = PaperStorageManager(temp_library)
return LocalImporter(storage_manager)
@pytest.fixture
def sample_pdf(self):
"""Create a sample PDF file for testing."""
pdf_file = Path("./.tmp") / f"sample_{hash(self)}.pdf"
with pdf_file.open("wb") as f:
# Minimal PDF content
f.write(b"%PDF-1.4\n")
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
f.write(b"%%EOF\n")
yield pdf_file
# Cleanup
if pdf_file.exists():
pdf_file.unlink()
def test_import_pdf_success(self, local_importer, sample_pdf):
"""Test successful PDF import."""
metadata = local_importer.import_pdf(
pdf_path=sample_pdf,
title="Test Paper",
notes="Test notes",
tags=["test", "sample"],
)
# Check metadata
assert metadata.source_type == SourceType.LOCAL
assert metadata.title == "Test Paper"
assert metadata.notes == "Test notes"
assert metadata.tags == ["test", "sample"]
assert metadata.paper_id.startswith("local-")
def test_import_pdf_auto_title(self, local_importer, sample_pdf):
"""Test PDF import with auto-generated title."""
# Rename PDF to have a meaningful name
meaningful_pdf = sample_pdf.parent / "Machine_Learning-Paper.pdf"
sample_pdf.rename(meaningful_pdf)
try:
metadata = local_importer.import_pdf(pdf_path=meaningful_pdf)
# Title should be auto-generated from filename
assert metadata.title == "Machine Learning Paper"
finally:
if meaningful_pdf.exists():
meaningful_pdf.unlink()
def test_import_nonexistent_pdf(self, local_importer):
"""Test importing non-existent PDF file."""
nonexistent = Path("./.tmp/nonexistent.pdf")
with pytest.raises(FileNotFoundError):
local_importer.import_pdf(pdf_path=nonexistent)
def test_import_non_pdf_file(self, local_importer):
"""Test importing non-PDF file."""
text_file = Path("./.tmp") / "not_a_pdf.txt"
with text_file.open("w") as f:
f.write("This is not a PDF")
try:
with pytest.raises(ValueError, match="File is not a PDF"):
local_importer.import_pdf(pdf_path=text_file)
finally:
if text_file.exists():
text_file.unlink()
def test_import_duplicate_pdf(self, local_importer, sample_pdf):
"""Test importing the same PDF twice."""
# Import once
metadata1 = local_importer.import_pdf(pdf_path=sample_pdf)
# Try to import again
with pytest.raises(ValueError, match="Paper already imported"):
local_importer.import_pdf(pdf_path=sample_pdf)
class TestArxivImporter:
"""Test ArxivImporter functionality."""
@pytest.fixture
def temp_library(self):
"""Create a temporary library for testing."""
temp_dir = Path("./.tmp") / f"test_arxiv_{hash(self)}"
temp_dir.mkdir(parents=True, exist_ok=True)
library_paths = LibraryPaths.from_root(temp_dir)
library_paths.create_directories()
yield library_paths
# Cleanup
if temp_dir.exists():
shutil.rmtree(temp_dir)
@pytest.fixture
def arxiv_importer(self, temp_library):
"""Create an ArxivImporter for testing."""
storage_manager = PaperStorageManager(temp_library)
return ArxivImporter(storage_manager)
def test_extract_arxiv_id_clean(self, arxiv_importer):
"""Test extracting clean arXiv ID."""
# Test various formats
assert arxiv_importer.extract_arxiv_id("2212.06340") == "2212.06340"
assert arxiv_importer.extract_arxiv_id("arxiv:2212.06340") == "2212.06340"
assert arxiv_importer.extract_arxiv_id("2212.06340v1") == "2212.06340v1"
assert arxiv_importer.extract_arxiv_id("math-ph/0701002") == "math-ph/0701002"
def test_extract_arxiv_id_from_url(self, arxiv_importer):
"""Test extracting arXiv ID from URLs."""
url = "https://arxiv.org/abs/2212.06340"
extracted = arxiv_importer.extract_arxiv_id(url)
assert extracted == "2212.06340"
def test_fetch_paper_metadata_success(self, arxiv_importer):
"""Test successful metadata fetching from arXiv."""
# Mock arXiv result
mock_result = Mock()
mock_result.title = "Test Paper"
mock_result.authors = [Mock(name="Alice Smith"), Mock(name="Bob Jones")]
mock_result.published = Mock()
mock_result.updated = Mock()
mock_result.categories = ["cs.AI", "stat.ML"]
# Mock the client's results method directly
arxiv_importer.client.results = Mock(return_value=[mock_result])
# Test
result = arxiv_importer.fetch_paper_metadata("2212.06340")
assert result == mock_result
def test_fetch_paper_metadata_not_found(self, arxiv_importer):
"""Test fetching metadata for non-existent paper."""
# Mock empty results
arxiv_importer.client.results = Mock(return_value=[])
with pytest.raises(ValueError, match="Paper not found on arXiv"):
arxiv_importer.fetch_paper_metadata("9999.99999")
@patch("paperlib.importer.arxiv_importer.tempfile.NamedTemporaryFile")
def test_download_pdf(self, mock_tempfile, arxiv_importer):
"""Test PDF downloading."""
# Mock temporary file
mock_temp_path = Path("./.tmp/mock_temp.pdf")
mock_tempfile.return_value.__enter__.return_value.name = str(mock_temp_path)
# Mock arXiv result
mock_result = Mock()
# Create actual temp file for test
with mock_temp_path.open("wb") as f:
f.write(b"%PDF-1.4\n%%EOF\n")
try:
pdf_path = arxiv_importer.download_pdf(mock_result)
assert pdf_path == mock_temp_path
mock_result.download_pdf.assert_called_once_with(
filename=str(mock_temp_path)
)
finally:
if mock_temp_path.exists():
mock_temp_path.unlink()
@patch.object(ArxivImporter, "download_pdf")
@patch.object(ArxivImporter, "fetch_paper_metadata")
def test_import_arxiv_paper_success(
self, mock_fetch, mock_download, arxiv_importer
):
"""Test successful arXiv paper import."""
# Mock PDF file
pdf_file = Path("./.tmp") / "test_arxiv.pdf"
with pdf_file.open("wb") as f:
f.write(b"%PDF-1.4\n%%EOF\n")
try:
# Mock arXiv result with proper string values
mock_author = Mock()
mock_author.name = "Alice Smith"
mock_result = Mock()
mock_result.title = "Test ArXiv Paper"
mock_result.authors = [mock_author]
mock_result.published = None
mock_result.updated = None
mock_result.categories = ["cs.AI"]
mock_fetch.return_value = mock_result
mock_download.return_value = pdf_file
# Test import
metadata = arxiv_importer.import_arxiv_paper(
arxiv_input="2212.06340", notes="Test notes", tags=["test"]
)
# Check results
assert metadata.source_type == SourceType.ARXIV
assert metadata.source_id == "2212.06340"
assert metadata.title == "Test ArXiv Paper"
assert metadata.authors == ["Alice Smith"]
assert metadata.categories == ["cs.AI"]
assert metadata.notes == "Test notes"
assert metadata.tags == ["test"]
finally:
if pdf_file.exists():
pdf_file.unlink()
@patch.object(ArxivImporter, "fetch_paper_metadata")
def test_import_duplicate_arxiv_paper(self, mock_fetch, arxiv_importer):
"""Test importing the same arXiv paper twice."""
# Mock first import
pdf_file = Path("./.tmp") / "test_arxiv_dup.pdf"
with pdf_file.open("wb") as f:
f.write(b"%PDF-1.4\n%%EOF\n")
try:
with patch.object(ArxivImporter, "download_pdf", return_value=pdf_file):
mock_result = Mock()
mock_result.title = "Test Paper"
mock_result.authors = []
mock_result.published = None
mock_result.updated = None
mock_result.categories = []
mock_fetch.return_value = mock_result
# First import should succeed
arxiv_importer.import_arxiv_paper("2212.06340")
# Second import should fail
with pytest.raises(ValueError, match="Paper already imported"):
arxiv_importer.import_arxiv_paper("2212.06340")
finally:
if pdf_file.exists():
pdf_file.unlink()