test: add tests
This commit is contained in:
@@ -0,0 +1,273 @@
|
||||
"""Tests for paperlib import functionality."""
|
||||
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from paperlib.config import LibraryPaths
|
||||
from paperlib.importer import ArxivImporter, LocalImporter
|
||||
from paperlib.models import SourceType
|
||||
from paperlib.storage import PaperStorageManager
|
||||
|
||||
|
||||
class TestLocalImporter:
|
||||
"""Test LocalImporter functionality."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_library(self):
|
||||
"""Create a temporary library for testing."""
|
||||
temp_dir = Path("./.tmp") / f"test_import_{hash(self)}"
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
library_paths = LibraryPaths.from_root(temp_dir)
|
||||
library_paths.create_directories()
|
||||
|
||||
yield library_paths
|
||||
|
||||
# Cleanup
|
||||
if temp_dir.exists():
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
@pytest.fixture
|
||||
def local_importer(self, temp_library):
|
||||
"""Create a LocalImporter for testing."""
|
||||
storage_manager = PaperStorageManager(temp_library)
|
||||
return LocalImporter(storage_manager)
|
||||
|
||||
@pytest.fixture
|
||||
def sample_pdf(self):
|
||||
"""Create a sample PDF file for testing."""
|
||||
pdf_file = Path("./.tmp") / f"sample_{hash(self)}.pdf"
|
||||
with pdf_file.open("wb") as f:
|
||||
# Minimal PDF content
|
||||
f.write(b"%PDF-1.4\n")
|
||||
f.write(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
|
||||
f.write(b"%%EOF\n")
|
||||
|
||||
yield pdf_file
|
||||
|
||||
# Cleanup
|
||||
if pdf_file.exists():
|
||||
pdf_file.unlink()
|
||||
|
||||
def test_import_pdf_success(self, local_importer, sample_pdf):
|
||||
"""Test successful PDF import."""
|
||||
metadata = local_importer.import_pdf(
|
||||
pdf_path=sample_pdf,
|
||||
title="Test Paper",
|
||||
notes="Test notes",
|
||||
tags=["test", "sample"],
|
||||
)
|
||||
|
||||
# Check metadata
|
||||
assert metadata.source_type == SourceType.LOCAL
|
||||
assert metadata.title == "Test Paper"
|
||||
assert metadata.notes == "Test notes"
|
||||
assert metadata.tags == ["test", "sample"]
|
||||
assert metadata.paper_id.startswith("local-")
|
||||
|
||||
def test_import_pdf_auto_title(self, local_importer, sample_pdf):
|
||||
"""Test PDF import with auto-generated title."""
|
||||
# Rename PDF to have a meaningful name
|
||||
meaningful_pdf = sample_pdf.parent / "Machine_Learning-Paper.pdf"
|
||||
sample_pdf.rename(meaningful_pdf)
|
||||
|
||||
try:
|
||||
metadata = local_importer.import_pdf(pdf_path=meaningful_pdf)
|
||||
|
||||
# Title should be auto-generated from filename
|
||||
assert metadata.title == "Machine Learning Paper"
|
||||
|
||||
finally:
|
||||
if meaningful_pdf.exists():
|
||||
meaningful_pdf.unlink()
|
||||
|
||||
def test_import_nonexistent_pdf(self, local_importer):
|
||||
"""Test importing non-existent PDF file."""
|
||||
nonexistent = Path("./.tmp/nonexistent.pdf")
|
||||
|
||||
with pytest.raises(FileNotFoundError):
|
||||
local_importer.import_pdf(pdf_path=nonexistent)
|
||||
|
||||
def test_import_non_pdf_file(self, local_importer):
|
||||
"""Test importing non-PDF file."""
|
||||
text_file = Path("./.tmp") / "not_a_pdf.txt"
|
||||
with text_file.open("w") as f:
|
||||
f.write("This is not a PDF")
|
||||
|
||||
try:
|
||||
with pytest.raises(ValueError, match="File is not a PDF"):
|
||||
local_importer.import_pdf(pdf_path=text_file)
|
||||
finally:
|
||||
if text_file.exists():
|
||||
text_file.unlink()
|
||||
|
||||
def test_import_duplicate_pdf(self, local_importer, sample_pdf):
|
||||
"""Test importing the same PDF twice."""
|
||||
# Import once
|
||||
metadata1 = local_importer.import_pdf(pdf_path=sample_pdf)
|
||||
|
||||
# Try to import again
|
||||
with pytest.raises(ValueError, match="Paper already imported"):
|
||||
local_importer.import_pdf(pdf_path=sample_pdf)
|
||||
|
||||
|
||||
class TestArxivImporter:
|
||||
"""Test ArxivImporter functionality."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_library(self):
|
||||
"""Create a temporary library for testing."""
|
||||
temp_dir = Path("./.tmp") / f"test_arxiv_{hash(self)}"
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
library_paths = LibraryPaths.from_root(temp_dir)
|
||||
library_paths.create_directories()
|
||||
|
||||
yield library_paths
|
||||
|
||||
# Cleanup
|
||||
if temp_dir.exists():
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
@pytest.fixture
|
||||
def arxiv_importer(self, temp_library):
|
||||
"""Create an ArxivImporter for testing."""
|
||||
storage_manager = PaperStorageManager(temp_library)
|
||||
return ArxivImporter(storage_manager)
|
||||
|
||||
def test_extract_arxiv_id_clean(self, arxiv_importer):
|
||||
"""Test extracting clean arXiv ID."""
|
||||
# Test various formats
|
||||
assert arxiv_importer.extract_arxiv_id("2212.06340") == "2212.06340"
|
||||
assert arxiv_importer.extract_arxiv_id("arxiv:2212.06340") == "2212.06340"
|
||||
assert arxiv_importer.extract_arxiv_id("2212.06340v1") == "2212.06340v1"
|
||||
assert arxiv_importer.extract_arxiv_id("math-ph/0701002") == "math-ph/0701002"
|
||||
|
||||
def test_extract_arxiv_id_from_url(self, arxiv_importer):
|
||||
"""Test extracting arXiv ID from URLs."""
|
||||
url = "https://arxiv.org/abs/2212.06340"
|
||||
extracted = arxiv_importer.extract_arxiv_id(url)
|
||||
assert extracted == "2212.06340"
|
||||
|
||||
def test_fetch_paper_metadata_success(self, arxiv_importer):
|
||||
"""Test successful metadata fetching from arXiv."""
|
||||
# Mock arXiv result
|
||||
mock_result = Mock()
|
||||
mock_result.title = "Test Paper"
|
||||
mock_result.authors = [Mock(name="Alice Smith"), Mock(name="Bob Jones")]
|
||||
mock_result.published = Mock()
|
||||
mock_result.updated = Mock()
|
||||
mock_result.categories = ["cs.AI", "stat.ML"]
|
||||
|
||||
# Mock the client's results method directly
|
||||
arxiv_importer.client.results = Mock(return_value=[mock_result])
|
||||
|
||||
# Test
|
||||
result = arxiv_importer.fetch_paper_metadata("2212.06340")
|
||||
assert result == mock_result
|
||||
|
||||
def test_fetch_paper_metadata_not_found(self, arxiv_importer):
|
||||
"""Test fetching metadata for non-existent paper."""
|
||||
# Mock empty results
|
||||
arxiv_importer.client.results = Mock(return_value=[])
|
||||
|
||||
with pytest.raises(ValueError, match="Paper not found on arXiv"):
|
||||
arxiv_importer.fetch_paper_metadata("9999.99999")
|
||||
|
||||
@patch("paperlib.importer.arxiv_importer.tempfile.NamedTemporaryFile")
|
||||
def test_download_pdf(self, mock_tempfile, arxiv_importer):
|
||||
"""Test PDF downloading."""
|
||||
# Mock temporary file
|
||||
mock_temp_path = Path("./.tmp/mock_temp.pdf")
|
||||
mock_tempfile.return_value.__enter__.return_value.name = str(mock_temp_path)
|
||||
|
||||
# Mock arXiv result
|
||||
mock_result = Mock()
|
||||
|
||||
# Create actual temp file for test
|
||||
with mock_temp_path.open("wb") as f:
|
||||
f.write(b"%PDF-1.4\n%%EOF\n")
|
||||
|
||||
try:
|
||||
pdf_path = arxiv_importer.download_pdf(mock_result)
|
||||
assert pdf_path == mock_temp_path
|
||||
mock_result.download_pdf.assert_called_once_with(
|
||||
filename=str(mock_temp_path)
|
||||
)
|
||||
finally:
|
||||
if mock_temp_path.exists():
|
||||
mock_temp_path.unlink()
|
||||
|
||||
@patch.object(ArxivImporter, "download_pdf")
|
||||
@patch.object(ArxivImporter, "fetch_paper_metadata")
|
||||
def test_import_arxiv_paper_success(
|
||||
self, mock_fetch, mock_download, arxiv_importer
|
||||
):
|
||||
"""Test successful arXiv paper import."""
|
||||
# Mock PDF file
|
||||
pdf_file = Path("./.tmp") / "test_arxiv.pdf"
|
||||
with pdf_file.open("wb") as f:
|
||||
f.write(b"%PDF-1.4\n%%EOF\n")
|
||||
|
||||
try:
|
||||
# Mock arXiv result with proper string values
|
||||
mock_author = Mock()
|
||||
mock_author.name = "Alice Smith"
|
||||
|
||||
mock_result = Mock()
|
||||
mock_result.title = "Test ArXiv Paper"
|
||||
mock_result.authors = [mock_author]
|
||||
mock_result.published = None
|
||||
mock_result.updated = None
|
||||
mock_result.categories = ["cs.AI"]
|
||||
|
||||
mock_fetch.return_value = mock_result
|
||||
mock_download.return_value = pdf_file
|
||||
|
||||
# Test import
|
||||
metadata = arxiv_importer.import_arxiv_paper(
|
||||
arxiv_input="2212.06340", notes="Test notes", tags=["test"]
|
||||
)
|
||||
|
||||
# Check results
|
||||
assert metadata.source_type == SourceType.ARXIV
|
||||
assert metadata.source_id == "2212.06340"
|
||||
assert metadata.title == "Test ArXiv Paper"
|
||||
assert metadata.authors == ["Alice Smith"]
|
||||
assert metadata.categories == ["cs.AI"]
|
||||
assert metadata.notes == "Test notes"
|
||||
assert metadata.tags == ["test"]
|
||||
|
||||
finally:
|
||||
if pdf_file.exists():
|
||||
pdf_file.unlink()
|
||||
|
||||
@patch.object(ArxivImporter, "fetch_paper_metadata")
|
||||
def test_import_duplicate_arxiv_paper(self, mock_fetch, arxiv_importer):
|
||||
"""Test importing the same arXiv paper twice."""
|
||||
# Mock first import
|
||||
pdf_file = Path("./.tmp") / "test_arxiv_dup.pdf"
|
||||
with pdf_file.open("wb") as f:
|
||||
f.write(b"%PDF-1.4\n%%EOF\n")
|
||||
|
||||
try:
|
||||
with patch.object(ArxivImporter, "download_pdf", return_value=pdf_file):
|
||||
mock_result = Mock()
|
||||
mock_result.title = "Test Paper"
|
||||
mock_result.authors = []
|
||||
mock_result.published = None
|
||||
mock_result.updated = None
|
||||
mock_result.categories = []
|
||||
mock_fetch.return_value = mock_result
|
||||
|
||||
# First import should succeed
|
||||
arxiv_importer.import_arxiv_paper("2212.06340")
|
||||
|
||||
# Second import should fail
|
||||
with pytest.raises(ValueError, match="Paper already imported"):
|
||||
arxiv_importer.import_arxiv_paper("2212.06340")
|
||||
|
||||
finally:
|
||||
if pdf_file.exists():
|
||||
pdf_file.unlink()
|
||||
Reference in New Issue
Block a user