feat: add rich ui for converting
This commit is contained in:
+11
-5
@@ -205,10 +205,12 @@ class TestCLI:
|
||||
"""Test convert command with no papers."""
|
||||
self.run_paperlib_cmd("init", str(temp_library))
|
||||
|
||||
result = self.run_paperlib_cmd("convert", "--library", str(temp_library))
|
||||
result = self.run_paperlib_cmd(
|
||||
"convert", "--no-ui", "--library", str(temp_library)
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert "Complete: 0 successful, 0 failed" in result.stdout
|
||||
assert "Converted pending: 0 successful, 0 failed" in result.stdout
|
||||
|
||||
def test_convert_command_with_papers_no_mineru(self, temp_library, sample_pdf):
|
||||
"""Test convert command with papers when MinerU is not available."""
|
||||
@@ -218,11 +220,15 @@ class TestCLI:
|
||||
"import", "--pdf", str(sample_pdf), "--library", str(temp_library)
|
||||
)
|
||||
|
||||
# Convert (will fail because MinerU command may not be properly set up)
|
||||
result = self.run_paperlib_cmd("convert", "--library", str(temp_library))
|
||||
# Convert without UI (will fail because MinerU command may not be properly set up)
|
||||
result = self.run_paperlib_cmd(
|
||||
"convert", "--no-ui", "--library", str(temp_library)
|
||||
)
|
||||
|
||||
# Should complete but may have failures due to MinerU setup
|
||||
assert "Complete:" in result.stdout
|
||||
assert ("Converted pending:" in result.stdout) or (
|
||||
"Converting papers" in result.stdout
|
||||
)
|
||||
|
||||
def test_invalid_command(self):
|
||||
"""Test invalid command."""
|
||||
|
||||
@@ -0,0 +1,92 @@
|
||||
"""Tests for converter UI functionality."""
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
from rich.console import Console
|
||||
|
||||
from paperlib.ui import ConversionUI
|
||||
|
||||
|
||||
class TestConversionUI:
|
||||
"""Test ConversionUI functionality."""
|
||||
|
||||
@pytest.fixture
|
||||
def ui(self):
|
||||
"""Create a ConversionUI instance for testing."""
|
||||
# Use a console that doesn't output to terminal during tests
|
||||
console = Console(file=open("/dev/null", "w"), force_terminal=True)
|
||||
return ConversionUI(console=console)
|
||||
|
||||
@pytest.fixture
|
||||
def mock_papers(self):
|
||||
"""Create mock paper metadata for testing."""
|
||||
papers = []
|
||||
for i in range(3):
|
||||
paper = Mock()
|
||||
paper.paper_id = f"test-paper-{i + 1}"
|
||||
paper.title = f"Test Paper Title {i + 1}"
|
||||
papers.append(paper)
|
||||
return papers
|
||||
|
||||
def test_format_mineru_output_line(self, ui):
|
||||
"""Test formatting of MinerU output lines."""
|
||||
# Test INFO line
|
||||
info_line = "2026-04-17 17:46:01.450 | INFO | Processing started"
|
||||
formatted = ui._format_mineru_output_line(info_line)
|
||||
assert "[dim]" in formatted
|
||||
|
||||
# Test ERROR line
|
||||
error_line = "ERROR: Conversion failed"
|
||||
formatted = ui._format_mineru_output_line(error_line)
|
||||
assert "[red]" in formatted
|
||||
|
||||
# Test WARNING line
|
||||
warning_line = "WARNING: Low memory"
|
||||
formatted = ui._format_mineru_output_line(warning_line)
|
||||
assert "[yellow]" in formatted
|
||||
|
||||
# Test progress line
|
||||
progress_line = "Layout Predict: 50%|█████ | 22/44 [00:15<00:15, 1.44it/s]"
|
||||
formatted = ui._format_mineru_output_line(progress_line)
|
||||
assert "[blue]" in formatted
|
||||
|
||||
# Test fetching line (may be colored blue due to % character)
|
||||
fetch_line = "Fetching 7 files: 100%|██████████| 7/7"
|
||||
formatted = ui._format_mineru_output_line(fetch_line)
|
||||
assert ("[cyan]" in formatted) or (
|
||||
"[blue]" in formatted
|
||||
) # Either color is fine
|
||||
|
||||
@patch("threading.Thread")
|
||||
@patch("time.sleep")
|
||||
def test_run_conversion_with_ui_empty(self, mock_sleep, mock_thread, ui):
|
||||
"""Test UI with no papers to convert."""
|
||||
result = ui.run_conversion_with_ui([], lambda x: True)
|
||||
assert result == (0, 0)
|
||||
|
||||
def test_create_display_table(self, ui):
|
||||
"""Test creating the display table."""
|
||||
task_id = ui.progress.add_task("test", total=1)
|
||||
|
||||
# Test without current paper
|
||||
table = ui.create_display_table(task_id)
|
||||
assert table is not None
|
||||
|
||||
# Test with current paper
|
||||
table = ui.create_display_table(task_id, "test-paper-1 - Sample Title")
|
||||
assert table is not None
|
||||
|
||||
def test_output_line_management(self, ui):
|
||||
"""Test that output lines are properly managed."""
|
||||
# Add many lines
|
||||
for i in range(60):
|
||||
ui.output_lines.append(f"Line {i}")
|
||||
|
||||
# The list can grow beyond 50, but display is limited to last 15 lines
|
||||
assert len(ui.output_lines) == 60
|
||||
|
||||
# Check that display shows only recent lines
|
||||
recent_lines = ui.output_lines[-ui.max_output_lines :]
|
||||
assert len(recent_lines) == ui.max_output_lines
|
||||
@@ -0,0 +1,219 @@
|
||||
"""Tests for MinerU markdown post-processing."""
|
||||
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from paperlib.config import LibraryPaths
|
||||
from paperlib.converter import MinerUConverter
|
||||
from paperlib.storage import PaperStorageManager
|
||||
|
||||
|
||||
class TestMinerUPostProcess:
|
||||
"""Test MinerU markdown post-processing functionality."""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_library(self):
|
||||
"""Create a temporary library for testing."""
|
||||
temp_dir = Path("./.tmp") / f"test_postprocess_{hash(self)}"
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
library_paths = LibraryPaths.from_root(temp_dir)
|
||||
library_paths.create_directories()
|
||||
return library_paths
|
||||
|
||||
@pytest.fixture
|
||||
def converter(self, temp_library):
|
||||
"""Create a MinerUConverter for testing."""
|
||||
storage_manager = PaperStorageManager(temp_library)
|
||||
return MinerUConverter(storage_manager)
|
||||
|
||||
def test_image_reference_replacement(self, converter):
|
||||
"""Test that image references are correctly updated."""
|
||||
# Create test markdown content with various image reference formats
|
||||
test_content = """# Test Document
|
||||
|
||||
Here's an image with alt text:
|
||||

|
||||
|
||||
Here's an image without alt text:
|
||||

|
||||
|
||||
Some text content.
|
||||
|
||||
Here's another image:
|
||||

|
||||
|
||||
This should not be changed:
|
||||

|
||||
|
||||
And this local reference should not change:
|
||||

|
||||
"""
|
||||
|
||||
expected_content = """# Test Document
|
||||
|
||||
Here's an image with alt text:
|
||||

|
||||
|
||||
Here's an image without alt text:
|
||||

|
||||
|
||||
Some text content.
|
||||
|
||||
Here's another image:
|
||||

|
||||
|
||||
This should not be changed:
|
||||

|
||||
|
||||
And this local reference should not change:
|
||||

|
||||
"""
|
||||
|
||||
# Create temporary file
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w", suffix=".md", delete=False, encoding="utf-8"
|
||||
) as tmp:
|
||||
tmp.write(test_content)
|
||||
tmp_path = Path(tmp.name)
|
||||
|
||||
try:
|
||||
# Apply post-processing
|
||||
converter._post_process_markdown(tmp_path)
|
||||
|
||||
# Read the result
|
||||
result_content = tmp_path.read_text(encoding="utf-8")
|
||||
|
||||
# Verify image references were updated correctly
|
||||
assert "" in result_content
|
||||
assert (
|
||||
""
|
||||
in result_content
|
||||
)
|
||||
|
||||
# Verify external and local references were NOT changed
|
||||
assert "https://example.com/image.jpg" in result_content
|
||||
assert "./local_images/test.png" in result_content
|
||||
|
||||
# Verify no "images/" references remain
|
||||
assert "](images/" not in result_content
|
||||
|
||||
finally:
|
||||
if tmp_path.exists():
|
||||
tmp_path.unlink()
|
||||
|
||||
def test_markdown_content_cleaning(self, converter):
|
||||
"""Test markdown content cleaning functionality."""
|
||||
test_content = """# Title with Extra Spaces
|
||||
|
||||
|
||||
Here's a paragraph with multiple spaces.
|
||||
|
||||
Indented line with tabs and spaces.
|
||||
|
||||
|
||||
Another paragraph.
|
||||
|
||||
|
||||
|
||||
Too many blank lines above.
|
||||
"""
|
||||
|
||||
expected_cleaned = """# Title with Extra Spaces
|
||||
|
||||
|
||||
Here's a paragraph with multiple spaces.
|
||||
|
||||
Indented line with tabs and spaces.
|
||||
|
||||
|
||||
Another paragraph.
|
||||
|
||||
|
||||
|
||||
Too many blank lines above.
|
||||
"""
|
||||
|
||||
result = converter._clean_markdown_content(test_content)
|
||||
|
||||
# Check that excessive whitespace within lines is cleaned
|
||||
lines = result.split("\n")
|
||||
for line in lines:
|
||||
if line.strip(): # Non-empty lines
|
||||
# Should not have multiple consecutive spaces
|
||||
assert " " not in line or line.startswith(
|
||||
" "
|
||||
) # Except for code blocks
|
||||
|
||||
def test_post_process_error_handling(self, converter):
|
||||
"""Test that post-processing errors don't crash conversion."""
|
||||
# Test with non-existent file
|
||||
fake_path = Path("./.tmp/nonexistent.md")
|
||||
|
||||
# Should not raise exception
|
||||
converter._post_process_markdown(fake_path)
|
||||
|
||||
# Test with unreadable file (permission issue simulation)
|
||||
with tempfile.NamedTemporaryFile(suffix=".md", delete=False) as tmp:
|
||||
tmp_path = Path(tmp.name)
|
||||
|
||||
try:
|
||||
# Create file then make it unreadable by removing it
|
||||
tmp_path.unlink()
|
||||
|
||||
# Should handle gracefully
|
||||
converter._post_process_markdown(tmp_path)
|
||||
|
||||
finally:
|
||||
# Cleanup if file somehow still exists
|
||||
if tmp_path.exists():
|
||||
tmp_path.unlink()
|
||||
|
||||
def test_complex_image_patterns(self, converter):
|
||||
"""Test complex image reference patterns."""
|
||||
test_content = """
|
||||
Various image patterns:
|
||||
|
||||

|
||||

|
||||

|
||||

|
||||
.jpg)
|
||||

|
||||
|
||||
Non-image patterns that should not change:
|
||||
[Link text](images/not-an-image)
|
||||
`code with images/path`
|
||||
code block with images/reference
|
||||
"""
|
||||
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w", suffix=".md", delete=False, encoding="utf-8"
|
||||
) as tmp:
|
||||
tmp.write(test_content)
|
||||
tmp_path = Path(tmp.name)
|
||||
|
||||
try:
|
||||
converter._post_process_markdown(tmp_path)
|
||||
result = tmp_path.read_text(encoding="utf-8")
|
||||
|
||||
# Verify all image references were updated
|
||||
assert "" in result
|
||||
assert "" in result
|
||||
assert "" in result
|
||||
assert "" in result
|
||||
assert ".jpg)" in result
|
||||
|
||||
# Verify non-image patterns were preserved
|
||||
assert "[Link text](images/not-an-image)" in result
|
||||
assert "`code with images/path`" in result
|
||||
assert (
|
||||
"code block with images/reference" in result
|
||||
) # Leading spaces may be removed by cleaning
|
||||
|
||||
finally:
|
||||
if tmp_path.exists():
|
||||
tmp_path.unlink()
|
||||
Reference in New Issue
Block a user