feat: add --json

This commit is contained in:
2026-04-17 19:27:16 -04:00
parent 832312297c
commit 227484e975
4 changed files with 584 additions and 65 deletions
+229 -65
View File
@@ -11,6 +11,7 @@ from paperlib.converter import MinerUConverter
from paperlib.importer import ArxivImporter, LocalImporter
from paperlib.index import DatabaseManager
from paperlib.storage import PaperStorageManager
from paperlib.utils import JSONOutputMixin
def _resolve_library_root(path: Path | None) -> Path:
@@ -53,10 +54,16 @@ def _build_parser() -> argparse.ArgumentParser:
default=".",
help="Library root to inspect. Defaults to the current directory.",
)
status_parser.add_argument(
"--json", action="store_true", help="Output in JSON format"
)
status_parser.set_defaults(handler=_handle_status)
list_parser = subparsers.add_parser("list", help="List imported papers.")
list_parser.add_argument("--library", "-L", default=".", help="Library root")
list_parser.add_argument(
"--json", action="store_true", help="Output in JSON format"
)
list_parser.set_defaults(handler=_handle_list)
show_parser = subparsers.add_parser(
@@ -65,6 +72,9 @@ def _build_parser() -> argparse.ArgumentParser:
)
show_parser.add_argument("paper_id", help="Paper ID to show")
show_parser.add_argument("--library", "-L", default=".", help="Library root")
show_parser.add_argument(
"--json", action="store_true", help="Output in JSON format"
)
show_parser.set_defaults(handler=_handle_show)
search_parser = subparsers.add_parser(
@@ -85,6 +95,9 @@ def _build_parser() -> argparse.ArgumentParser:
import_parser.add_argument("--notes", type=str, default="", help="Notes")
import_parser.add_argument("--tags", nargs="*", default=[], help="Tags")
import_parser.add_argument("--library", "-L", default=".", help="Library root")
import_parser.add_argument(
"--json", action="store_true", help="Output in JSON format"
)
import_parser.set_defaults(handler=_handle_import)
# Convert command
@@ -103,6 +116,9 @@ def _build_parser() -> argparse.ArgumentParser:
convert_parser.add_argument(
"--no-ui", action="store_true", help="Disable rich UI (useful for scripting)"
)
convert_parser.add_argument(
"--json", action="store_true", help="Output in JSON format"
)
convert_parser.set_defaults(handler=_handle_convert)
# Reindex command
@@ -111,6 +127,9 @@ def _build_parser() -> argparse.ArgumentParser:
help="Rebuild the search index from stored papers.",
)
reindex_parser.add_argument("--library", "-L", default=".", help="Library root")
reindex_parser.add_argument(
"--json", action="store_true", help="Output in JSON format"
)
reindex_parser.set_defaults(handler=_handle_reindex)
return parser
@@ -140,8 +159,22 @@ def _handle_init(args: argparse.Namespace) -> int:
def _handle_status(args: argparse.Namespace) -> int:
"""Show the resolved library layout for a selected root."""
paths = LibraryPaths.from_root(_resolve_library_root(Path(args.library)))
print(_format_paths(paths))
library_root = _resolve_library_root(Path(args.library))
paths = LibraryPaths.from_root(library_root)
if args.json:
JSONOutputMixin.output_json(
{
"library_root": str(paths.root),
"config_path": str(paths.config_path),
"database_path": str(paths.db_path),
"papers_dir": str(paths.papers_dir),
"inbox_dir": str(paths.inbox_dir),
"cache_dir": str(paths.cache_dir),
}
)
else:
print(_format_paths(paths))
return 0
@@ -162,6 +195,12 @@ def _handle_list(args: argparse.Namespace) -> int:
# List all papers from storage (more reliable than index)
papers = list(storage_manager.list_all_papers())
if args.json:
JSONOutputMixin.output_json(
JSONOutputMixin.format_papers_list_for_json(papers)
)
return 0
if not papers:
print("No papers found in library.")
return 0
@@ -213,47 +252,75 @@ def _handle_show(args: argparse.Namespace) -> int:
# Find paper by ID
for metadata in storage_manager.list_all_papers():
if metadata.paper_id == args.paper_id:
print(f"Paper ID: {metadata.paper_id}")
print(f"Source: {metadata.source_type.value}")
if metadata.source_id:
print(f"Source ID: {metadata.source_id}")
print(f"Title: {metadata.title}")
if metadata.authors:
print(f"Authors: {', '.join(metadata.authors)}")
if metadata.published_date:
print(f"Published: {metadata.published_date.strftime('%Y-%m-%d')}")
if metadata.categories:
print(f"Categories: {', '.join(metadata.categories)}")
if metadata.tags:
print(f"Tags: {', '.join(metadata.tags)}")
print(f"Imported: {metadata.imported_at.strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Conversion Status: {metadata.conversion_status.value}")
print(f"Summary Status: {metadata.summary_status.value}")
if metadata.notes:
print(f"Notes: {metadata.notes}")
if args.json:
# Add file existence information
paper_data = JSONOutputMixin.format_metadata_for_json(metadata)
# Show file paths
print("\nFiles:")
if metadata.pdf_path:
pdf_path = paths.root / metadata.pdf_path
exists = "" if pdf_path.exists() else ""
print(f" PDF: {exists} {metadata.pdf_path}")
if metadata.paper_md_path:
md_path = paths.root / metadata.paper_md_path
exists = "" if md_path.exists() else ""
print(f" Markdown: {exists} {metadata.paper_md_path}")
if metadata.summary_json_path:
summary_path = paths.root / metadata.summary_json_path
exists = "" if summary_path.exists() else ""
print(f" Summary: {exists} {metadata.summary_json_path}")
# Add file status information
files_status = {}
if metadata.pdf_path:
pdf_path = paths.root / metadata.pdf_path
files_status["pdf_exists"] = pdf_path.exists()
if metadata.paper_md_path:
md_path = paths.root / metadata.paper_md_path
files_status["markdown_exists"] = md_path.exists()
if metadata.summary_json_path:
summary_path = paths.root / metadata.summary_json_path
files_status["summary_exists"] = summary_path.exists()
paper_data["files_status"] = files_status
JSONOutputMixin.output_json({"paper": paper_data})
else:
print(f"Paper ID: {metadata.paper_id}")
print(f"Source: {metadata.source_type.value}")
if metadata.source_id:
print(f"Source ID: {metadata.source_id}")
print(f"Title: {metadata.title}")
if metadata.authors:
print(f"Authors: {', '.join(metadata.authors)}")
if metadata.published_date:
print(
f"Published: {metadata.published_date.strftime('%Y-%m-%d')}"
)
if metadata.categories:
print(f"Categories: {', '.join(metadata.categories)}")
if metadata.tags:
print(f"Tags: {', '.join(metadata.tags)}")
imported_str = metadata.imported_at.strftime("%Y-%m-%d %H:%M:%S")
print(f"Imported: {imported_str}")
print(f"Conversion Status: {metadata.conversion_status.value}")
print(f"Summary Status: {metadata.summary_status.value}")
if metadata.notes:
print(f"Notes: {metadata.notes}")
# Show file paths
print("\nFiles:")
if metadata.pdf_path:
pdf_path = paths.root / metadata.pdf_path
exists = "" if pdf_path.exists() else ""
print(f" PDF: {exists} {metadata.pdf_path}")
if metadata.paper_md_path:
md_path = paths.root / metadata.paper_md_path
exists = "" if md_path.exists() else ""
print(f" Markdown: {exists} {metadata.paper_md_path}")
if metadata.summary_json_path:
summary_path = paths.root / metadata.summary_json_path
exists = "" if summary_path.exists() else ""
print(f" Summary: {exists} {metadata.summary_json_path}")
return 0
print(f"Paper not found: {args.paper_id}")
if args.json:
JSONOutputMixin.output_json_error(f"Paper not found: {args.paper_id}")
else:
print(f"Paper not found: {args.paper_id}")
return 1
except Exception as e:
print(f"Error showing paper: {e}")
if args.json:
JSONOutputMixin.output_json_error(f"Error showing paper: {e}")
else:
print(f"Error showing paper: {e}")
return 1
@@ -286,8 +353,19 @@ def _handle_import(args: argparse.Namespace) -> int:
# Index the paper
db_manager.index_paper(metadata)
print(f"Successfully imported local PDF: {metadata.paper_id}")
print(f"Title: {metadata.title}")
if args.json:
JSONOutputMixin.output_json(
{
"paper_id": metadata.paper_id,
"title": metadata.title,
"source_type": metadata.source_type.value,
"message": "Successfully imported local PDF",
"paper": JSONOutputMixin.format_metadata_for_json(metadata),
}
)
else:
print(f"Successfully imported local PDF: {metadata.paper_id}")
print(f"Title: {metadata.title}")
elif args.arxiv:
# Import from arXiv
@@ -300,14 +378,30 @@ def _handle_import(args: argparse.Namespace) -> int:
# Index the paper
db_manager.index_paper(metadata)
print(f"Successfully imported arXiv paper: {metadata.paper_id}")
print(f"Title: {metadata.title}")
print(f"Authors: {', '.join(metadata.authors)}")
if args.json:
JSONOutputMixin.output_json(
{
"paper_id": metadata.paper_id,
"title": metadata.title,
"source_type": metadata.source_type.value,
"source_id": metadata.source_id,
"authors": metadata.authors,
"message": "Successfully imported arXiv paper",
"paper": JSONOutputMixin.format_metadata_for_json(metadata),
}
)
else:
print(f"Successfully imported arXiv paper: {metadata.paper_id}")
print(f"Title: {metadata.title}")
print(f"Authors: {', '.join(metadata.authors)}")
return 0
except Exception as e:
print(f"Error importing paper: {e}")
if args.json:
JSONOutputMixin.output_json_error(f"Error importing paper: {e}")
else:
print(f"Error importing paper: {e}")
return 1
@@ -323,35 +417,88 @@ def _handle_convert(args: argparse.Namespace) -> int:
# Convert specific paper
for metadata in storage_manager.list_all_papers():
if metadata.paper_id == args.paper_id:
if converter.convert_paper(metadata):
print(f"Successfully converted paper: {metadata.paper_id}")
conversion_success = converter.convert_paper(metadata)
if args.json:
# Get updated metadata after conversion
updated_metadata = storage_manager.load_paper_metadata(
metadata.paper_id, metadata.source_type
)
status_val = (
updated_metadata.conversion_status.value
if updated_metadata
else "unknown"
)
msg = (
"Successfully converted paper"
if conversion_success
else "Failed to convert paper"
)
JSONOutputMixin.output_json(
{
"paper_id": metadata.paper_id,
"conversion_success": conversion_success,
"conversion_status": status_val,
"message": msg,
}
)
else:
print(f"Failed to convert paper: {metadata.paper_id}")
return 0
print(f"Paper not found: {args.paper_id}")
if conversion_success:
print(f"Successfully converted paper: {metadata.paper_id}")
else:
print(f"Failed to convert paper: {metadata.paper_id}")
return 0 if conversion_success else 1
if args.json:
JSONOutputMixin.output_json_error(f"Paper not found: {args.paper_id}")
else:
print(f"Paper not found: {args.paper_id}")
return 1
else:
# Convert papers based on flags
use_ui = not args.no_ui # Use UI unless explicitly disabled
use_ui = not (args.no_ui or args.json) # Disable UI for JSON output
success_count, failure_count = converter.convert_all_pending(
retry_failed=args.retry_failed, force=args.force, use_ui=use_ui
)
# Show what was attempted (if not using UI, UI will show its own summary)
if args.no_ui or (success_count == 0 and failure_count == 0):
if args.json:
# Determine action type
if args.force:
action = "Force converted"
action_type = "force_convert"
elif args.retry_failed:
action = "Converted pending and retried failed"
action_type = "convert_with_retry"
else:
action = "Converted pending"
action_type = "convert_pending"
msg = f"{action}: {success_count} successful, {failure_count} failed"
print(msg)
JSONOutputMixin.output_json(
{
"action": action_type,
"success_count": success_count,
"failure_count": failure_count,
"total_attempted": success_count + failure_count,
}
)
else:
# Show what was attempted (if not using UI)
if args.no_ui or (success_count == 0 and failure_count == 0):
if args.force:
action = "Force converted"
elif args.retry_failed:
action = "Converted pending and retried failed"
else:
action = "Converted pending"
msg = (
f"{action}: {success_count} successful, {failure_count} failed"
)
print(msg)
return 0 if failure_count == 0 else 1
except Exception as e:
print(f"Error during conversion: {e}")
if args.json:
JSONOutputMixin.output_json_error(f"Error during conversion: {e}")
else:
print(f"Error during conversion: {e}")
return 1
@@ -362,7 +509,8 @@ def _handle_reindex(args: argparse.Namespace) -> int:
storage_manager = PaperStorageManager(paths)
db_manager = DatabaseManager(paths)
print("Rebuilding search index...")
if not args.json:
print("Rebuilding search index...")
# Initialize database schema
db_manager.initialize_database()
@@ -370,21 +518,37 @@ def _handle_reindex(args: argparse.Namespace) -> int:
# Rebuild index from storage
success_count, error_count = db_manager.reindex_from_storage(storage_manager)
print(f"Reindex complete: {success_count} papers indexed, {error_count} errors")
# Show statistics
stats = db_manager.get_statistics()
print(f"Total papers: {stats['total_papers']}")
if stats.get("by_source_type"):
by_source = ", ".join(
f"{k}: {v}" for k, v in stats["by_source_type"].items()
if args.json:
JSONOutputMixin.output_json(
{
"reindex_complete": True,
"papers_indexed": success_count,
"errors": error_count,
"statistics": stats,
}
)
print(f"By source: {by_source}")
else:
reindex_msg = (
f"Complete: {success_count} papers indexed, {error_count} errors"
)
print(reindex_msg)
print(f"Total papers: {stats['total_papers']}")
if stats.get("by_source_type"):
by_source = ", ".join(
f"{k}: {v}" for k, v in stats["by_source_type"].items()
)
print(f"By source: {by_source}")
return 0 if error_count == 0 else 1
except Exception as e:
print(f"Error during reindex: {e}")
if args.json:
JSONOutputMixin.output_json_error(f"Error during reindex: {e}")
else:
print(f"Error during reindex: {e}")
return 1
+5
View File
@@ -0,0 +1,5 @@
"""Utility functions for paperlib."""
from .json_output import JSONOutputMixin
__all__ = ["JSONOutputMixin"]
+60
View File
@@ -0,0 +1,60 @@
"""JSON output utilities for CLI commands."""
import json
from datetime import datetime
from typing import Any
class JSONOutputMixin:
"""Mixin class for commands that support JSON output."""
@staticmethod
def output_json(data: dict[str, Any], success: bool = True) -> None:
"""Output JSON data to stdout."""
output = {
"success": success,
"timestamp": datetime.now().isoformat(),
**data,
}
print(json.dumps(output, indent=2, ensure_ascii=False))
@staticmethod
def output_json_error(error_message: str, error_code: int = 1) -> None:
"""Output JSON error to stdout."""
output = {
"success": False,
"timestamp": datetime.now().isoformat(),
"error": error_message,
"error_code": error_code,
}
print(json.dumps(output, indent=2, ensure_ascii=False))
@staticmethod
def format_metadata_for_json(metadata) -> dict[str, Any]:
"""Convert PaperMetadata to JSON-serializable dict."""
from paperlib.models import PaperMetadata
if isinstance(metadata, PaperMetadata):
return metadata.to_dict()
elif isinstance(metadata, dict):
# Already a dict (from database query)
return metadata
else:
# Fallback for other types
return {"error": "Unknown metadata format"}
@staticmethod
def format_papers_list_for_json(papers: list) -> dict[str, Any]:
"""Format a list of papers for JSON output."""
formatted_papers = []
for paper in papers:
formatted_paper = JSONOutputMixin.format_metadata_for_json(paper)
formatted_papers.append(formatted_paper)
return {
"papers": formatted_papers,
"total": len(formatted_papers),
}
+290
View File
@@ -0,0 +1,290 @@
"""Tests for JSON output functionality."""
import json
import subprocess
from pathlib import Path
import pytest
from paperlib.models import PaperMetadata, SourceType
from paperlib.utils import JSONOutputMixin
class TestJSONOutputMixin:
"""Test JSONOutputMixin utility functions."""
def test_format_metadata_for_json(self):
"""Test formatting PaperMetadata for JSON output."""
metadata = PaperMetadata(
paper_id="test-paper-1",
source_type=SourceType.ARXIV,
source_id="2212.06340",
title="Test Paper",
authors=["Alice Smith", "Bob Jones"],
categories=["cs.AI"],
)
result = JSONOutputMixin.format_metadata_for_json(metadata)
assert result["paper_id"] == "test-paper-1"
assert result["source_type"] == "arxiv"
assert result["source_id"] == "2212.06340"
assert result["title"] == "Test Paper"
assert result["authors"] == ["Alice Smith", "Bob Jones"]
assert result["categories"] == ["cs.AI"]
def test_format_metadata_for_json_dict(self):
"""Test formatting dict metadata for JSON output."""
metadata_dict = {
"paper_id": "test-paper-1",
"title": "Test Paper",
"source_type": "local",
}
result = JSONOutputMixin.format_metadata_for_json(metadata_dict)
assert result == metadata_dict
def test_format_papers_list_for_json(self):
"""Test formatting a list of papers for JSON output."""
papers = [
PaperMetadata(
paper_id="paper-1",
source_type=SourceType.LOCAL,
title="Paper 1",
),
PaperMetadata(
paper_id="paper-2",
source_type=SourceType.ARXIV,
title="Paper 2",
),
]
result = JSONOutputMixin.format_papers_list_for_json(papers)
assert "papers" in result
assert "total" in result
assert result["total"] == 2
assert len(result["papers"]) == 2
assert result["papers"][0]["paper_id"] == "paper-1"
assert result["papers"][1]["paper_id"] == "paper-2"
class TestCLIJSONOutput:
"""Test CLI commands with JSON output."""
def run_paperlib_cmd(self, *args):
"""Helper to run paperlib commands and parse JSON output."""
cmd = ["uv", "run", "paperlib"] + list(args)
result = subprocess.run(cmd, capture_output=True, text=True, cwd=Path.cwd())
if "--json" in args:
try:
output_data = json.loads(result.stdout)
return result.returncode, output_data, result.stderr
except json.JSONDecodeError as e:
pytest.fail(f"Invalid JSON output: {e}\nOutput: {result.stdout}")
return result.returncode, result.stdout, result.stderr
def test_status_json_output(self):
"""Test status command with JSON output."""
# Create temporary library
temp_lib = Path("./.tmp") / f"test_status_json_{hash(self)}"
temp_lib.mkdir(parents=True, exist_ok=True)
try:
# Initialize library
self.run_paperlib_cmd("init", str(temp_lib))
# Test status with JSON
returncode, output_data, stderr = self.run_paperlib_cmd(
"status", "--library", str(temp_lib), "--json"
)
assert returncode == 0
assert isinstance(output_data, dict)
assert output_data["success"] is True
assert "timestamp" in output_data
assert "library_root" in output_data
assert "config_path" in output_data
assert "database_path" in output_data
assert str(temp_lib.resolve()) in output_data["library_root"]
finally:
if temp_lib.exists():
import shutil
shutil.rmtree(temp_lib)
def test_list_json_output_empty(self):
"""Test list command with JSON output for empty library."""
temp_lib = Path("./.tmp") / f"test_list_json_{hash(self)}"
temp_lib.mkdir(parents=True, exist_ok=True)
try:
# Initialize library
self.run_paperlib_cmd("init", str(temp_lib))
# Test list with JSON
returncode, output_data, stderr = self.run_paperlib_cmd(
"list", "--library", str(temp_lib), "--json"
)
assert returncode == 0
assert isinstance(output_data, dict)
assert output_data["success"] is True
assert output_data["papers"] == []
assert output_data["total"] == 0
finally:
if temp_lib.exists():
import shutil
shutil.rmtree(temp_lib)
def test_import_json_output(self):
"""Test import command with JSON output."""
temp_lib = Path("./.tmp") / f"test_import_json_{hash(self)}"
temp_lib.mkdir(parents=True, exist_ok=True)
# Create sample PDF
sample_pdf = Path("./.tmp") / f"test_import_json_{hash(self)}.pdf"
with sample_pdf.open("wb") as f:
f.write(b"%PDF-1.4\n%%EOF\n")
try:
# Initialize library
self.run_paperlib_cmd("init", str(temp_lib))
# Test import with JSON
returncode, output_data, stderr = self.run_paperlib_cmd(
"import",
"--pdf",
str(sample_pdf),
"--title",
"Test JSON Import",
"--library",
str(temp_lib),
"--json",
)
assert returncode == 0
assert isinstance(output_data, dict)
assert output_data["success"] is True
assert "paper_id" in output_data
assert output_data["title"] == "Test JSON Import"
assert output_data["source_type"] == "local"
assert "Successfully imported local PDF" in output_data["message"]
assert "paper" in output_data
assert isinstance(output_data["paper"], dict)
finally:
if temp_lib.exists():
import shutil
shutil.rmtree(temp_lib)
if sample_pdf.exists():
sample_pdf.unlink()
def test_show_json_output(self):
"""Test show command with JSON output."""
temp_lib = Path("./.tmp") / f"test_show_json_{hash(self)}"
temp_lib.mkdir(parents=True, exist_ok=True)
# Create sample PDF
sample_pdf = Path("./.tmp") / f"test_show_json_{hash(self)}.pdf"
with sample_pdf.open("wb") as f:
f.write(b"%PDF-1.4\n%%EOF\n")
try:
# Initialize and import
self.run_paperlib_cmd("init", str(temp_lib))
import_returncode, import_data, _ = self.run_paperlib_cmd(
"import",
"--pdf",
str(sample_pdf),
"--title",
"Test JSON Show",
"--library",
str(temp_lib),
"--json",
)
assert import_returncode == 0
paper_id = import_data["paper_id"]
# Test show with JSON
returncode, output_data, stderr = self.run_paperlib_cmd(
"show", paper_id, "--library", str(temp_lib), "--json"
)
assert returncode == 0
assert isinstance(output_data, dict)
assert output_data["success"] is True
assert "paper" in output_data
assert output_data["paper"]["paper_id"] == paper_id
assert output_data["paper"]["title"] == "Test JSON Show"
assert "files_status" in output_data["paper"]
assert "pdf_exists" in output_data["paper"]["files_status"]
finally:
if temp_lib.exists():
import shutil
shutil.rmtree(temp_lib)
if sample_pdf.exists():
sample_pdf.unlink()
def test_show_json_not_found(self):
"""Test show command with JSON output for non-existent paper."""
temp_lib = Path("./.tmp") / f"test_show_json_nf_{hash(self)}"
temp_lib.mkdir(parents=True, exist_ok=True)
try:
# Initialize library
self.run_paperlib_cmd("init", str(temp_lib))
# Test show non-existent paper
returncode, output_data, stderr = self.run_paperlib_cmd(
"show", "nonexistent", "--library", str(temp_lib), "--json"
)
assert returncode == 1
assert isinstance(output_data, dict)
assert output_data["success"] is False
assert "error" in output_data
assert "Paper not found" in output_data["error"]
finally:
if temp_lib.exists():
import shutil
shutil.rmtree(temp_lib)
def test_convert_json_output(self):
"""Test convert command with JSON output."""
temp_lib = Path("./.tmp") / f"test_convert_json_{hash(self)}"
temp_lib.mkdir(parents=True, exist_ok=True)
try:
# Initialize library
self.run_paperlib_cmd("init", str(temp_lib))
# Test convert with no papers (JSON)
returncode, output_data, stderr = self.run_paperlib_cmd(
"convert", "--library", str(temp_lib), "--json"
)
assert returncode == 0
assert isinstance(output_data, dict)
assert output_data["success"] is True
assert output_data["action"] == "convert_pending"
assert output_data["success_count"] == 0
assert output_data["failure_count"] == 0
assert output_data["total_attempted"] == 0
finally:
if temp_lib.exists():
import shutil
shutil.rmtree(temp_lib)