From 76580fc4a20931946fb823d4eda05674ee2eea8d Mon Sep 17 00:00:00 2001 From: Yingjie Wang Date: Fri, 17 Apr 2026 20:04:32 -0400 Subject: [PATCH] doc: doc the --json option --- README.md | 47 ++++++++++- docs/cli.md | 161 +++++++++++++++++++++++++++++++++++++- docs/integration-guide.md | 68 +++++++++++++--- 3 files changed, 259 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 3228e3b..33165c2 100644 --- a/README.md +++ b/README.md @@ -180,14 +180,53 @@ cat path/to/library/papers/.../logs/mineru.log ## Machine-Readable Output -Most commands support `--json` output for automation: +Most commands support `--json` output for automation and integration: ```bash -paperlib list --json -paperlib show --json +# Get library configuration in JSON paperlib status --json + +# List all papers with metadata +paperlib list --json + +# Get detailed paper information +paperlib show --json + +# Get import results +paperlib import --arxiv 2212.06340 --json + +# Get conversion status and results +paperlib convert --json +paperlib convert --paper-id --json + +# Get reindexing statistics +paperlib reindex --json ``` +### JSON Output Format + +All JSON responses follow a consistent envelope format: + +```json +{ + "success": true, + "timestamp": "2024-01-15T10:30:00.000Z", + "data": { /* command-specific data */ } +} +``` + +For errors: +```json +{ + "success": false, + "timestamp": "2024-01-15T10:30:00.000Z", + "error": "Error message here", + "error_code": 1 +} +``` + +This structured output enables reliable automation, scripting, and integration with other tools. The JSON format is stable across paperlib versions. + ## Development paperlib is designed for extensibility and integration with higher-level tools. @@ -238,7 +277,7 @@ paperlib follows clean architecture principles: - [x] Comprehensive test suite - [ ] Search command implementation - [ ] AI summarization with provider abstraction -- [ ] JSON output for all commands +- [x] JSON output for core commands - [ ] Configuration file support - [ ] Advanced arXiv workflows diff --git a/docs/cli.md b/docs/cli.md index d61bcfe..55fcec7 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -54,6 +54,7 @@ Import papers into the library from various sources. - `--notes TEXT`: Add notes about the paper - `--tags TAG1 TAG2`: Add tags to the paper - `--library PATH`: Specify library directory +- `--json`: Output import results in JSON format for automation **Examples:** ```bash @@ -68,6 +69,9 @@ paperlib import --arxiv https://arxiv.org/abs/2212.06340 # Import to specific library paperlib import --pdf paper.pdf --library ~/research + +# Import with JSON output for automation +paperlib import --arxiv 2212.06340 --json ``` **Behavior:** @@ -160,6 +164,7 @@ Convert papers from PDF to Markdown using MinerU. - `--retry-failed`: Retry papers with failed conversion status - `--force`: Force reconvert all papers (including successful ones) - `--no-ui`: Disable rich UI display (useful for scripting) +- `--json`: Output conversion results in JSON format (automatically disables UI) **Examples:** ```bash @@ -180,6 +185,10 @@ paperlib convert --no-ui # Convert in specific library paperlib convert --library ~/research + +# Get JSON output for automation (disables UI automatically) +paperlib convert --json +paperlib convert --paper-id arxiv-2212_06340 --json ``` **Behavior:** @@ -205,6 +214,7 @@ Rebuild the search index from stored paper metadata. **Options:** - `--library PATH`: Specify library directory +- `--json`: Output reindexing results and statistics in JSON format **Examples:** ```bash @@ -213,6 +223,9 @@ paperlib reindex # Rebuild index for specific library paperlib reindex --library ~/research + +# Get JSON output with statistics +paperlib reindex --json ``` **Behavior:** @@ -239,6 +252,9 @@ paperlib status # Show specific library status paperlib status --library ~/research + +# Get JSON output for automation +paperlib status --json ``` **Output:** @@ -288,21 +304,160 @@ paperlib looks for configuration in these locations (in order): ## JSON Output Format -When using `--json`, commands output structured data suitable for programmatic consumption: +When using `--json`, commands output structured data suitable for programmatic consumption. All JSON responses follow a consistent envelope format with standard fields: +### Standard Response Envelope + +**Success Response:** ```json { + "success": true, + "timestamp": "2024-01-15T10:30:00.000Z", + // Command-specific data fields below +} +``` + +**Error Response:** +```json +{ + "success": false, + "timestamp": "2024-01-15T10:30:00.000Z", + "error": "Error message here", + "error_code": 1 +} +``` + +### Command-Specific JSON Formats + +#### `paperlib status --json` +```json +{ + "success": true, + "timestamp": "2024-01-15T10:30:00.000Z", + "library_root": "/home/user/papers", + "config_path": "/home/user/papers/config/config.toml", + "database_path": "/home/user/papers/db/paperlib.sqlite3", + "papers_dir": "/home/user/papers/papers", + "inbox_dir": "/home/user/papers/inbox", + "cache_dir": "/home/user/papers/cache" +} +``` + +#### `paperlib list --json` +```json +{ + "success": true, + "timestamp": "2024-01-15T10:30:00.000Z", "papers": [ { "paper_id": "arxiv-2212_06340", + "source_type": "arxiv", + "source_id": "2212.06340", "title": "Example Paper", "authors": ["Alice Smith", "Bob Jones"], + "published_date": "2022-12-06T00:00:00.000Z", + "categories": ["cs.AI"], "conversion_status": "success", - "imported_at": "2024-01-15T10:30:00" + "summary_status": "pending", + "imported_at": "2024-01-15T10:30:00.000Z", + "tags": [], + "notes": "" } ], "total": 1 } ``` -This format is stable across paperlib versions for reliable automation. \ No newline at end of file +#### `paperlib show --json` +```json +{ + "success": true, + "timestamp": "2024-01-15T10:30:00.000Z", + "paper": { + "paper_id": "arxiv-2212_06340", + "source_type": "arxiv", + "source_id": "2212.06340", + "title": "Example Paper", + "authors": ["Alice Smith", "Bob Jones"], + "conversion_status": "success", + "summary_status": "pending", + "pdf_path": "papers/arxiv/2022/arxiv-2212_06340.pdf", + "paper_md_path": "papers/arxiv/2022/arxiv-2212_06340.md", + "files_status": { + "pdf_exists": true, + "markdown_exists": true, + "summary_exists": false + } + } +} +``` + +#### `paperlib import --json` +```json +{ + "success": true, + "timestamp": "2024-01-15T10:30:00.000Z", + "paper_id": "arxiv-2212_06340", + "title": "Example Paper Title", + "source_type": "arxiv", + "source_id": "2212.06340", + "authors": ["Alice Smith", "Bob Jones"], + "message": "Successfully imported arXiv paper", + "paper": { + // Full paper metadata object + } +} +``` + +#### `paperlib convert --json` +```json +{ + "success": true, + "timestamp": "2024-01-15T10:30:00.000Z", + "action": "convert_pending", + "success_count": 5, + "failure_count": 1, + "total_attempted": 6 +} +``` + +For single paper conversion (`--paper-id`): +```json +{ + "success": true, + "timestamp": "2024-01-15T10:30:00.000Z", + "paper_id": "arxiv-2212_06340", + "conversion_success": true, + "conversion_status": "success", + "message": "Successfully converted paper" +} +``` + +#### `paperlib reindex --json` +```json +{ + "success": true, + "timestamp": "2024-01-15T10:30:00.000Z", + "reindex_complete": true, + "papers_indexed": 42, + "errors": 1, + "statistics": { + "total_papers": 42, + "by_source_type": { + "arxiv": 38, + "local": 4 + } + } +} +``` + +### JSON Data Types + +- **Timestamps**: Always in ISO 8601 format (`YYYY-MM-DDTHH:mm:ss.sssZ`) +- **Paper IDs**: String identifiers (e.g., `"arxiv-2212_06340"`, `"local-a1b2c3d4"`) +- **Status Fields**: String enums (`"pending"`, `"success"`, `"failed"`) +- **Authors**: Array of strings +- **Categories/Tags**: Array of strings +- **File Paths**: Relative to library root + +This JSON format is stable across paperlib versions for reliable automation and scripting. \ No newline at end of file diff --git a/docs/integration-guide.md b/docs/integration-guide.md index 2555997..156efe3 100644 --- a/docs/integration-guide.md +++ b/docs/integration-guide.md @@ -18,27 +18,38 @@ paperlib is designed as a **library engine** that higher-level tools can build u Most paperlib commands support `--json` output for automation: ```bash -# Get library statistics +# Get library configuration paperlib status --json { + "success": true, + "timestamp": "2024-01-15T10:30:00.000Z", "library_root": "/home/user/papers", - "total_papers": 42, - "by_status": {"converted": 38, "pending": 4}, - "last_updated": "2024-01-15T10:30:00Z" + "config_path": "/home/user/papers/config/config.toml", + "database_path": "/home/user/papers/db/paperlib.sqlite3", + "papers_dir": "/home/user/papers/papers", + "inbox_dir": "/home/user/papers/inbox", + "cache_dir": "/home/user/papers/cache" } # List papers with metadata paperlib list --json { + "success": true, + "timestamp": "2024-01-15T10:30:00.000Z", "papers": [ { "paper_id": "arxiv-2212_06340", + "source_type": "arxiv", + "source_id": "2212.06340", "title": "Example Paper", "authors": ["Alice Smith", "Bob Jones"], + "published_date": "2022-12-06T00:00:00.000Z", "categories": ["cs.AI"], "conversion_status": "success", "summary_status": "pending", - "imported_at": "2024-01-15T10:30:00Z" + "imported_at": "2024-01-15T10:30:00.000Z", + "tags": [], + "notes": "" } ], "total": 1 @@ -48,9 +59,46 @@ paperlib list --json paperlib import --arxiv 2212.06340 --json { "success": true, + "timestamp": "2024-01-15T10:30:00.000Z", "paper_id": "arxiv-2212_06340", "title": "Example Paper Title", - "message": "Successfully imported arXiv paper" + "source_type": "arxiv", + "source_id": "2212.06340", + "authors": ["Alice Smith", "Bob Jones"], + "message": "Successfully imported arXiv paper", + "paper": { + // Full paper metadata object + } + } +} + +# Convert papers with JSON output +paperlib convert --json +{ + "success": true, + "timestamp": "2024-01-15T10:30:00.000Z", + "action": "convert_pending", + "success_count": 5, + "failure_count": 1, + "total_attempted": 6 +} + +# Reindex with JSON output +paperlib reindex --json +{ + "success": true, + "timestamp": "2024-01-15T10:30:00.000Z", + "reindex_complete": true, + "papers_indexed": 42, + "errors": 1, + "statistics": { + "total_papers": 42, + "by_source_type": { + "arxiv": 38, + "local": 4 + } + } +} } ``` @@ -89,8 +137,8 @@ while read arxiv_id; do paperlib import --arxiv "$arxiv_id" --library "$LIBRARY" --json done -# Convert newly imported papers -paperlib convert --library "$LIBRARY" +# Convert newly imported papers with JSON output +paperlib convert --library "$LIBRARY" --json # Generate daily report paperlib list --library "$LIBRARY" --json | \ @@ -120,8 +168,8 @@ while IFS= read -r pdf_path; do fi done < "$PAPER_LIST" -# Convert all pending papers -paperlib convert --library "$LIBRARY" +# Convert all pending papers with JSON output +paperlib convert --library "$LIBRARY" --json ``` ## Python API