From 76580fc4a20931946fb823d4eda05674ee2eea8d Mon Sep 17 00:00:00 2001
From: Yingjie Wang <phywyj@gmail.com>
Date: Fri, 17 Apr 2026 20:04:32 -0400
Subject: [PATCH] doc: doc the --json option

---
 README.md                 |  47 ++++++++++-
 docs/cli.md               | 161 +++++++++++++++++++++++++++++++++++++-
 docs/integration-guide.md |  68 +++++++++++++---
 3 files changed, 259 insertions(+), 17 deletions(-)
diff --git a/README.md b/README.md
index 3228e3b..33165c2 100644
--- a/README.md
+++ b/README.md
@@ -180,14 +180,53 @@ cat path/to/library/papers/.../logs/mineru.log
 
 ## Machine-Readable Output
 
-Most commands support `--json` output for automation:
+Most commands support `--json` output for automation and integration:
 
 ```bash
-paperlib list --json
-paperlib show <paper-id> --json
+# Get library configuration in JSON
 paperlib status --json
+
+# List all papers with metadata
+paperlib list --json
+
+# Get detailed paper information  
+paperlib show <paper-id> --json
+
+# Get import results
+paperlib import --arxiv 2212.06340 --json
+
+# Get conversion status and results
+paperlib convert --json
+paperlib convert --paper-id <paper-id> --json
+
+# Get reindexing statistics
+paperlib reindex --json
 ```
 
+### JSON Output Format
+
+All JSON responses follow a consistent envelope format:
+
+```json
+{
+  "success": true,
+  "timestamp": "2024-01-15T10:30:00.000Z",
+  "data": { /* command-specific data */ }
+}
+```
+
+For errors:
+```json
+{
+  "success": false, 
+  "timestamp": "2024-01-15T10:30:00.000Z",
+  "error": "Error message here",
+  "error_code": 1
+}
+```
+
+This structured output enables reliable automation, scripting, and integration with other tools. The JSON format is stable across paperlib versions.
+
 ## Development
 
 paperlib is designed for extensibility and integration with higher-level tools.
@@ -238,7 +277,7 @@ paperlib follows clean architecture principles:
 - [x] Comprehensive test suite
 - [ ] Search command implementation
 - [ ] AI summarization with provider abstraction
-- [ ] JSON output for all commands
+- [x] JSON output for core commands
 - [ ] Configuration file support
 - [ ] Advanced arXiv workflows
 
diff --git a/docs/cli.md b/docs/cli.md
index d61bcfe..55fcec7 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -54,6 +54,7 @@ Import papers into the library from various sources.
 - `--notes TEXT`: Add notes about the paper
 - `--tags TAG1 TAG2`: Add tags to the paper
 - `--library PATH`: Specify library directory
+- `--json`: Output import results in JSON format for automation
 
 **Examples:**
 ```bash
@@ -68,6 +69,9 @@ paperlib import --arxiv https://arxiv.org/abs/2212.06340
 
 # Import to specific library
 paperlib import --pdf paper.pdf --library ~/research
+
+# Import with JSON output for automation
+paperlib import --arxiv 2212.06340 --json
 ```
 
 **Behavior:**
@@ -160,6 +164,7 @@ Convert papers from PDF to Markdown using MinerU.
 - `--retry-failed`: Retry papers with failed conversion status
 - `--force`: Force reconvert all papers (including successful ones)
 - `--no-ui`: Disable rich UI display (useful for scripting)
+- `--json`: Output conversion results in JSON format (automatically disables UI)
 
 **Examples:**
 ```bash
@@ -180,6 +185,10 @@ paperlib convert --no-ui
 
 # Convert in specific library
 paperlib convert --library ~/research
+
+# Get JSON output for automation (disables UI automatically)
+paperlib convert --json
+paperlib convert --paper-id arxiv-2212_06340 --json
 ```
 
 **Behavior:**
@@ -205,6 +214,7 @@ Rebuild the search index from stored paper metadata.
 
 **Options:**
 - `--library PATH`: Specify library directory
+- `--json`: Output reindexing results and statistics in JSON format
 
 **Examples:**
 ```bash
@@ -213,6 +223,9 @@ paperlib reindex
 
 # Rebuild index for specific library
 paperlib reindex --library ~/research
+
+# Get JSON output with statistics
+paperlib reindex --json
 ```
 
 **Behavior:**
@@ -239,6 +252,9 @@ paperlib status
 
 # Show specific library status
 paperlib status --library ~/research
+
+# Get JSON output for automation
+paperlib status --json
 ```
 
 **Output:**
@@ -288,21 +304,160 @@ paperlib looks for configuration in these locations (in order):
 
 ## JSON Output Format
 
-When using `--json`, commands output structured data suitable for programmatic consumption:
+When using `--json`, commands output structured data suitable for programmatic consumption. All JSON responses follow a consistent envelope format with standard fields:
 
+### Standard Response Envelope
+
+**Success Response:**
 ```json
 {
+  "success": true,
+  "timestamp": "2024-01-15T10:30:00.000Z",
+  // Command-specific data fields below
+}
+```
+
+**Error Response:**
+```json
+{
+  "success": false,
+  "timestamp": "2024-01-15T10:30:00.000Z", 
+  "error": "Error message here",
+  "error_code": 1
+}
+```
+
+### Command-Specific JSON Formats
+
+#### `paperlib status --json`
+```json
+{
+  "success": true,
+  "timestamp": "2024-01-15T10:30:00.000Z",
+  "library_root": "/home/user/papers",
+  "config_path": "/home/user/papers/config/config.toml",
+  "database_path": "/home/user/papers/db/paperlib.sqlite3",
+  "papers_dir": "/home/user/papers/papers",
+  "inbox_dir": "/home/user/papers/inbox",
+  "cache_dir": "/home/user/papers/cache"
+}
+```
+
+#### `paperlib list --json`
+```json
+{
+  "success": true,
+  "timestamp": "2024-01-15T10:30:00.000Z",
   "papers": [
     {
       "paper_id": "arxiv-2212_06340",
+      "source_type": "arxiv",
+      "source_id": "2212.06340", 
       "title": "Example Paper",
       "authors": ["Alice Smith", "Bob Jones"],
+      "published_date": "2022-12-06T00:00:00.000Z",
+      "categories": ["cs.AI"],
       "conversion_status": "success",
-      "imported_at": "2024-01-15T10:30:00"
+      "summary_status": "pending",
+      "imported_at": "2024-01-15T10:30:00.000Z",
+      "tags": [],
+      "notes": ""
     }
   ],
   "total": 1
 }
 ```
 
-This format is stable across paperlib versions for reliable automation.
\ No newline at end of file
+#### `paperlib show <paper_id> --json`
+```json
+{
+  "success": true,
+  "timestamp": "2024-01-15T10:30:00.000Z",
+  "paper": {
+    "paper_id": "arxiv-2212_06340",
+    "source_type": "arxiv",
+    "source_id": "2212.06340",
+    "title": "Example Paper",
+    "authors": ["Alice Smith", "Bob Jones"],
+    "conversion_status": "success",
+    "summary_status": "pending",
+    "pdf_path": "papers/arxiv/2022/arxiv-2212_06340.pdf",
+    "paper_md_path": "papers/arxiv/2022/arxiv-2212_06340.md",
+    "files_status": {
+      "pdf_exists": true,
+      "markdown_exists": true,
+      "summary_exists": false
+    }
+  }
+}
+```
+
+#### `paperlib import --json`
+```json
+{
+  "success": true,
+  "timestamp": "2024-01-15T10:30:00.000Z",
+  "paper_id": "arxiv-2212_06340",
+  "title": "Example Paper Title",
+  "source_type": "arxiv",
+  "source_id": "2212.06340",
+  "authors": ["Alice Smith", "Bob Jones"],
+  "message": "Successfully imported arXiv paper",
+  "paper": {
+    // Full paper metadata object
+  }
+}
+```
+
+#### `paperlib convert --json`
+```json
+{
+  "success": true,
+  "timestamp": "2024-01-15T10:30:00.000Z",
+  "action": "convert_pending",
+  "success_count": 5,
+  "failure_count": 1,
+  "total_attempted": 6
+}
+```
+
+For single paper conversion (`--paper-id`):
+```json
+{
+  "success": true,
+  "timestamp": "2024-01-15T10:30:00.000Z",
+  "paper_id": "arxiv-2212_06340",
+  "conversion_success": true,
+  "conversion_status": "success",
+  "message": "Successfully converted paper"
+}
+```
+
+#### `paperlib reindex --json`
+```json
+{
+  "success": true,
+  "timestamp": "2024-01-15T10:30:00.000Z",
+  "reindex_complete": true,
+  "papers_indexed": 42,
+  "errors": 1,
+  "statistics": {
+    "total_papers": 42,
+    "by_source_type": {
+      "arxiv": 38,
+      "local": 4
+    }
+  }
+}
+```
+
+### JSON Data Types
+
+- **Timestamps**: Always in ISO 8601 format (`YYYY-MM-DDTHH:mm:ss.sssZ`)
+- **Paper IDs**: String identifiers (e.g., `"arxiv-2212_06340"`, `"local-a1b2c3d4"`)
+- **Status Fields**: String enums (`"pending"`, `"success"`, `"failed"`)
+- **Authors**: Array of strings
+- **Categories/Tags**: Array of strings
+- **File Paths**: Relative to library root
+
+This JSON format is stable across paperlib versions for reliable automation and scripting.
\ No newline at end of file
diff --git a/docs/integration-guide.md b/docs/integration-guide.md
index 2555997..156efe3 100644
--- a/docs/integration-guide.md
+++ b/docs/integration-guide.md
@@ -18,27 +18,38 @@ paperlib is designed as a **library engine** that higher-level tools can build u
 Most paperlib commands support `--json` output for automation:
 
 ```bash
-# Get library statistics
+# Get library configuration
 paperlib status --json
 {
+  "success": true,
+  "timestamp": "2024-01-15T10:30:00.000Z",
   "library_root": "/home/user/papers",
-  "total_papers": 42,
-  "by_status": {"converted": 38, "pending": 4},
-  "last_updated": "2024-01-15T10:30:00Z"
+  "config_path": "/home/user/papers/config/config.toml",
+  "database_path": "/home/user/papers/db/paperlib.sqlite3",
+  "papers_dir": "/home/user/papers/papers",
+  "inbox_dir": "/home/user/papers/inbox",
+  "cache_dir": "/home/user/papers/cache"
 }
 
 # List papers with metadata
 paperlib list --json
 {
+  "success": true,
+  "timestamp": "2024-01-15T10:30:00.000Z",
   "papers": [
     {
       "paper_id": "arxiv-2212_06340",
+      "source_type": "arxiv",
+      "source_id": "2212.06340",
       "title": "Example Paper",
       "authors": ["Alice Smith", "Bob Jones"],
+      "published_date": "2022-12-06T00:00:00.000Z",
       "categories": ["cs.AI"],
       "conversion_status": "success",
       "summary_status": "pending",
-      "imported_at": "2024-01-15T10:30:00Z"
+      "imported_at": "2024-01-15T10:30:00.000Z",
+      "tags": [],
+      "notes": ""
     }
   ],
   "total": 1
@@ -48,9 +59,46 @@ paperlib list --json
 paperlib import --arxiv 2212.06340 --json
 {
   "success": true,
+  "timestamp": "2024-01-15T10:30:00.000Z",
   "paper_id": "arxiv-2212_06340",
   "title": "Example Paper Title",
-  "message": "Successfully imported arXiv paper"
+  "source_type": "arxiv", 
+  "source_id": "2212.06340",
+  "authors": ["Alice Smith", "Bob Jones"],
+  "message": "Successfully imported arXiv paper",
+    "paper": {
+      // Full paper metadata object
+    }
+  }
+}
+
+# Convert papers with JSON output
+paperlib convert --json
+{
+  "success": true,
+  "timestamp": "2024-01-15T10:30:00.000Z",
+  "action": "convert_pending",
+  "success_count": 5,
+  "failure_count": 1,
+  "total_attempted": 6
+}
+
+# Reindex with JSON output
+paperlib reindex --json
+{
+  "success": true,
+  "timestamp": "2024-01-15T10:30:00.000Z",
+  "reindex_complete": true,
+  "papers_indexed": 42,
+  "errors": 1,
+  "statistics": {
+    "total_papers": 42,
+    "by_source_type": {
+      "arxiv": 38,
+      "local": 4
+    }
+  }
+}
 }
 ```
 
@@ -89,8 +137,8 @@ while read arxiv_id; do
     paperlib import --arxiv "$arxiv_id" --library "$LIBRARY" --json
 done
 
-# Convert newly imported papers
-paperlib convert --library "$LIBRARY"
+# Convert newly imported papers with JSON output
+paperlib convert --library "$LIBRARY" --json
 
 # Generate daily report
 paperlib list --library "$LIBRARY" --json | \
@@ -120,8 +168,8 @@ while IFS= read -r pdf_path; do
     fi
 done < "$PAPER_LIST"
 
-# Convert all pending papers
-paperlib convert --library "$LIBRARY"
+# Convert all pending papers with JSON output
+paperlib convert --library "$LIBRARY" --json
 ```
 
 ## Python API