update: add core functionality

This commit is contained in:
2026-04-17 14:40:46 -04:00
parent 1cebf30491
commit 82e4ed6fec
16 changed files with 1379 additions and 89 deletions
+1
View File
@@ -5,6 +5,7 @@ description = "Local-first CLI toolkit for managing a paper library"
readme = "README.md" readme = "README.md"
requires-python = ">=3.13,<3.14" requires-python = ">=3.13,<3.14"
dependencies = [ dependencies = [
"arxiv>=2.0.0",
"mineru[core]>=3.0.9", "mineru[core]>=3.0.9",
"rich>=15.0.0", "rich>=15.0.0",
"typer>=0.24.1", "typer>=0.24.1",
-1
View File
@@ -2,6 +2,5 @@
from paperlib.cli import main from paperlib.cli import main
if __name__ == "__main__": if __name__ == "__main__":
main() main()
+336 -86
View File
@@ -7,120 +7,370 @@ from pathlib import Path
from paperlib import __version__ from paperlib import __version__
from paperlib.config import LibraryPaths from paperlib.config import LibraryPaths
from paperlib.converter import MinerUConverter
from paperlib.importer import ArxivImporter, LocalImporter
from paperlib.index import DatabaseManager
from paperlib.storage import PaperStorageManager
def _resolve_library_root(path: Path | None) -> Path: def _resolve_library_root(path: Path | None) -> Path:
"""Resolve the target library root, defaulting to the current directory.""" """Resolve the target library root, defaulting to the current directory."""
return (path or Path.cwd()).expanduser() return (path or Path.cwd()).expanduser()
def _build_parser() -> argparse.ArgumentParser: def _build_parser() -> argparse.ArgumentParser:
"""Create the top-level argument parser.""" """Create the top-level argument parser."""
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog="paperlib", prog="paperlib",
description="Local-first paper library engine with a CLI.", description="Local-first paper library engine with a CLI.",
) )
parser.add_argument( parser.add_argument(
"--version", "--version",
action="version", action="version",
version=f"%(prog)s {__version__}", version=f"%(prog)s {__version__}",
) )
subparsers = parser.add_subparsers(dest="command", metavar="COMMAND") subparsers = parser.add_subparsers(dest="command", metavar="COMMAND")
init_parser = subparsers.add_parser( init_parser = subparsers.add_parser(
"init", "init",
help="Initialize a paper library directory.", help="Initialize a paper library directory.",
) )
init_parser.add_argument( init_parser.add_argument(
"path", "path",
nargs="?", nargs="?",
default=".", default=".",
help="Directory where the library should be initialized.", help="Directory where the library should be initialized.",
) )
init_parser.set_defaults(handler=_handle_init) init_parser.set_defaults(handler=_handle_init)
status_parser = subparsers.add_parser( status_parser = subparsers.add_parser(
"status", "status",
help="Show the resolved library layout for the selected root.", help="Show the resolved library layout for the selected root.",
) )
status_parser.add_argument( status_parser.add_argument(
"--library", "--library",
"-L", "-L",
default=".", default=".",
help="Library root to inspect. Defaults to the current directory.", help="Library root to inspect. Defaults to the current directory.",
) )
status_parser.set_defaults(handler=_handle_status) status_parser.set_defaults(handler=_handle_status)
list_parser = subparsers.add_parser("list", help="List imported papers.") list_parser = subparsers.add_parser("list", help="List imported papers.")
list_parser.set_defaults(handler=_handle_list) list_parser.add_argument("--library", "-L", default=".", help="Library root")
list_parser.set_defaults(handler=_handle_list)
show_parser = subparsers.add_parser( show_parser = subparsers.add_parser(
"show", "show",
help="Show detailed information for a paper.", help="Show detailed information for a paper.",
) )
show_parser.set_defaults(handler=_handle_show) show_parser.add_argument("paper_id", help="Paper ID to show")
show_parser.add_argument("--library", "-L", default=".", help="Library root")
show_parser.set_defaults(handler=_handle_show)
search_parser = subparsers.add_parser( search_parser = subparsers.add_parser(
"search", "search",
help="Search the paper library.", help="Search the paper library.",
) )
search_parser.set_defaults(handler=_handle_search) search_parser.set_defaults(handler=_handle_search)
return parser # Import command
import_parser = subparsers.add_parser(
"import",
help="Import a paper into the library.",
)
import_group = import_parser.add_mutually_exclusive_group(required=True)
import_group.add_argument("--pdf", type=Path, help="Path to a local PDF file")
import_group.add_argument("--arxiv", type=str, help="arXiv ID or URL")
import_parser.add_argument("--title", type=str, help="Title for local PDFs")
import_parser.add_argument("--notes", type=str, default="", help="Notes")
import_parser.add_argument("--tags", nargs="*", default=[], help="Tags")
import_parser.add_argument("--library", "-L", default=".", help="Library root")
import_parser.set_defaults(handler=_handle_import)
# Convert command
convert_parser = subparsers.add_parser(
"convert",
help="Convert papers to Markdown.",
)
convert_parser.add_argument("--library", "-L", default=".", help="Library root")
convert_parser.add_argument("--paper-id", help="Convert specific paper by ID")
convert_parser.set_defaults(handler=_handle_convert)
# Reindex command
reindex_parser = subparsers.add_parser(
"reindex",
help="Rebuild the search index from stored papers.",
)
reindex_parser.add_argument("--library", "-L", default=".", help="Library root")
reindex_parser.set_defaults(handler=_handle_reindex)
return parser
def _format_paths(paths: LibraryPaths) -> str: def _format_paths(paths: LibraryPaths) -> str:
"""Render library paths in a simple, grep-friendly format.""" """Render library paths in a simple, grep-friendly format."""
lines = [ lines = [
f"root: {paths.root}", f"root: {paths.root}",
f"config: {paths.config_path}", f"config: {paths.config_path}",
f"database: {paths.db_path}", f"database: {paths.db_path}",
f"papers: {paths.papers_dir}", f"papers: {paths.papers_dir}",
f"inbox: {paths.inbox_dir}", f"inbox: {paths.inbox_dir}",
f"cache: {paths.cache_dir}", f"cache: {paths.cache_dir}",
] ]
return "\n".join(lines) return "\n".join(lines)
def _handle_init(args: argparse.Namespace) -> int: def _handle_init(args: argparse.Namespace) -> int:
"""Initialize a paper library directory.""" """Initialize a paper library directory."""
paths = LibraryPaths.from_root(Path(args.path)) paths = LibraryPaths.from_root(Path(args.path))
paths.create_directories() paths.create_directories()
print(f"Initialized paper library at {paths.root}") print(f"Initialized paper library at {paths.root}")
print(_format_paths(paths)) print(_format_paths(paths))
return 0 return 0
def _handle_status(args: argparse.Namespace) -> int: def _handle_status(args: argparse.Namespace) -> int:
"""Show the resolved library layout for a selected root.""" """Show the resolved library layout for a selected root."""
paths = LibraryPaths.from_root(_resolve_library_root(Path(args.library))) paths = LibraryPaths.from_root(_resolve_library_root(Path(args.library)))
print(_format_paths(paths)) print(_format_paths(paths))
return 0 return 0
def _handle_list(_: argparse.Namespace) -> int: def _handle_list(args: argparse.Namespace) -> int:
"""Placeholder for listing imported papers.""" """List imported papers."""
print("Listing papers is not implemented yet.") try:
return 0 paths = LibraryPaths.from_root(
_resolve_library_root(
Path(args.library if hasattr(args, "library") else ".")
)
)
storage_manager = PaperStorageManager(paths)
db_manager = DatabaseManager(paths)
# Initialize database if it doesn't exist
db_manager.initialize_database()
# List all papers from storage (more reliable than index)
papers = list(storage_manager.list_all_papers())
if not papers:
print("No papers found in library.")
return 0
print(f"Found {len(papers)} papers:")
print()
for metadata in papers:
status_indicators = []
if metadata.conversion_status.value == "success":
status_indicators.append("📄") # Converted
if metadata.summary_status.value == "success":
status_indicators.append("📝") # Summarized
status_str = "".join(status_indicators) if status_indicators else ""
print(f"{status_str} {metadata.paper_id}")
print(f" {metadata.title}")
if metadata.authors:
authors_str = ", ".join(metadata.authors[:3])
if len(metadata.authors) > 3:
authors_str += f" (+{len(metadata.authors) - 3} more)"
print(f" By: {authors_str}")
if metadata.categories:
print(f" Categories: {', '.join(metadata.categories)}")
print()
return 0
except Exception as e:
print(f"Error listing papers: {e}")
return 1
def _handle_show(_: argparse.Namespace) -> int: def _handle_show(args: argparse.Namespace) -> int:
"""Placeholder for showing paper details.""" """Show detailed information for a paper."""
print("Showing paper details is not implemented yet.") if not hasattr(args, "paper_id") or not args.paper_id:
return 0 print("Please specify a paper ID with --paper-id")
return 1
try:
paths = LibraryPaths.from_root(
_resolve_library_root(
Path(args.library if hasattr(args, "library") else ".")
)
)
storage_manager = PaperStorageManager(paths)
# Find paper by ID
for metadata in storage_manager.list_all_papers():
if metadata.paper_id == args.paper_id:
print(f"Paper ID: {metadata.paper_id}")
print(f"Source: {metadata.source_type.value}")
if metadata.source_id:
print(f"Source ID: {metadata.source_id}")
print(f"Title: {metadata.title}")
if metadata.authors:
print(f"Authors: {', '.join(metadata.authors)}")
if metadata.published_date:
print(f"Published: {metadata.published_date.strftime('%Y-%m-%d')}")
if metadata.categories:
print(f"Categories: {', '.join(metadata.categories)}")
if metadata.tags:
print(f"Tags: {', '.join(metadata.tags)}")
print(f"Imported: {metadata.imported_at.strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Conversion Status: {metadata.conversion_status.value}")
print(f"Summary Status: {metadata.summary_status.value}")
if metadata.notes:
print(f"Notes: {metadata.notes}")
# Show file paths
print("\nFiles:")
if metadata.pdf_path:
pdf_path = paths.root / metadata.pdf_path
exists = "" if pdf_path.exists() else ""
print(f" PDF: {exists} {metadata.pdf_path}")
if metadata.paper_md_path:
md_path = paths.root / metadata.paper_md_path
exists = "" if md_path.exists() else ""
print(f" Markdown: {exists} {metadata.paper_md_path}")
if metadata.summary_json_path:
summary_path = paths.root / metadata.summary_json_path
exists = "" if summary_path.exists() else ""
print(f" Summary: {exists} {metadata.summary_json_path}")
return 0
print(f"Paper not found: {args.paper_id}")
return 1
except Exception as e:
print(f"Error showing paper: {e}")
return 1
def _handle_search(_: argparse.Namespace) -> int: def _handle_search(_: argparse.Namespace) -> int:
"""Placeholder for searching the paper library.""" """Placeholder for searching the paper library."""
print("Search is not implemented yet.") print("Search is not implemented yet.")
return 0 return 0
def _handle_import(args: argparse.Namespace) -> int:
"""Handle importing a paper into the library."""
try:
# Set up library paths and managers
paths = LibraryPaths.from_root(_resolve_library_root(Path(args.library)))
storage_manager = PaperStorageManager(paths)
db_manager = DatabaseManager(paths)
# Initialize database
db_manager.initialize_database()
if args.pdf:
# Import local PDF
local_importer = LocalImporter(storage_manager)
metadata = local_importer.import_pdf(
pdf_path=args.pdf,
title=args.title or "",
notes=args.notes,
tags=args.tags,
)
# Index the paper
db_manager.index_paper(metadata)
print(f"Successfully imported local PDF: {metadata.paper_id}")
print(f"Title: {metadata.title}")
elif args.arxiv:
# Import from arXiv
arxiv_importer = ArxivImporter(storage_manager)
metadata = arxiv_importer.import_arxiv_paper(
arxiv_input=args.arxiv,
notes=args.notes,
tags=args.tags,
)
# Index the paper
db_manager.index_paper(metadata)
print(f"Successfully imported arXiv paper: {metadata.paper_id}")
print(f"Title: {metadata.title}")
print(f"Authors: {', '.join(metadata.authors)}")
return 0
except Exception as e:
print(f"Error importing paper: {e}")
return 1
def _handle_convert(args: argparse.Namespace) -> int:
"""Handle converting papers to Markdown."""
try:
# Set up library paths and components
paths = LibraryPaths.from_root(_resolve_library_root(Path(args.library)))
storage_manager = PaperStorageManager(paths)
converter = MinerUConverter(storage_manager)
if args.paper_id:
# Convert specific paper
for metadata in storage_manager.list_all_papers():
if metadata.paper_id == args.paper_id:
if converter.convert_paper(metadata):
print(f"Successfully converted paper: {metadata.paper_id}")
else:
print(f"Failed to convert paper: {metadata.paper_id}")
return 0
print(f"Paper not found: {args.paper_id}")
return 1
else:
# Convert all pending papers
success_count, failure_count = converter.convert_all_pending()
msg = f"Complete: {success_count} successful, {failure_count} failed"
print(msg)
return 0 if failure_count == 0 else 1
except Exception as e:
print(f"Error during conversion: {e}")
return 1
def _handle_reindex(args: argparse.Namespace) -> int:
"""Rebuild the search index from stored papers."""
try:
paths = LibraryPaths.from_root(_resolve_library_root(Path(args.library)))
storage_manager = PaperStorageManager(paths)
db_manager = DatabaseManager(paths)
print("Rebuilding search index...")
# Initialize database schema
db_manager.initialize_database()
# Rebuild index from storage
success_count, error_count = db_manager.reindex_from_storage(storage_manager)
print(f"Reindex complete: {success_count} papers indexed, {error_count} errors")
# Show statistics
stats = db_manager.get_statistics()
print(f"Total papers: {stats['total_papers']}")
if stats.get("by_source_type"):
by_source = ", ".join(
f"{k}: {v}" for k, v in stats["by_source_type"].items()
)
print(f"By source: {by_source}")
return 0 if error_count == 0 else 1
except Exception as e:
print(f"Error during reindex: {e}")
return 1
def main() -> None: def main() -> None:
"""Console script entrypoint.""" """Console script entrypoint."""
parser = _build_parser() parser = _build_parser()
args = parser.parse_args() args = parser.parse_args()
if not hasattr(args, "handler"): if not hasattr(args, "handler"):
parser.print_help() parser.print_help()
raise SystemExit(0) raise SystemExit(0)
raise SystemExit(args.handler(args)) raise SystemExit(args.handler(args))
+1 -2
View File
@@ -5,7 +5,6 @@ from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
DEFAULT_CONFIG_DIRNAME = "config" DEFAULT_CONFIG_DIRNAME = "config"
DEFAULT_DB_DIRNAME = "db" DEFAULT_DB_DIRNAME = "db"
DEFAULT_CACHE_DIRNAME = "cache" DEFAULT_CACHE_DIRNAME = "cache"
@@ -29,7 +28,7 @@ class LibraryPaths:
config_path: Path config_path: Path
@classmethod @classmethod
def from_root(cls, root: Path) -> "LibraryPaths": def from_root(cls, root: Path) -> LibraryPaths:
"""Build a standard library layout from a root directory.""" """Build a standard library layout from a root directory."""
resolved_root = root.expanduser().resolve() resolved_root = root.expanduser().resolve()
config_dir = resolved_root / DEFAULT_CONFIG_DIRNAME config_dir = resolved_root / DEFAULT_CONFIG_DIRNAME
+5
View File
@@ -0,0 +1,5 @@
"""PDF conversion functionality for paperlib."""
from .mineru_converter import MinerUConverter
__all__ = ["MinerUConverter"]
+134
View File
@@ -0,0 +1,134 @@
"""PDF to Markdown conversion using MinerU."""
from __future__ import annotations
import logging
import subprocess
import sys
from paperlib.models import ConversionStatus, PaperMetadata
from paperlib.storage import PaperStorageManager
class MinerUConverter:
"""Handles PDF to Markdown conversion using MinerU."""
def __init__(self, storage_manager: PaperStorageManager) -> None:
self.storage_manager = storage_manager
self.logger = logging.getLogger(__name__)
def is_mineru_available(self) -> bool:
"""Check if MinerU is available in the environment."""
try:
result = subprocess.run(
[sys.executable, "-c", "import mineru"],
capture_output=True,
check=False,
)
return result.returncode == 0
except (subprocess.SubprocessError, FileNotFoundError):
return False
def convert_paper(self, metadata: PaperMetadata) -> bool:
"""Convert a paper's PDF to Markdown using MinerU."""
if not self.is_mineru_available():
self.logger.error("MinerU is not available")
return False
# Get paper paths
paths = self.storage_manager.get_paper_paths(
metadata.paper_id, metadata.source_type
)
pdf_path = self.storage_manager.library_paths.root / metadata.pdf_path
markdown_path = paths["markdown"]
logs_dir = paths["logs"]
if not pdf_path.exists():
self.logger.error(f"PDF file not found: {pdf_path}")
return False
# Update status to processing
metadata.conversion_status = ConversionStatus.PROCESSING
self.storage_manager.update_paper_metadata(metadata)
try:
# Run MinerU conversion
log_file = logs_dir / "mineru.log"
# MinerU command
cmd = [
sys.executable,
"-m",
"magic_pdf.pipe.UNIPipe",
"--pdf",
str(pdf_path),
"--output-dir",
str(paths["directory"]),
]
self.logger.info(f"Running MinerU conversion: {' '.join(cmd)}")
with log_file.open("w") as log:
result = subprocess.run(
cmd,
stdout=log,
stderr=subprocess.STDOUT,
cwd=paths["directory"],
check=False,
)
# Check if conversion was successful
if result.returncode == 0:
# MinerU typically outputs markdown files, try to find the main one
# Look for common output patterns
markdown_candidates = list(paths["directory"].glob("*.md"))
if not markdown_candidates:
# Try subdirectories
markdown_candidates = list(paths["directory"].rglob("*.md"))
if markdown_candidates:
# Use the first markdown file found, or rename if needed
main_md = markdown_candidates[0]
if main_md != markdown_path:
main_md.rename(markdown_path)
# Update metadata
metadata.conversion_status = ConversionStatus.SUCCESS
self.storage_manager.update_paper_metadata(metadata)
self.logger.info(
f"Successfully converted {pdf_path} to {markdown_path}"
)
return True
else:
self.logger.error("No markdown output found after conversion")
metadata.conversion_status = ConversionStatus.FAILED
self.storage_manager.update_paper_metadata(metadata)
return False
else:
self.logger.error(
f"MinerU conversion failed with return code {result.returncode}"
)
metadata.conversion_status = ConversionStatus.FAILED
self.storage_manager.update_paper_metadata(metadata)
return False
except Exception as e:
self.logger.error(f"Exception during conversion: {e}")
metadata.conversion_status = ConversionStatus.FAILED
self.storage_manager.update_paper_metadata(metadata)
return False
def convert_all_pending(self) -> tuple[int, int]:
"""Convert all papers with pending conversion status."""
success_count = 0
failure_count = 0
for metadata in self.storage_manager.list_all_papers():
if metadata.conversion_status == ConversionStatus.PENDING:
if self.convert_paper(metadata):
success_count += 1
else:
failure_count += 1
return success_count, failure_count
+6
View File
@@ -0,0 +1,6 @@
"""Import functionality for paperlib."""
from .arxiv_importer import ArxivImporter
from .local_importer import LocalImporter
__all__ = ["ArxivImporter", "LocalImporter"]
+112
View File
@@ -0,0 +1,112 @@
"""arXiv import functionality."""
from __future__ import annotations
import re
import tempfile
from pathlib import Path
import arxiv
from paperlib.models import PaperMetadata, SourceType
from paperlib.storage import PaperStorageManager
class ArxivImporter:
"""Handles importing papers from arXiv."""
def __init__(self, storage_manager: PaperStorageManager) -> None:
self.storage_manager = storage_manager
# Create arXiv client with reasonable defaults
self.client = arxiv.Client(page_size=10, delay_seconds=3.0, num_retries=3)
def extract_arxiv_id(self, input_string: str) -> str:
"""Extract arXiv ID from various input formats."""
# Clean input
input_string = input_string.strip()
# Pattern for arXiv ID (both old and new formats)
# New format: YYMM.NNNNN[vN]
# Old format: subject-class/YYMMnnn
patterns = [
r"(?:arxiv:)?(\d{4}\.\d{4,5}(?:v\d+)?)", # New format
r"(?:arxiv:)?([a-z-]+/\d{7})", # Old format
]
for pattern in patterns:
match = re.search(pattern, input_string, re.IGNORECASE)
if match:
return match.group(1)
# If no pattern matches, assume it's already a clean arXiv ID
return input_string
def fetch_paper_metadata(self, arxiv_id: str) -> arxiv.Result:
"""Fetch paper metadata from arXiv API."""
search = arxiv.Search(id_list=[arxiv_id])
results = list(self.client.results(search))
if not results:
msg = f"Paper not found on arXiv: {arxiv_id}"
raise ValueError(msg)
return results[0]
def download_pdf(self, result: arxiv.Result) -> Path:
"""Download PDF from arXiv to a temporary location."""
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_path = Path(tmp_file.name)
# Download PDF
result.download_pdf(filename=str(tmp_path))
return tmp_path
def import_arxiv_paper(
self, arxiv_input: str, notes: str = "", tags: list[str] | None = None
) -> PaperMetadata:
"""Import a paper from arXiv."""
# Extract clean arXiv ID
arxiv_id = self.extract_arxiv_id(arxiv_input)
# Check if already imported
paper_id = self.storage_manager.generate_paper_id(SourceType.ARXIV, arxiv_id)
if self.storage_manager.paper_exists(paper_id, SourceType.ARXIV):
msg = f"Paper already imported: {arxiv_id}"
raise ValueError(msg)
# Fetch metadata from arXiv
result = self.fetch_paper_metadata(arxiv_id)
# Download PDF
pdf_path = self.download_pdf(result)
try:
# Convert arXiv result to our metadata format
published_date = (
result.published.replace(tzinfo=None) if result.published else None
)
updated_date = (
result.updated.replace(tzinfo=None) if result.updated else None
)
# Store the paper
metadata = self.storage_manager.store_paper(
pdf_path=pdf_path,
source_type=SourceType.ARXIV,
source_id=arxiv_id,
title=result.title,
authors=[author.name for author in result.authors],
published_date=published_date,
updated_date=updated_date,
categories=[cat for cat in result.categories],
notes=notes,
tags=tags or [],
)
return metadata
finally:
# Clean up temporary PDF file
if pdf_path.exists():
pdf_path.unlink()
+56
View File
@@ -0,0 +1,56 @@
"""Local PDF import functionality."""
from __future__ import annotations
from pathlib import Path
from paperlib.models import PaperMetadata, SourceType
from paperlib.storage import PaperStorageManager
class LocalImporter:
"""Handles importing local PDF files."""
def __init__(self, storage_manager: PaperStorageManager) -> None:
self.storage_manager = storage_manager
def import_pdf(
self,
pdf_path: Path,
title: str = "",
notes: str = "",
tags: list[str] | None = None,
) -> PaperMetadata:
"""Import a local PDF file."""
if not pdf_path.exists():
msg = f"PDF file not found: {pdf_path}"
raise FileNotFoundError(msg)
if not pdf_path.suffix.lower() == ".pdf":
msg = f"File is not a PDF: {pdf_path}"
raise ValueError(msg)
# Generate paper ID and check for duplicates
paper_id = self.storage_manager.generate_paper_id(
SourceType.LOCAL, pdf_path=pdf_path
)
if self.storage_manager.paper_exists(paper_id, SourceType.LOCAL):
msg = f"Paper already imported: {paper_id}"
raise ValueError(msg)
# Extract title from filename if not provided
if not title:
title = pdf_path.stem.replace("_", " ").replace("-", " ").title()
# Store the paper
metadata = self.storage_manager.store_paper(
pdf_path=pdf_path,
source_type=SourceType.LOCAL,
source_id=None,
title=title,
notes=notes,
tags=tags or [],
)
return metadata
+5
View File
@@ -0,0 +1,5 @@
"""SQLite index layer for paperlib."""
from .database import DatabaseManager
__all__ = ["DatabaseManager"]
+321
View File
@@ -0,0 +1,321 @@
"""SQLite database manager for indexing papers."""
from __future__ import annotations
import sqlite3
from collections.abc import Iterator
from paperlib.config import LibraryPaths
from paperlib.models import ConversionStatus, PaperMetadata, SourceType, SummaryStatus
class DatabaseManager:
"""Manages SQLite database for indexing papers."""
def __init__(self, library_paths: LibraryPaths) -> None:
self.library_paths = library_paths
self.db_path = library_paths.db_path
def _get_connection(self) -> sqlite3.Connection:
"""Get a database connection with proper settings."""
# Ensure database directory exists
self.db_path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(self.db_path)
conn.row_factory = sqlite3.Row # Enable dict-like access to rows
conn.execute("PRAGMA foreign_keys = ON") # Enable foreign keys
return conn
def initialize_database(self) -> None:
"""Initialize the database schema."""
with self._get_connection() as conn:
# Main papers table
conn.execute("""
CREATE TABLE IF NOT EXISTS papers (
paper_id TEXT PRIMARY KEY,
source_type TEXT NOT NULL,
source_id TEXT,
title TEXT NOT NULL,
authors_json TEXT NOT NULL, -- JSON array of authors
published_date TEXT, -- ISO format
updated_date TEXT, -- ISO format
categories_json TEXT NOT NULL, -- JSON array of categories
pdf_path TEXT,
paper_md_path TEXT,
summary_json_path TEXT,
summary_md_path TEXT,
imported_at TEXT NOT NULL, -- ISO format
conversion_status TEXT NOT NULL,
summary_status TEXT NOT NULL,
tags_json TEXT NOT NULL, -- JSON array of tags
notes TEXT NOT NULL,
-- Computed fields for search
search_text TEXT, -- Full-text search content
author_list TEXT, -- Space-separated authors for search
category_list TEXT -- Space-separated categories for search
)
""")
# Create indexes for common queries
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_papers_source_type ON papers(source_type)"
)
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_papers_source_id ON papers(source_id)"
)
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_papers_conversion_status "
"ON papers(conversion_status)"
)
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_papers_summary_status "
"ON papers(summary_status)"
)
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_papers_imported_at ON papers(imported_at)"
)
# Full-text search virtual table
conn.execute("""
CREATE VIRTUAL TABLE IF NOT EXISTS papers_fts USING fts5(
paper_id UNINDEXED,
title,
authors,
search_text,
categories,
tags,
notes,
content='papers',
content_rowid='rowid'
)
""")
def index_paper(self, metadata: PaperMetadata) -> None:
"""Index a paper in the database."""
import json
with self._get_connection() as conn:
# Prepare data for insertion
parts = [
metadata.title,
" ".join(metadata.authors),
" ".join(metadata.categories),
" ".join(metadata.tags),
metadata.notes,
]
search_text = " ".join(parts)
author_list = " ".join(metadata.authors)
category_list = " ".join(metadata.categories)
# Insert or replace in main table
conn.execute(
"""
INSERT OR REPLACE INTO papers (
paper_id, source_type, source_id, title, authors_json,
published_date, updated_date, categories_json, pdf_path,
paper_md_path, summary_json_path, summary_md_path,
imported_at, conversion_status, summary_status,
tags_json, notes, search_text, author_list, category_list
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
metadata.paper_id,
metadata.source_type.value,
metadata.source_id,
metadata.title,
json.dumps(metadata.authors),
metadata.published_date.isoformat()
if metadata.published_date
else None,
metadata.updated_date.isoformat()
if metadata.updated_date
else None,
json.dumps(metadata.categories),
metadata.pdf_path,
metadata.paper_md_path,
metadata.summary_json_path,
metadata.summary_md_path,
metadata.imported_at.isoformat(),
metadata.conversion_status.value,
metadata.summary_status.value,
json.dumps(metadata.tags),
metadata.notes,
search_text,
author_list,
category_list,
),
)
# Update FTS table
conn.execute(
"""
INSERT OR REPLACE INTO papers_fts (
paper_id, title, authors, search_text, categories, tags, notes
) VALUES (?, ?, ?, ?, ?, ?, ?)
""",
(
metadata.paper_id,
metadata.title,
" ".join(metadata.authors),
search_text,
" ".join(metadata.categories),
" ".join(metadata.tags),
metadata.notes,
),
)
def remove_paper(self, paper_id: str) -> bool:
"""Remove a paper from the index."""
with self._get_connection() as conn:
cursor = conn.execute("DELETE FROM papers WHERE paper_id = ?", (paper_id,))
conn.execute("DELETE FROM papers_fts WHERE paper_id = ?", (paper_id,))
return cursor.rowcount > 0
def get_paper(self, paper_id: str) -> dict | None:
"""Get a paper by ID from the index."""
with self._get_connection() as conn:
cursor = conn.execute(
"SELECT * FROM papers WHERE paper_id = ?", (paper_id,)
)
row = cursor.fetchone()
return dict(row) if row else None
def list_papers(
self,
source_type: SourceType | None = None,
conversion_status: ConversionStatus | None = None,
summary_status: SummaryStatus | None = None,
limit: int | None = None,
offset: int = 0,
) -> Iterator[dict]:
"""List papers with optional filtering."""
conditions = []
params = []
if source_type:
conditions.append("source_type = ?")
params.append(source_type.value)
if conversion_status:
conditions.append("conversion_status = ?")
params.append(conversion_status.value)
if summary_status:
conditions.append("summary_status = ?")
params.append(summary_status.value)
where_clause = ""
if conditions:
where_clause = "WHERE " + " AND ".join(conditions)
query = f"SELECT * FROM papers {where_clause} ORDER BY imported_at DESC"
if limit:
query += " LIMIT ? OFFSET ?"
params.extend([limit, offset])
with self._get_connection() as conn:
cursor = conn.execute(query, params)
for row in cursor:
yield dict(row)
def search_papers(self, query: str, limit: int = 50) -> Iterator[dict]:
"""Search papers using full-text search."""
with self._get_connection() as conn:
# Use FTS for full-text search
cursor = conn.execute(
"""
SELECT papers.* FROM papers_fts
JOIN papers ON papers.paper_id = papers_fts.paper_id
WHERE papers_fts MATCH ?
ORDER BY rank
LIMIT ?
""",
(query, limit),
)
for row in cursor:
yield dict(row)
def search_by_field(
self,
field: str,
value: str,
exact_match: bool = False,
limit: int = 50,
) -> Iterator[dict]:
"""Search papers by specific field."""
if field not in ["title", "author_list", "category_list", "notes"]:
msg = f"Invalid field for search: {field}"
raise ValueError(msg)
if exact_match:
where_clause = f"{field} = ?"
params = [value]
else:
where_clause = f"{field} LIKE ?"
params = [f"%{value}%"]
query = f"SELECT * FROM papers WHERE {where_clause} ORDER BY imported_at DESC LIMIT ?"
params.append(limit)
with self._get_connection() as conn:
cursor = conn.execute(query, params)
for row in cursor:
yield dict(row)
def get_statistics(self) -> dict:
"""Get library statistics."""
with self._get_connection() as conn:
stats = {}
# Total papers
cursor = conn.execute("SELECT COUNT(*) as count FROM papers")
stats["total_papers"] = cursor.fetchone()["count"]
# By source type
cursor = conn.execute(
"SELECT source_type, COUNT(*) as count FROM papers GROUP BY source_type"
)
stats["by_source_type"] = {
row["source_type"]: row["count"] for row in cursor
}
# By conversion status
cursor = conn.execute(
"SELECT conversion_status, COUNT(*) as count FROM papers GROUP BY conversion_status"
)
stats["by_conversion_status"] = {
row["conversion_status"]: row["count"] for row in cursor
}
# By summary status
cursor = conn.execute(
"SELECT summary_status, COUNT(*) as count FROM papers GROUP BY summary_status"
)
stats["by_summary_status"] = {
row["summary_status"]: row["count"] for row in cursor
}
return stats
def reindex_from_storage(self, storage_manager) -> tuple[int, int]:
"""Rebuild the index from storage files."""
success_count = 0
error_count = 0
# Clear existing index
with self._get_connection() as conn:
conn.execute("DELETE FROM papers")
conn.execute("DELETE FROM papers_fts")
# Reindex all papers from storage
for metadata in storage_manager.list_all_papers():
try:
self.index_paper(metadata)
success_count += 1
except Exception:
error_count += 1
return success_count, error_count
+17
View File
@@ -0,0 +1,17 @@
"""Data models for paperlib."""
from .paper import (
ConversionStatus,
PaperMetadata,
PaperSummary,
SourceType,
SummaryStatus,
)
__all__ = [
"ConversionStatus",
"PaperMetadata",
"PaperSummary",
"SourceType",
"SummaryStatus",
]
+164
View File
@@ -0,0 +1,164 @@
"""Data models for paper metadata and summaries."""
from __future__ import annotations
import json
from dataclasses import asdict, dataclass, field
from datetime import datetime
from enum import StrEnum
from pathlib import Path
from typing import Any
class ConversionStatus(StrEnum):
"""Status of PDF to Markdown conversion."""
PENDING = "pending"
PROCESSING = "processing"
SUCCESS = "success"
FAILED = "failed"
class SummaryStatus(StrEnum):
"""Status of AI summarization."""
PENDING = "pending"
PROCESSING = "processing"
SUCCESS = "success"
FAILED = "failed"
NOT_REQUESTED = "not_requested"
class SourceType(StrEnum):
"""Type of paper source."""
LOCAL = "local"
ARXIV = "arxiv"
@dataclass
class PaperMetadata:
"""Metadata for a paper (stored in meta.json)."""
# Core identifiers
paper_id: str
source_type: SourceType
source_id: str | None = None # arXiv ID or local file hash
# Bibliographic information
title: str = ""
authors: list[str] = field(default_factory=list)
published_date: datetime | None = None
updated_date: datetime | None = None
categories: list[str] = field(default_factory=list)
# File paths (relative to library root)
pdf_path: str | None = None
paper_md_path: str | None = None
summary_json_path: str | None = None
summary_md_path: str | None = None
# Processing status
imported_at: datetime = field(default_factory=datetime.now)
conversion_status: ConversionStatus = ConversionStatus.PENDING
summary_status: SummaryStatus = SummaryStatus.NOT_REQUESTED
# Additional metadata
tags: list[str] = field(default_factory=list)
notes: str = ""
def to_dict(self) -> dict[str, Any]:
"""Convert to dictionary for JSON serialization."""
data = asdict(self)
# Convert datetime objects to ISO format strings
for field_name in ["published_date", "updated_date", "imported_at"]:
if data[field_name] is not None:
data[field_name] = data[field_name].isoformat()
# Convert enums to strings
data["source_type"] = self.source_type.value
data["conversion_status"] = self.conversion_status.value
data["summary_status"] = self.summary_status.value
return data
@classmethod
def from_dict(cls, data: dict[str, Any]) -> PaperMetadata:
"""Create from dictionary (JSON deserialization)."""
# Convert ISO format strings back to datetime objects
for field_name in ["published_date", "updated_date", "imported_at"]:
if data.get(field_name):
data[field_name] = datetime.fromisoformat(data[field_name])
# Convert strings back to enums
if "source_type" in data:
data["source_type"] = SourceType(data["source_type"])
if "conversion_status" in data:
data["conversion_status"] = ConversionStatus(data["conversion_status"])
if "summary_status" in data:
data["summary_status"] = SummaryStatus(data["summary_status"])
return cls(**data)
def save_to_file(self, file_path: Path) -> None:
"""Save metadata to a JSON file atomically."""
# Write to temporary file first, then move (atomic operation)
temp_path = file_path.with_suffix(".tmp")
with temp_path.open("w") as f:
json.dump(self.to_dict(), f, indent=2)
temp_path.rename(file_path)
@classmethod
def load_from_file(cls, file_path: Path) -> PaperMetadata:
"""Load metadata from a JSON file."""
with file_path.open() as f:
data = json.load(f)
return cls.from_dict(data)
@dataclass
class PaperSummary:
"""Structured summary for a paper (stored in summary.json)."""
# Schema version for migration
schema_version: str = "1.0"
# Core summary fields
one_sentence_summary: str = ""
problem_statement: str = ""
method_overview: str = ""
main_results: str = ""
claimed_contributions: list[str] = field(default_factory=list)
assumptions: list[str] = field(default_factory=list)
limitations: list[str] = field(default_factory=list)
# Categorization
problem_tags: list[str] = field(default_factory=list)
technique_tags: list[str] = field(default_factory=list)
# Entities mentioned
entities: list[str] = field(default_factory=list)
# Relevance scoring (optional)
relevance_to_user: float | None = None
recommended_sections: list[str] = field(default_factory=list)
def to_dict(self) -> dict[str, Any]:
"""Convert to dictionary for JSON serialization."""
return asdict(self)
@classmethod
def from_dict(cls, data: dict[str, Any]) -> PaperSummary:
"""Create from dictionary (JSON deserialization)."""
return cls(**data)
def save_to_file(self, file_path: Path) -> None:
"""Save summary to a JSON file atomically."""
# Write to temporary file first, then move (atomic operation)
temp_path = file_path.with_suffix(".tmp")
with temp_path.open("w") as f:
json.dump(self.to_dict(), f, indent=2)
temp_path.rename(file_path)
@classmethod
def load_from_file(cls, file_path: Path) -> PaperSummary:
"""Load summary from a JSON file."""
with file_path.open() as f:
data = json.load(f)
return cls.from_dict(data)
+5
View File
@@ -0,0 +1,5 @@
"""Storage layer for paperlib."""
from .manager import PaperStorageManager
__all__ = ["PaperStorageManager"]
+183
View File
@@ -0,0 +1,183 @@
"""Paper storage manager for CRUD operations on metadata files."""
from __future__ import annotations
import hashlib
import shutil
from collections.abc import Iterator
from datetime import datetime
from pathlib import Path
from paperlib.config import LibraryPaths
from paperlib.models import PaperMetadata, PaperSummary, SourceType
class PaperStorageManager:
"""Manages storage and retrieval of papers and their metadata."""
def __init__(self, library_paths: LibraryPaths) -> None:
self.library_paths = library_paths
def generate_paper_id(
self,
source_type: SourceType,
source_id: str | None = None,
pdf_path: Path | None = None,
) -> str:
"""Generate a stable paper ID based on source type and content."""
if source_type == SourceType.ARXIV and source_id:
# Use arXiv ID directly (normalized)
return f"arxiv-{source_id.replace('.', '_').replace('v', '_v')}"
elif source_type == SourceType.LOCAL and pdf_path:
# Use SHA256 hash of PDF file content
with pdf_path.open("rb") as f:
content = f.read()
hash_hex = hashlib.sha256(content).hexdigest()
return f"local-{hash_hex[:16]}" # Use first 16 chars of hash
else:
msg = "Cannot generate paper ID without proper source information"
raise ValueError(msg)
def get_paper_directory(self, paper_id: str, source_type: SourceType) -> Path:
"""Get the directory path for storing a paper's files."""
if source_type == SourceType.ARXIV:
# Extract year from arXiv ID pattern (e.g., "2212.06340" -> "2022")
arxiv_id = paper_id.replace("arxiv-", "").replace("_", ".")
year_part = arxiv_id[:4]
# Modern arXiv format: YYMM.NNNNN
if len(year_part) == 4 and year_part.isdigit():
year = year_part
else:
# Fallback to current year for older formats
year = str(datetime.now().year)
return self.library_paths.papers_dir / "arxiv" / year / paper_id
else:
# Local papers go under papers/local/{hash-prefix}/
hash_part = paper_id.replace("local-", "")
return self.library_paths.papers_dir / "local" / hash_part
def get_paper_paths(
self, paper_id: str, source_type: SourceType
) -> dict[str, Path]:
"""Get all expected file paths for a paper."""
paper_dir = self.get_paper_directory(paper_id, source_type)
return {
"directory": paper_dir,
"meta": paper_dir / "meta.json",
"pdf": paper_dir / "source.pdf",
"markdown": paper_dir / "paper.md",
"summary_json": paper_dir / "summary.json",
"summary_md": paper_dir / "summary.md",
"assets": paper_dir / "assets",
"logs": paper_dir / "logs",
}
def store_paper(
self,
pdf_path: Path,
source_type: SourceType,
source_id: str | None = None,
**metadata_kwargs,
) -> PaperMetadata:
"""Store a paper and create its metadata."""
# Generate paper ID
paper_id = self.generate_paper_id(source_type, source_id, pdf_path)
# Get storage paths
paths = self.get_paper_paths(paper_id, source_type)
# Create directory structure
paths["directory"].mkdir(parents=True, exist_ok=True)
paths["assets"].mkdir(exist_ok=True)
paths["logs"].mkdir(exist_ok=True)
# Copy PDF to storage
shutil.copy2(pdf_path, paths["pdf"])
# Create metadata
metadata = PaperMetadata(
paper_id=paper_id,
source_type=source_type,
source_id=source_id,
pdf_path=str(paths["pdf"].relative_to(self.library_paths.root)),
paper_md_path=str(paths["markdown"].relative_to(self.library_paths.root)),
summary_json_path=str(
paths["summary_json"].relative_to(self.library_paths.root)
),
summary_md_path=str(
paths["summary_md"].relative_to(self.library_paths.root)
),
**metadata_kwargs,
)
# Save metadata
metadata.save_to_file(paths["meta"])
return metadata
def load_paper_metadata(
self, paper_id: str, source_type: SourceType
) -> PaperMetadata | None:
"""Load paper metadata from storage."""
paths = self.get_paper_paths(paper_id, source_type)
if not paths["meta"].exists():
return None
try:
return PaperMetadata.load_from_file(paths["meta"])
except (FileNotFoundError, ValueError):
return None
def update_paper_metadata(self, metadata: PaperMetadata) -> None:
"""Update paper metadata in storage."""
paths = self.get_paper_paths(metadata.paper_id, metadata.source_type)
metadata.save_to_file(paths["meta"])
def load_paper_summary(
self, paper_id: str, source_type: SourceType
) -> PaperSummary | None:
"""Load paper summary from storage."""
paths = self.get_paper_paths(paper_id, source_type)
if not paths["summary_json"].exists():
return None
try:
return PaperSummary.load_from_file(paths["summary_json"])
except (FileNotFoundError, ValueError):
return None
def save_paper_summary(
self, paper_id: str, source_type: SourceType, summary: PaperSummary
) -> None:
"""Save paper summary to storage."""
paths = self.get_paper_paths(paper_id, source_type)
summary.save_to_file(paths["summary_json"])
def list_all_papers(self) -> Iterator[PaperMetadata]:
"""Iterate over all papers in the library."""
papers_dir = self.library_paths.papers_dir
if not papers_dir.exists():
return
# Look for meta.json files in the papers directory structure
for meta_file in papers_dir.rglob("meta.json"):
try:
yield PaperMetadata.load_from_file(meta_file)
except (ValueError, FileNotFoundError):
# Skip corrupted metadata files
continue
def paper_exists(self, paper_id: str, source_type: SourceType) -> bool:
"""Check if a paper already exists in storage."""
paths = self.get_paper_paths(paper_id, source_type)
return paths["meta"].exists()
def delete_paper(self, paper_id: str, source_type: SourceType) -> bool:
"""Delete a paper and all its files."""
paths = self.get_paper_paths(paper_id, source_type)
if not paths["directory"].exists():
return False
# Remove entire paper directory
shutil.rmtree(paths["directory"])
return True
Generated
+33
View File
@@ -97,6 +97,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" }, { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" },
] ]
[[package]]
name = "arxiv"
version = "3.0.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "feedparser" },
{ name = "requests" },
]
sdist = { url = "https://files.pythonhosted.org/packages/ff/78/1e93a001ed51b5114e1978247078fa3130cbb2794a520603949cbe9a7028/arxiv-3.0.0.tar.gz", hash = "sha256:c8cb0d31208afbc1ceb17bd3f9816c8d4c5ca1e0abf199d211e216715440498d", size = 67344, upload-time = "2026-04-12T22:48:59.623Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/9d/0d/bb2ef604e5548ba73ba6326576908d8285ebf3468b02b86af83381c7c973/arxiv-3.0.0-py3-none-any.whl", hash = "sha256:8b4d4e2e336bfeb71ea653623d7dadb260f682f0475cee2aecad0560a23b34db", size = 11928, upload-time = "2026-04-12T22:48:58.44Z" },
]
[[package]] [[package]]
name = "audioop-lts" name = "audioop-lts"
version = "0.2.2" version = "0.2.2"
@@ -502,6 +515,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/31/fb/6d251f3fdfe3346ee60d091f55106513e509659ee005ad39c914182c96f4/fasttext_predict-0.9.2.4-cp313-cp313t-win_amd64.whl", hash = "sha256:be0933fa4af7abae09c703d28f9e17c80e7069eb6f92100b21985b777f4ea275", size = 110325, upload-time = "2024-11-23T17:24:16.984Z" }, { url = "https://files.pythonhosted.org/packages/31/fb/6d251f3fdfe3346ee60d091f55106513e509659ee005ad39c914182c96f4/fasttext_predict-0.9.2.4-cp313-cp313t-win_amd64.whl", hash = "sha256:be0933fa4af7abae09c703d28f9e17c80e7069eb6f92100b21985b777f4ea275", size = 110325, upload-time = "2024-11-23T17:24:16.984Z" },
] ]
[[package]]
name = "feedparser"
version = "6.0.12"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "sgmllib3k" },
]
sdist = { url = "https://files.pythonhosted.org/packages/dc/79/db7edb5e77d6dfbc54d7d9df72828be4318275b2e580549ff45a962f6461/feedparser-6.0.12.tar.gz", hash = "sha256:64f76ce90ae3e8ef5d1ede0f8d3b50ce26bcce71dd8ae5e82b1cd2d4a5f94228", size = 286579, upload-time = "2025-09-10T13:33:59.486Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/4e/eb/c96d64137e29ae17d83ad2552470bafe3a7a915e85434d9942077d7fd011/feedparser-6.0.12-py3-none-any.whl", hash = "sha256:6bbff10f5a52662c00a2e3f86a38928c37c48f77b3c511aedcd51de933549324", size = 81480, upload-time = "2025-09-10T13:33:58.022Z" },
]
[[package]] [[package]]
name = "ffmpy" name = "ffmpy"
version = "1.0.0" version = "1.0.0"
@@ -1382,6 +1407,7 @@ name = "paperlib"
version = "0.1.0" version = "0.1.0"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "arxiv" },
{ name = "mineru", extra = ["core"] }, { name = "mineru", extra = ["core"] },
{ name = "rich" }, { name = "rich" },
{ name = "typer" }, { name = "typer" },
@@ -1389,6 +1415,7 @@ dependencies = [
[package.metadata] [package.metadata]
requires-dist = [ requires-dist = [
{ name = "arxiv", specifier = ">=2.0.0" },
{ name = "mineru", extras = ["core"], specifier = ">=3.0.9" }, { name = "mineru", extras = ["core"], specifier = ">=3.0.9" },
{ name = "rich", specifier = ">=15.0.0" }, { name = "rich", specifier = ">=15.0.0" },
{ name = "typer", specifier = ">=0.24.1" }, { name = "typer", specifier = ">=0.24.1" },
@@ -1947,6 +1974,12 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e1/e3/c164c88b2e5ce7b24d667b9bd83589cf4f3520d97cad01534cd3c4f55fdb/setuptools-81.0.0-py3-none-any.whl", hash = "sha256:fdd925d5c5d9f62e4b74b30d6dd7828ce236fd6ed998a08d81de62ce5a6310d6", size = 1062021, upload-time = "2026-02-06T21:10:37.175Z" }, { url = "https://files.pythonhosted.org/packages/e1/e3/c164c88b2e5ce7b24d667b9bd83589cf4f3520d97cad01534cd3c4f55fdb/setuptools-81.0.0-py3-none-any.whl", hash = "sha256:fdd925d5c5d9f62e4b74b30d6dd7828ce236fd6ed998a08d81de62ce5a6310d6", size = 1062021, upload-time = "2026-02-06T21:10:37.175Z" },
] ]
[[package]]
name = "sgmllib3k"
version = "1.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/9e/bd/3704a8c3e0942d711c1299ebf7b9091930adae6675d7c8f476a7ce48653c/sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9", size = 5750, upload-time = "2010-08-24T14:33:52.445Z" }
[[package]] [[package]]
name = "shapely" name = "shapely"
version = "2.1.2" version = "2.1.2"