update: a framework to call main() according to crontab
This commit is contained in:
@@ -0,0 +1,8 @@
|
||||
[general]
|
||||
# crontab
|
||||
crontab = "1,5 * * * * *"
|
||||
|
||||
[arxiv]
|
||||
# The max days should we search arXiv
|
||||
# Note: if we set this to 7, we will search up to 7 days, but no duplicate download
|
||||
fetch_period = 7
|
||||
@@ -1,6 +1,21 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from time import sleep
|
||||
|
||||
from src.utils import load_config, next_cron_match
|
||||
|
||||
|
||||
def main():
|
||||
print("Hello from arxiv-bot!")
|
||||
# a place holder
|
||||
print("Hello from arxiv-bot!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
config = load_config()
|
||||
crontab = config["general"]["crontab"]
|
||||
|
||||
while True:
|
||||
next_run = next_cron_match(crontab, datetime.now())
|
||||
sleep(max((next_run - datetime.now()).total_seconds(), 0.0))
|
||||
main()
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import tomllib
|
||||
|
||||
|
||||
def load_config(config_path: str | Path = "config.toml") -> dict[str, Any]:
|
||||
"""Load a TOML config file and return its contents as a dictionary."""
|
||||
path = Path(config_path)
|
||||
|
||||
if not path.is_absolute():
|
||||
path = Path(__file__).resolve().parent.parent / path
|
||||
|
||||
if not path.is_file():
|
||||
raise FileNotFoundError(f"Config file not found: {path}")
|
||||
|
||||
with path.open("rb") as f:
|
||||
return tomllib.load(f)
|
||||
|
||||
|
||||
def parse_cron_field(field: str, minimum: int, maximum: int) -> set[int]:
|
||||
if field == "*":
|
||||
return set(range(minimum, maximum + 1))
|
||||
|
||||
values: set[int] = set()
|
||||
for part in field.split(","):
|
||||
if "/" in part:
|
||||
base, step_text = part.split("/", 1)
|
||||
step = int(step_text)
|
||||
if base == "*":
|
||||
start, end = minimum, maximum
|
||||
elif "-" in base:
|
||||
start_text, end_text = base.split("-", 1)
|
||||
start, end = int(start_text), int(end_text)
|
||||
else:
|
||||
start = end = int(base)
|
||||
values.update(range(start, end + 1, step))
|
||||
continue
|
||||
|
||||
if "-" in part:
|
||||
start_text, end_text = part.split("-", 1)
|
||||
values.update(range(int(start_text), int(end_text) + 1))
|
||||
continue
|
||||
|
||||
values.add(int(part))
|
||||
|
||||
return {value for value in values if minimum <= value <= maximum}
|
||||
|
||||
|
||||
def cron_matches(expr: str, dt: datetime) -> bool:
|
||||
fields = expr.split()
|
||||
if len(fields) == 5:
|
||||
second = "0"
|
||||
minute, hour, day, month, weekday = fields
|
||||
elif len(fields) == 6:
|
||||
second, minute, hour, day, month, weekday = fields
|
||||
else:
|
||||
raise ValueError("crontab must contain 5 or 6 fields")
|
||||
|
||||
cron_weekday = (dt.weekday() + 1) % 7
|
||||
return (
|
||||
dt.second in parse_cron_field(second, 0, 59)
|
||||
and dt.minute in parse_cron_field(minute, 0, 59)
|
||||
and dt.hour in parse_cron_field(hour, 0, 23)
|
||||
and dt.day in parse_cron_field(day, 1, 31)
|
||||
and dt.month in parse_cron_field(month, 1, 12)
|
||||
and cron_weekday in parse_cron_field(weekday, 0, 6)
|
||||
)
|
||||
|
||||
|
||||
def next_cron_match(expr: str, after: datetime) -> datetime:
|
||||
fields = expr.split()
|
||||
if len(fields) == 5:
|
||||
step = timedelta(minutes=1)
|
||||
candidate = after.replace(second=0, microsecond=0) + step
|
||||
elif len(fields) == 6:
|
||||
step = timedelta(seconds=1)
|
||||
candidate = after.replace(microsecond=0) + step
|
||||
else:
|
||||
raise ValueError("crontab must contain 5 or 6 fields")
|
||||
|
||||
deadline = candidate + timedelta(days=366)
|
||||
while candidate <= deadline:
|
||||
if cron_matches(expr, candidate):
|
||||
return candidate
|
||||
candidate += step
|
||||
|
||||
raise ValueError(f"Unable to find next run time for crontab: {expr}")
|
||||
Reference in New Issue
Block a user