cmcmaster's picture
Update scheduler
908d5c9 verified
import json
import os
from datetime import datetime, timezone
from fasthtml.common import *
from huggingface_hub import HfApi, hf_hub_download
from starlette.responses import FileResponse
from generate_newsletter import process_new_papers
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.cron import CronTrigger
from fasthtml_hf import setup_hf_backup
# Initialize Hugging Face API
HF_TOKEN = os.environ.get("HF_TOKEN")
DATASET_NAME = "cmcmaster/this_week_in_rheumatology"
api = HfApi(token=HF_TOKEN)
# Initialize scheduler
scheduler = BackgroundScheduler()
# Schedule newsletter generation to run every Monday at 1 AM UTC
scheduler.add_job(process_new_papers,
CronTrigger(day_of_week='mon', hour=1),
kwargs={
'end_date': '{{ (execution_date - timedelta(days=1)).strftime("%Y-%m-%d") }}',
'test': False
},
id='generate_newsletter',
name='Weekly newsletter generation',
replace_existing=True)
css = Style("""
body {
font-family: Georgia, Times, serif;
line-height: 1.6;
color: #333;
max-width: 800px;
margin: 0 auto;
padding: 20px;
background: #fff;
}
h1, h2 {
color: #2c3e50;
font-family: Georgia, Times, serif;
}
a {
color: #2c3e50;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
ul {
list-style-type: none;
padding: 0;
}
li {
margin-bottom: 10px;
}
.newsletter-content {
margin-top: 20px;
}
.download-link {
display: inline-block;
padding: 10px 20px;
background-color: #2c3e50;
color: white;
border-radius: 3px;
margin: 10px 0;
font-family: Georgia, Times, serif;
}
.download-link:hover {
background-color: #34495e;
text-decoration: none;
}
""")
app = FastHTML(hdrs=(css, MarkdownJS(),
HighlightJS(
langs=['python', 'javascript', 'html', 'css'])))
# Start the scheduler when the app starts
@app.on_event("startup")
async def start_scheduler():
scheduler.start()
# Shut down the scheduler when the app stops
@app.on_event("shutdown")
async def shutdown_scheduler():
scheduler.shutdown()
def get_newsletter_list():
# Fetch the list of newsletters from the Hugging Face repository
files = api.list_repo_files(repo_id=DATASET_NAME, repo_type="dataset")
newsletters = [f for f in files if f.endswith('newsletter.json')]
return sorted(newsletters, reverse=True)
def get_newsletter_content(path):
# Download and parse the newsletter content
content = api.hf_hub_download(repo_id=DATASET_NAME,
filename=path,
repo_type="dataset")
with open(content, 'r') as f:
return json.load(f)
@app.get("/")
def index():
newsletters = get_newsletter_list()
links = [
Li(
A(datetime.strptime(n.split('/')[0], '%Y%m%d').strftime('%B %d, %Y'),
href=f"/newsletter/{n.split('/')[0]}")) for n in newsletters
]
return Titled("This Week in Rheumatology", H2("Available Newsletters"),
Ul(*links))
@app.get("/newsletter/{date}")
def newsletter(date: str):
path = f"{date}/newsletter.json"
pdf_path = f"{date}/newsletter.pdf"
try:
content = get_newsletter_content(path)
return Titled(
f"This Week in Rheumatology - {content['date']}",
A("Back to Index", href="/"),
Div(
A("Download PDF", href=f"/download/{date}", cls="download-link")
),
Div(content['content'], cls="marked"))
except Exception as e:
return Titled("Error", H2("Newsletter not found"),
P(f"Unable to load newsletter for date: {date}"),
A("Back to Index", href="/"))
@app.get("/download/{date}")
def download_pdf(date: str):
try:
pdf_path = f"{date}/newsletter.pdf"
content = api.hf_hub_download(repo_id=DATASET_NAME,
filename=pdf_path,
repo_type="dataset")
return FileResponse(content,
media_type="application/pdf",
filename=f"newsletter_{date}.pdf")
except Exception as e:
return Titled("Error", H2("PDF not found"),
P(f"Unable to load PDF for date: {date}"),
A("Back to Index", href="/"))
setup_hf_backup(app)
serve()