import json import os from datetime import datetime, timezone from fasthtml.common import * from huggingface_hub import HfApi, hf_hub_download from starlette.responses import FileResponse from generate_newsletter import process_new_papers from apscheduler.schedulers.background import BackgroundScheduler from apscheduler.triggers.cron import CronTrigger from fasthtml_hf import setup_hf_backup # Initialize Hugging Face API HF_TOKEN = os.environ.get("HF_TOKEN") DATASET_NAME = "cmcmaster/this_week_in_rheumatology" api = HfApi(token=HF_TOKEN) # Initialize scheduler scheduler = BackgroundScheduler() # Schedule newsletter generation to run every Monday at 1 AM UTC scheduler.add_job(process_new_papers, CronTrigger(day_of_week='mon', hour=1), kwargs={ 'end_date': '{{ (execution_date - timedelta(days=1)).strftime("%Y-%m-%d") }}', 'test': False }, id='generate_newsletter', name='Weekly newsletter generation', replace_existing=True) css = Style(""" body { font-family: Georgia, Times, serif; line-height: 1.6; color: #333; max-width: 800px; margin: 0 auto; padding: 20px; background: #fff; } h1, h2 { color: #2c3e50; font-family: Georgia, Times, serif; } a { color: #2c3e50; text-decoration: none; } a:hover { text-decoration: underline; } ul { list-style-type: none; padding: 0; } li { margin-bottom: 10px; } .newsletter-content { margin-top: 20px; } .download-link { display: inline-block; padding: 10px 20px; background-color: #2c3e50; color: white; border-radius: 3px; margin: 10px 0; font-family: Georgia, Times, serif; } .download-link:hover { background-color: #34495e; text-decoration: none; } """) app = FastHTML(hdrs=(css, MarkdownJS(), HighlightJS( langs=['python', 'javascript', 'html', 'css']))) # Start the scheduler when the app starts @app.on_event("startup") async def start_scheduler(): scheduler.start() # Shut down the scheduler when the app stops @app.on_event("shutdown") async def shutdown_scheduler(): scheduler.shutdown() def get_newsletter_list(): # Fetch the list of newsletters from the Hugging Face repository files = api.list_repo_files(repo_id=DATASET_NAME, repo_type="dataset") newsletters = [f for f in files if f.endswith('newsletter.json')] return sorted(newsletters, reverse=True) def get_newsletter_content(path): # Download and parse the newsletter content content = api.hf_hub_download(repo_id=DATASET_NAME, filename=path, repo_type="dataset") with open(content, 'r') as f: return json.load(f) @app.get("/") def index(): newsletters = get_newsletter_list() links = [ Li( A(datetime.strptime(n.split('/')[0], '%Y%m%d').strftime('%B %d, %Y'), href=f"/newsletter/{n.split('/')[0]}")) for n in newsletters ] return Titled("This Week in Rheumatology", H2("Available Newsletters"), Ul(*links)) @app.get("/newsletter/{date}") def newsletter(date: str): path = f"{date}/newsletter.json" pdf_path = f"{date}/newsletter.pdf" try: content = get_newsletter_content(path) return Titled( f"This Week in Rheumatology - {content['date']}", A("Back to Index", href="/"), Div( A("Download PDF", href=f"/download/{date}", cls="download-link") ), Div(content['content'], cls="marked")) except Exception as e: return Titled("Error", H2("Newsletter not found"), P(f"Unable to load newsletter for date: {date}"), A("Back to Index", href="/")) @app.get("/download/{date}") def download_pdf(date: str): try: pdf_path = f"{date}/newsletter.pdf" content = api.hf_hub_download(repo_id=DATASET_NAME, filename=pdf_path, repo_type="dataset") return FileResponse(content, media_type="application/pdf", filename=f"newsletter_{date}.pdf") except Exception as e: return Titled("Error", H2("PDF not found"), P(f"Unable to load PDF for date: {date}"), A("Back to Index", href="/")) setup_hf_backup(app) serve()