Spaces:

cmcmaster
/

this_week_in_rheumatology

Sleeping

File size: 4,636 Bytes

import json
import os
from datetime import datetime, timezone

from fasthtml.common import *
from huggingface_hub import HfApi, hf_hub_download
from starlette.responses import FileResponse
from generate_newsletter import process_new_papers
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.cron import CronTrigger

from fasthtml_hf import setup_hf_backup

# Initialize Hugging Face API
HF_TOKEN = os.environ.get("HF_TOKEN")
DATASET_NAME = "cmcmaster/this_week_in_rheumatology"
api = HfApi(token=HF_TOKEN)

# Initialize scheduler
scheduler = BackgroundScheduler()

# Schedule newsletter generation to run every Monday at 1 AM UTC
scheduler.add_job(process_new_papers,
                  CronTrigger(day_of_week='mon', hour=1),
                  kwargs={
                      'end_date': '{{ (execution_date - timedelta(days=1)).strftime("%Y-%m-%d") }}',
                      'test': False
                  },
                  id='generate_newsletter',
                  name='Weekly newsletter generation',
                  replace_existing=True)

css = Style("""
    body { 
        font-family: Georgia, Times, serif;
        line-height: 1.6;
        color: #333;
        max-width: 800px;
        margin: 0 auto;
        padding: 20px;
        background: #fff;
    }

    h1, h2 { 
        color: #2c3e50;
        font-family: Georgia, Times, serif;
    }

    a { 
        color: #2c3e50;
        text-decoration: none;
    }

    a:hover { 
        text-decoration: underline; 
    }

    ul { 
        list-style-type: none;
        padding: 0;
    }

    li { 
        margin-bottom: 10px;
    }

    .newsletter-content {
        margin-top: 20px;
    }

    .download-link {
        display: inline-block;
        padding: 10px 20px;
        background-color: #2c3e50;
        color: white;
        border-radius: 3px;
        margin: 10px 0;
        font-family: Georgia, Times, serif;
    }

    .download-link:hover {
        background-color: #34495e;
        text-decoration: none;
    }
""")

app = FastHTML(hdrs=(css, MarkdownJS(),
                     HighlightJS(
                         langs=['python', 'javascript', 'html', 'css'])))


# Start the scheduler when the app starts
@app.on_event("startup")
async def start_scheduler():
  scheduler.start()


# Shut down the scheduler when the app stops
@app.on_event("shutdown")
async def shutdown_scheduler():
  scheduler.shutdown()


def get_newsletter_list():
  # Fetch the list of newsletters from the Hugging Face repository
  files = api.list_repo_files(repo_id=DATASET_NAME, repo_type="dataset")
  newsletters = [f for f in files if f.endswith('newsletter.json')]
  return sorted(newsletters, reverse=True)


def get_newsletter_content(path):
  # Download and parse the newsletter content
  content = api.hf_hub_download(repo_id=DATASET_NAME,
                                filename=path,
                                repo_type="dataset")
  with open(content, 'r') as f:
    return json.load(f)


@app.get("/")
def index():
  newsletters = get_newsletter_list()
  links = [
      Li(
          A(datetime.strptime(n.split('/')[0], '%Y%m%d').strftime('%B %d, %Y'),
            href=f"/newsletter/{n.split('/')[0]}")) for n in newsletters
  ]
  return Titled("This Week in Rheumatology", H2("Available Newsletters"),
                Ul(*links))


@app.get("/newsletter/{date}")
def newsletter(date: str):
  path = f"{date}/newsletter.json"
  pdf_path = f"{date}/newsletter.pdf"
  try:
    content = get_newsletter_content(path)
    return Titled(
        f"This Week in Rheumatology - {content['date']}",
        A("Back to Index", href="/"),
        Div(
        A("Download PDF", href=f"/download/{date}", cls="download-link")
        ),
        Div(content['content'], cls="marked"))
  except Exception as e:
    return Titled("Error", H2("Newsletter not found"),
                  P(f"Unable to load newsletter for date: {date}"),
                  A("Back to Index", href="/"))


@app.get("/download/{date}")
def download_pdf(date: str):
  try:
    pdf_path = f"{date}/newsletter.pdf"
    content = api.hf_hub_download(repo_id=DATASET_NAME,
                                  filename=pdf_path,
                                  repo_type="dataset")
    return FileResponse(content,
                        media_type="application/pdf",
                        filename=f"newsletter_{date}.pdf")
  except Exception as e:
    return Titled("Error", H2("PDF not found"),
                  P(f"Unable to load PDF for date: {date}"),
                  A("Back to Index", href="/"))

setup_hf_backup(app)
serve()