Spaces:

cmcmaster
/

this_week_in_rheumatology

Sleeping

File size: 5,905 Bytes

import json
import os
from datetime import datetime, timezone

from fasthtml.common import *
from huggingface_hub import HfApi, hf_hub_download
from starlette.responses import FileResponse
from generate_newsletter import process_new_papers
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.cron import CronTrigger

# Initialize Hugging Face API
HF_TOKEN = os.environ.get("HF_TOKEN")
DATASET_NAME = "cmcmaster/this_week_in_rheumatology"
api = HfApi(token=HF_TOKEN)

# Initialize scheduler
scheduler = BackgroundScheduler()
# Schedule newsletter generation to run every Monday at 1 AM UTC
scheduler.add_job(process_new_papers,
                  trigger="interval", 
                  hours=6,
                  kwargs={
                      'end_date': '{{ (execution_date - timedelta(days=execution_date.weekday() + 1)).strftime("%Y-%m-%d") }}',
                      'test': False
                  },
                  id='generate_newsletter',
                  name='Weekly newsletter generation',
                  replace_existing=True)

css = Style("""
    body { 
        font-family: Georgia, Times, serif;
        line-height: 1.6;
        color: #333;
        max-width: 800px;
        margin: 0 auto;
        padding: 20px;
        background: #fff;
    }

    h1, h2 { 
        color: #2c3e50;
        font-family: Georgia, Times, serif;
    }

    a { 
        color: #2c3e50;
        text-decoration: none;
    }

    a:hover { 
        text-decoration: underline; 
    }

    ul { 
        list-style-type: none;
        padding: 0;
    }

    li { 
        margin-bottom: 10px;
    }

    .newsletter-content {
        margin-top: 20px;
    }

    .download-links {
        margin: 20px 0;
    }

    .download-link {
        display: inline-block;
        padding: 10px 20px;
        background-color: #2c3e50;
        color: white;
        border-radius: 3px;
        margin: 0 10px 10px 0;
        font-family: Georgia, Times, serif;
    }

    .download-link:hover {
        background-color: #34495e;
        text-decoration: none;
    }
""")

app = FastHTML(hdrs=(css, MarkdownJS(),
                     HighlightJS(
                         langs=['python', 'javascript', 'html', 'css'])))


# Start the scheduler when the app starts
@app.on_event("startup")
async def start_scheduler():
    scheduler.start()


# Shut down the scheduler when the app stops
@app.on_event("shutdown")
async def shutdown_scheduler():
    scheduler.shutdown()


def get_newsletter_list():
    # Fetch the list of newsletters from the Hugging Face repository
    files = api.list_repo_files(repo_id=DATASET_NAME, repo_type="dataset")
    newsletters = [f for f in files if f.endswith('newsletter.json')]
    return sorted(newsletters, reverse=True)


def get_newsletter_content(path):
    # Download and parse the newsletter content
    content = api.hf_hub_download(repo_id=DATASET_NAME,
                                filename=path,
                                repo_type="dataset")
    with open(content, 'r') as f:
        return json.load(f)


def check_format_exists(date: str, format: str) -> bool:
    """Check if a specific format exists for a given date"""
    try:
        api.hf_hub_download(
            repo_id=DATASET_NAME,
            filename=f"{date}/newsletter.{format}",
            repo_type="dataset"
        )
        return True
    except Exception:
        return False


@app.get("/")
def index():
    newsletters = get_newsletter_list()
    links = [
        Li(
            A(datetime.strptime(n.split('/')[0], '%Y%m%d').strftime('%B %d, %Y'),
              href=f"/newsletter/{n.split('/')[0]}")) for n in newsletters
    ]
    return Titled("This Week in Rheumatology", H2("Available Newsletters"),
                  Ul(*links))


@app.get("/newsletter/{date}")
def newsletter(date: str):
    path = f"{date}/newsletter.json"
    try:
        content = get_newsletter_content(path)
        
        # Create download links div
        download_links = []
        
        # Check for PDF
        if check_format_exists(date, "pdf"):
            download_links.append(
                A("Download PDF", href=f"/download/{date}/pdf", cls="download-link")
            )
        
        # Check for EPUB
        if check_format_exists(date, "epub"):
            download_links.append(
                A("Download EPUB", href=f"/download/{date}/epub", cls="download-link")
            )
        
        return Titled(
            f"This Week in Rheumatology - {content['date']}",
            A("Back to Index", href="/"),
            Div(*download_links, cls="download-links"),
            Div(content['content'], cls="marked"))
    except Exception as e:
        return Titled("Error", H2("Newsletter not found"),
                    P(f"Unable to load newsletter for date: {date}"),
                    A("Back to Index", href="/"))


@app.get("/download/{date}/{format}")
def download_file(date: str, format: str):
    try:
        file_path = f"{date}/newsletter.{format}"
        content = api.hf_hub_download(repo_id=DATASET_NAME,
                                    filename=file_path,
                                    repo_type="dataset")
        
        # Set appropriate media type and filename
        if format == "pdf":
            media_type = "application/pdf"
        elif format == "epub":
            media_type = "application/epub+zip"
        else:
            raise ValueError(f"Unsupported format: {format}")
            
        return FileResponse(content,
                          media_type=media_type,
                          filename=f"newsletter_{date}.{format}")
    except Exception as e:
        return Titled("Error", H2(f"{format.upper()} not found"),
                    P(f"Unable to load {format.upper()} for date: {date}"),
                    A("Back to Index", href="/"))

serve()