Spaces:
Building
Building
import re | |
import os | |
import json | |
import requests | |
import google.generativeai as genai | |
from reportlab.lib.pagesizes import letter | |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, ListFlowable, ListItem | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
from fastapi import FastAPI, HTTPException, Query | |
from fastapi.responses import FileResponse | |
# ---------------------------- | |
# Helper functions (provided code) | |
# ---------------------------- | |
def convert_markdown(text): | |
""" | |
Convert simple markdown bold markers **text** to HTML <b> tags. | |
""" | |
return re.sub(r"\*\*(.*?)\*\*", r"<b>\1</b>", text) | |
def parse_report_text(report_text): | |
""" | |
Parse the generated report text and return a list of ReportLab flowables | |
with proper headings, subheadings, and bullet-point formatting. | |
Conventions used: | |
- A line that starts and ends with '**' is treated as a heading. | |
- Lines starting with '*' are treated as bullet items. | |
- All other nonempty lines are treated as normal paragraphs. | |
""" | |
styles = getSampleStyleSheet() | |
# You can tweak these styles if needed. | |
heading_style = styles["Heading1"] | |
subheading_style = styles["Heading2"] | |
normal_style = styles["BodyText"] | |
bullet_style = styles["Bullet"] | |
flowables = [] | |
bullet_items = [] # temporary list for bullet items | |
# Split the report text into lines. | |
lines = report_text.splitlines() | |
for line in lines: | |
line = line.strip() | |
# If the line is empty, flush any bullet list and add a spacer. | |
if not line: | |
if bullet_items: | |
flowables.append(ListFlowable(bullet_items, bulletType='bullet', leftIndent=20)) | |
bullet_items = [] | |
flowables.append(Spacer(1, 12)) | |
continue | |
# Check if line is a bullet point (starts with '*') | |
if line.startswith("*"): | |
bullet_text = line.lstrip("*").strip() | |
bullet_text = convert_markdown(bullet_text) | |
bullet_paragraph = Paragraph(bullet_text, bullet_style) | |
bullet_items.append(bullet_paragraph) | |
# Check if the entire line is a heading (starts and ends with '**') | |
elif line.startswith("**") and line.endswith("**"): | |
if bullet_items: | |
flowables.append(ListFlowable(bullet_items, bulletType='bullet', leftIndent=20)) | |
bullet_items = [] | |
# Remove the asterisks and convert markdown if needed. | |
heading_text = convert_markdown(line.strip("*").strip()) | |
flowables.append(Paragraph(heading_text, heading_style)) | |
else: | |
if bullet_items: | |
flowables.append(ListFlowable(bullet_items, bulletType='bullet', leftIndent=20)) | |
bullet_items = [] | |
# Otherwise, treat as normal paragraph. | |
normal_text = convert_markdown(line) | |
flowables.append(Paragraph(normal_text, normal_style)) | |
# Flush any remaining bullet items. | |
if bullet_items: | |
flowables.append(ListFlowable(bullet_items, bulletType='bullet', leftIndent=20)) | |
return flowables | |
def get_pagespeed_data(target_url, pagespeed_api_key): | |
""" | |
Fetch data from the PageSpeed Insights API for the given URL. | |
""" | |
endpoint = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed" | |
params = { | |
"url": target_url, | |
"key": pagespeed_api_key | |
} | |
response = requests.get(endpoint, params=params) | |
if response.status_code == 200: | |
return response.json() | |
else: | |
raise Exception(f"Error fetching PageSpeed data: {response.status_code} - {response.text}") | |
def generate_report_with_gemini(pagespeed_data, gemini_api_key): | |
""" | |
Uses the Gemini model to generate a detailed report based on the PageSpeed Insights data. | |
""" | |
# Configure the Gemini API with the provided key | |
genai.configure(api_key=gemini_api_key) | |
# Select a Gemini model. For this example, we use 'gemini-2.0-flash'. | |
model = genai.GenerativeModel("gemini-2.0-flash") | |
# Prepare the prompt including the pretty-printed JSON. | |
prompt = ( | |
"Please generate a detailed and well-structured report on the website performance based " | |
"on the following PageSpeed Insights data. Organize the report with clear headings, do not generate tables in the report " | |
"subheadings, and bullet points for key metrics and recommendations:\n\n" + | |
json.dumps(pagespeed_data, indent=2) | |
) | |
# Generate the content using Gemini. | |
response = model.generate_content(prompt) | |
if response and hasattr(response, "text"): | |
return response.text | |
else: | |
return "No report could be generated." | |
def save_report_to_pdf(report_text, pdf_filename="pagespeed_report.pdf"): | |
""" | |
Format the report text into headings, subheadings, and bullet points, then save it as a PDF. | |
""" | |
# Create a SimpleDocTemplate for the PDF | |
doc = SimpleDocTemplate(pdf_filename, pagesize=letter, | |
rightMargin=72, leftMargin=72, | |
topMargin=72, bottomMargin=72) | |
# Parse the report text into platypus flowables | |
flowables = parse_report_text(report_text) | |
# Build the PDF document. | |
doc.build(flowables) | |
print(f"Report saved as {pdf_filename}") | |
# ---------------------------- | |
# FastAPI App and Endpoints | |
# ---------------------------- | |
app = FastAPI() | |
# Load API keys from environment variables. | |
PAGESPEED_API_KEY = os.environ.get("PAGESPEED_API_KEY") | |
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY") | |
PDF_FILENAME = "pagespeed_report.pdf" | |
if not PAGESPEED_API_KEY or not GEMINI_API_KEY: | |
raise Exception("Please set the PAGESPEED_API_KEY and GEMINI_API_KEY environment variables.") | |
def read_root(): | |
""" | |
Root endpoint providing basic information. | |
""" | |
return { | |
"message": "Welcome to the PageSpeed Insights Report Generator API. " | |
"Use /generate_report?url=<target_url> (POST) to generate a report and /download_pdf (GET) to download the PDF." | |
} | |
def generate_report(url: str = Query(..., description="The target URL for which to generate the report")): | |
""" | |
Generates the report by: | |
1. Fetching PageSpeed Insights data for the given URL. | |
2. Generating a report via the Gemini API. | |
3. Saving the report as a PDF. | |
Returns the generated report text. | |
""" | |
try: | |
# Step 1: Fetch PageSpeed data for the provided URL. | |
pagespeed_data = get_pagespeed_data(url, PAGESPEED_API_KEY) | |
# Step 2: Generate report using the Gemini API. | |
report_text = generate_report_with_gemini(pagespeed_data, GEMINI_API_KEY) | |
# Step 3: Save the report as a PDF. | |
save_report_to_pdf(report_text, pdf_filename=PDF_FILENAME) | |
return {"report": report_text} | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) | |
def download_pdf(): | |
""" | |
Returns the generated PDF file for download. | |
Make sure you have already called /generate_report. | |
""" | |
if not os.path.exists(PDF_FILENAME): | |
raise HTTPException(status_code=404, detail="PDF report not found. Please generate the report first.") | |
return FileResponse(PDF_FILENAME, media_type="application/pdf", filename=PDF_FILENAME) | |
# To run the app, use the command: | |
# uvicorn main:app --reload | |