Spaces:
Starting
Starting
File size: 7,521 Bytes
2c69db6 809d658 2c69db6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
import re
import os
import json
import requests
import google.generativeai as genai
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, ListFlowable, ListItem
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from fastapi import FastAPI, HTTPException, Query
from fastapi.responses import FileResponse
# ----------------------------
# Helper functions (provided code)
# ----------------------------
def convert_markdown(text):
"""
Convert simple markdown bold markers **text** to HTML <b> tags.
"""
return re.sub(r"\*\*(.*?)\*\*", r"<b>\1</b>", text)
def parse_report_text(report_text):
"""
Parse the generated report text and return a list of ReportLab flowables
with proper headings, subheadings, and bullet-point formatting.
Conventions used:
- A line that starts and ends with '**' is treated as a heading.
- Lines starting with '*' are treated as bullet items.
- All other nonempty lines are treated as normal paragraphs.
"""
styles = getSampleStyleSheet()
# You can tweak these styles if needed.
heading_style = styles["Heading1"]
subheading_style = styles["Heading2"]
normal_style = styles["BodyText"]
bullet_style = styles["Bullet"]
flowables = []
bullet_items = [] # temporary list for bullet items
# Split the report text into lines.
lines = report_text.splitlines()
for line in lines:
line = line.strip()
# If the line is empty, flush any bullet list and add a spacer.
if not line:
if bullet_items:
flowables.append(ListFlowable(bullet_items, bulletType='bullet', leftIndent=20))
bullet_items = []
flowables.append(Spacer(1, 12))
continue
# Check if line is a bullet point (starts with '*')
if line.startswith("*"):
bullet_text = line.lstrip("*").strip()
bullet_text = convert_markdown(bullet_text)
bullet_paragraph = Paragraph(bullet_text, bullet_style)
bullet_items.append(bullet_paragraph)
# Check if the entire line is a heading (starts and ends with '**')
elif line.startswith("**") and line.endswith("**"):
if bullet_items:
flowables.append(ListFlowable(bullet_items, bulletType='bullet', leftIndent=20))
bullet_items = []
# Remove the asterisks and convert markdown if needed.
heading_text = convert_markdown(line.strip("*").strip())
flowables.append(Paragraph(heading_text, heading_style))
else:
if bullet_items:
flowables.append(ListFlowable(bullet_items, bulletType='bullet', leftIndent=20))
bullet_items = []
# Otherwise, treat as normal paragraph.
normal_text = convert_markdown(line)
flowables.append(Paragraph(normal_text, normal_style))
# Flush any remaining bullet items.
if bullet_items:
flowables.append(ListFlowable(bullet_items, bulletType='bullet', leftIndent=20))
return flowables
def get_pagespeed_data(target_url, pagespeed_api_key):
"""
Fetch data from the PageSpeed Insights API for the given URL.
"""
endpoint = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
params = {
"url": target_url,
"key": pagespeed_api_key
}
response = requests.get(endpoint, params=params)
if response.status_code == 200:
return response.json()
else:
raise Exception(f"Error fetching PageSpeed data: {response.status_code} - {response.text}")
def generate_report_with_gemini(pagespeed_data, gemini_api_key):
"""
Uses the Gemini model to generate a detailed report based on the PageSpeed Insights data.
"""
# Configure the Gemini API with the provided key
genai.configure(api_key=gemini_api_key)
# Select a Gemini model. For this example, we use 'gemini-2.0-flash'.
model = genai.GenerativeModel("gemini-2.0-flash")
# Prepare the prompt including the pretty-printed JSON.
prompt = (
"Please generate a detailed and well-structured report on the website performance based "
"on the following PageSpeed Insights data. Organize the report with clear headings, do not generate tables in the report "
"subheadings, and bullet points for key metrics and recommendations:\n\n" +
json.dumps(pagespeed_data, indent=2)
)
# Generate the content using Gemini.
response = model.generate_content(prompt)
if response and hasattr(response, "text"):
return response.text
else:
return "No report could be generated."
def save_report_to_pdf(report_text, pdf_filename="pagespeed_report.pdf"):
"""
Format the report text into headings, subheadings, and bullet points, then save it as a PDF.
"""
# Create a SimpleDocTemplate for the PDF
doc = SimpleDocTemplate(pdf_filename, pagesize=letter,
rightMargin=72, leftMargin=72,
topMargin=72, bottomMargin=72)
# Parse the report text into platypus flowables
flowables = parse_report_text(report_text)
# Build the PDF document.
doc.build(flowables)
print(f"Report saved as {pdf_filename}")
# ----------------------------
# FastAPI App and Endpoints
# ----------------------------
app = FastAPI()
# Load API keys from environment variables.
PAGESPEED_API_KEY = os.environ.get("PAGESPEED_API_KEY")
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
PDF_FILENAME = "pagespeed_report.pdf"
if not PAGESPEED_API_KEY or not GEMINI_API_KEY:
raise Exception("Please set the PAGESPEED_API_KEY and GEMINI_API_KEY environment variables.")
@app.get("/")
def read_root():
"""
Root endpoint providing basic information.
"""
return {
"message": "Welcome to the PageSpeed Insights Report Generator API. "
"Use /generate_report?url=<target_url> (POST) to generate a report and /download_pdf (GET) to download the PDF."
}
@app.post("/generate_report")
def generate_report(url: str = Query(..., description="The target URL for which to generate the report")):
"""
Generates the report by:
1. Fetching PageSpeed Insights data for the given URL.
2. Generating a report via the Gemini API.
3. Saving the report as a PDF.
Returns the generated report text.
"""
try:
# Step 1: Fetch PageSpeed data for the provided URL.
pagespeed_data = get_pagespeed_data(url, PAGESPEED_API_KEY)
# Step 2: Generate report using the Gemini API.
report_text = generate_report_with_gemini(pagespeed_data, GEMINI_API_KEY)
# Step 3: Save the report as a PDF.
save_report_to_pdf(report_text, pdf_filename=PDF_FILENAME)
return {"report": report_text}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/download_pdf")
def download_pdf():
"""
Returns the generated PDF file for download.
Make sure you have already called /generate_report.
"""
if not os.path.exists(PDF_FILENAME):
raise HTTPException(status_code=404, detail="PDF report not found. Please generate the report first.")
return FileResponse(PDF_FILENAME, media_type="application/pdf", filename=PDF_FILENAME)
# To run the app, use the command:
# uvicorn main:app --reload
|