website-audit / main.py
Hammad712's picture
Update main.py
809d658 verified
import re
import os
import json
import requests
import google.generativeai as genai
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, ListFlowable, ListItem
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from fastapi import FastAPI, HTTPException, Query
from fastapi.responses import FileResponse
# ----------------------------
# Helper functions (provided code)
# ----------------------------
def convert_markdown(text):
"""
Convert simple markdown bold markers **text** to HTML <b> tags.
"""
return re.sub(r"\*\*(.*?)\*\*", r"<b>\1</b>", text)
def parse_report_text(report_text):
"""
Parse the generated report text and return a list of ReportLab flowables
with proper headings, subheadings, and bullet-point formatting.
Conventions used:
- A line that starts and ends with '**' is treated as a heading.
- Lines starting with '*' are treated as bullet items.
- All other nonempty lines are treated as normal paragraphs.
"""
styles = getSampleStyleSheet()
# You can tweak these styles if needed.
heading_style = styles["Heading1"]
subheading_style = styles["Heading2"]
normal_style = styles["BodyText"]
bullet_style = styles["Bullet"]
flowables = []
bullet_items = [] # temporary list for bullet items
# Split the report text into lines.
lines = report_text.splitlines()
for line in lines:
line = line.strip()
# If the line is empty, flush any bullet list and add a spacer.
if not line:
if bullet_items:
flowables.append(ListFlowable(bullet_items, bulletType='bullet', leftIndent=20))
bullet_items = []
flowables.append(Spacer(1, 12))
continue
# Check if line is a bullet point (starts with '*')
if line.startswith("*"):
bullet_text = line.lstrip("*").strip()
bullet_text = convert_markdown(bullet_text)
bullet_paragraph = Paragraph(bullet_text, bullet_style)
bullet_items.append(bullet_paragraph)
# Check if the entire line is a heading (starts and ends with '**')
elif line.startswith("**") and line.endswith("**"):
if bullet_items:
flowables.append(ListFlowable(bullet_items, bulletType='bullet', leftIndent=20))
bullet_items = []
# Remove the asterisks and convert markdown if needed.
heading_text = convert_markdown(line.strip("*").strip())
flowables.append(Paragraph(heading_text, heading_style))
else:
if bullet_items:
flowables.append(ListFlowable(bullet_items, bulletType='bullet', leftIndent=20))
bullet_items = []
# Otherwise, treat as normal paragraph.
normal_text = convert_markdown(line)
flowables.append(Paragraph(normal_text, normal_style))
# Flush any remaining bullet items.
if bullet_items:
flowables.append(ListFlowable(bullet_items, bulletType='bullet', leftIndent=20))
return flowables
def get_pagespeed_data(target_url, pagespeed_api_key):
"""
Fetch data from the PageSpeed Insights API for the given URL.
"""
endpoint = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
params = {
"url": target_url,
"key": pagespeed_api_key
}
response = requests.get(endpoint, params=params)
if response.status_code == 200:
return response.json()
else:
raise Exception(f"Error fetching PageSpeed data: {response.status_code} - {response.text}")
def generate_report_with_gemini(pagespeed_data, gemini_api_key):
"""
Uses the Gemini model to generate a detailed report based on the PageSpeed Insights data.
"""
# Configure the Gemini API with the provided key
genai.configure(api_key=gemini_api_key)
# Select a Gemini model. For this example, we use 'gemini-2.0-flash'.
model = genai.GenerativeModel("gemini-2.0-flash")
# Prepare the prompt including the pretty-printed JSON.
prompt = (
"Please generate a detailed and well-structured report on the website performance based "
"on the following PageSpeed Insights data. Organize the report with clear headings, do not generate tables in the report "
"subheadings, and bullet points for key metrics and recommendations:\n\n" +
json.dumps(pagespeed_data, indent=2)
)
# Generate the content using Gemini.
response = model.generate_content(prompt)
if response and hasattr(response, "text"):
return response.text
else:
return "No report could be generated."
def save_report_to_pdf(report_text, pdf_filename="pagespeed_report.pdf"):
"""
Format the report text into headings, subheadings, and bullet points, then save it as a PDF.
"""
# Create a SimpleDocTemplate for the PDF
doc = SimpleDocTemplate(pdf_filename, pagesize=letter,
rightMargin=72, leftMargin=72,
topMargin=72, bottomMargin=72)
# Parse the report text into platypus flowables
flowables = parse_report_text(report_text)
# Build the PDF document.
doc.build(flowables)
print(f"Report saved as {pdf_filename}")
# ----------------------------
# FastAPI App and Endpoints
# ----------------------------
app = FastAPI()
# Load API keys from environment variables.
PAGESPEED_API_KEY = os.environ.get("PAGESPEED_API_KEY")
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
PDF_FILENAME = "pagespeed_report.pdf"
if not PAGESPEED_API_KEY or not GEMINI_API_KEY:
raise Exception("Please set the PAGESPEED_API_KEY and GEMINI_API_KEY environment variables.")
@app.get("/")
def read_root():
"""
Root endpoint providing basic information.
"""
return {
"message": "Welcome to the PageSpeed Insights Report Generator API. "
"Use /generate_report?url=<target_url> (POST) to generate a report and /download_pdf (GET) to download the PDF."
}
@app.post("/generate_report")
def generate_report(url: str = Query(..., description="The target URL for which to generate the report")):
"""
Generates the report by:
1. Fetching PageSpeed Insights data for the given URL.
2. Generating a report via the Gemini API.
3. Saving the report as a PDF.
Returns the generated report text.
"""
try:
# Step 1: Fetch PageSpeed data for the provided URL.
pagespeed_data = get_pagespeed_data(url, PAGESPEED_API_KEY)
# Step 2: Generate report using the Gemini API.
report_text = generate_report_with_gemini(pagespeed_data, GEMINI_API_KEY)
# Step 3: Save the report as a PDF.
save_report_to_pdf(report_text, pdf_filename=PDF_FILENAME)
return {"report": report_text}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/download_pdf")
def download_pdf():
"""
Returns the generated PDF file for download.
Make sure you have already called /generate_report.
"""
if not os.path.exists(PDF_FILENAME):
raise HTTPException(status_code=404, detail="PDF report not found. Please generate the report first.")
return FileResponse(PDF_FILENAME, media_type="application/pdf", filename=PDF_FILENAME)
# To run the app, use the command:
# uvicorn main:app --reload