|
import streamlit as st |
|
from swarm import Swarm, Agent |
|
from bs4 import BeautifulSoup |
|
import requests |
|
import os |
|
from io import BytesIO |
|
from reportlab.lib.pagesizes import letter |
|
from reportlab.pdfgen import canvas |
|
import json |
|
|
|
|
|
def fetch_openai_api_key(): |
|
"""Fetch the OpenAI API key from Hugging Face secrets.""" |
|
try: |
|
secret_key = st.secrets.get("OPENAI_API_KEY", "") |
|
if secret_key: |
|
os.environ['OPENAI_API_KEY'] = secret_key |
|
else: |
|
st.warning("β οΈ OpenAI API Key is missing! Please check your Hugging Face secrets configuration.") |
|
except Exception as e: |
|
st.error(f"Error retrieving OpenAI API Key: {str(e)}") |
|
|
|
|
|
def initialize_swarm_client(): |
|
return Swarm() |
|
|
|
|
|
def scrape_website(url): |
|
"""Scrapes the content of the website.""" |
|
try: |
|
response = requests.get(url) |
|
response.raise_for_status() |
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
return soup.get_text() |
|
except requests.exceptions.RequestException as e: |
|
return f"Error during scraping: {str(e)}" |
|
|
|
|
|
def generate_pdf(report): |
|
"""Generate a PDF file from the report.""" |
|
buffer = BytesIO() |
|
pdf_canvas = canvas.Canvas(buffer, pagesize=letter) |
|
pdf_canvas.drawString(100, 750, "Generated Summary Report") |
|
pdf_canvas.drawString(100, 735, "-" * 50) |
|
|
|
lines = report.split('\n') |
|
y = 700 |
|
for line in lines: |
|
if y < 50: |
|
pdf_canvas.showPage() |
|
y = 750 |
|
pdf_canvas.drawString(100, y, line) |
|
y -= 15 |
|
|
|
pdf_canvas.save() |
|
buffer.seek(0) |
|
return buffer |
|
|
|
|
|
def orchestrate_workflow(client, url): |
|
|
|
scrape_result = client.run( |
|
agent=Agent( |
|
name="Scraper Agent", |
|
instructions="Scrape content from websites.", |
|
functions=[scrape_website] |
|
), |
|
messages=[{"role": "user", "content": f"Scrape the following website: {url}"}] |
|
) |
|
scraped_content = scrape_result.messages[-1]["content"] |
|
|
|
|
|
if "Error during scraping" in scraped_content: |
|
return scraped_content |
|
|
|
|
|
analyze_result = client.run( |
|
agent=Agent( |
|
name="Research Agent", |
|
instructions="Analyze content and extract insights.", |
|
functions=[lambda content: f"Summary: {content[:700]}..."] |
|
), |
|
messages=[{"role": "user", "content": f"Analyze the following content: {scraped_content}"}] |
|
) |
|
analysis_summary = analyze_result.messages[-1]["content"] |
|
|
|
return analysis_summary |
|
|
|
|
|
st.markdown( |
|
""" |
|
<style> |
|
.title { text-align: center; font-size: 2.5rem; font-weight: bold; } |
|
.description { text-align: center; font-size: 1.1rem; color: #555; } |
|
.ack { font-size: 0.95rem; color: #888; text-align: center; } |
|
</style> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
st.markdown('<div class="title">π Swarm-based Web Content Analyzer</div>', unsafe_allow_html=True) |
|
st.markdown('<div class="description">Effortlessly extract, analyze, and summarize web content.</div>', unsafe_allow_html=True) |
|
|
|
fetch_openai_api_key() |
|
|
|
|
|
if 'OPENAI_API_KEY' in os.environ and os.environ['OPENAI_API_KEY']: |
|
client = initialize_swarm_client() |
|
|
|
|
|
st.subheader("π Enter the Website URL") |
|
url = st.text_input("Enter the URL of the website you want to scrape", placeholder="https://example.com") |
|
|
|
if st.button("Run Workflow"): |
|
if url: |
|
with st.spinner("Running the multi-agent workflow... This may take a moment."): |
|
final_summary = orchestrate_workflow(client, url) |
|
|
|
st.success("β
Workflow complete!") |
|
st.write("### π Final Report:") |
|
st.write(final_summary) |
|
|
|
|
|
json_data = json.dumps({"summary": final_summary}, indent=4) |
|
txt_data = final_summary |
|
pdf_data = generate_pdf(final_summary) |
|
|
|
|
|
st.download_button( |
|
label="Download Report as TXT", |
|
data=txt_data, |
|
file_name="report.txt", |
|
mime="text/plain" |
|
) |
|
st.download_button( |
|
label="Download Report as PDF", |
|
data=pdf_data, |
|
file_name="report.pdf", |
|
mime="application/pdf" |
|
) |
|
st.download_button( |
|
label="Download Report as JSON", |
|
data=json_data, |
|
file_name="report.json", |
|
mime="application/json" |
|
) |
|
else: |
|
st.error("β Please enter a valid URL.") |
|
else: |
|
st.sidebar.warning("β οΈ OpenAI API Key not set. Please check your Hugging Face secrets configuration.") |
|
|
|
|
|
st.divider() |
|
st.markdown( |
|
""" |
|
<div class="ack"> |
|
Acknowledgment: This work is based on <a href="https://github.com/jadouse5/openai-swarm-webscraper" target="_blank">Jad Tounsi El Azzoiani's work</a>. |
|
</div> |
|
""", |
|
unsafe_allow_html=True |
|
) |
|
|