File size: 5,448 Bytes
5f94e5a 61a1efb 5f94e5a d2c82ee d268952 8f212a3 d268952 c64ddc6 d268952 8f212a3 d268952 5f94e5a 8f212a3 5f94e5a 61a1efb 8f212a3 61a1efb 8f212a3 61a1efb 8f212a3 5f94e5a 8f212a3 61a1efb 8f212a3 61a1efb 8f212a3 61a1efb 8f212a3 61a1efb 5f94e5a 4f6325e a301bed eb455f0 b21e86f 4f6325e 8f212a3 d268952 5f94e5a 8f212a3 5f94e5a d268952 5f94e5a 4f6325e 5f94e5a d268952 61a1efb 8f212a3 61a1efb 5f94e5a d268952 5f94e5a 8f212a3 61a1efb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
import streamlit as st
from swarm import Swarm, Agent
from bs4 import BeautifulSoup
import requests
import os
from io import BytesIO
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
import json
# Function to fetch OpenAI API key
def fetch_openai_api_key():
"""Fetch the OpenAI API key from Hugging Face secrets."""
try:
secret_key = st.secrets.get("OPENAI_API_KEY", "")
if secret_key:
os.environ['OPENAI_API_KEY'] = secret_key
else:
st.warning("β οΈ OpenAI API Key is missing! Please check your Hugging Face secrets configuration.")
except Exception as e:
st.error(f"Error retrieving OpenAI API Key: {str(e)}")
# Initialize the Swarm client
def initialize_swarm_client():
return Swarm()
# Define the scraping function
def scrape_website(url):
"""Scrapes the content of the website."""
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
return soup.get_text() # Return the text content from the HTML
except requests.exceptions.RequestException as e:
return f"Error during scraping: {str(e)}"
# Generate PDF report
def generate_pdf(report):
"""Generate a PDF file from the report."""
buffer = BytesIO()
pdf_canvas = canvas.Canvas(buffer, pagesize=letter)
pdf_canvas.drawString(100, 750, "Generated Summary Report")
pdf_canvas.drawString(100, 735, "-" * 50)
lines = report.split('\n')
y = 700 # Start position for the text
for line in lines:
if y < 50: # Create a new page if content exceeds one page
pdf_canvas.showPage()
y = 750
pdf_canvas.drawString(100, y, line)
y -= 15
pdf_canvas.save()
buffer.seek(0)
return buffer
# Orchestrate the workflow
def orchestrate_workflow(client, url):
# Scrape, analyze, and summarize content
scrape_result = client.run(
agent=Agent(
name="Scraper Agent",
instructions="Scrape content from websites.",
functions=[scrape_website]
),
messages=[{"role": "user", "content": f"Scrape the following website: {url}"}]
)
scraped_content = scrape_result.messages[-1]["content"]
# Handle errors during scraping
if "Error during scraping" in scraped_content:
return scraped_content
# Analyze and summarize the content
analyze_result = client.run(
agent=Agent(
name="Research Agent",
instructions="Analyze content and extract insights.",
functions=[lambda content: f"Summary: {content[:700]}..."]
),
messages=[{"role": "user", "content": f"Analyze the following content: {scraped_content}"}]
)
analysis_summary = analyze_result.messages[-1]["content"]
return analysis_summary
# Streamlit App UI
st.markdown(
"""
<style>
.title { text-align: center; font-size: 2.5rem; font-weight: bold; }
.description { text-align: center; font-size: 1.1rem; color: #555; }
.ack { font-size: 0.95rem; color: #888; text-align: center; }
</style>
""",
unsafe_allow_html=True,
)
st.markdown('<div class="title">π Swarm-based Web Content Analyzer</div>', unsafe_allow_html=True)
st.markdown('<div class="description">Effortlessly extract, analyze, and summarize web content.</div>', unsafe_allow_html=True)
fetch_openai_api_key()
# Initialize Swarm client only after API key is set
if 'OPENAI_API_KEY' in os.environ and os.environ['OPENAI_API_KEY']:
client = initialize_swarm_client()
# Input field for the website URL
st.subheader("π Enter the Website URL")
url = st.text_input("Enter the URL of the website you want to scrape", placeholder="https://example.com")
if st.button("Run Workflow"):
if url:
with st.spinner("Running the multi-agent workflow... This may take a moment."):
final_summary = orchestrate_workflow(client, url)
st.success("β
Workflow complete!")
st.write("### π Final Report:")
st.write(final_summary)
# Download options
json_data = json.dumps({"summary": final_summary}, indent=4)
txt_data = final_summary
pdf_data = generate_pdf(final_summary)
st.download_button(
label="Download Report as TXT",
data=txt_data,
file_name="report.txt",
mime="text/plain"
)
st.download_button(
label="Download Report as PDF",
data=pdf_data,
file_name="report.pdf",
mime="application/pdf"
)
st.download_button(
label="Download Report as JSON",
data=json_data,
file_name="report.json",
mime="application/json"
)
else:
st.error("β Please enter a valid URL.")
else:
st.sidebar.warning("β οΈ OpenAI API Key not set. Please check your Hugging Face secrets configuration.")
# Footer with credits
st.divider()
st.markdown(
"""
<div class="ack">
Acknowledgment: This work is based on <a href="https://github.com/jadouse5/openai-swarm-webscraper" target="_blank">Jad Tounsi El Azzoiani's work</a>.
</div>
""",
unsafe_allow_html=True
)
|