File size: 7,497 Bytes
0184f32 e0196db 466344f 5f94e5a bb14580 5f94e5a d2c82ee d268952 8f212a3 d268952 c64ddc6 d268952 8f212a3 d268952 5f94e5a 8f212a3 5f94e5a 1a66764 0c604ed 1a66764 1c7c6a9 1a66764 8f212a3 1a66764 8f212a3 1a66764 8f212a3 5f94e5a 8f212a3 1a66764 8f212a3 1a66764 8f212a3 1a66764 8f212a3 1a66764 5f94e5a bb14580 bf0654b bb14580 bf0654b bb14580 5f94e5a 4f6325e 8a4396c 466344f dc60aaa 4f6325e dc60aaa 1d2a7b4 8a4396c dc60aaa 6d8d359 686975f dc60aaa db033fb 8a4396c dc60aaa 9b62756 c174e69 01c5b1d 0423cfe d268952 5f94e5a bb14580 6ad4699 5f89cb0 5f94e5a 466344f 1a66764 8a4396c 466344f 5f94e5a d268952 1a66764 8f212a3 1a66764 bb14580 5f94e5a d268952 5f94e5a 8f212a3 bf0654b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 |
# image source: https://www.globesign.com/blog/a-beginners-guide-to-google-website-analyzer/
#ref: i) https://blog.dailydoseofds.com/p/building-a-multi-agent-internet-research , ii) https://blog.dailydoseofds.com/p/build-a-multi-agent-research-assistant
import streamlit as st
from swarm import Swarm, Agent
from bs4 import BeautifulSoup
import requests
import os
import io
from reportlab.pdfgen import canvas
# Function to fetch OpenAI API key
def fetch_openai_api_key():
"""Fetch the OpenAI API key from Hugging Face secrets."""
try:
secret_key = st.secrets.get("OPENAI_API_KEY", "")
if secret_key:
os.environ['OPENAI_API_KEY'] = secret_key
else:
st.warning("β οΈ OpenAI API Key is missing! Please check your Hugging Face secrets configuration.")
except Exception as e:
st.error(f"Error retrieving OpenAI API Key: {str(e)}")
# Initialize the Swarm client
def initialize_swarm_client():
return Swarm()
# Define the scraping function
def scrape_website(url):
"""Scrapes the content of the website."""
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
return soup.get_text() # Return the text content from the HTML
except requests.exceptions.RequestException as e:
return f"Error during scraping: {str(e)}"
# Scraper Agent
scraper_agent = Agent(
name="Scraper Agent",
instructions="You are an agent that scrapes content from websites.",
functions=[scrape_website]
)
# Define the analysis function
def analyze_content(content):
"""Dynamically analyzes the content and extracts key insights."""
# Generic analysis prompt for flexibility
summary = (
"π Final Report:\n\n"
"Based on the website content, here are the key takeaways:\n\n"
"Offerings and Highlights:\n"
"- Summarize key offerings, products, or services.\n\n"
"Prominent Features:\n"
"- Identify any standout features or unique aspects.\n\n"
"Additional Notes:\n"
"- Provide other insights that might be useful for the user."
)
return summary
# Research Agent
research_agent = Agent(
name="Research Agent",
instructions="You are an agent that highlights key insights by dynamically analyzing content and adapting to the available information.",
functions=[analyze_content]
)
# Define the writing function
def write_summary(context_variables):
"""Writes a summary based on the analysis."""
analysis = context_variables.get('analysis', '')
summary = f"Here's a detailed report based on the research: {analysis}"
return summary
# Writer Agent
writer_agent = Agent(
name="Writer Agent",
instructions="You are an agent that writes summaries of research.",
functions=[write_summary]
)
# Orchestrate the workflow
def orchestrate_workflow(client, url):
# Step 1: Scrape the website
scrape_result = client.run(
agent=scraper_agent,
messages=[{"role": "user", "content": f"Scrape the following website: {url}"}]
)
scraped_content = scrape_result.messages[-1]["content"]
# Check for any error during scraping
if "Error during scraping" in scraped_content:
return scraped_content
# Step 2: Analyze the scraped content
research_result = client.run(
agent=research_agent,
messages=[{"role": "user", "content": f"Analyze the following content: {scraped_content}"}]
)
analysis_summary = research_result.messages[-1]["content"]
# Step 3: Write the summary based on the analysis
writer_result = client.run(
agent=writer_agent,
messages=[{"role": "user", "content": f"Write a summary based on this analysis: {analysis_summary}"}],
context_variables={"analysis": analysis_summary}
)
final_summary = writer_result.messages[-1]["content"]
return final_summary
# Helper functions to create text and PDF files
def create_text_file(content):
"""Create a downloadable text file."""
return content # Return plain text
def create_pdf_file(content):
"""Create a downloadable PDF file."""
buffer = io.BytesIO()
c = canvas.Canvas(buffer)
c.drawString(100, 750, "Generated Report")
c.drawString(100, 730, "--------------------")
lines = content.split("\n")
y = 700
for line in lines:
if y < 50: # Create a new page if the content overflows
c.showPage()
y = 750
c.drawString(100, y, line)
y -= 20
c.save()
buffer.seek(0)
return buffer.getvalue() # Return binary content
# Streamlit App UI
st.markdown(
"""
<style>
.title { text-align: center; font-size: 2.4rem; font-weight: bold; margin-bottom: 20px; }
.description { text-align: center; font-size: 1.0rem; color: #555; margin-bottom: 30px; }
.section { margin-top: 30px; margin-bottom: 30px; }
.ack { font-size: 0.95rem; color: #888; text-align: center; margin-top: 10px; }
</style>
""",
unsafe_allow_html=True,
)
# 1. Add the title at the top
st.markdown('<div class="title">Swarm-based Web Content Analyzer π§</div>', unsafe_allow_html=True)
# 2. Add the description below the title
st.markdown('<div class="description">Effortlessly extract, analyze, and summarize web content using multi-agent.</div>', unsafe_allow_html=True)
# 3. Add the image below the description
st.image("./image-4.png", use_container_width=True)
# 4. Add Acknowledgement
st.markdown(
"""
<div class="ack">
Acknowledgment: This app is based on <a href="https://github.com/jadouse5/openai-swarm-webscraper" target="_blank">Jad Tounsi El Azzoiani's work</a>.
</div>
""",
unsafe_allow_html=True
)
# 5. Add one line-spacing after the acknowledgment
st.markdown('<div style="margin-bottom: 20px;"></div>', unsafe_allow_html=True)
fetch_openai_api_key()
if 'OPENAI_API_KEY' in os.environ and os.environ['OPENAI_API_KEY']:
client = initialize_swarm_client()
# Add interface for URL input
st.subheader("Enter the Website URL π")
url = st.text_input("Enter the URL of the website you want to scrape", placeholder="https://huggingface.co/models")
# Add some spacing
st.markdown('<div class="section"></div>', unsafe_allow_html=True)
# Add the "Run Workflow" button
if st.button("π Run Workflow", key="run"):
if url:
with st.spinner("Running the multi-agent workflow... This may take a moment."):
final_report = orchestrate_workflow(client, url)
st.success("β
Workflow complete!")
st.write("### π Final Report:")
st.write(final_report)
# Add download buttons for the report
text_file = create_text_file(final_report)
pdf_file = create_pdf_file(final_report)
st.download_button(
label="Download Report as Text",
data=text_file,
file_name="report.txt",
mime="text/plain",
)
st.download_button(
label="Download Report as PDF",
data=pdf_file,
file_name="report.pdf",
mime="application/pdf",
)
else:
st.error("β Please enter a valid URL.")
else:
st.sidebar.warning("β οΈ OpenAI API Key not set. Please check your Hugging Face secrets configuration.")
|