Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,10 @@ from swarm import Swarm, Agent
|
|
3 |
from bs4 import BeautifulSoup
|
4 |
import requests
|
5 |
import os
|
|
|
|
|
|
|
|
|
6 |
|
7 |
# Function to fetch OpenAI API key
|
8 |
def fetch_openai_api_key():
|
@@ -31,69 +35,56 @@ def scrape_website(url):
|
|
31 |
except requests.exceptions.RequestException as e:
|
32 |
return f"Error during scraping: {str(e)}"
|
33 |
|
34 |
-
#
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
)
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
)
|
53 |
-
|
54 |
-
# Define the writing function
|
55 |
-
def write_summary(context_variables):
|
56 |
-
"""Writes a summary based on the analysis."""
|
57 |
-
analysis = context_variables.get('analysis', '')
|
58 |
-
summary = f"Here's a detailed report based on the research: {analysis}"
|
59 |
-
return summary
|
60 |
-
|
61 |
-
# Writer Agent
|
62 |
-
writer_agent = Agent(
|
63 |
-
name="Writer Agent",
|
64 |
-
instructions="You are an agent that writes summaries of research.",
|
65 |
-
functions=[write_summary]
|
66 |
-
)
|
67 |
|
68 |
# Orchestrate the workflow
|
69 |
def orchestrate_workflow(client, url):
|
70 |
-
#
|
71 |
scrape_result = client.run(
|
72 |
-
agent=
|
|
|
|
|
|
|
|
|
73 |
messages=[{"role": "user", "content": f"Scrape the following website: {url}"}]
|
74 |
)
|
75 |
scraped_content = scrape_result.messages[-1]["content"]
|
76 |
|
77 |
-
#
|
78 |
if "Error during scraping" in scraped_content:
|
79 |
return scraped_content
|
80 |
|
81 |
-
#
|
82 |
-
|
83 |
-
agent=
|
|
|
|
|
|
|
|
|
84 |
messages=[{"role": "user", "content": f"Analyze the following content: {scraped_content}"}]
|
85 |
)
|
86 |
-
analysis_summary =
|
87 |
-
|
88 |
-
# Step 3: Write the summary based on the analysis
|
89 |
-
writer_result = client.run(
|
90 |
-
agent=writer_agent,
|
91 |
-
messages=[{"role": "user", "content": f"Write a summary based on this analysis: {analysis_summary}"}],
|
92 |
-
context_variables={"analysis": analysis_summary}
|
93 |
-
)
|
94 |
|
95 |
-
|
96 |
-
return final_summary
|
97 |
|
98 |
# Streamlit App UI
|
99 |
st.markdown(
|
@@ -101,7 +92,6 @@ st.markdown(
|
|
101 |
<style>
|
102 |
.title { text-align: center; font-size: 2.5rem; font-weight: bold; }
|
103 |
.description { text-align: center; font-size: 1.1rem; color: #555; }
|
104 |
-
.button-container { text-align: center; }
|
105 |
.ack { font-size: 0.95rem; color: #888; text-align: center; }
|
106 |
</style>
|
107 |
""",
|
@@ -111,9 +101,6 @@ st.markdown(
|
|
111 |
st.markdown('<div class="title">π Swarm-based Web Content Analyzer</div>', unsafe_allow_html=True)
|
112 |
st.markdown('<div class="description">Effortlessly extract, analyze, and summarize web content.</div>', unsafe_allow_html=True)
|
113 |
|
114 |
-
st.write("")
|
115 |
-
st.write("")
|
116 |
-
|
117 |
fetch_openai_api_key()
|
118 |
|
119 |
# Initialize Swarm client only after API key is set
|
@@ -124,16 +111,39 @@ if 'OPENAI_API_KEY' in os.environ and os.environ['OPENAI_API_KEY']:
|
|
124 |
st.subheader("π Enter the Website URL")
|
125 |
url = st.text_input("Enter the URL of the website you want to scrape", placeholder="https://example.com")
|
126 |
|
127 |
-
# Run Workflow button
|
128 |
-
st.write("")
|
129 |
-
|
130 |
if st.button("Run Workflow"):
|
131 |
if url:
|
132 |
with st.spinner("Running the multi-agent workflow... This may take a moment."):
|
133 |
-
|
|
|
134 |
st.success("β
Workflow complete!")
|
135 |
st.write("### π Final Report:")
|
136 |
-
st.write(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
else:
|
138 |
st.error("β Please enter a valid URL.")
|
139 |
else:
|
@@ -141,4 +151,11 @@ else:
|
|
141 |
|
142 |
# Footer with credits
|
143 |
st.divider()
|
144 |
-
st.markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from bs4 import BeautifulSoup
|
4 |
import requests
|
5 |
import os
|
6 |
+
from io import BytesIO
|
7 |
+
from reportlab.lib.pagesizes import letter
|
8 |
+
from reportlab.pdfgen import canvas
|
9 |
+
import json
|
10 |
|
11 |
# Function to fetch OpenAI API key
|
12 |
def fetch_openai_api_key():
|
|
|
35 |
except requests.exceptions.RequestException as e:
|
36 |
return f"Error during scraping: {str(e)}"
|
37 |
|
38 |
+
# Generate PDF report
|
39 |
+
def generate_pdf(report):
|
40 |
+
"""Generate a PDF file from the report."""
|
41 |
+
buffer = BytesIO()
|
42 |
+
pdf_canvas = canvas.Canvas(buffer, pagesize=letter)
|
43 |
+
pdf_canvas.drawString(100, 750, "Generated Summary Report")
|
44 |
+
pdf_canvas.drawString(100, 735, "-" * 50)
|
45 |
+
|
46 |
+
lines = report.split('\n')
|
47 |
+
y = 700 # Start position for the text
|
48 |
+
for line in lines:
|
49 |
+
if y < 50: # Create a new page if content exceeds one page
|
50 |
+
pdf_canvas.showPage()
|
51 |
+
y = 750
|
52 |
+
pdf_canvas.drawString(100, y, line)
|
53 |
+
y -= 15
|
54 |
+
|
55 |
+
pdf_canvas.save()
|
56 |
+
buffer.seek(0)
|
57 |
+
return buffer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
# Orchestrate the workflow
|
60 |
def orchestrate_workflow(client, url):
|
61 |
+
# Scrape, analyze, and summarize content
|
62 |
scrape_result = client.run(
|
63 |
+
agent=Agent(
|
64 |
+
name="Scraper Agent",
|
65 |
+
instructions="Scrape content from websites.",
|
66 |
+
functions=[scrape_website]
|
67 |
+
),
|
68 |
messages=[{"role": "user", "content": f"Scrape the following website: {url}"}]
|
69 |
)
|
70 |
scraped_content = scrape_result.messages[-1]["content"]
|
71 |
|
72 |
+
# Handle errors during scraping
|
73 |
if "Error during scraping" in scraped_content:
|
74 |
return scraped_content
|
75 |
|
76 |
+
# Analyze and summarize the content
|
77 |
+
analyze_result = client.run(
|
78 |
+
agent=Agent(
|
79 |
+
name="Research Agent",
|
80 |
+
instructions="Analyze content and extract insights.",
|
81 |
+
functions=[lambda content: f"Summary: {content[:700]}..."]
|
82 |
+
),
|
83 |
messages=[{"role": "user", "content": f"Analyze the following content: {scraped_content}"}]
|
84 |
)
|
85 |
+
analysis_summary = analyze_result.messages[-1]["content"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
+
return analysis_summary
|
|
|
88 |
|
89 |
# Streamlit App UI
|
90 |
st.markdown(
|
|
|
92 |
<style>
|
93 |
.title { text-align: center; font-size: 2.5rem; font-weight: bold; }
|
94 |
.description { text-align: center; font-size: 1.1rem; color: #555; }
|
|
|
95 |
.ack { font-size: 0.95rem; color: #888; text-align: center; }
|
96 |
</style>
|
97 |
""",
|
|
|
101 |
st.markdown('<div class="title">π Swarm-based Web Content Analyzer</div>', unsafe_allow_html=True)
|
102 |
st.markdown('<div class="description">Effortlessly extract, analyze, and summarize web content.</div>', unsafe_allow_html=True)
|
103 |
|
|
|
|
|
|
|
104 |
fetch_openai_api_key()
|
105 |
|
106 |
# Initialize Swarm client only after API key is set
|
|
|
111 |
st.subheader("π Enter the Website URL")
|
112 |
url = st.text_input("Enter the URL of the website you want to scrape", placeholder="https://example.com")
|
113 |
|
|
|
|
|
|
|
114 |
if st.button("Run Workflow"):
|
115 |
if url:
|
116 |
with st.spinner("Running the multi-agent workflow... This may take a moment."):
|
117 |
+
final_summary = orchestrate_workflow(client, url)
|
118 |
+
|
119 |
st.success("β
Workflow complete!")
|
120 |
st.write("### π Final Report:")
|
121 |
+
st.write(final_summary)
|
122 |
+
|
123 |
+
# Download options
|
124 |
+
json_data = json.dumps({"summary": final_summary}, indent=4)
|
125 |
+
txt_data = final_summary
|
126 |
+
pdf_data = generate_pdf(final_summary)
|
127 |
+
|
128 |
+
|
129 |
+
st.download_button(
|
130 |
+
label="Download Report as TXT",
|
131 |
+
data=txt_data,
|
132 |
+
file_name="report.txt",
|
133 |
+
mime="text/plain"
|
134 |
+
)
|
135 |
+
st.download_button(
|
136 |
+
label="Download Report as PDF",
|
137 |
+
data=pdf_data,
|
138 |
+
file_name="report.pdf",
|
139 |
+
mime="application/pdf"
|
140 |
+
)
|
141 |
+
st.download_button(
|
142 |
+
label="Download Report as JSON",
|
143 |
+
data=json_data,
|
144 |
+
file_name="report.json",
|
145 |
+
mime="application/json"
|
146 |
+
)
|
147 |
else:
|
148 |
st.error("β Please enter a valid URL.")
|
149 |
else:
|
|
|
151 |
|
152 |
# Footer with credits
|
153 |
st.divider()
|
154 |
+
st.markdown(
|
155 |
+
"""
|
156 |
+
<div class="ack">
|
157 |
+
Acknowledgment: This work is based on <a href="https://github.com/jadouse5/openai-swarm-webscraper" target="_blank">Jad Tounsi El Azzoiani's work</a>.
|
158 |
+
</div>
|
159 |
+
""",
|
160 |
+
unsafe_allow_html=True
|
161 |
+
)
|