Update app.py
Browse files
app.py
CHANGED
@@ -3,16 +3,20 @@ from swarm import Swarm, Agent
|
|
3 |
from bs4 import BeautifulSoup
|
4 |
import requests
|
5 |
import os
|
|
|
|
|
|
|
|
|
6 |
|
7 |
# Function to fetch OpenAI API key
|
8 |
def fetch_openai_api_key():
|
9 |
-
"""Fetch the OpenAI API key from
|
10 |
try:
|
11 |
secret_key = st.secrets.get("OPENAI_API_KEY", "")
|
12 |
if secret_key:
|
13 |
os.environ['OPENAI_API_KEY'] = secret_key
|
14 |
else:
|
15 |
-
st.warning("β οΈ OpenAI API Key is missing! Please check your
|
16 |
except Exception as e:
|
17 |
st.error(f"Error retrieving OpenAI API Key: {str(e)}")
|
18 |
|
@@ -27,73 +31,60 @@ def scrape_website(url):
|
|
27 |
response = requests.get(url)
|
28 |
response.raise_for_status()
|
29 |
soup = BeautifulSoup(response.text, 'html.parser')
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
except requests.exceptions.RequestException as e:
|
32 |
return f"Error during scraping: {str(e)}"
|
33 |
|
34 |
-
#
|
35 |
-
scraper_agent = Agent(
|
36 |
-
name="Scraper Agent",
|
37 |
-
instructions="You are an agent that scrapes content from websites.",
|
38 |
-
functions=[scrape_website]
|
39 |
-
)
|
40 |
-
|
41 |
-
# Define the analysis function
|
42 |
def analyze_content(content):
|
43 |
"""Analyzes the scraped content for key points."""
|
44 |
-
summary = f"Summary of content: {content[:
|
45 |
return summary
|
46 |
|
47 |
-
# Research Agent
|
48 |
-
research_agent = Agent(
|
49 |
-
name="Research Agent",
|
50 |
-
instructions="You are an agent that analyzes content and extracts key insights.",
|
51 |
-
functions=[analyze_content]
|
52 |
-
)
|
53 |
-
|
54 |
# Define the writing function
|
55 |
def write_summary(context_variables):
|
56 |
"""Writes a summary based on the analysis."""
|
57 |
analysis = context_variables.get('analysis', '')
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
)
|
67 |
-
|
68 |
-
# Orchestrate the workflow
|
69 |
-
def orchestrate_workflow(client, url):
|
70 |
-
# Step 1: Scrape the website
|
71 |
-
scrape_result = client.run(
|
72 |
-
agent=scraper_agent,
|
73 |
-
messages=[{"role": "user", "content": f"Scrape the following website: {url}"}]
|
74 |
)
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
)
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
return
|
97 |
|
98 |
# Streamlit App UI
|
99 |
st.markdown(
|
@@ -108,15 +99,11 @@ st.markdown(
|
|
108 |
unsafe_allow_html=True,
|
109 |
)
|
110 |
|
111 |
-
st.markdown('<div class="title">π
|
112 |
-
st.markdown('<div class="description">
|
113 |
-
|
114 |
-
st.write("")
|
115 |
-
st.write("")
|
116 |
|
117 |
fetch_openai_api_key()
|
118 |
|
119 |
-
# Initialize Swarm client only after API key is set
|
120 |
if 'OPENAI_API_KEY' in os.environ and os.environ['OPENAI_API_KEY']:
|
121 |
client = initialize_swarm_client()
|
122 |
|
@@ -125,20 +112,55 @@ if 'OPENAI_API_KEY' in os.environ and os.environ['OPENAI_API_KEY']:
|
|
125 |
url = st.text_input("Enter the URL of the website you want to scrape", placeholder="https://example.com")
|
126 |
|
127 |
# Run Workflow button
|
128 |
-
st.write("")
|
129 |
-
|
130 |
if st.button("Run Workflow"):
|
131 |
if url:
|
132 |
with st.spinner("Running the multi-agent workflow... This may take a moment."):
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
else:
|
138 |
st.error("β Please enter a valid URL.")
|
139 |
else:
|
140 |
-
st.sidebar.warning("β οΈ OpenAI API Key not set. Please check your
|
141 |
-
|
142 |
-
# Footer with credits
|
143 |
-
st.divider()
|
144 |
-
st.markdown('<div class="ack">Acknowledgement: </div>', unsafe_allow_html=True)
|
|
|
3 |
from bs4 import BeautifulSoup
|
4 |
import requests
|
5 |
import os
|
6 |
+
import json
|
7 |
+
from io import BytesIO
|
8 |
+
from reportlab.lib.pagesizes import letter
|
9 |
+
from reportlab.pdfgen import canvas
|
10 |
|
11 |
# Function to fetch OpenAI API key
|
12 |
def fetch_openai_api_key():
|
13 |
+
"""Fetch the OpenAI API key from Streamlit secrets."""
|
14 |
try:
|
15 |
secret_key = st.secrets.get("OPENAI_API_KEY", "")
|
16 |
if secret_key:
|
17 |
os.environ['OPENAI_API_KEY'] = secret_key
|
18 |
else:
|
19 |
+
st.warning("β οΈ OpenAI API Key is missing! Please check your secrets configuration.")
|
20 |
except Exception as e:
|
21 |
st.error(f"Error retrieving OpenAI API Key: {str(e)}")
|
22 |
|
|
|
31 |
response = requests.get(url)
|
32 |
response.raise_for_status()
|
33 |
soup = BeautifulSoup(response.text, 'html.parser')
|
34 |
+
|
35 |
+
# Extract metadata
|
36 |
+
metadata = {
|
37 |
+
"title": soup.title.string if soup.title else "N/A",
|
38 |
+
"description": soup.find("meta", {"name": "description"})["content"] if soup.find("meta", {"name": "description"}) else "N/A",
|
39 |
+
"keywords": soup.find("meta", {"name": "keywords"})["content"] if soup.find("meta", {"name": "keywords"}) else "N/A",
|
40 |
+
}
|
41 |
+
|
42 |
+
text_content = soup.get_text() # Extract text content
|
43 |
+
return {"text": text_content, "metadata": metadata}
|
44 |
except requests.exceptions.RequestException as e:
|
45 |
return f"Error during scraping: {str(e)}"
|
46 |
|
47 |
+
# Enhanced summarization function
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
def analyze_content(content):
|
49 |
"""Analyzes the scraped content for key points."""
|
50 |
+
summary = f"Summary of content: {content[:500]}..."
|
51 |
return summary
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
# Define the writing function
|
54 |
def write_summary(context_variables):
|
55 |
"""Writes a summary based on the analysis."""
|
56 |
analysis = context_variables.get('analysis', '')
|
57 |
+
metadata = context_variables.get('metadata', {})
|
58 |
+
|
59 |
+
detailed_report = (
|
60 |
+
f"### Metadata:\n"
|
61 |
+
f"**Title:** {metadata.get('title')}\n"
|
62 |
+
f"**Description:** {metadata.get('description')}\n"
|
63 |
+
f"**Keywords:** {metadata.get('keywords')}\n\n"
|
64 |
+
f"### Content Summary:\n{analysis}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
)
|
66 |
+
return detailed_report
|
67 |
+
|
68 |
+
# Generate PDF report
|
69 |
+
def generate_pdf(report):
|
70 |
+
"""Generate a PDF file from the report."""
|
71 |
+
buffer = BytesIO()
|
72 |
+
pdf_canvas = canvas.Canvas(buffer, pagesize=letter)
|
73 |
+
pdf_canvas.drawString(100, 750, "Web Content Analyzer Report")
|
74 |
+
pdf_canvas.drawString(100, 735, "-" * 50)
|
75 |
+
|
76 |
+
lines = report.split('\n')
|
77 |
+
y = 700 # Start position for the text
|
78 |
+
for line in lines:
|
79 |
+
if y < 50: # Create a new page if content exceeds one page
|
80 |
+
pdf_canvas.showPage()
|
81 |
+
y = 750
|
82 |
+
pdf_canvas.drawString(100, y, line)
|
83 |
+
y -= 15
|
84 |
+
|
85 |
+
pdf_canvas.save()
|
86 |
+
buffer.seek(0)
|
87 |
+
return buffer
|
88 |
|
89 |
# Streamlit App UI
|
90 |
st.markdown(
|
|
|
99 |
unsafe_allow_html=True,
|
100 |
)
|
101 |
|
102 |
+
st.markdown('<div class="title">π Multi-Agent Web Content Analyzer</div>', unsafe_allow_html=True)
|
103 |
+
st.markdown('<div class="description">Extract, analyze, and summarize web content with advanced capabilities.</div>', unsafe_allow_html=True)
|
|
|
|
|
|
|
104 |
|
105 |
fetch_openai_api_key()
|
106 |
|
|
|
107 |
if 'OPENAI_API_KEY' in os.environ and os.environ['OPENAI_API_KEY']:
|
108 |
client = initialize_swarm_client()
|
109 |
|
|
|
112 |
url = st.text_input("Enter the URL of the website you want to scrape", placeholder="https://example.com")
|
113 |
|
114 |
# Run Workflow button
|
|
|
|
|
115 |
if st.button("Run Workflow"):
|
116 |
if url:
|
117 |
with st.spinner("Running the multi-agent workflow... This may take a moment."):
|
118 |
+
scrape_result = scrape_website(url)
|
119 |
+
if isinstance(scrape_result, str): # Error handling
|
120 |
+
st.error(scrape_result)
|
121 |
+
else:
|
122 |
+
content = scrape_result["text"]
|
123 |
+
metadata = scrape_result["metadata"]
|
124 |
+
|
125 |
+
# Analysis and writing
|
126 |
+
analysis_summary = analyze_content(content)
|
127 |
+
final_summary = write_summary({
|
128 |
+
"analysis": analysis_summary,
|
129 |
+
"metadata": metadata,
|
130 |
+
})
|
131 |
+
|
132 |
+
st.success("β
Workflow complete!")
|
133 |
+
st.write("### π Final Report:")
|
134 |
+
st.markdown(final_summary, unsafe_allow_html=True)
|
135 |
+
|
136 |
+
# Prepare downloadable content
|
137 |
+
report_file_json = json.dumps({
|
138 |
+
"metadata": metadata,
|
139 |
+
"summary": analysis_summary
|
140 |
+
}, indent=4)
|
141 |
+
report_file_txt = final_summary
|
142 |
+
report_file_pdf = generate_pdf(final_summary)
|
143 |
+
|
144 |
+
# Download options
|
145 |
+
st.download_button(
|
146 |
+
label="Download Report as JSON",
|
147 |
+
data=report_file_json,
|
148 |
+
file_name="report.json",
|
149 |
+
mime="application/json"
|
150 |
+
)
|
151 |
+
st.download_button(
|
152 |
+
label="Download Report as TXT",
|
153 |
+
data=report_file_txt,
|
154 |
+
file_name="report.txt",
|
155 |
+
mime="text/plain"
|
156 |
+
)
|
157 |
+
st.download_button(
|
158 |
+
label="Download Report as PDF",
|
159 |
+
data=report_file_pdf,
|
160 |
+
file_name="report.pdf",
|
161 |
+
mime="application/pdf"
|
162 |
+
)
|
163 |
else:
|
164 |
st.error("β Please enter a valid URL.")
|
165 |
else:
|
166 |
+
st.sidebar.warning("β οΈ OpenAI API Key not set. Please check your secrets configuration.")
|
|
|
|
|
|
|
|