Update app.py
Browse files
app.py
CHANGED
@@ -3,20 +3,16 @@ from swarm import Swarm, Agent
|
|
3 |
from bs4 import BeautifulSoup
|
4 |
import requests
|
5 |
import os
|
6 |
-
import json
|
7 |
-
from io import BytesIO
|
8 |
-
from reportlab.lib.pagesizes import letter
|
9 |
-
from reportlab.pdfgen import canvas
|
10 |
|
11 |
# Function to fetch OpenAI API key
|
12 |
def fetch_openai_api_key():
|
13 |
-
"""Fetch the OpenAI API key from
|
14 |
try:
|
15 |
secret_key = st.secrets.get("OPENAI_API_KEY", "")
|
16 |
if secret_key:
|
17 |
os.environ['OPENAI_API_KEY'] = secret_key
|
18 |
else:
|
19 |
-
st.warning("β οΈ OpenAI API Key is missing! Please check your secrets configuration.")
|
20 |
except Exception as e:
|
21 |
st.error(f"Error retrieving OpenAI API Key: {str(e)}")
|
22 |
|
@@ -31,60 +27,73 @@ def scrape_website(url):
|
|
31 |
response = requests.get(url)
|
32 |
response.raise_for_status()
|
33 |
soup = BeautifulSoup(response.text, 'html.parser')
|
34 |
-
|
35 |
-
# Extract metadata
|
36 |
-
metadata = {
|
37 |
-
"title": soup.title.string if soup.title else "N/A",
|
38 |
-
"description": soup.find("meta", {"name": "description"})["content"] if soup.find("meta", {"name": "description"}) else "N/A",
|
39 |
-
"keywords": soup.find("meta", {"name": "keywords"})["content"] if soup.find("meta", {"name": "keywords"}) else "N/A",
|
40 |
-
}
|
41 |
-
|
42 |
-
text_content = soup.get_text() # Extract text content
|
43 |
-
return {"text": text_content, "metadata": metadata}
|
44 |
except requests.exceptions.RequestException as e:
|
45 |
return f"Error during scraping: {str(e)}"
|
46 |
|
47 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
def analyze_content(content):
|
49 |
"""Analyzes the scraped content for key points."""
|
50 |
-
summary = f"Summary of content: {content[:
|
51 |
return summary
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
# Define the writing function
|
54 |
def write_summary(context_variables):
|
55 |
"""Writes a summary based on the analysis."""
|
56 |
analysis = context_variables.get('analysis', '')
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
)
|
66 |
-
|
67 |
-
|
68 |
-
#
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
return
|
88 |
|
89 |
# Streamlit App UI
|
90 |
st.markdown(
|
@@ -99,11 +108,15 @@ st.markdown(
|
|
99 |
unsafe_allow_html=True,
|
100 |
)
|
101 |
|
102 |
-
st.markdown('<div class="title">π
|
103 |
-
st.markdown('<div class="description">
|
|
|
|
|
|
|
104 |
|
105 |
fetch_openai_api_key()
|
106 |
|
|
|
107 |
if 'OPENAI_API_KEY' in os.environ and os.environ['OPENAI_API_KEY']:
|
108 |
client = initialize_swarm_client()
|
109 |
|
@@ -112,55 +125,20 @@ if 'OPENAI_API_KEY' in os.environ and os.environ['OPENAI_API_KEY']:
|
|
112 |
url = st.text_input("Enter the URL of the website you want to scrape", placeholder="https://example.com")
|
113 |
|
114 |
# Run Workflow button
|
|
|
|
|
115 |
if st.button("Run Workflow"):
|
116 |
if url:
|
117 |
with st.spinner("Running the multi-agent workflow... This may take a moment."):
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
content = scrape_result["text"]
|
123 |
-
metadata = scrape_result["metadata"]
|
124 |
-
|
125 |
-
# Analysis and writing
|
126 |
-
analysis_summary = analyze_content(content)
|
127 |
-
final_summary = write_summary({
|
128 |
-
"analysis": analysis_summary,
|
129 |
-
"metadata": metadata,
|
130 |
-
})
|
131 |
-
|
132 |
-
st.success("β
Workflow complete!")
|
133 |
-
st.write("### π Final Report:")
|
134 |
-
st.markdown(final_summary, unsafe_allow_html=True)
|
135 |
-
|
136 |
-
# Prepare downloadable content
|
137 |
-
report_file_json = json.dumps({
|
138 |
-
"metadata": metadata,
|
139 |
-
"summary": analysis_summary
|
140 |
-
}, indent=4)
|
141 |
-
report_file_txt = final_summary
|
142 |
-
report_file_pdf = generate_pdf(final_summary)
|
143 |
-
|
144 |
-
# Download options
|
145 |
-
st.download_button(
|
146 |
-
label="Download Report as JSON",
|
147 |
-
data=report_file_json,
|
148 |
-
file_name="report.json",
|
149 |
-
mime="application/json"
|
150 |
-
)
|
151 |
-
st.download_button(
|
152 |
-
label="Download Report as TXT",
|
153 |
-
data=report_file_txt,
|
154 |
-
file_name="report.txt",
|
155 |
-
mime="text/plain"
|
156 |
-
)
|
157 |
-
st.download_button(
|
158 |
-
label="Download Report as PDF",
|
159 |
-
data=report_file_pdf,
|
160 |
-
file_name="report.pdf",
|
161 |
-
mime="application/pdf"
|
162 |
-
)
|
163 |
else:
|
164 |
st.error("β Please enter a valid URL.")
|
165 |
else:
|
166 |
-
st.sidebar.warning("β οΈ OpenAI API Key not set. Please check your secrets configuration.")
|
|
|
|
|
|
|
|
|
|
3 |
from bs4 import BeautifulSoup
|
4 |
import requests
|
5 |
import os
|
|
|
|
|
|
|
|
|
6 |
|
7 |
# Function to fetch OpenAI API key
|
8 |
def fetch_openai_api_key():
|
9 |
+
"""Fetch the OpenAI API key from Hugging Face secrets."""
|
10 |
try:
|
11 |
secret_key = st.secrets.get("OPENAI_API_KEY", "")
|
12 |
if secret_key:
|
13 |
os.environ['OPENAI_API_KEY'] = secret_key
|
14 |
else:
|
15 |
+
st.warning("β οΈ OpenAI API Key is missing! Please check your Hugging Face secrets configuration.")
|
16 |
except Exception as e:
|
17 |
st.error(f"Error retrieving OpenAI API Key: {str(e)}")
|
18 |
|
|
|
27 |
response = requests.get(url)
|
28 |
response.raise_for_status()
|
29 |
soup = BeautifulSoup(response.text, 'html.parser')
|
30 |
+
return soup.get_text() # Return the text content from the HTML
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
except requests.exceptions.RequestException as e:
|
32 |
return f"Error during scraping: {str(e)}"
|
33 |
|
34 |
+
# Scraper Agent
|
35 |
+
scraper_agent = Agent(
|
36 |
+
name="Scraper Agent",
|
37 |
+
instructions="You are an agent that scrapes content from websites.",
|
38 |
+
functions=[scrape_website]
|
39 |
+
)
|
40 |
+
|
41 |
+
# Define the analysis function
|
42 |
def analyze_content(content):
|
43 |
"""Analyzes the scraped content for key points."""
|
44 |
+
summary = f"Summary of content: {content[:200]}..." # A simple placeholder summarization
|
45 |
return summary
|
46 |
|
47 |
+
# Research Agent
|
48 |
+
research_agent = Agent(
|
49 |
+
name="Research Agent",
|
50 |
+
instructions="You are an agent that analyzes content and extracts key insights.",
|
51 |
+
functions=[analyze_content]
|
52 |
+
)
|
53 |
+
|
54 |
# Define the writing function
|
55 |
def write_summary(context_variables):
|
56 |
"""Writes a summary based on the analysis."""
|
57 |
analysis = context_variables.get('analysis', '')
|
58 |
+
summary = f"Here's a detailed report based on the research: {analysis}"
|
59 |
+
return summary
|
60 |
+
|
61 |
+
# Writer Agent
|
62 |
+
writer_agent = Agent(
|
63 |
+
name="Writer Agent",
|
64 |
+
instructions="You are an agent that writes summaries of research.",
|
65 |
+
functions=[write_summary]
|
66 |
+
)
|
67 |
+
|
68 |
+
# Orchestrate the workflow
|
69 |
+
def orchestrate_workflow(client, url):
|
70 |
+
# Step 1: Scrape the website
|
71 |
+
scrape_result = client.run(
|
72 |
+
agent=scraper_agent,
|
73 |
+
messages=[{"role": "user", "content": f"Scrape the following website: {url}"}]
|
74 |
)
|
75 |
+
scraped_content = scrape_result.messages[-1]["content"]
|
76 |
+
|
77 |
+
# Check for any error during scraping
|
78 |
+
if "Error during scraping" in scraped_content:
|
79 |
+
return scraped_content
|
80 |
+
|
81 |
+
# Step 2: Analyze the scraped content
|
82 |
+
research_result = client.run(
|
83 |
+
agent=research_agent,
|
84 |
+
messages=[{"role": "user", "content": f"Analyze the following content: {scraped_content}"}]
|
85 |
+
)
|
86 |
+
analysis_summary = research_result.messages[-1]["content"]
|
87 |
+
|
88 |
+
# Step 3: Write the summary based on the analysis
|
89 |
+
writer_result = client.run(
|
90 |
+
agent=writer_agent,
|
91 |
+
messages=[{"role": "user", "content": f"Write a summary based on this analysis: {analysis_summary}"}],
|
92 |
+
context_variables={"analysis": analysis_summary}
|
93 |
+
)
|
94 |
+
|
95 |
+
final_summary = writer_result.messages[-1]["content"]
|
96 |
+
return final_summary
|
97 |
|
98 |
# Streamlit App UI
|
99 |
st.markdown(
|
|
|
108 |
unsafe_allow_html=True,
|
109 |
)
|
110 |
|
111 |
+
st.markdown('<div class="title">π Swarm-based Web Content Analyzer</div>', unsafe_allow_html=True)
|
112 |
+
st.markdown('<div class="description">Effortlessly extract, analyze, and summarize web content.</div>', unsafe_allow_html=True)
|
113 |
+
|
114 |
+
st.write("")
|
115 |
+
st.write("")
|
116 |
|
117 |
fetch_openai_api_key()
|
118 |
|
119 |
+
# Initialize Swarm client only after API key is set
|
120 |
if 'OPENAI_API_KEY' in os.environ and os.environ['OPENAI_API_KEY']:
|
121 |
client = initialize_swarm_client()
|
122 |
|
|
|
125 |
url = st.text_input("Enter the URL of the website you want to scrape", placeholder="https://example.com")
|
126 |
|
127 |
# Run Workflow button
|
128 |
+
st.write("")
|
129 |
+
|
130 |
if st.button("Run Workflow"):
|
131 |
if url:
|
132 |
with st.spinner("Running the multi-agent workflow... This may take a moment."):
|
133 |
+
final_report = orchestrate_workflow(client, url)
|
134 |
+
st.success("β
Workflow complete!")
|
135 |
+
st.write("### π Final Report:")
|
136 |
+
st.write(final_report)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
else:
|
138 |
st.error("β Please enter a valid URL.")
|
139 |
else:
|
140 |
+
st.sidebar.warning("β οΈ OpenAI API Key not set. Please check your Hugging Face secrets configuration.")
|
141 |
+
|
142 |
+
# Footer with credits
|
143 |
+
st.divider()
|
144 |
+
st.markdown('<div class="ack">Acknowledgement: </div>', unsafe_allow_html=True)
|