DrishtiSharma commited on
Commit
f9e3235
Β·
verified Β·
1 Parent(s): b21e86f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -74
app.py CHANGED
@@ -3,16 +3,20 @@ from swarm import Swarm, Agent
3
  from bs4 import BeautifulSoup
4
  import requests
5
  import os
 
 
 
 
6
 
7
  # Function to fetch OpenAI API key
8
  def fetch_openai_api_key():
9
- """Fetch the OpenAI API key from Hugging Face secrets."""
10
  try:
11
  secret_key = st.secrets.get("OPENAI_API_KEY", "")
12
  if secret_key:
13
  os.environ['OPENAI_API_KEY'] = secret_key
14
  else:
15
- st.warning("⚠️ OpenAI API Key is missing! Please check your Hugging Face secrets configuration.")
16
  except Exception as e:
17
  st.error(f"Error retrieving OpenAI API Key: {str(e)}")
18
 
@@ -27,73 +31,60 @@ def scrape_website(url):
27
  response = requests.get(url)
28
  response.raise_for_status()
29
  soup = BeautifulSoup(response.text, 'html.parser')
30
- return soup.get_text() # Return the text content from the HTML
 
 
 
 
 
 
 
 
 
31
  except requests.exceptions.RequestException as e:
32
  return f"Error during scraping: {str(e)}"
33
 
34
- # Scraper Agent
35
- scraper_agent = Agent(
36
- name="Scraper Agent",
37
- instructions="You are an agent that scrapes content from websites.",
38
- functions=[scrape_website]
39
- )
40
-
41
- # Define the analysis function
42
  def analyze_content(content):
43
  """Analyzes the scraped content for key points."""
44
- summary = f"Summary of content: {content[:200]}..." # A simple placeholder summarization
45
  return summary
46
 
47
- # Research Agent
48
- research_agent = Agent(
49
- name="Research Agent",
50
- instructions="You are an agent that analyzes content and extracts key insights.",
51
- functions=[analyze_content]
52
- )
53
-
54
  # Define the writing function
55
  def write_summary(context_variables):
56
  """Writes a summary based on the analysis."""
57
  analysis = context_variables.get('analysis', '')
58
- summary = f"Here's a detailed report based on the research: {analysis}"
59
- return summary
60
-
61
- # Writer Agent
62
- writer_agent = Agent(
63
- name="Writer Agent",
64
- instructions="You are an agent that writes summaries of research.",
65
- functions=[write_summary]
66
- )
67
-
68
- # Orchestrate the workflow
69
- def orchestrate_workflow(client, url):
70
- # Step 1: Scrape the website
71
- scrape_result = client.run(
72
- agent=scraper_agent,
73
- messages=[{"role": "user", "content": f"Scrape the following website: {url}"}]
74
  )
75
- scraped_content = scrape_result.messages[-1]["content"]
76
-
77
- # Check for any error during scraping
78
- if "Error during scraping" in scraped_content:
79
- return scraped_content
80
-
81
- # Step 2: Analyze the scraped content
82
- research_result = client.run(
83
- agent=research_agent,
84
- messages=[{"role": "user", "content": f"Analyze the following content: {scraped_content}"}]
85
- )
86
- analysis_summary = research_result.messages[-1]["content"]
87
-
88
- # Step 3: Write the summary based on the analysis
89
- writer_result = client.run(
90
- agent=writer_agent,
91
- messages=[{"role": "user", "content": f"Write a summary based on this analysis: {analysis_summary}"}],
92
- context_variables={"analysis": analysis_summary}
93
- )
94
-
95
- final_summary = writer_result.messages[-1]["content"]
96
- return final_summary
97
 
98
  # Streamlit App UI
99
  st.markdown(
@@ -108,15 +99,11 @@ st.markdown(
108
  unsafe_allow_html=True,
109
  )
110
 
111
- st.markdown('<div class="title">πŸ”Ž Swarm-based Web Content Analyzer</div>', unsafe_allow_html=True)
112
- st.markdown('<div class="description">Effortlessly extract, analyze, and summarize web content.</div>', unsafe_allow_html=True)
113
-
114
- st.write("")
115
- st.write("")
116
 
117
  fetch_openai_api_key()
118
 
119
- # Initialize Swarm client only after API key is set
120
  if 'OPENAI_API_KEY' in os.environ and os.environ['OPENAI_API_KEY']:
121
  client = initialize_swarm_client()
122
 
@@ -125,20 +112,55 @@ if 'OPENAI_API_KEY' in os.environ and os.environ['OPENAI_API_KEY']:
125
  url = st.text_input("Enter the URL of the website you want to scrape", placeholder="https://example.com")
126
 
127
  # Run Workflow button
128
- st.write("")
129
-
130
  if st.button("Run Workflow"):
131
  if url:
132
  with st.spinner("Running the multi-agent workflow... This may take a moment."):
133
- final_report = orchestrate_workflow(client, url)
134
- st.success("βœ… Workflow complete!")
135
- st.write("### πŸ“œ Final Report:")
136
- st.write(final_report)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  else:
138
  st.error("❌ Please enter a valid URL.")
139
  else:
140
- st.sidebar.warning("⚠️ OpenAI API Key not set. Please check your Hugging Face secrets configuration.")
141
-
142
- # Footer with credits
143
- st.divider()
144
- st.markdown('<div class="ack">Acknowledgement: </div>', unsafe_allow_html=True)
 
3
  from bs4 import BeautifulSoup
4
  import requests
5
  import os
6
+ import json
7
+ from io import BytesIO
8
+ from reportlab.lib.pagesizes import letter
9
+ from reportlab.pdfgen import canvas
10
 
11
  # Function to fetch OpenAI API key
12
  def fetch_openai_api_key():
13
+ """Fetch the OpenAI API key from Streamlit secrets."""
14
  try:
15
  secret_key = st.secrets.get("OPENAI_API_KEY", "")
16
  if secret_key:
17
  os.environ['OPENAI_API_KEY'] = secret_key
18
  else:
19
+ st.warning("⚠️ OpenAI API Key is missing! Please check your secrets configuration.")
20
  except Exception as e:
21
  st.error(f"Error retrieving OpenAI API Key: {str(e)}")
22
 
 
31
  response = requests.get(url)
32
  response.raise_for_status()
33
  soup = BeautifulSoup(response.text, 'html.parser')
34
+
35
+ # Extract metadata
36
+ metadata = {
37
+ "title": soup.title.string if soup.title else "N/A",
38
+ "description": soup.find("meta", {"name": "description"})["content"] if soup.find("meta", {"name": "description"}) else "N/A",
39
+ "keywords": soup.find("meta", {"name": "keywords"})["content"] if soup.find("meta", {"name": "keywords"}) else "N/A",
40
+ }
41
+
42
+ text_content = soup.get_text() # Extract text content
43
+ return {"text": text_content, "metadata": metadata}
44
  except requests.exceptions.RequestException as e:
45
  return f"Error during scraping: {str(e)}"
46
 
47
+ # Enhanced summarization function
 
 
 
 
 
 
 
48
  def analyze_content(content):
49
  """Analyzes the scraped content for key points."""
50
+ summary = f"Summary of content: {content[:500]}..."
51
  return summary
52
 
 
 
 
 
 
 
 
53
  # Define the writing function
54
  def write_summary(context_variables):
55
  """Writes a summary based on the analysis."""
56
  analysis = context_variables.get('analysis', '')
57
+ metadata = context_variables.get('metadata', {})
58
+
59
+ detailed_report = (
60
+ f"### Metadata:\n"
61
+ f"**Title:** {metadata.get('title')}\n"
62
+ f"**Description:** {metadata.get('description')}\n"
63
+ f"**Keywords:** {metadata.get('keywords')}\n\n"
64
+ f"### Content Summary:\n{analysis}"
 
 
 
 
 
 
 
 
65
  )
66
+ return detailed_report
67
+
68
+ # Generate PDF report
69
+ def generate_pdf(report):
70
+ """Generate a PDF file from the report."""
71
+ buffer = BytesIO()
72
+ pdf_canvas = canvas.Canvas(buffer, pagesize=letter)
73
+ pdf_canvas.drawString(100, 750, "Web Content Analyzer Report")
74
+ pdf_canvas.drawString(100, 735, "-" * 50)
75
+
76
+ lines = report.split('\n')
77
+ y = 700 # Start position for the text
78
+ for line in lines:
79
+ if y < 50: # Create a new page if content exceeds one page
80
+ pdf_canvas.showPage()
81
+ y = 750
82
+ pdf_canvas.drawString(100, y, line)
83
+ y -= 15
84
+
85
+ pdf_canvas.save()
86
+ buffer.seek(0)
87
+ return buffer
88
 
89
  # Streamlit App UI
90
  st.markdown(
 
99
  unsafe_allow_html=True,
100
  )
101
 
102
+ st.markdown('<div class="title">πŸ”Ž Multi-Agent Web Content Analyzer</div>', unsafe_allow_html=True)
103
+ st.markdown('<div class="description">Extract, analyze, and summarize web content with advanced capabilities.</div>', unsafe_allow_html=True)
 
 
 
104
 
105
  fetch_openai_api_key()
106
 
 
107
  if 'OPENAI_API_KEY' in os.environ and os.environ['OPENAI_API_KEY']:
108
  client = initialize_swarm_client()
109
 
 
112
  url = st.text_input("Enter the URL of the website you want to scrape", placeholder="https://example.com")
113
 
114
  # Run Workflow button
 
 
115
  if st.button("Run Workflow"):
116
  if url:
117
  with st.spinner("Running the multi-agent workflow... This may take a moment."):
118
+ scrape_result = scrape_website(url)
119
+ if isinstance(scrape_result, str): # Error handling
120
+ st.error(scrape_result)
121
+ else:
122
+ content = scrape_result["text"]
123
+ metadata = scrape_result["metadata"]
124
+
125
+ # Analysis and writing
126
+ analysis_summary = analyze_content(content)
127
+ final_summary = write_summary({
128
+ "analysis": analysis_summary,
129
+ "metadata": metadata,
130
+ })
131
+
132
+ st.success("βœ… Workflow complete!")
133
+ st.write("### πŸ“œ Final Report:")
134
+ st.markdown(final_summary, unsafe_allow_html=True)
135
+
136
+ # Prepare downloadable content
137
+ report_file_json = json.dumps({
138
+ "metadata": metadata,
139
+ "summary": analysis_summary
140
+ }, indent=4)
141
+ report_file_txt = final_summary
142
+ report_file_pdf = generate_pdf(final_summary)
143
+
144
+ # Download options
145
+ st.download_button(
146
+ label="Download Report as JSON",
147
+ data=report_file_json,
148
+ file_name="report.json",
149
+ mime="application/json"
150
+ )
151
+ st.download_button(
152
+ label="Download Report as TXT",
153
+ data=report_file_txt,
154
+ file_name="report.txt",
155
+ mime="text/plain"
156
+ )
157
+ st.download_button(
158
+ label="Download Report as PDF",
159
+ data=report_file_pdf,
160
+ file_name="report.pdf",
161
+ mime="application/pdf"
162
+ )
163
  else:
164
  st.error("❌ Please enter a valid URL.")
165
  else:
166
+ st.sidebar.warning("⚠️ OpenAI API Key not set. Please check your secrets configuration.")