File size: 7,497 Bytes
0184f32
e0196db
466344f
5f94e5a
 
 
 
 
bb14580
 
5f94e5a
d2c82ee
d268952
8f212a3
d268952
c64ddc6
d268952
 
 
8f212a3
d268952
 
5f94e5a
 
 
 
 
 
 
 
 
 
 
 
8f212a3
5f94e5a
 
 
1a66764
 
 
 
 
 
 
 
 
0c604ed
 
 
 
 
 
 
 
 
 
 
 
1a66764
 
 
 
 
1c7c6a9
1a66764
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f212a3
 
 
1a66764
8f212a3
1a66764
8f212a3
5f94e5a
8f212a3
 
1a66764
8f212a3
 
 
1a66764
 
 
8f212a3
 
1a66764
8f212a3
1a66764
 
 
 
 
 
 
 
 
5f94e5a
bb14580
 
 
bf0654b
bb14580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf0654b
bb14580
5f94e5a
4f6325e
 
 
8a4396c
 
466344f
dc60aaa
4f6325e
 
 
 
 
dc60aaa
1d2a7b4
8a4396c
dc60aaa
6d8d359
686975f
dc60aaa
db033fb
8a4396c
dc60aaa
9b62756
 
 
 
 
 
 
 
c174e69
01c5b1d
0423cfe
 
d268952
5f94e5a
 
 
 
bb14580
6ad4699
5f89cb0
5f94e5a
466344f
 
1a66764
8a4396c
466344f
5f94e5a
d268952
1a66764
8f212a3
 
1a66764
bb14580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f94e5a
d268952
5f94e5a
8f212a3
bf0654b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
# image source: https://www.globesign.com/blog/a-beginners-guide-to-google-website-analyzer/
#ref: i) https://blog.dailydoseofds.com/p/building-a-multi-agent-internet-research , ii) https://blog.dailydoseofds.com/p/build-a-multi-agent-research-assistant

import streamlit as st
from swarm import Swarm, Agent
from bs4 import BeautifulSoup
import requests
import os
import io
from reportlab.pdfgen import canvas

# Function to fetch OpenAI API key
def fetch_openai_api_key():
    """Fetch the OpenAI API key from Hugging Face secrets."""
    try:
        secret_key = st.secrets.get("OPENAI_API_KEY", "")
        if secret_key:
            os.environ['OPENAI_API_KEY'] = secret_key
        else:
            st.warning("⚠️ OpenAI API Key is missing! Please check your Hugging Face secrets configuration.")
    except Exception as e:
        st.error(f"Error retrieving OpenAI API Key: {str(e)}")

# Initialize the Swarm client
def initialize_swarm_client():
    return Swarm()

# Define the scraping function
def scrape_website(url):
    """Scrapes the content of the website."""
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup.get_text()  # Return the text content from the HTML
    except requests.exceptions.RequestException as e:
        return f"Error during scraping: {str(e)}"

# Scraper Agent
scraper_agent = Agent(
    name="Scraper Agent",
    instructions="You are an agent that scrapes content from websites.",
    functions=[scrape_website]
)

# Define the analysis function
def analyze_content(content):
    """Dynamically analyzes the content and extracts key insights."""
    # Generic analysis prompt for flexibility
    summary = (
        "πŸ“œ Final Report:\n\n"
        "Based on the website content, here are the key takeaways:\n\n"
        "Offerings and Highlights:\n"
        "- Summarize key offerings, products, or services.\n\n"
        "Prominent Features:\n"
        "- Identify any standout features or unique aspects.\n\n"
        "Additional Notes:\n"
        "- Provide other insights that might be useful for the user."
    )
    return summary

# Research Agent
research_agent = Agent(
    name="Research Agent",
    instructions="You are an agent that highlights key insights by dynamically analyzing content and adapting to the available information.",
    functions=[analyze_content]
)

# Define the writing function
def write_summary(context_variables):
    """Writes a summary based on the analysis."""
    analysis = context_variables.get('analysis', '')
    summary = f"Here's a detailed report based on the research: {analysis}"
    return summary

# Writer Agent
writer_agent = Agent(
    name="Writer Agent",
    instructions="You are an agent that writes summaries of research.",
    functions=[write_summary]
)

# Orchestrate the workflow
def orchestrate_workflow(client, url):
    # Step 1: Scrape the website
    scrape_result = client.run(
        agent=scraper_agent,
        messages=[{"role": "user", "content": f"Scrape the following website: {url}"}]
    )
    scraped_content = scrape_result.messages[-1]["content"]

    # Check for any error during scraping
    if "Error during scraping" in scraped_content:
        return scraped_content

    # Step 2: Analyze the scraped content
    research_result = client.run(
        agent=research_agent,
        messages=[{"role": "user", "content": f"Analyze the following content: {scraped_content}"}]
    )
    analysis_summary = research_result.messages[-1]["content"]

    # Step 3: Write the summary based on the analysis
    writer_result = client.run(
        agent=writer_agent,
        messages=[{"role": "user", "content": f"Write a summary based on this analysis: {analysis_summary}"}],
        context_variables={"analysis": analysis_summary}
    )

    final_summary = writer_result.messages[-1]["content"]
    return final_summary

# Helper functions to create text and PDF files
def create_text_file(content):
    """Create a downloadable text file."""
    return content  # Return plain text

def create_pdf_file(content):
    """Create a downloadable PDF file."""
    buffer = io.BytesIO()
    c = canvas.Canvas(buffer)
    c.drawString(100, 750, "Generated Report")
    c.drawString(100, 730, "--------------------")
    lines = content.split("\n")
    y = 700
    for line in lines:
        if y < 50:  # Create a new page if the content overflows
            c.showPage()
            y = 750
        c.drawString(100, y, line)
        y -= 20
    c.save()
    buffer.seek(0)
    return buffer.getvalue()  # Return binary content

# Streamlit App UI
st.markdown(
    """
    <style>
    .title { text-align: center; font-size: 2.4rem; font-weight: bold; margin-bottom: 20px; }
    .description { text-align: center; font-size: 1.0rem; color: #555; margin-bottom: 30px; }
    .section { margin-top: 30px; margin-bottom: 30px; }
    .ack { font-size: 0.95rem; color: #888; text-align: center; margin-top: 10px; }
    </style>
    """,
    unsafe_allow_html=True,
)

# 1. Add the title at the top
st.markdown('<div class="title">Swarm-based Web Content Analyzer 🧐</div>', unsafe_allow_html=True)

# 2. Add the description below the title
st.markdown('<div class="description">Effortlessly extract, analyze, and summarize web content using multi-agent.</div>', unsafe_allow_html=True)

# 3. Add the image below the description
st.image("./image-4.png", use_container_width=True)

# 4. Add Acknowledgement
st.markdown(
    """
    <div class="ack">
        Acknowledgment: This app is based on <a href="https://github.com/jadouse5/openai-swarm-webscraper" target="_blank">Jad Tounsi El Azzoiani's work</a>.
    </div>
    """,
    unsafe_allow_html=True
)

# 5. Add one line-spacing after the acknowledgment
st.markdown('<div style="margin-bottom: 20px;"></div>', unsafe_allow_html=True)

fetch_openai_api_key()

if 'OPENAI_API_KEY' in os.environ and os.environ['OPENAI_API_KEY']:
    client = initialize_swarm_client()

    # Add interface for URL input
    st.subheader("Enter the Website URL πŸ”—")
    url = st.text_input("Enter the URL of the website you want to scrape", placeholder="https://huggingface.co/models")

    # Add some spacing
    st.markdown('<div class="section"></div>', unsafe_allow_html=True)

    # Add the "Run Workflow" button
    if st.button("πŸš€ Run Workflow", key="run"):
        if url:
            with st.spinner("Running the multi-agent workflow... This may take a moment."):
                final_report = orchestrate_workflow(client, url)
            st.success("βœ… Workflow complete!")
            st.write("### πŸ“œ Final Report:")
            st.write(final_report)

            # Add download buttons for the report
            text_file = create_text_file(final_report)
            pdf_file = create_pdf_file(final_report)

            st.download_button(
                label="Download Report as Text",
                data=text_file,
                file_name="report.txt",
                mime="text/plain",
            )

            st.download_button(
                label="Download Report as PDF",
                data=pdf_file,
                file_name="report.pdf",
                mime="application/pdf",
            )
        else:
            st.error("❌ Please enter a valid URL.")
else:
    st.sidebar.warning("⚠️ OpenAI API Key not set. Please check your Hugging Face secrets configuration.")