File size: 5,448 Bytes
5f94e5a
 
 
 
 
61a1efb
 
 
 
5f94e5a
d2c82ee
d268952
8f212a3
d268952
c64ddc6
d268952
 
 
8f212a3
d268952
 
5f94e5a
 
 
 
 
 
 
 
 
 
 
 
8f212a3
5f94e5a
 
 
61a1efb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f212a3
 
 
61a1efb
8f212a3
61a1efb
 
 
 
 
8f212a3
5f94e5a
8f212a3
 
61a1efb
8f212a3
 
 
61a1efb
 
 
 
 
 
 
8f212a3
 
61a1efb
8f212a3
61a1efb
5f94e5a
 
4f6325e
 
 
a301bed
eb455f0
b21e86f
4f6325e
 
 
 
 
8f212a3
 
 
d268952
5f94e5a
8f212a3
5f94e5a
 
 
 
d268952
5f94e5a
 
4f6325e
5f94e5a
d268952
61a1efb
 
8f212a3
 
61a1efb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f94e5a
d268952
5f94e5a
8f212a3
 
 
 
61a1efb
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import streamlit as st
from swarm import Swarm, Agent
from bs4 import BeautifulSoup
import requests
import os
from io import BytesIO
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
import json

# Function to fetch OpenAI API key
def fetch_openai_api_key():
    """Fetch the OpenAI API key from Hugging Face secrets."""
    try:
        secret_key = st.secrets.get("OPENAI_API_KEY", "")
        if secret_key:
            os.environ['OPENAI_API_KEY'] = secret_key
        else:
            st.warning("⚠️ OpenAI API Key is missing! Please check your Hugging Face secrets configuration.")
    except Exception as e:
        st.error(f"Error retrieving OpenAI API Key: {str(e)}")

# Initialize the Swarm client
def initialize_swarm_client():
    return Swarm()

# Define the scraping function
def scrape_website(url):
    """Scrapes the content of the website."""
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup.get_text()  # Return the text content from the HTML
    except requests.exceptions.RequestException as e:
        return f"Error during scraping: {str(e)}"

# Generate PDF report
def generate_pdf(report):
    """Generate a PDF file from the report."""
    buffer = BytesIO()
    pdf_canvas = canvas.Canvas(buffer, pagesize=letter)
    pdf_canvas.drawString(100, 750, "Generated Summary Report")
    pdf_canvas.drawString(100, 735, "-" * 50)
    
    lines = report.split('\n')
    y = 700  # Start position for the text
    for line in lines:
        if y < 50:  # Create a new page if content exceeds one page
            pdf_canvas.showPage()
            y = 750
        pdf_canvas.drawString(100, y, line)
        y -= 15
    
    pdf_canvas.save()
    buffer.seek(0)
    return buffer

# Orchestrate the workflow
def orchestrate_workflow(client, url):
    # Scrape, analyze, and summarize content
    scrape_result = client.run(
        agent=Agent(
            name="Scraper Agent",
            instructions="Scrape content from websites.",
            functions=[scrape_website]
        ),
        messages=[{"role": "user", "content": f"Scrape the following website: {url}"}]
    )
    scraped_content = scrape_result.messages[-1]["content"]

    # Handle errors during scraping
    if "Error during scraping" in scraped_content:
        return scraped_content

    # Analyze and summarize the content
    analyze_result = client.run(
        agent=Agent(
            name="Research Agent",
            instructions="Analyze content and extract insights.",
            functions=[lambda content: f"Summary: {content[:700]}..."]
        ),
        messages=[{"role": "user", "content": f"Analyze the following content: {scraped_content}"}]
    )
    analysis_summary = analyze_result.messages[-1]["content"]

    return analysis_summary

# Streamlit App UI
st.markdown(
    """
    <style>
    .title { text-align: center; font-size: 2.5rem; font-weight: bold; }
    .description { text-align: center; font-size: 1.1rem; color: #555; }
    .ack { font-size: 0.95rem; color: #888; text-align: center; }
    </style>
    """,
    unsafe_allow_html=True,
)

st.markdown('<div class="title">πŸ”Ž Swarm-based Web Content Analyzer</div>', unsafe_allow_html=True)
st.markdown('<div class="description">Effortlessly extract, analyze, and summarize web content.</div>', unsafe_allow_html=True)

fetch_openai_api_key()

# Initialize Swarm client only after API key is set
if 'OPENAI_API_KEY' in os.environ and os.environ['OPENAI_API_KEY']:
    client = initialize_swarm_client()

    # Input field for the website URL
    st.subheader("🌍 Enter the Website URL")
    url = st.text_input("Enter the URL of the website you want to scrape", placeholder="https://example.com")

    if st.button("Run Workflow"):
        if url:
            with st.spinner("Running the multi-agent workflow... This may take a moment."):
                final_summary = orchestrate_workflow(client, url)

            st.success("βœ… Workflow complete!")
            st.write("### πŸ“œ Final Report:")
            st.write(final_summary)

            # Download options
            json_data = json.dumps({"summary": final_summary}, indent=4)
            txt_data = final_summary
            pdf_data = generate_pdf(final_summary)


            st.download_button(
                label="Download Report as TXT",
                data=txt_data,
                file_name="report.txt",
                mime="text/plain"
            )
            st.download_button(
                label="Download Report as PDF",
                data=pdf_data,
                file_name="report.pdf",
                mime="application/pdf"
            )
            st.download_button(
                label="Download Report as JSON",
                data=json_data,
                file_name="report.json",
                mime="application/json"
            )
        else:
            st.error("❌ Please enter a valid URL.")
else:
    st.sidebar.warning("⚠️ OpenAI API Key not set. Please check your Hugging Face secrets configuration.")

# Footer with credits
st.divider()
st.markdown(
    """
    <div class="ack">
        Acknowledgment: This work is based on <a href="https://github.com/jadouse5/openai-swarm-webscraper" target="_blank">Jad Tounsi El Azzoiani's work</a>.
    </div>
    """,
    unsafe_allow_html=True
)