Spaces:

DrishtiSharma
/

swarm-based-multiagent-content-analyzer

Sleeping

File size: 5,448 Bytes

5f94e5a
 
 
 
 
61a1efb
 
 
 
5f94e5a
d2c82ee
d268952
8f212a3
d268952
c64ddc6
d268952
 
 
8f212a3
d268952
 
5f94e5a
 
 
 
 
 
 
 
 
 
 
 
8f212a3
5f94e5a
 
 
61a1efb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f212a3
 
 
61a1efb
8f212a3
61a1efb
 
 
 
 
8f212a3
5f94e5a
8f212a3
 
61a1efb
8f212a3
 
 
61a1efb
 
 
 
 
 
 
8f212a3
 
61a1efb
8f212a3
61a1efb
5f94e5a
 
4f6325e
 
 
a301bed
eb455f0
b21e86f
4f6325e
 
 
 
 
8f212a3
 
 
d268952
5f94e5a
8f212a3
5f94e5a
 
 
 
d268952
5f94e5a
 
4f6325e
5f94e5a
d268952
61a1efb
 
8f212a3
 
61a1efb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f94e5a
d268952
5f94e5a
8f212a3
 
 
 
61a1efb

import streamlit as st
from swarm import Swarm, Agent
from bs4 import BeautifulSoup
import requests
import os
from io import BytesIO
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
import json

# Function to fetch OpenAI API key
def fetch_openai_api_key():
    """Fetch the OpenAI API key from Hugging Face secrets."""
    try:
        secret_key = st.secrets.get("OPENAI_API_KEY", "")
        if secret_key:
            os.environ['OPENAI_API_KEY'] = secret_key
        else:
            st.warning("⚠️ OpenAI API Key is missing! Please check your Hugging Face secrets configuration.")
    except Exception as e:
        st.error(f"Error retrieving OpenAI API Key: {str(e)}")

# Initialize the Swarm client
def initialize_swarm_client():
    return Swarm()

# Define the scraping function
def scrape_website(url):
    """Scrapes the content of the website."""
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup.get_text()  # Return the text content from the HTML
    except requests.exceptions.RequestException as e:
        return f"Error during scraping: {str(e)}"

# Generate PDF report
def generate_pdf(report):
    """Generate a PDF file from the report."""
    buffer = BytesIO()
    pdf_canvas = canvas.Canvas(buffer, pagesize=letter)
    pdf_canvas.drawString(100, 750, "Generated Summary Report")
    pdf_canvas.drawString(100, 735, "-" * 50)
    
    lines = report.split('\n')
    y = 700  # Start position for the text
    for line in lines:
        if y < 50:  # Create a new page if content exceeds one page
            pdf_canvas.showPage()
            y = 750
        pdf_canvas.drawString(100, y, line)
        y -= 15
    
    pdf_canvas.save()
    buffer.seek(0)
    return buffer

# Orchestrate the workflow
def orchestrate_workflow(client, url):
    # Scrape, analyze, and summarize content
    scrape_result = client.run(
        agent=Agent(
            name="Scraper Agent",
            instructions="Scrape content from websites.",
            functions=[scrape_website]
        ),
        messages=[{"role": "user", "content": f"Scrape the following website: {url}"}]
    )
    scraped_content = scrape_result.messages[-1]["content"]

    # Handle errors during scraping
    if "Error during scraping" in scraped_content:
        return scraped_content

    # Analyze and summarize the content
    analyze_result = client.run(
        agent=Agent(
            name="Research Agent",
            instructions="Analyze content and extract insights.",
            functions=[lambda content: f"Summary: {content[:700]}..."]
        ),
        messages=[{"role": "user", "content": f"Analyze the following content: {scraped_content}"}]
    )
    analysis_summary = analyze_result.messages[-1]["content"]

    return analysis_summary

# Streamlit App UI
st.markdown(
    """
    <style>
    .title { text-align: center; font-size: 2.5rem; font-weight: bold; }
    .description { text-align: center; font-size: 1.1rem; color: #555; }
    .ack { font-size: 0.95rem; color: #888; text-align: center; }
    </style>
    """,
    unsafe_allow_html=True,
)

st.markdown('<div class="title">🔎 Swarm-based Web Content Analyzer</div>', unsafe_allow_html=True)
st.markdown('<div class="description">Effortlessly extract, analyze, and summarize web content.</div>', unsafe_allow_html=True)

fetch_openai_api_key()

# Initialize Swarm client only after API key is set
if 'OPENAI_API_KEY' in os.environ and os.environ['OPENAI_API_KEY']:
    client = initialize_swarm_client()

    # Input field for the website URL
    st.subheader("🌍 Enter the Website URL")
    url = st.text_input("Enter the URL of the website you want to scrape", placeholder="https://example.com")

    if st.button("Run Workflow"):
        if url:
            with st.spinner("Running the multi-agent workflow... This may take a moment."):
                final_summary = orchestrate_workflow(client, url)

            st.success("✅ Workflow complete!")
            st.write("### 📜 Final Report:")
            st.write(final_summary)

            # Download options
            json_data = json.dumps({"summary": final_summary}, indent=4)
            txt_data = final_summary
            pdf_data = generate_pdf(final_summary)


            st.download_button(
                label="Download Report as TXT",
                data=txt_data,
                file_name="report.txt",
                mime="text/plain"
            )
            st.download_button(
                label="Download Report as PDF",
                data=pdf_data,
                file_name="report.pdf",
                mime="application/pdf"
            )
            st.download_button(
                label="Download Report as JSON",
                data=json_data,
                file_name="report.json",
                mime="application/json"
            )
        else:
            st.error("❌ Please enter a valid URL.")
else:
    st.sidebar.warning("⚠️ OpenAI API Key not set. Please check your Hugging Face secrets configuration.")

# Footer with credits
st.divider()
st.markdown(
    """
    <div class="ack">
        Acknowledgment: This work is based on <a href="https://github.com/jadouse5/openai-swarm-webscraper" target="_blank">Jad Tounsi El Azzoiani's work</a>.
    </div>
    """,
    unsafe_allow_html=True
)