Spaces:

DrishtiSharma
/

swarm-based-multiagent-content-analyzer

Running

File size: 7,497 Bytes

# image source: https://www.globesign.com/blog/a-beginners-guide-to-google-website-analyzer/
#ref: i) https://blog.dailydoseofds.com/p/building-a-multi-agent-internet-research , ii) https://blog.dailydoseofds.com/p/build-a-multi-agent-research-assistant

import streamlit as st
from swarm import Swarm, Agent
from bs4 import BeautifulSoup
import requests
import os
import io
from reportlab.pdfgen import canvas

# Function to fetch OpenAI API key
def fetch_openai_api_key():
    """Fetch the OpenAI API key from Hugging Face secrets."""
    try:
        secret_key = st.secrets.get("OPENAI_API_KEY", "")
        if secret_key:
            os.environ['OPENAI_API_KEY'] = secret_key
        else:
            st.warning("⚠️ OpenAI API Key is missing! Please check your Hugging Face secrets configuration.")
    except Exception as e:
        st.error(f"Error retrieving OpenAI API Key: {str(e)}")

# Initialize the Swarm client
def initialize_swarm_client():
    return Swarm()

# Define the scraping function
def scrape_website(url):
    """Scrapes the content of the website."""
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup.get_text()  # Return the text content from the HTML
    except requests.exceptions.RequestException as e:
        return f"Error during scraping: {str(e)}"

# Scraper Agent
scraper_agent = Agent(
    name="Scraper Agent",
    instructions="You are an agent that scrapes content from websites.",
    functions=[scrape_website]
)

# Define the analysis function
def analyze_content(content):
    """Dynamically analyzes the content and extracts key insights."""
    # Generic analysis prompt for flexibility
    summary = (
        "📜 Final Report:\n\n"
        "Based on the website content, here are the key takeaways:\n\n"
        "Offerings and Highlights:\n"
        "- Summarize key offerings, products, or services.\n\n"
        "Prominent Features:\n"
        "- Identify any standout features or unique aspects.\n\n"
        "Additional Notes:\n"
        "- Provide other insights that might be useful for the user."
    )
    return summary

# Research Agent
research_agent = Agent(
    name="Research Agent",
    instructions="You are an agent that highlights key insights by dynamically analyzing content and adapting to the available information.",
    functions=[analyze_content]
)

# Define the writing function
def write_summary(context_variables):
    """Writes a summary based on the analysis."""
    analysis = context_variables.get('analysis', '')
    summary = f"Here's a detailed report based on the research: {analysis}"
    return summary

# Writer Agent
writer_agent = Agent(
    name="Writer Agent",
    instructions="You are an agent that writes summaries of research.",
    functions=[write_summary]
)

# Orchestrate the workflow
def orchestrate_workflow(client, url):
    # Step 1: Scrape the website
    scrape_result = client.run(
        agent=scraper_agent,
        messages=[{"role": "user", "content": f"Scrape the following website: {url}"}]
    )
    scraped_content = scrape_result.messages[-1]["content"]

    # Check for any error during scraping
    if "Error during scraping" in scraped_content:
        return scraped_content

    # Step 2: Analyze the scraped content
    research_result = client.run(
        agent=research_agent,
        messages=[{"role": "user", "content": f"Analyze the following content: {scraped_content}"}]
    )
    analysis_summary = research_result.messages[-1]["content"]

    # Step 3: Write the summary based on the analysis
    writer_result = client.run(
        agent=writer_agent,
        messages=[{"role": "user", "content": f"Write a summary based on this analysis: {analysis_summary}"}],
        context_variables={"analysis": analysis_summary}
    )

    final_summary = writer_result.messages[-1]["content"]
    return final_summary

# Helper functions to create text and PDF files
def create_text_file(content):
    """Create a downloadable text file."""
    return content  # Return plain text

def create_pdf_file(content):
    """Create a downloadable PDF file."""
    buffer = io.BytesIO()
    c = canvas.Canvas(buffer)
    c.drawString(100, 750, "Generated Report")
    c.drawString(100, 730, "--------------------")
    lines = content.split("\n")
    y = 700
    for line in lines:
        if y < 50:  # Create a new page if the content overflows
            c.showPage()
            y = 750
        c.drawString(100, y, line)
        y -= 20
    c.save()
    buffer.seek(0)
    return buffer.getvalue()  # Return binary content

# Streamlit App UI
st.markdown(
    """
    <style>
    .title { text-align: center; font-size: 2.4rem; font-weight: bold; margin-bottom: 20px; }
    .description { text-align: center; font-size: 1.0rem; color: #555; margin-bottom: 30px; }
    .section { margin-top: 30px; margin-bottom: 30px; }
    .ack { font-size: 0.95rem; color: #888; text-align: center; margin-top: 10px; }
    </style>
    """,
    unsafe_allow_html=True,
)

# 1. Add the title at the top
st.markdown('<div class="title">Swarm-based Web Content Analyzer 🧐</div>', unsafe_allow_html=True)

# 2. Add the description below the title
st.markdown('<div class="description">Effortlessly extract, analyze, and summarize web content using multi-agent.</div>', unsafe_allow_html=True)

# 3. Add the image below the description
st.image("./image-4.png", use_container_width=True)

# 4. Add Acknowledgement
st.markdown(
    """
    <div class="ack">
        Acknowledgment: This app is based on <a href="https://github.com/jadouse5/openai-swarm-webscraper" target="_blank">Jad Tounsi El Azzoiani's work</a>.
    </div>
    """,
    unsafe_allow_html=True
)

# 5. Add one line-spacing after the acknowledgment
st.markdown('<div style="margin-bottom: 20px;"></div>', unsafe_allow_html=True)

fetch_openai_api_key()

if 'OPENAI_API_KEY' in os.environ and os.environ['OPENAI_API_KEY']:
    client = initialize_swarm_client()

    # Add interface for URL input
    st.subheader("Enter the Website URL 🔗")
    url = st.text_input("Enter the URL of the website you want to scrape", placeholder="https://huggingface.co/models")

    # Add some spacing
    st.markdown('<div class="section"></div>', unsafe_allow_html=True)

    # Add the "Run Workflow" button
    if st.button("🚀 Run Workflow", key="run"):
        if url:
            with st.spinner("Running the multi-agent workflow... This may take a moment."):
                final_report = orchestrate_workflow(client, url)
            st.success("✅ Workflow complete!")
            st.write("### 📜 Final Report:")
            st.write(final_report)

            # Add download buttons for the report
            text_file = create_text_file(final_report)
            pdf_file = create_pdf_file(final_report)

            st.download_button(
                label="Download Report as Text",
                data=text_file,
                file_name="report.txt",
                mime="text/plain",
            )

            st.download_button(
                label="Download Report as PDF",
                data=pdf_file,
                file_name="report.pdf",
                mime="application/pdf",
            )
        else:
            st.error("❌ Please enter a valid URL.")
else:
    st.sidebar.warning("⚠️ OpenAI API Key not set. Please check your Hugging Face secrets configuration.")