Spaces:

MHamdan
/

ContentAnalyzer

Running

File size: 5,516 Bytes

import gradio as gr
from transformers import pipeline
import requests
from bs4 import BeautifulSoup
import PyPDF2
import docx
import time
from smolagents.agents import HuggingFaceAgent

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
sentiment_analyzer = pipeline("sentiment-analysis")
topic_classifier = pipeline("zero-shot-classification")

def fetch_text_from_url(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    return " ".join(p.get_text() for p in soup.find_all("p"))

def extract_text_from_pdf(file):
    pdf_reader = PyPDF2.PdfReader(file)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

def extract_text_from_docx(file):
    doc = docx.Document(file)
    text = ""
    for para in doc.paragraphs:
        text += para.text + "\n"
    return text

def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
    if input_type == "URL":
        progress(0, desc="Fetching text from URL")
        input_text = fetch_text_from_url(input_text)
    elif input_type == "File":
        progress(0, desc="Extracting text from file")
        if input_text.name.lower().endswith(".pdf"):
            input_text = extract_text_from_pdf(input_text)
        elif input_text.name.lower().endswith(".docx"):
            input_text = extract_text_from_docx(input_text)
        else:
            input_text = input_text.read().decode("utf-8")
    
    original_text = input_text[:1000] + ("..." if len(input_text) > 1000 else "")
    
    summary, sentiment, topics = "", "", ""
    
    if "Summarization" in tasks:
        progress(0.3, desc="Generating summary")
        summary = summarizer(input_text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
        time.sleep(1)  # Add a minimal delay for demonstration purposes
    
    if "Sentiment Analysis" in tasks:
        progress(0.6, desc="Analyzing sentiment")
        sentiment = sentiment_analyzer(input_text[:512])[0]["label"]  # Truncate input for sentiment analysis
        time.sleep(1)
    
    if "Topic Detection" in tasks:
        progress(0.9, desc="Detecting topics")
        topic_labels = ["technology", "politics", "sports", "entertainment", "business"]
        topics = topic_classifier(input_text[:512], topic_labels, multi_label=True)["labels"]  # Truncate input for topic detection 
        time.sleep(1)
    
    progress(1, desc="Analysis completed")
    
    return original_text, summary, sentiment, ", ".join(topics)

def create_interface():
    input_type = gr.inputs.Dropdown(["Text", "URL", "File"], label="Input Type")
    text_input = gr.Textbox(visible=False)
    url_input = gr.Textbox(visible=False)
    file_input = gr.File(visible=False)
    
    tasks_checkboxes = gr.CheckboxGroup(["Summarization", "Sentiment Analysis", "Topic Detection"], label="Analysis Tasks")
    
    submit_button = gr.Button("Analyze")
    progress_bar = gr.Progress()
    
    model_endpoint = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
    agent = HuggingFaceAgent(model_endpoint=model_endpoint)

    def update_input_visibility(input_type):
        return {
            text_input: gr.update(visible=input_type == "Text"),
            url_input: gr.update(visible=input_type == "URL"), 
            file_input: gr.update(visible=input_type == "File"),
        }

    input_type.change(update_input_visibility, [input_type], [text_input, url_input, file_input])

    original_text_output = gr.Textbox(label="Original Text")  
    summary_output = gr.Textbox(label="Summary")
    sentiment_output = gr.Textbox(label="Sentiment")
    topics_output = gr.Textbox(label="Topics")
    
    def process_input(input_type, text, url, file, tasks):
        if input_type == "Text":
            input_value = text
        elif input_type == "URL":
            input_value = url
        else:
            input_value = file
        
        try:
            original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
            enhanced_summary = agent.run(f"Given the following text: '{original_text}', please suggest improvements to this summary: '{summary}'")
            enhanced_sentiment = agent.run(f"Given the following text: '{original_text}', does this sentiment seem accurate: '{sentiment}'? Please elaborate and suggest any corrections.")
        except Exception as e:
            original_text = f"Error: {str(e)}"
            summary, sentiment, topics = "", "", ""
            enhanced_summary = ""
            enhanced_sentiment = ""

        return original_text, summary, enhanced_summary, sentiment, enhanced_sentiment, topics

    submit_button.click(
        fn=process_input, 
        inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes],
        outputs=[original_text_output, summary_output, summary_output, sentiment_output, sentiment_output, topics_output]
    )
    
    interface = gr.TabbedInterface([
        gr.Tab(original_text_output, label="Original Text"),
        gr.Tab(summary_output, label="Summary"), 
        gr.Tab(sentiment_output, label="Sentiment"),
        gr.Tab(topics_output, label="Topics")
    ])
    
    return gr.Blocks(
        title="Text Analysis App",
        inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes, submit_button],
        outputs=[interface, progress_bar]
    )

if __name__ == "__main__":
    create_interface().launch()