Spaces:

MHamdan
/

ContentAnalyzer

Running

File size: 4,604 Bytes

import gradio as gr
from transformers import pipeline
import requests
from bs4 import BeautifulSoup
import PyPDF2
import docx
import time

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
sentiment_analyzer = pipeline("sentiment-analysis")
topic_classifier = pipeline("zero-shot-classification")

def fetch_text_from_url(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    return " ".join(p.get_text() for p in soup.find_all("p"))

def extract_text_from_pdf(file):
    pdf_reader = PyPDF2.PdfReader(file)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

def extract_text_from_docx(file):
    doc = docx.Document(file)
    text = ""
    for para in doc.paragraphs:
        text += para.text + "\n"
    return text

def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
    if input_type == "URL":
        progress(0, desc="Fetching text from URL")
        input_text = fetch_text_from_url(input_text)
    elif input_type == "File":
        progress(0, desc="Extracting text from file")
        if input_text.name.lower().endswith(".pdf"):
            input_text = extract_text_from_pdf(input_text)
        elif input_text.name.lower().endswith(".docx"):
            input_text = extract_text_from_docx(input_text)
        else:
            input_text = input_text.read().decode("utf-8")
    
    original_text = input_text[:1000] + ("..." if len(input_text) > 1000 else "")
    
    summary, sentiment, topics = "", "", ""
    
    if "Summarization" in tasks:
        progress(0.3, desc="Generating summary")
        summary = summarizer(input_text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
        time.sleep(1)  # Add a minimal delay for demonstration purposes
    
    if "Sentiment Analysis" in tasks:
        progress(0.6, desc="Analyzing sentiment")
        sentiment = sentiment_analyzer(input_text[:512])[0]["label"]  # Truncate input for sentiment analysis
        time.sleep(1)
    
    if "Topic Detection" in tasks:
        progress(0.9, desc="Detecting topics")
        topic_labels = ["technology", "politics", "sports", "entertainment", "business"]
        topics = topic_classifier(input_text[:512], topic_labels, multi_label=True)["labels"]  # Truncate input for topic detection 
        time.sleep(1)
    
    progress(1, desc="Analysis completed")
    
    return original_text, summary, sentiment, ", ".join(topics)

def create_interface():
    with gr.Blocks(title="Text Analysis App") as interface:
        input_type = gr.Dropdown(["Text", "URL", "File"], label="Input Type")
        text_input = gr.Textbox(visible=False)
        url_input = gr.Textbox(visible=False)
        file_input = gr.File(visible=False)

        tasks_checkboxes = gr.CheckboxGroup(["Summarization", "Sentiment Analysis", "Topic Detection"], label="Analysis Tasks")

        submit_button = gr.Button("Analyze")
        progress_bar = gr.Progress()

        def update_input_visibility(input_type):
            text_input.visible = input_type == "Text"
            url_input.visible = input_type == "URL"
            file_input.visible = input_type == "File"

        input_type.change(update_input_visibility, inputs=input_type)

        def process_input(input_type, text, url, file, tasks):
            if input_type == "Text":
                input_value = text
            elif input_type == "URL":
                input_value = url
            else:
                input_value = file

            try:
                original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
            except Exception as e:
                original_text = f"Error: {str(e)}"
                summary, sentiment, topics = "", "", ""

            return original_text, summary, sentiment, topics

        submit_button.click(
            fn=process_input,
            inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes],
            outputs=[original_text_output, summary_output, sentiment_output, topics_output]
        )

        with gr.Tab("Original Text"):
            original_text_output = gr.Textbox(label="Original Text")
        with gr.Tab("Summary"):
            summary_output = gr.Textbox(label="Summary")
        with gr.Tab("Sentiment"):
            sentiment_output = gr.Textbox(label="Sentiment")
        with gr.Tab("Topics"):
            topics_output = gr.Textbox(label="Topics")

    return interface

if __name__ == "__main__":
    create_interface().launch()