Spaces:
Running
Running
File size: 5,516 Bytes
5215be1 16eaebe 8e8a46c 16eaebe 8e8a46c 16eaebe 8e8a46c 16eaebe 18d6761 5215be1 16eaebe 5215be1 16eaebe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import gradio as gr
from transformers import pipeline
import requests
from bs4 import BeautifulSoup
import PyPDF2
import docx
import time
from smolagents.agents import HuggingFaceAgent
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
sentiment_analyzer = pipeline("sentiment-analysis")
topic_classifier = pipeline("zero-shot-classification")
def fetch_text_from_url(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
return " ".join(p.get_text() for p in soup.find_all("p"))
def extract_text_from_pdf(file):
pdf_reader = PyPDF2.PdfReader(file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
def extract_text_from_docx(file):
doc = docx.Document(file)
text = ""
for para in doc.paragraphs:
text += para.text + "\n"
return text
def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
if input_type == "URL":
progress(0, desc="Fetching text from URL")
input_text = fetch_text_from_url(input_text)
elif input_type == "File":
progress(0, desc="Extracting text from file")
if input_text.name.lower().endswith(".pdf"):
input_text = extract_text_from_pdf(input_text)
elif input_text.name.lower().endswith(".docx"):
input_text = extract_text_from_docx(input_text)
else:
input_text = input_text.read().decode("utf-8")
original_text = input_text[:1000] + ("..." if len(input_text) > 1000 else "")
summary, sentiment, topics = "", "", ""
if "Summarization" in tasks:
progress(0.3, desc="Generating summary")
summary = summarizer(input_text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
time.sleep(1) # Add a minimal delay for demonstration purposes
if "Sentiment Analysis" in tasks:
progress(0.6, desc="Analyzing sentiment")
sentiment = sentiment_analyzer(input_text[:512])[0]["label"] # Truncate input for sentiment analysis
time.sleep(1)
if "Topic Detection" in tasks:
progress(0.9, desc="Detecting topics")
topic_labels = ["technology", "politics", "sports", "entertainment", "business"]
topics = topic_classifier(input_text[:512], topic_labels, multi_label=True)["labels"] # Truncate input for topic detection
time.sleep(1)
progress(1, desc="Analysis completed")
return original_text, summary, sentiment, ", ".join(topics)
def create_interface():
input_type = gr.inputs.Dropdown(["Text", "URL", "File"], label="Input Type")
text_input = gr.Textbox(visible=False)
url_input = gr.Textbox(visible=False)
file_input = gr.File(visible=False)
tasks_checkboxes = gr.CheckboxGroup(["Summarization", "Sentiment Analysis", "Topic Detection"], label="Analysis Tasks")
submit_button = gr.Button("Analyze")
progress_bar = gr.Progress()
model_endpoint = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
agent = HuggingFaceAgent(model_endpoint=model_endpoint)
def update_input_visibility(input_type):
return {
text_input: gr.update(visible=input_type == "Text"),
url_input: gr.update(visible=input_type == "URL"),
file_input: gr.update(visible=input_type == "File"),
}
input_type.change(update_input_visibility, [input_type], [text_input, url_input, file_input])
original_text_output = gr.Textbox(label="Original Text")
summary_output = gr.Textbox(label="Summary")
sentiment_output = gr.Textbox(label="Sentiment")
topics_output = gr.Textbox(label="Topics")
def process_input(input_type, text, url, file, tasks):
if input_type == "Text":
input_value = text
elif input_type == "URL":
input_value = url
else:
input_value = file
try:
original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
enhanced_summary = agent.run(f"Given the following text: '{original_text}', please suggest improvements to this summary: '{summary}'")
enhanced_sentiment = agent.run(f"Given the following text: '{original_text}', does this sentiment seem accurate: '{sentiment}'? Please elaborate and suggest any corrections.")
except Exception as e:
original_text = f"Error: {str(e)}"
summary, sentiment, topics = "", "", ""
enhanced_summary = ""
enhanced_sentiment = ""
return original_text, summary, enhanced_summary, sentiment, enhanced_sentiment, topics
submit_button.click(
fn=process_input,
inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes],
outputs=[original_text_output, summary_output, summary_output, sentiment_output, sentiment_output, topics_output]
)
interface = gr.TabbedInterface([
gr.Tab(original_text_output, label="Original Text"),
gr.Tab(summary_output, label="Summary"),
gr.Tab(sentiment_output, label="Sentiment"),
gr.Tab(topics_output, label="Topics")
])
return gr.Blocks(
title="Text Analysis App",
inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes, submit_button],
outputs=[interface, progress_bar]
)
if __name__ == "__main__":
create_interface().launch() |