Spaces:
Running
Running
File size: 4,604 Bytes
5215be1 16eaebe 8e8a46c 16eaebe 8e8a46c 16eaebe 8e8a46c 16eaebe 18d6761 5215be1 1575e7a 86decb6 1575e7a 5215be1 16eaebe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import gradio as gr
from transformers import pipeline
import requests
from bs4 import BeautifulSoup
import PyPDF2
import docx
import time
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
sentiment_analyzer = pipeline("sentiment-analysis")
topic_classifier = pipeline("zero-shot-classification")
def fetch_text_from_url(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
return " ".join(p.get_text() for p in soup.find_all("p"))
def extract_text_from_pdf(file):
pdf_reader = PyPDF2.PdfReader(file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
def extract_text_from_docx(file):
doc = docx.Document(file)
text = ""
for para in doc.paragraphs:
text += para.text + "\n"
return text
def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
if input_type == "URL":
progress(0, desc="Fetching text from URL")
input_text = fetch_text_from_url(input_text)
elif input_type == "File":
progress(0, desc="Extracting text from file")
if input_text.name.lower().endswith(".pdf"):
input_text = extract_text_from_pdf(input_text)
elif input_text.name.lower().endswith(".docx"):
input_text = extract_text_from_docx(input_text)
else:
input_text = input_text.read().decode("utf-8")
original_text = input_text[:1000] + ("..." if len(input_text) > 1000 else "")
summary, sentiment, topics = "", "", ""
if "Summarization" in tasks:
progress(0.3, desc="Generating summary")
summary = summarizer(input_text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
time.sleep(1) # Add a minimal delay for demonstration purposes
if "Sentiment Analysis" in tasks:
progress(0.6, desc="Analyzing sentiment")
sentiment = sentiment_analyzer(input_text[:512])[0]["label"] # Truncate input for sentiment analysis
time.sleep(1)
if "Topic Detection" in tasks:
progress(0.9, desc="Detecting topics")
topic_labels = ["technology", "politics", "sports", "entertainment", "business"]
topics = topic_classifier(input_text[:512], topic_labels, multi_label=True)["labels"] # Truncate input for topic detection
time.sleep(1)
progress(1, desc="Analysis completed")
return original_text, summary, sentiment, ", ".join(topics)
def create_interface():
with gr.Blocks(title="Text Analysis App") as interface:
input_type = gr.Dropdown(["Text", "URL", "File"], label="Input Type")
text_input = gr.Textbox(visible=False)
url_input = gr.Textbox(visible=False)
file_input = gr.File(visible=False)
tasks_checkboxes = gr.CheckboxGroup(["Summarization", "Sentiment Analysis", "Topic Detection"], label="Analysis Tasks")
submit_button = gr.Button("Analyze")
progress_bar = gr.Progress()
def update_input_visibility(input_type):
text_input.visible = input_type == "Text"
url_input.visible = input_type == "URL"
file_input.visible = input_type == "File"
input_type.change(update_input_visibility, inputs=input_type)
def process_input(input_type, text, url, file, tasks):
if input_type == "Text":
input_value = text
elif input_type == "URL":
input_value = url
else:
input_value = file
try:
original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
except Exception as e:
original_text = f"Error: {str(e)}"
summary, sentiment, topics = "", "", ""
return original_text, summary, sentiment, topics
submit_button.click(
fn=process_input,
inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes],
outputs=[original_text_output, summary_output, sentiment_output, topics_output]
)
with gr.Tab("Original Text"):
original_text_output = gr.Textbox(label="Original Text")
with gr.Tab("Summary"):
summary_output = gr.Textbox(label="Summary")
with gr.Tab("Sentiment"):
sentiment_output = gr.Textbox(label="Sentiment")
with gr.Tab("Topics"):
topics_output = gr.Textbox(label="Topics")
return interface
if __name__ == "__main__":
create_interface().launch() |