Spaces:

MHamdan
/

ContentAnalyzer

Running

App Files Files Community

ContentAnalyzer / app.py

MHamdan

Update app.py

86decb6 verified 2 months ago

raw

history blame

4.6 kB

	import gradio as gr
	from transformers import pipeline
	import requests
	from bs4 import BeautifulSoup
	import PyPDF2
	import docx
	import time

	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
	sentiment_analyzer = pipeline("sentiment-analysis")
	topic_classifier = pipeline("zero-shot-classification")

	def fetch_text_from_url(url):
	response = requests.get(url)
	soup = BeautifulSoup(response.text, "html.parser")
	return " ".join(p.get_text() for p in soup.find_all("p"))

	def extract_text_from_pdf(file):
	pdf_reader = PyPDF2.PdfReader(file)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()
	return text

	def extract_text_from_docx(file):
	doc = docx.Document(file)
	text = ""
	for para in doc.paragraphs:
	text += para.text + "\n"
	return text

	def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
	if input_type == "URL":
	progress(0, desc="Fetching text from URL")
	input_text = fetch_text_from_url(input_text)
	elif input_type == "File":
	progress(0, desc="Extracting text from file")
	if input_text.name.lower().endswith(".pdf"):
	input_text = extract_text_from_pdf(input_text)
	elif input_text.name.lower().endswith(".docx"):
	input_text = extract_text_from_docx(input_text)
	else:
	input_text = input_text.read().decode("utf-8")

	original_text = input_text[:1000] + ("..." if len(input_text) > 1000 else "")

	summary, sentiment, topics = "", "", ""

	if "Summarization" in tasks:
	progress(0.3, desc="Generating summary")
	summary = summarizer(input_text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
	time.sleep(1) # Add a minimal delay for demonstration purposes

	if "Sentiment Analysis" in tasks:
	progress(0.6, desc="Analyzing sentiment")
	sentiment = sentiment_analyzer(input_text[:512])[0]["label"] # Truncate input for sentiment analysis
	time.sleep(1)

	if "Topic Detection" in tasks:
	progress(0.9, desc="Detecting topics")
	topic_labels = ["technology", "politics", "sports", "entertainment", "business"]
	topics = topic_classifier(input_text[:512], topic_labels, multi_label=True)["labels"] # Truncate input for topic detection
	time.sleep(1)

	progress(1, desc="Analysis completed")

	return original_text, summary, sentiment, ", ".join(topics)

	def create_interface():
	with gr.Blocks(title="Text Analysis App") as interface:
	input_type = gr.Dropdown(["Text", "URL", "File"], label="Input Type")
	text_input = gr.Textbox(visible=False)
	url_input = gr.Textbox(visible=False)
	file_input = gr.File(visible=False)

	tasks_checkboxes = gr.CheckboxGroup(["Summarization", "Sentiment Analysis", "Topic Detection"], label="Analysis Tasks")

	submit_button = gr.Button("Analyze")
	progress_bar = gr.Progress()

	def update_input_visibility(input_type):
	text_input.visible = input_type == "Text"
	url_input.visible = input_type == "URL"
	file_input.visible = input_type == "File"

	input_type.change(update_input_visibility, inputs=input_type)

	def process_input(input_type, text, url, file, tasks):
	if input_type == "Text":
	input_value = text
	elif input_type == "URL":
	input_value = url
	else:
	input_value = file

	try:
	original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
	except Exception as e:
	original_text = f"Error: {str(e)}"
	summary, sentiment, topics = "", "", ""

	return original_text, summary, sentiment, topics

	submit_button.click(
	fn=process_input,
	inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes],
	outputs=[original_text_output, summary_output, sentiment_output, topics_output]
	)

	with gr.Tab("Original Text"):
	original_text_output = gr.Textbox(label="Original Text")
	with gr.Tab("Summary"):
	summary_output = gr.Textbox(label="Summary")
	with gr.Tab("Sentiment"):
	sentiment_output = gr.Textbox(label="Sentiment")
	with gr.Tab("Topics"):
	topics_output = gr.Textbox(label="Topics")

	return interface

	if __name__ == "__main__":
	create_interface().launch()