Spaces:

MHamdan
/

ContentAnalyzer

Running

App Files Files Community

ContentAnalyzer / app.py

MHamdan

Update app.py

16eaebe verified 4 months ago

raw

history blame

5.52 kB

	import gradio as gr
	from transformers import pipeline
	import requests
	from bs4 import BeautifulSoup
	import PyPDF2
	import docx
	import time
	from smolagents.agents import HuggingFaceAgent

	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
	sentiment_analyzer = pipeline("sentiment-analysis")
	topic_classifier = pipeline("zero-shot-classification")

	def fetch_text_from_url(url):
	response = requests.get(url)
	soup = BeautifulSoup(response.text, "html.parser")
	return " ".join(p.get_text() for p in soup.find_all("p"))

	def extract_text_from_pdf(file):
	pdf_reader = PyPDF2.PdfReader(file)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()
	return text

	def extract_text_from_docx(file):
	doc = docx.Document(file)
	text = ""
	for para in doc.paragraphs:
	text += para.text + "\n"
	return text

	def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
	if input_type == "URL":
	progress(0, desc="Fetching text from URL")
	input_text = fetch_text_from_url(input_text)
	elif input_type == "File":
	progress(0, desc="Extracting text from file")
	if input_text.name.lower().endswith(".pdf"):
	input_text = extract_text_from_pdf(input_text)
	elif input_text.name.lower().endswith(".docx"):
	input_text = extract_text_from_docx(input_text)
	else:
	input_text = input_text.read().decode("utf-8")

	original_text = input_text[:1000] + ("..." if len(input_text) > 1000 else "")

	summary, sentiment, topics = "", "", ""

	if "Summarization" in tasks:
	progress(0.3, desc="Generating summary")
	summary = summarizer(input_text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
	time.sleep(1) # Add a minimal delay for demonstration purposes

	if "Sentiment Analysis" in tasks:
	progress(0.6, desc="Analyzing sentiment")
	sentiment = sentiment_analyzer(input_text[:512])[0]["label"] # Truncate input for sentiment analysis
	time.sleep(1)

	if "Topic Detection" in tasks:
	progress(0.9, desc="Detecting topics")
	topic_labels = ["technology", "politics", "sports", "entertainment", "business"]
	topics = topic_classifier(input_text[:512], topic_labels, multi_label=True)["labels"] # Truncate input for topic detection
	time.sleep(1)

	progress(1, desc="Analysis completed")

	return original_text, summary, sentiment, ", ".join(topics)

	def create_interface():
	input_type = gr.inputs.Dropdown(["Text", "URL", "File"], label="Input Type")
	text_input = gr.Textbox(visible=False)
	url_input = gr.Textbox(visible=False)
	file_input = gr.File(visible=False)

	tasks_checkboxes = gr.CheckboxGroup(["Summarization", "Sentiment Analysis", "Topic Detection"], label="Analysis Tasks")

	submit_button = gr.Button("Analyze")
	progress_bar = gr.Progress()

	model_endpoint = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
	agent = HuggingFaceAgent(model_endpoint=model_endpoint)

	def update_input_visibility(input_type):
	return {
	text_input: gr.update(visible=input_type == "Text"),
	url_input: gr.update(visible=input_type == "URL"),
	file_input: gr.update(visible=input_type == "File"),
	}

	input_type.change(update_input_visibility, [input_type], [text_input, url_input, file_input])

	original_text_output = gr.Textbox(label="Original Text")
	summary_output = gr.Textbox(label="Summary")
	sentiment_output = gr.Textbox(label="Sentiment")
	topics_output = gr.Textbox(label="Topics")

	def process_input(input_type, text, url, file, tasks):
	if input_type == "Text":
	input_value = text
	elif input_type == "URL":
	input_value = url
	else:
	input_value = file

	try:
	original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
	enhanced_summary = agent.run(f"Given the following text: '{original_text}', please suggest improvements to this summary: '{summary}'")
	enhanced_sentiment = agent.run(f"Given the following text: '{original_text}', does this sentiment seem accurate: '{sentiment}'? Please elaborate and suggest any corrections.")
	except Exception as e:
	original_text = f"Error: {str(e)}"
	summary, sentiment, topics = "", "", ""
	enhanced_summary = ""
	enhanced_sentiment = ""

	return original_text, summary, enhanced_summary, sentiment, enhanced_sentiment, topics

	submit_button.click(
	fn=process_input,
	inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes],
	outputs=[original_text_output, summary_output, summary_output, sentiment_output, sentiment_output, topics_output]
	)

	interface = gr.TabbedInterface([
	gr.Tab(original_text_output, label="Original Text"),
	gr.Tab(summary_output, label="Summary"),
	gr.Tab(sentiment_output, label="Sentiment"),
	gr.Tab(topics_output, label="Topics")
	])

	return gr.Blocks(
	title="Text Analysis App",
	inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes, submit_button],
	outputs=[interface, progress_bar]
	)

	if __name__ == "__main__":
	create_interface().launch()