Spaces:

MHamdan
/

ContentAnalyzer

Running

App Files Files Community

ContentAnalyzer / app.py

MHamdan

app update

edcb23f verified 4 months ago

raw

history blame contribute delete

7.42 kB

	import gradio as gr
	from transformers import pipeline
	import requests
	from bs4 import BeautifulSoup
	import PyPDF2
	import docx
	import time
	from langchain_community.llms import OpenAI
	from langchain.chains import ConversationChain
	from langchain.memory import ConversationBufferMemory
	from langchain_core.prompts import PromptTemplate
	from dotenv import load_dotenv
	import os

	load_dotenv() # Load environment variables from .env file
	openai_api_key = os.getenv("OPENAI_API_KEY")
	llm = OpenAI(openai_api_key=openai_api_key)

	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
	sentiment_analyzer = pipeline("sentiment-analysis")
	topic_classifier = pipeline("zero-shot-classification")

	def fetch_text_from_url(url):
	try:
	response = requests.get(url)
	response.raise_for_status() # Raise an exception for 4xx or 5xx status codes
	soup = BeautifulSoup(response.text, "html.parser")
	return " ".join(p.get_text() for p in soup.find_all("p"))
	except requests.exceptions.RequestException as e:
	raise ValueError(f"Error fetching text from URL: {str(e)}")

	def extract_text_from_pdf(file):
	try:
	pdf_reader = PyPDF2.PdfReader(file)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()
	return text
	except PyPDF2.errors.PdfReadError as e:
	raise ValueError(f"Error extracting text from PDF: {str(e)}")

	def extract_text_from_docx(file):
	try:
	doc = docx.Document(file)
	text = ""
	for para in doc.paragraphs:
	text += para.text + "\n"
	return text
	except docx.opc.exceptions.PackageNotFoundError as e:
	raise ValueError(f"Error extracting text from DOCX: {str(e)}")

	def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
	if input_type == "URL":
	progress(0, desc="Fetching text from URL")
	try:
	input_text = fetch_text_from_url(input_text)
	except ValueError as e:
	return str(e), "", "", ""
	elif input_type == "File":
	progress(0, desc="Extracting text from file")
	if input_text is None:
	return "No file uploaded", "", "", ""
	file_name = input_text.name.lower()
	if file_name.endswith(".pdf"):
	try:
	input_text = extract_text_from_pdf(input_text)
	except ValueError as e:
	return str(e), "", "", ""
	elif file_name.endswith(".docx"):
	try:
	input_text = extract_text_from_docx(input_text)
	except ValueError as e:
	return str(e), "", "", ""
	else:
	input_text = input_text.read().decode("utf-8")

	original_text = input_text[:1000] + ("..." if len(input_text) > 1000 else "")

	summary, sentiment, topics = "", "", ""

	if "Summarization" in tasks:
	progress(0.3, desc="Generating summary")
	summary = summarizer(input_text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
	time.sleep(1) # Add a minimal delay for demonstration purposes

	if "Sentiment Analysis" in tasks:
	progress(0.6, desc="Analyzing sentiment")
	sentiment = sentiment_analyzer(input_text[:512])[0]["label"] # Truncate input for sentiment analysis
	time.sleep(1)

	if "Topic Detection" in tasks:
	progress(0.9, desc="Detecting topics")
	topic_labels = ["technology", "politics", "sports", "entertainment", "business"]
	topics = topic_classifier(input_text[:512], topic_labels, multi_label=True)["labels"] # Truncate input for topic detection
	time.sleep(1)

	progress(1, desc="Analysis completed")

	return original_text, summary, sentiment, ", ".join(topics)

	def chat(input_text, conversation_history):
	prompt_template = """
	Assistant is an AI language model that helps with text analysis tasks.

	Conversation history:
	{conversation_history}

	Human: {input_text}
	Assistant:"""

	prompt = PromptTemplate(
	input_variables=["conversation_history", "input_text"],
	template=prompt_template
	)

	chain = ConversationChain(llm=llm, prompt=prompt, memory=ConversationBufferMemory(memory_key="conversation_history"))
	response = chain.predict(input_text=input_text)

	return response

	def create_interface():
	with gr.Blocks(title="Text Analysis App") as interface:
	gr.Markdown("## Choose data format to analyze")
	input_type = gr.Dropdown(["Text", "URL", "File"], label="Input Type")
	text_input = gr.Textbox(label="Text Input", visible=False)
	url_input = gr.Textbox(label="URL Input", visible=False)
	file_input = gr.File(label="File Upload", visible=False)

	tasks_checkboxes = gr.CheckboxGroup(["Summarization", "Sentiment Analysis", "Topic Detection"], label="Analysis Tasks")

	submit_button = gr.Button("Analyze")
	progress_bar = gr.Progress()

	with gr.Tab("Original Text"):
	original_text_output = gr.Textbox(label="Original Text")
	with gr.Tab("Summary"):
	summary_output = gr.Textbox(label="Summary")
	with gr.Tab("Sentiment"):
	sentiment_output = gr.Textbox(label="Sentiment")
	with gr.Tab("Topics"):
	topics_output = gr.Textbox(label="Topics")
	with gr.Tab("Conversation"):
	conversation_history = gr.State([])
	conversation_input = gr.Textbox(label="Human")
	conversation_output = gr.Textbox(label="Assistant")
	conversation_button = gr.Button("Send")

	def update_input_visibility(input_type):
	return {
	text_input: gr.update(visible=input_type == "Text"),
	url_input: gr.update(visible=input_type == "URL"),
	file_input: gr.update(visible=input_type == "File")
	}

	input_type.change(update_input_visibility, inputs=[input_type], outputs=[text_input, url_input, file_input])

	def process_input(input_type, text, url, file, tasks):
	if input_type == "Text":
	input_value = text
	elif input_type == "URL":
	input_value = url
	else:
	input_value = file

	original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
	return original_text, summary, sentiment, topics

	submit_button.click(
	fn=process_input,
	inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes],
	outputs=[original_text_output, summary_output, sentiment_output, topics_output]
	)

	def process_conversation(conversation_history, conversation_input):
	conversation_history.append(f"Human: {conversation_input}")
	response = chat(conversation_input, "\n".join(conversation_history))
	conversation_history.append(f"Assistant: {response}")
	return "\n".join(conversation_history), "", response

	conversation_button.click(
	fn=process_conversation,
	inputs=[conversation_history, conversation_input],
	outputs=[conversation_history, conversation_input, conversation_output]
	)

	return interface

	if __name__ == "__main__":
	create_interface().launch()