Spaces:

MHamdan
/

ContentAnalyzer

Running

App Files Files Community

ContentAnalyzer / app.py

MHamdan

app update

edcb23f verified 5 months ago

raw

history blame

7.42 kB

	import gradio as gr
	from transformers import pipeline
	import requests
	from bs4 import BeautifulSoup
	import PyPDF2
	import docx
	import time
	from langchain_community.llms import OpenAI
	from langchain.chains import ConversationChain
	from langchain.memory import ConversationBufferMemory
	from langchain_core.prompts import PromptTemplate
	from dotenv import load_dotenv
	import os

	load_dotenv() # Load environment variables from .env file
	openai_api_key = os.getenv("OPENAI_API_KEY")
	llm = OpenAI(openai_api_key=openai_api_key)

	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
	sentiment_analyzer = pipeline("sentiment-analysis")
	topic_classifier = pipeline("zero-shot-classification")

	def fetch_text_from_url(url):
	try:
	response = requests.get(url)
	response.raise_for_status() # Raise an exception for 4xx or 5xx status codes
	soup = BeautifulSoup(response.text, "html.parser")
	return " ".join(p.get_text() for p in soup.find_all("p"))
	except requests.exceptions.RequestException as e:
	raise ValueError(f"Error fetching text from URL: {str(e)}")

	def extract_text_from_pdf(file):
	try:
	pdf_reader = PyPDF2.PdfReader(file)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()
	return text
	except PyPDF2.errors.PdfReadError as e:
	raise ValueError(f"Error extracting text from PDF: {str(e)}")

	def extract_text_from_docx(file):
	try:
	doc = docx.Document(file)
	text = ""
	for para in doc.paragraphs:
	text += para.text + "\n"
	return text
	except docx.opc.exceptions.PackageNotFoundError as e:
	raise ValueError(f"Error extracting text from DOCX: {str(e)}")

	def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
	if input_type == "URL":
	progress(0, desc="Fetching text from URL")
	try:
	input_text = fetch_text_from_url(input_text)
	except ValueError as e:
	return str(e), "", "", ""
	elif input_type == "File":
	progress(0, desc="Extracting text from file")
	if input_text is None:
	return "No file uploaded", "", "", ""
	file_name = input_text.name.lower()
	if file_name.endswith(".pdf"):
	try:
	input_text = extract_text_from_pdf(input_text)
	except ValueError as e:
	return str(e), "", "", ""
	elif file_name.endswith(".docx"):
	try:
	input_text = extract_text_from_docx(input_text)
	except ValueError as e:
	return str(e), "", "", ""
	else:
	input_text = input_text.read().decode("utf-8")

	original_text = input_text[:1000] + ("..." if len(input_text) > 1000 else "")

	summary, sentiment, topics = "", "", ""

	if "Summarization" in tasks:
	progress(0.3, desc="Generating summary")
	summary = summarizer(input_text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
	time.sleep(1) # Add a minimal delay for demonstration purposes

	if "Sentiment Analysis" in tasks:
	progress(0.6, desc="Analyzing sentiment")
	sentiment = sentiment_analyzer(input_text[:512])[0]["label"] # Truncate input for sentiment analysis
	time.sleep(1)

	if "Topic Detection" in tasks:
	progress(0.9, desc="Detecting topics")
	topic_labels = ["technology", "politics", "sports", "entertainment", "business"]
	topics = topic_classifier(input_text[:512], topic_labels, multi_label=True)["labels"] # Truncate input for topic detection
	time.sleep(1)

	progress(1, desc="Analysis completed")

	return original_text, summary, sentiment, ", ".join(topics)

	def chat(input_text, conversation_history):
	prompt_template = """
	Assistant is an AI language model that helps with text analysis tasks.

	Conversation history:
	{conversation_history}

	Human: {input_text}
	Assistant:"""

	prompt = PromptTemplate(
	input_variables=["conversation_history", "input_text"],
	template=prompt_template
	)

	chain = ConversationChain(llm=llm, prompt=prompt, memory=ConversationBufferMemory(memory_key="conversation_history"))
	response = chain.predict(input_text=input_text)

	return response

	def create_interface():
	with gr.Blocks(title="Text Analysis App") as interface:
	gr.Markdown("## Choose data format to analyze")
	input_type = gr.Dropdown(["Text", "URL", "File"], label="Input Type")
	text_input = gr.Textbox(label="Text Input", visible=False)
	url_input = gr.Textbox(label="URL Input", visible=False)
	file_input = gr.File(label="File Upload", visible=False)

	tasks_checkboxes = gr.CheckboxGroup(["Summarization", "Sentiment Analysis", "Topic Detection"], label="Analysis Tasks")

	submit_button = gr.Button("Analyze")
	progress_bar = gr.Progress()

	with gr.Tab("Original Text"):
	original_text_output = gr.Textbox(label="Original Text")
	with gr.Tab("Summary"):
	summary_output = gr.Textbox(label="Summary")
	with gr.Tab("Sentiment"):
	sentiment_output = gr.Textbox(label="Sentiment")
	with gr.Tab("Topics"):
	topics_output = gr.Textbox(label="Topics")
	with gr.Tab("Conversation"):
	conversation_history = gr.State([])
	conversation_input = gr.Textbox(label="Human")
	conversation_output = gr.Textbox(label="Assistant")
	conversation_button = gr.Button("Send")

	def update_input_visibility(input_type):
	return {
	text_input: gr.update(visible=input_type == "Text"),
	url_input: gr.update(visible=input_type == "URL"),
	file_input: gr.update(visible=input_type == "File")
	}

	input_type.change(update_input_visibility, inputs=[input_type], outputs=[text_input, url_input, file_input])

	def process_input(input_type, text, url, file, tasks):
	if input_type == "Text":
	input_value = text
	elif input_type == "URL":
	input_value = url
	else:
	input_value = file

	original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
	return original_text, summary, sentiment, topics

	submit_button.click(
	fn=process_input,
	inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes],
	outputs=[original_text_output, summary_output, sentiment_output, topics_output]
	)

	def process_conversation(conversation_history, conversation_input):
	conversation_history.append(f"Human: {conversation_input}")
	response = chat(conversation_input, "\n".join(conversation_history))
	conversation_history.append(f"Assistant: {response}")
	return "\n".join(conversation_history), "", response

	conversation_button.click(
	fn=process_conversation,
	inputs=[conversation_history, conversation_input],
	outputs=[conversation_history, conversation_input, conversation_output]
	)

	return interface

	if __name__ == "__main__":
	create_interface().launch()