ContentAnalyzer / app.py
MHamdan's picture
updated using langchain and openai
22b42e4 verified
raw
history blame
6.16 kB
import gradio as gr
from transformers import pipeline
import requests
from bs4 import BeautifulSoup
import PyPDF2
import docx
import time
from langchain import OpenAI, ConversationChain, PromptTemplate
from dotenv import load_dotenv
import os
load_dotenv() # Load environment variables from .env file
openai_api_key = os.getenv("openai_api_key")
llm = OpenAI(openai_api_key=openai_api_key)
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
sentiment_analyzer = pipeline("sentiment-analysis")
topic_classifier = pipeline("zero-shot-classification")
def fetch_text_from_url(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
return " ".join(p.get_text() for p in soup.find_all("p"))
def extract_text_from_pdf(file):
pdf_reader = PyPDF2.PdfReader(file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
def extract_text_from_docx(file):
doc = docx.Document(file)
text = ""
for para in doc.paragraphs:
text += para.text + "\n"
return text
def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
if input_type == "URL":
progress(0, desc="Fetching text from URL")
input_text = fetch_text_from_url(input_text)
elif input_type == "File":
progress(0, desc="Extracting text from file")
if input_text.name.lower().endswith(".pdf"):
input_text = extract_text_from_pdf(input_text)
elif input_text.name.lower().endswith(".docx"):
input_text = extract_text_from_docx(input_text)
else:
input_text = input_text.read().decode("utf-8")
original_text = input_text[:1000] + ("..." if len(input_text) > 1000 else "")
summary, sentiment, topics = "", "", ""
if "Summarization" in tasks:
progress(0.3, desc="Generating summary")
summary = summarizer(input_text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
time.sleep(1) # Add a minimal delay for demonstration purposes
if "Sentiment Analysis" in tasks:
progress(0.6, desc="Analyzing sentiment")
sentiment = sentiment_analyzer(input_text[:512])[0]["label"] # Truncate input for sentiment analysis
time.sleep(1)
if "Topic Detection" in tasks:
progress(0.9, desc="Detecting topics")
topic_labels = ["technology", "politics", "sports", "entertainment", "business"]
topics = topic_classifier(input_text[:512], topic_labels, multi_label=True)["labels"] # Truncate input for topic detection
time.sleep(1)
progress(1, desc="Analysis completed")
return original_text, summary, sentiment, ", ".join(topics)
def chat(input_text, chat_history):
prompt_template = """
Assistant is an AI language model that helps with text analysis tasks.
{chat_history}
Human: {input_text}
Assistant:"""
prompt = PromptTemplate(
input_variables=["chat_history", "input_text"],
template=prompt_template
)
chain = ConversationChain(llm=llm, prompt=prompt)
response = chain.predict(input_text=input_text)
return response
def create_interface():
with gr.Blocks(title="Text Analysis App") as interface:
input_type = gr.Dropdown(["Text", "URL", "File"], label="Input Type")
text_input = gr.Textbox(visible=False)
url_input = gr.Textbox(visible=False)
file_input = gr.File(visible=False)
tasks_checkboxes = gr.CheckboxGroup(["Summarization", "Sentiment Analysis", "Topic Detection"], label="Analysis Tasks")
submit_button = gr.Button("Analyze")
progress_bar = gr.Progress()
with gr.Tab("Original Text"):
original_text_output = gr.Textbox(label="Original Text")
with gr.Tab("Summary"):
summary_output = gr.Textbox(label="Summary")
with gr.Tab("Sentiment"):
sentiment_output = gr.Textbox(label="Sentiment")
with gr.Tab("Topics"):
topics_output = gr.Textbox(label="Topics")
with gr.Tab("Conversation"):
conversation_history = gr.State([])
conversation_input = gr.Textbox(label="Human")
conversation_output = gr.Textbox(label="Assistant")
conversation_button = gr.Button("Send")
def update_input_visibility(input_type):
text_input.visible = input_type == "Text"
url_input.visible = input_type == "URL"
file_input.visible = input_type == "File"
input_type.change(update_input_visibility, inputs=input_type)
def process_input(input_type, text, url, file, tasks):
if input_type == "Text":
input_value = text
elif input_type == "URL":
input_value = url
else:
input_value = file
try:
original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
except Exception as e:
original_text = f"Error: {str(e)}"
summary, sentiment, topics = "", "", ""
return original_text, summary, sentiment, topics
submit_button.click(
fn=process_input,
inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes],
outputs=[original_text_output, summary_output, sentiment_output, topics_output]
)
def process_conversation(conversation_history, conversation_input):
conversation_history.append(f"Human: {conversation_input}")
response = chat(conversation_input, "\n".join(conversation_history))
conversation_history.append(f"Assistant: {response}")
return conversation_history, "", response
conversation_button.click(
fn=process_conversation,
inputs=[conversation_history, conversation_input],
outputs=[conversation_history, conversation_input, conversation_output]
)
return interface
if __name__ == "__main__":
create_interface().launch()