import gradio as gr from transformers import pipeline import requests from bs4 import BeautifulSoup import PyPDF2 import docx import time from langchain import OpenAI, ConversationChain, PromptTemplate from dotenv import load_dotenv import os load_dotenv() # Load environment variables from .env file openai_api_key = os.getenv("openai_api_key") llm = OpenAI(openai_api_key=openai_api_key) summarizer = pipeline("summarization", model="facebook/bart-large-cnn") sentiment_analyzer = pipeline("sentiment-analysis") topic_classifier = pipeline("zero-shot-classification") def fetch_text_from_url(url): response = requests.get(url) soup = BeautifulSoup(response.text, "html.parser") return " ".join(p.get_text() for p in soup.find_all("p")) def extract_text_from_pdf(file): pdf_reader = PyPDF2.PdfReader(file) text = "" for page in pdf_reader.pages: text += page.extract_text() return text def extract_text_from_docx(file): doc = docx.Document(file) text = "" for para in doc.paragraphs: text += para.text + "\n" return text def analyze_text(input_text, input_type, tasks, progress=gr.Progress()): if input_type == "URL": progress(0, desc="Fetching text from URL") input_text = fetch_text_from_url(input_text) elif input_type == "File": progress(0, desc="Extracting text from file") if input_text.name.lower().endswith(".pdf"): input_text = extract_text_from_pdf(input_text) elif input_text.name.lower().endswith(".docx"): input_text = extract_text_from_docx(input_text) else: input_text = input_text.read().decode("utf-8") original_text = input_text[:1000] + ("..." if len(input_text) > 1000 else "") summary, sentiment, topics = "", "", "" if "Summarization" in tasks: progress(0.3, desc="Generating summary") summary = summarizer(input_text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"] time.sleep(1) # Add a minimal delay for demonstration purposes if "Sentiment Analysis" in tasks: progress(0.6, desc="Analyzing sentiment") sentiment = sentiment_analyzer(input_text[:512])[0]["label"] # Truncate input for sentiment analysis time.sleep(1) if "Topic Detection" in tasks: progress(0.9, desc="Detecting topics") topic_labels = ["technology", "politics", "sports", "entertainment", "business"] topics = topic_classifier(input_text[:512], topic_labels, multi_label=True)["labels"] # Truncate input for topic detection time.sleep(1) progress(1, desc="Analysis completed") return original_text, summary, sentiment, ", ".join(topics) def chat(input_text, chat_history): prompt_template = """ Assistant is an AI language model that helps with text analysis tasks. {chat_history} Human: {input_text} Assistant:""" prompt = PromptTemplate( input_variables=["chat_history", "input_text"], template=prompt_template ) chain = ConversationChain(llm=llm, prompt=prompt) response = chain.predict(input_text=input_text) return response def create_interface(): with gr.Blocks(title="Text Analysis App") as interface: input_type = gr.Dropdown(["Text", "URL", "File"], label="Input Type") text_input = gr.Textbox(visible=False) url_input = gr.Textbox(visible=False) file_input = gr.File(visible=False) tasks_checkboxes = gr.CheckboxGroup(["Summarization", "Sentiment Analysis", "Topic Detection"], label="Analysis Tasks") submit_button = gr.Button("Analyze") progress_bar = gr.Progress() with gr.Tab("Original Text"): original_text_output = gr.Textbox(label="Original Text") with gr.Tab("Summary"): summary_output = gr.Textbox(label="Summary") with gr.Tab("Sentiment"): sentiment_output = gr.Textbox(label="Sentiment") with gr.Tab("Topics"): topics_output = gr.Textbox(label="Topics") with gr.Tab("Conversation"): conversation_history = gr.State([]) conversation_input = gr.Textbox(label="Human") conversation_output = gr.Textbox(label="Assistant") conversation_button = gr.Button("Send") def update_input_visibility(input_type): text_input.visible = input_type == "Text" url_input.visible = input_type == "URL" file_input.visible = input_type == "File" input_type.change(update_input_visibility, inputs=input_type) def process_input(input_type, text, url, file, tasks): if input_type == "Text": input_value = text elif input_type == "URL": input_value = url else: input_value = file try: original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar) except Exception as e: original_text = f"Error: {str(e)}" summary, sentiment, topics = "", "", "" return original_text, summary, sentiment, topics submit_button.click( fn=process_input, inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes], outputs=[original_text_output, summary_output, sentiment_output, topics_output] ) def process_conversation(conversation_history, conversation_input): conversation_history.append(f"Human: {conversation_input}") response = chat(conversation_input, "\n".join(conversation_history)) conversation_history.append(f"Assistant: {response}") return conversation_history, "", response conversation_button.click( fn=process_conversation, inputs=[conversation_history, conversation_input], outputs=[conversation_history, conversation_input, conversation_output] ) return interface if __name__ == "__main__": create_interface().launch()