import gradio as gr from transformers import pipeline import requests from bs4 import BeautifulSoup import PyPDF2 import docx import time from langchain_community.llms import OpenAI from langchain.chains import ConversationChain from langchain.memory import ConversationBufferMemory from langchain_core.prompts import PromptTemplate from dotenv import load_dotenv import os load_dotenv() # Load environment variables from .env file openai_api_key = os.getenv("OPENAI_API_KEY") llm = OpenAI(openai_api_key=openai_api_key) summarizer = pipeline("summarization", model="facebook/bart-large-cnn") sentiment_analyzer = pipeline("sentiment-analysis") topic_classifier = pipeline("zero-shot-classification") def fetch_text_from_url(url): try: response = requests.get(url) response.raise_for_status() # Raise an exception for 4xx or 5xx status codes soup = BeautifulSoup(response.text, "html.parser") return " ".join(p.get_text() for p in soup.find_all("p")) except requests.exceptions.RequestException as e: raise ValueError(f"Error fetching text from URL: {str(e)}") def extract_text_from_pdf(file): try: pdf_reader = PyPDF2.PdfReader(file) text = "" for page in pdf_reader.pages: text += page.extract_text() return text except PyPDF2.errors.PdfReadError as e: raise ValueError(f"Error extracting text from PDF: {str(e)}") def extract_text_from_docx(file): try: doc = docx.Document(file) text = "" for para in doc.paragraphs: text += para.text + "\n" return text except docx.opc.exceptions.PackageNotFoundError as e: raise ValueError(f"Error extracting text from DOCX: {str(e)}") def analyze_text(input_text, input_type, tasks, progress=gr.Progress()): if input_type == "URL": progress(0, desc="Fetching text from URL") try: input_text = fetch_text_from_url(input_text) except ValueError as e: return str(e), "", "", "" elif input_type == "File": progress(0, desc="Extracting text from file") if input_text is None: return "No file uploaded", "", "", "" file_name = input_text.name.lower() if file_name.endswith(".pdf"): try: input_text = extract_text_from_pdf(input_text) except ValueError as e: return str(e), "", "", "" elif file_name.endswith(".docx"): try: input_text = extract_text_from_docx(input_text) except ValueError as e: return str(e), "", "", "" else: input_text = input_text.read().decode("utf-8") original_text = input_text[:1000] + ("..." if len(input_text) > 1000 else "") summary, sentiment, topics = "", "", "" if "Summarization" in tasks: progress(0.3, desc="Generating summary") summary = summarizer(input_text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"] time.sleep(1) # Add a minimal delay for demonstration purposes if "Sentiment Analysis" in tasks: progress(0.6, desc="Analyzing sentiment") sentiment = sentiment_analyzer(input_text[:512])[0]["label"] # Truncate input for sentiment analysis time.sleep(1) if "Topic Detection" in tasks: progress(0.9, desc="Detecting topics") topic_labels = ["technology", "politics", "sports", "entertainment", "business"] topics = topic_classifier(input_text[:512], topic_labels, multi_label=True)["labels"] # Truncate input for topic detection time.sleep(1) progress(1, desc="Analysis completed") return original_text, summary, sentiment, ", ".join(topics) def chat(input_text, conversation_history): prompt_template = """ Assistant is an AI language model that helps with text analysis tasks. Conversation history: {conversation_history} Human: {input_text} Assistant:""" prompt = PromptTemplate( input_variables=["conversation_history", "input_text"], template=prompt_template ) chain = ConversationChain(llm=llm, prompt=prompt, memory=ConversationBufferMemory(memory_key="conversation_history")) response = chain.predict(input_text=input_text) return response def create_interface(): with gr.Blocks(title="Text Analysis App") as interface: gr.Markdown("## Choose data format to analyze") input_type = gr.Dropdown(["Text", "URL", "File"], label="Input Type") text_input = gr.Textbox(label="Text Input", visible=False) url_input = gr.Textbox(label="URL Input", visible=False) file_input = gr.File(label="File Upload", visible=False) tasks_checkboxes = gr.CheckboxGroup(["Summarization", "Sentiment Analysis", "Topic Detection"], label="Analysis Tasks") submit_button = gr.Button("Analyze") progress_bar = gr.Progress() with gr.Tab("Original Text"): original_text_output = gr.Textbox(label="Original Text") with gr.Tab("Summary"): summary_output = gr.Textbox(label="Summary") with gr.Tab("Sentiment"): sentiment_output = gr.Textbox(label="Sentiment") with gr.Tab("Topics"): topics_output = gr.Textbox(label="Topics") with gr.Tab("Conversation"): conversation_history = gr.State([]) conversation_input = gr.Textbox(label="Human") conversation_output = gr.Textbox(label="Assistant") conversation_button = gr.Button("Send") def update_input_visibility(input_type): return { text_input: gr.update(visible=input_type == "Text"), url_input: gr.update(visible=input_type == "URL"), file_input: gr.update(visible=input_type == "File") } input_type.change(update_input_visibility, inputs=[input_type], outputs=[text_input, url_input, file_input]) def process_input(input_type, text, url, file, tasks): if input_type == "Text": input_value = text elif input_type == "URL": input_value = url else: input_value = file original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar) return original_text, summary, sentiment, topics submit_button.click( fn=process_input, inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes], outputs=[original_text_output, summary_output, sentiment_output, topics_output] ) def process_conversation(conversation_history, conversation_input): conversation_history.append(f"Human: {conversation_input}") response = chat(conversation_input, "\n".join(conversation_history)) conversation_history.append(f"Assistant: {response}") return "\n".join(conversation_history), "", response conversation_button.click( fn=process_conversation, inputs=[conversation_history, conversation_input], outputs=[conversation_history, conversation_input, conversation_output] ) return interface if __name__ == "__main__": create_interface().launch()