import gradio as gr from transformers import pipeline import requests from bs4 import BeautifulSoup import PyPDF2 import docx import time from smolagents.agents import HuggingFaceAgent summarizer = pipeline("summarization", model="facebook/bart-large-cnn") sentiment_analyzer = pipeline("sentiment-analysis") topic_classifier = pipeline("zero-shot-classification") def fetch_text_from_url(url): response = requests.get(url) soup = BeautifulSoup(response.text, "html.parser") return " ".join(p.get_text() for p in soup.find_all("p")) def extract_text_from_pdf(file): pdf_reader = PyPDF2.PdfReader(file) text = "" for page in pdf_reader.pages: text += page.extract_text() return text def extract_text_from_docx(file): doc = docx.Document(file) text = "" for para in doc.paragraphs: text += para.text + "\n" return text def analyze_text(input_text, input_type, tasks, progress=gr.Progress()): if input_type == "URL": progress(0, desc="Fetching text from URL") input_text = fetch_text_from_url(input_text) elif input_type == "File": progress(0, desc="Extracting text from file") if input_text.name.lower().endswith(".pdf"): input_text = extract_text_from_pdf(input_text) elif input_text.name.lower().endswith(".docx"): input_text = extract_text_from_docx(input_text) else: input_text = input_text.read().decode("utf-8") original_text = input_text[:1000] + ("..." if len(input_text) > 1000 else "") summary, sentiment, topics = "", "", "" if "Summarization" in tasks: progress(0.3, desc="Generating summary") summary = summarizer(input_text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"] time.sleep(1) # Add a minimal delay for demonstration purposes if "Sentiment Analysis" in tasks: progress(0.6, desc="Analyzing sentiment") sentiment = sentiment_analyzer(input_text[:512])[0]["label"] # Truncate input for sentiment analysis time.sleep(1) if "Topic Detection" in tasks: progress(0.9, desc="Detecting topics") topic_labels = ["technology", "politics", "sports", "entertainment", "business"] topics = topic_classifier(input_text[:512], topic_labels, multi_label=True)["labels"] # Truncate input for topic detection time.sleep(1) progress(1, desc="Analysis completed") return original_text, summary, sentiment, ", ".join(topics) def create_interface(): input_type = gr.inputs.Dropdown(["Text", "URL", "File"], label="Input Type") text_input = gr.Textbox(visible=False) url_input = gr.Textbox(visible=False) file_input = gr.File(visible=False) tasks_checkboxes = gr.CheckboxGroup(["Summarization", "Sentiment Analysis", "Topic Detection"], label="Analysis Tasks") submit_button = gr.Button("Analyze") progress_bar = gr.Progress() model_endpoint = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn" agent = HuggingFaceAgent(model_endpoint=model_endpoint) def update_input_visibility(input_type): return { text_input: gr.update(visible=input_type == "Text"), url_input: gr.update(visible=input_type == "URL"), file_input: gr.update(visible=input_type == "File"), } input_type.change(update_input_visibility, [input_type], [text_input, url_input, file_input]) original_text_output = gr.Textbox(label="Original Text") summary_output = gr.Textbox(label="Summary") sentiment_output = gr.Textbox(label="Sentiment") topics_output = gr.Textbox(label="Topics") def process_input(input_type, text, url, file, tasks): if input_type == "Text": input_value = text elif input_type == "URL": input_value = url else: input_value = file try: original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar) enhanced_summary = agent.run(f"Given the following text: '{original_text}', please suggest improvements to this summary: '{summary}'") enhanced_sentiment = agent.run(f"Given the following text: '{original_text}', does this sentiment seem accurate: '{sentiment}'? Please elaborate and suggest any corrections.") except Exception as e: original_text = f"Error: {str(e)}" summary, sentiment, topics = "", "", "" enhanced_summary = "" enhanced_sentiment = "" return original_text, summary, enhanced_summary, sentiment, enhanced_sentiment, topics submit_button.click( fn=process_input, inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes], outputs=[original_text_output, summary_output, summary_output, sentiment_output, sentiment_output, topics_output] ) interface = gr.TabbedInterface([ gr.Tab(original_text_output, label="Original Text"), gr.Tab(summary_output, label="Summary"), gr.Tab(sentiment_output, label="Sentiment"), gr.Tab(topics_output, label="Topics") ]) return gr.Blocks( title="Text Analysis App", inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes, submit_button], outputs=[interface, progress_bar] ) if __name__ == "__main__": create_interface().launch()