Spaces:

MHamdan
/

ContentAnalyzer

Sleeping

App Files Files Community

MHamdan commited on Feb 16

Commit

16eaebe

verified ·

1 Parent(s): 8e8a46c

Update app.py

Browse files

Files changed (1) hide show

app.py +130 -121

app.py CHANGED Viewed

@@ -1,129 +1,138 @@
 import gradio as gr
-import time
 import requests
-import os
-def read_file(file_obj):
-    """Reads text from a .txt file only (no PDF/docx)."""
-    if file_obj is None:
-        return ""
-    file_ext = os.path.splitext(file_obj.name)[1].lower()
-    if file_ext != ".txt":
-        return f"Unsupported file type: {file_ext}"
-    try:
-        return file_obj.read().decode("utf-8")
-    except Exception as e:
-        return f"Error reading file: {str(e)}"
-def fetch_url(url: str):
-    """Fetch text from URL."""
-    try:
-        resp = requests.get(url, timeout=10)
-        resp.raise_for_status()
-        return resp.text[:1000]  # just show first 1000 chars
-    except Exception as e:
-        return f"Error fetching URL: {str(e)}"
-def process_input(choice, text_val, url_val, file_val):
-    """
-    Minimal process function that:
-    1. Shows a progress bar for 4 steps (with time.sleep to visualize).
-    2. Reads content from the chosen input type.
-    3. Returns that content to the output.
-    """
-    with gr.Progress() as p:
-        # STEP 1: "Reading input" placeholder
-        p(0, total=4, desc="Reading input")
-        time.sleep(1)
-        # Actually read the content now
-        if choice == "Text":
-            content = text_val or "No text provided"
-        elif choice == "URL":
-            content = fetch_url(url_val or "")
-        else:  # "File"
-            content = read_file(file_val)
-        # STEP 2: Some dummy step
-        p(1, total=4, desc="Doing something else")
-        time.sleep(1)
-        # STEP 3: Another dummy step
-        p(2, total=4, desc="Almost done...")
         time.sleep(1)
-        # STEP 4: Final step
-        p(3, total=4, desc="Finalizing")
         time.sleep(1)
-    # Return the content to show in the output
-    return content
 def create_interface():
-    with gr.Blocks(title="Minimal Progress Bar Demo") as demo:
-        gr.Markdown("# Minimal Progress Bar Demo")
-        gr.Markdown(
-            "Select an input type, provide some data, then click **Analyze**. "
-            "A progress bar will appear with four steps."
-        )
-        # 1) Dropdown to select input
-        input_choice = gr.Dropdown(
-            choices=["Text", "URL", "File"],
-            value="Text",
-            label="Select Input Type"
-        )
-        # 2) Containers for each input
-        with gr.Column(visible=True) as text_col:
-            text_input = gr.Textbox(
-                label="Enter Text",
-                placeholder="Paste text here...",
-                lines=3
-            )
-        with gr.Column(visible=False) as url_col:
-            url_input = gr.Textbox(
-                label="Enter URL",
-                placeholder="https://example.com"
-            )
-        with gr.Column(visible=False) as file_col:
-            file_input = gr.File(
-                label="Upload a .txt File Only",
-                file_types=[".txt"]
-            )
-        # Toggle visibility function
-        def show_inputs(choice):
-            return {
-                text_col: choice == "Text",
-                url_col: choice == "URL",
-                file_col: choice == "File"
-            }
-        input_choice.change(
-            fn=show_inputs,
-            inputs=[input_choice],
-            outputs=[text_col, url_col, file_col]
-        )
-        analyze_btn = gr.Button("Analyze", variant="primary")
-        # 3) Output
-        output_box = gr.Textbox(
-            label="Output",
-            lines=6
-        )
-        # Link the button to the process function
-        analyze_btn.click(
-            fn=process_input,
-            inputs=[input_choice, text_input, url_input, file_input],
-            outputs=[output_box],
-            show_progress=True
-        )
-    return demo
 if __name__ == "__main__":
-    demo = create_interface()
-    demo.launch()

 import gradio as gr
+from transformers import pipeline
 import requests
+from bs4 import BeautifulSoup
+import PyPDF2
+import docx
+import time
+from smolagents.agents import HuggingFaceAgent
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+sentiment_analyzer = pipeline("sentiment-analysis")
+topic_classifier = pipeline("zero-shot-classification")
+def fetch_text_from_url(url):
+    response = requests.get(url)
+    soup = BeautifulSoup(response.text, "html.parser")
+    return " ".join(p.get_text() for p in soup.find_all("p"))
+def extract_text_from_pdf(file):
+    pdf_reader = PyPDF2.PdfReader(file)
+    text = ""
+    for page in pdf_reader.pages:
+        text += page.extract_text()
+    return text
+def extract_text_from_docx(file):
+    doc = docx.Document(file)
+    text = ""
+    for para in doc.paragraphs:
+        text += para.text + "\n"
+    return text
+def analyze_text(input_text, input_type, tasks, progress=gr.Progress()):
+    if input_type == "URL":
+        progress(0, desc="Fetching text from URL")
+        input_text = fetch_text_from_url(input_text)
+    elif input_type == "File":
+        progress(0, desc="Extracting text from file")
+        if input_text.name.lower().endswith(".pdf"):
+            input_text = extract_text_from_pdf(input_text)
+        elif input_text.name.lower().endswith(".docx"):
+            input_text = extract_text_from_docx(input_text)
+        else:
+            input_text = input_text.read().decode("utf-8")
+    original_text = input_text[:1000] + ("..." if len(input_text) > 1000 else "")
+    summary, sentiment, topics = "", "", ""
+    if "Summarization" in tasks:
+        progress(0.3, desc="Generating summary")
+        summary = summarizer(input_text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
+        time.sleep(1)  # Add a minimal delay for demonstration purposes
+    if "Sentiment Analysis" in tasks:
+        progress(0.6, desc="Analyzing sentiment")
+        sentiment = sentiment_analyzer(input_text[:512])[0]["label"]  # Truncate input for sentiment analysis
         time.sleep(1)
+    if "Topic Detection" in tasks:
+        progress(0.9, desc="Detecting topics")
+        topic_labels = ["technology", "politics", "sports", "entertainment", "business"]
+        topics = topic_classifier(input_text[:512], topic_labels, multi_label=True)["labels"]  # Truncate input for topic detection
         time.sleep(1)
+    progress(1, desc="Analysis completed")
+    return original_text, summary, sentiment, ", ".join(topics)
 def create_interface():
+    input_type = gr.inputs.Dropdown(["Text", "URL", "File"], label="Input Type")
+    text_input = gr.Textbox(visible=False)
+    url_input = gr.Textbox(visible=False)
+    file_input = gr.File(visible=False)
+    tasks_checkboxes = gr.CheckboxGroup(["Summarization", "Sentiment Analysis", "Topic Detection"], label="Analysis Tasks")
+    submit_button = gr.Button("Analyze")
+    progress_bar = gr.Progress()
+    model_endpoint = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
+    agent = HuggingFaceAgent(model_endpoint=model_endpoint)
+    def update_input_visibility(input_type):
+        return {
+            text_input: gr.update(visible=input_type == "Text"),
+            url_input: gr.update(visible=input_type == "URL"),
+            file_input: gr.update(visible=input_type == "File"),
+        }
+    input_type.change(update_input_visibility, [input_type], [text_input, url_input, file_input])
+    original_text_output = gr.Textbox(label="Original Text")
+    summary_output = gr.Textbox(label="Summary")
+    sentiment_output = gr.Textbox(label="Sentiment")
+    topics_output = gr.Textbox(label="Topics")
+    def process_input(input_type, text, url, file, tasks):
+        if input_type == "Text":
+            input_value = text
+        elif input_type == "URL":
+            input_value = url
+        else:
+            input_value = file
+        try:
+            original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar)
+            enhanced_summary = agent.run(f"Given the following text: '{original_text}', please suggest improvements to this summary: '{summary}'")
+            enhanced_sentiment = agent.run(f"Given the following text: '{original_text}', does this sentiment seem accurate: '{sentiment}'? Please elaborate and suggest any corrections.")
+        except Exception as e:
+            original_text = f"Error: {str(e)}"
+            summary, sentiment, topics = "", "", ""
+            enhanced_summary = ""
+            enhanced_sentiment = ""
+        return original_text, summary, enhanced_summary, sentiment, enhanced_sentiment, topics
+    submit_button.click(
+        fn=process_input,
+        inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes],
+        outputs=[original_text_output, summary_output, summary_output, sentiment_output, sentiment_output, topics_output]
+    )
+    interface = gr.TabbedInterface([
+        gr.Tab(original_text_output, label="Original Text"),
+        gr.Tab(summary_output, label="Summary"),
+        gr.Tab(sentiment_output, label="Sentiment"),
+        gr.Tab(topics_output, label="Topics")
+    ])
+    return gr.Blocks(
+        title="Text Analysis App",
+        inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes, submit_button],
+        outputs=[interface, progress_bar]
+    )
 if __name__ == "__main__":
+    create_interface().launch()