Spaces:

MHamdan
/

ContentAnalyzer

Running

App Files Files Community

MHamdan commited on Feb 16

Commit

8e8a46c

verified ·

1 Parent(s): c2c731a

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -186

app.py CHANGED Viewed

@@ -1,141 +1,99 @@
 import gradio as gr
-import requests
 import time
-from bs4 import BeautifulSoup
-from transformers import pipeline
-import PyPDF2
-import docx
 import os
-from typing import List, Optional
-class ContentAnalyzer:
-    def __init__(self):
-        print("[DEBUG] Initializing pipelines...")
-        self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-        self.sentiment_analyzer = pipeline("sentiment-analysis")
-        self.zero_shot = pipeline("zero-shot-classification")
-        print("[DEBUG] Pipelines initialized.")
-    def read_file(self, file_obj) -> str:
-        """Read content from different file types."""
-        if file_obj is None:
-            return ""
-        file_ext = os.path.splitext(file_obj.name)[1].lower()
-        print(f"[DEBUG] File extension: {file_ext}")
-        try:
-            if file_ext == '.txt':
-                return file_obj.read().decode('utf-8')
-            elif file_ext == '.pdf':
-                pdf_reader = PyPDF2.PdfReader(file_obj)
-                text = ""
-                for page in pdf_reader.pages:
-                    text += page.extract_text() + "\n"
-                return text
-            elif file_ext == '.docx':
-                doc = docx.Document(file_obj)
-                return "\n".join([paragraph.text for paragraph in doc.paragraphs])
-            else:
-                return f"Unsupported file type: {file_ext}"
-        except Exception as e:
-            return f"Error reading file: {str(e)}"
-    def fetch_web_content(self, url: str) -> str:
-        """Fetch content from URL."""
-        print(f"[DEBUG] Attempting to fetch URL: {url}")
-        try:
-            response = requests.get(url, timeout=10)
-            response.raise_for_status()
-            soup = BeautifulSoup(response.text, 'html.parser')
-            # Remove scripts and styles
-            for script in soup(["script", "style"]):
-                script.decompose()
-            text = soup.get_text(separator='\n')
-            lines = (line.strip() for line in text.splitlines())
-            final_text = "\n".join(line for line in lines if line)
-            return final_text
-        except Exception as e:
-            return f"Error fetching URL: {str(e)}"
-    def analyze_content(
-        self,
-        content: str,
-        analysis_types: List[str],
-    ) -> dict:
-        """Perform summarization, sentiment analysis, and topic detection on `content`."""
-        results = {}
-        truncated = content[:1000] + "..." if len(content) > 1000 else content
-        results["original_text"] = truncated
-        # Summarize
-        if "summarize" in analysis_types:
-            summary = self.summarizer(content[:1024], max_length=130, min_length=30)
-            results["summary"] = summary[0]['summary_text']
-        # Sentiment
-        if "sentiment" in analysis_types:
-            sentiment = self.sentiment_analyzer(content[:512])
-            results["sentiment"] = {
-                "label": sentiment[0]['label'],
-                "score": round(sentiment[0]['score'], 3)
-            }
-        # Topics
-        if "topics" in analysis_types:
-            topics = self.zero_shot(
-                content[:512],
-                candidate_labels=[
-                    "technology", "science", "business", "politics",
-                    "entertainment", "education", "health", "sports"
-                ]
-            )
-            results["topics"] = [
-                {"label": label, "score": round(score, 3)}
-                for label, score in zip(topics['labels'], topics['scores'])
-                if score > 0.1
-            ]
-        return results
 def create_interface():
-    analyzer = ContentAnalyzer()
-    with gr.Blocks(title="Content Analyzer") as demo:
-        gr.Markdown("# 📑 Content Analyzer")
         gr.Markdown(
-            "Analyze text from **Text**, **URL**, or **File** with summarization, "
-            "sentiment, and topic detection. A progress bar will appear during processing."
         )
-        # Dropdown for input type
         input_choice = gr.Dropdown(
             choices=["Text", "URL", "File"],
             value="Text",
             label="Select Input Type"
         )
-        # We use three separate columns to conditionally display
         with gr.Column(visible=True) as text_col:
             text_input = gr.Textbox(
                 label="Enter Text",
-                placeholder="Paste your text here...",
-                lines=5
             )
         with gr.Column(visible=False) as url_col:
             url_input = gr.Textbox(
                 label="Enter URL",
                 placeholder="https://example.com"
             )
         with gr.Column(visible=False) as file_col:
             file_input = gr.File(
-                label="Upload File",
-                file_types=[".txt", ".pdf", ".docx"]
             )
         def show_inputs(choice):
-            """Return a dict mapping columns to booleans for visibility."""
             return {
                 text_col: choice == "Text",
                 url_col: choice == "URL",
@@ -148,87 +106,20 @@ def create_interface():
             outputs=[text_col, url_col, file_col]
         )
-        analysis_types = gr.CheckboxGroup(
-            choices=["summarize", "sentiment", "topics"],
-            value=["summarize"],
-            label="Analysis Types"
-        )
         analyze_btn = gr.Button("Analyze", variant="primary")
-        # Output tabs
-        with gr.Tabs():
-            with gr.Tab("Original Text"):
-                original_text = gr.Markdown()
-            with gr.Tab("Summary"):
-                summary_output = gr.Markdown()
-            with gr.Tab("Sentiment"):
-                sentiment_output = gr.Markdown()
-            with gr.Tab("Topics"):
-                topics_output = gr.Markdown()
-        def process_analysis(choice, text_val, url_val, file_val, types):
-            """
-            This function does everything in one place using a 'with gr.Progress() as p:' block,
-            so we can show each step of the process. We add time.sleep(1) just to demonstrate
-            the progress bar (otherwise it may appear/disappear too quickly).
-            """
-            with gr.Progress() as p:
-                # STEP 1: Retrieve content
-                p(0, total=4, desc="Reading input")
-                time.sleep(1)  # For demonstration
-                if choice == "Text":
-                    content = text_val or ""
-                elif choice == "URL":
-                    content = analyzer.fetch_web_content(url_val or "")
-                else:  # File
-                    content = analyzer.read_file(file_val)
-                if not content or content.startswith("Error"):
-                    return content or "No content provided", "", "", ""
-                # STEP 2: Summarize
-                p(1, total=4, desc="Summarizing content")
-                time.sleep(1)  # For demonstration
-                # STEP 3: Sentiment
-                p(2, total=4, desc="Performing sentiment analysis")
-                time.sleep(1)  # For demonstration
-                # STEP 4: Topics
-                p(3, total=4, desc="Identifying topics")
-                time.sleep(1)  # For demonstration
-            # After the progress steps, do the actual analysis in one shot
-            # (You could interleave the calls to pipeline with each progress step
-            # if you want real-time progress. This is a simplified approach.)
-            results = analyzer.analyze_content(content, types)
-            if "error" in results:
-                return results["error"], "", "", ""
-            original = results.get("original_text", "")
-            summary = results.get("summary", "")
-            sentiment = ""
-            if "sentiment" in results:
-                s = results["sentiment"]
-                sentiment = f"**Sentiment:** {s['label']} (Confidence: {s['score']})"
-            topics = ""
-            if "topics" in results:
-                t_list = "\n".join([
-                    f"- {t['label']}: {t['score']}"
-                    for t in results["topics"]
-                ])
-                topics = "**Detected Topics:**\n" + t_list
-            return original, summary, sentiment, topics
         analyze_btn.click(
-            fn=process_analysis,
-            inputs=[input_choice, text_input, url_input, file_input, analysis_types],
-            outputs=[original_text, summary_output, sentiment_output, topics_output],
-            show_progress=True  # This ensures the Gradio progress bar is enabled
         )
     return demo

 import gradio as gr
 import time
+import requests
 import os
+def read_file(file_obj):
+    """Reads text from a .txt file only (no PDF/docx)."""
+    if file_obj is None:
+        return ""
+    file_ext = os.path.splitext(file_obj.name)[1].lower()
+    if file_ext != ".txt":
+        return f"Unsupported file type: {file_ext}"
+    try:
+        return file_obj.read().decode("utf-8")
+    except Exception as e:
+        return f"Error reading file: {str(e)}"
+def fetch_url(url: str):
+    """Fetch text from URL."""
+    try:
+        resp = requests.get(url, timeout=10)
+        resp.raise_for_status()
+        return resp.text[:1000]  # just show first 1000 chars
+    except Exception as e:
+        return f"Error fetching URL: {str(e)}"
+def process_input(choice, text_val, url_val, file_val):
+    """
+    Minimal process function that:
+    1. Shows a progress bar for 4 steps (with time.sleep to visualize).
+    2. Reads content from the chosen input type.
+    3. Returns that content to the output.
+    """
+    with gr.Progress() as p:
+        # STEP 1: "Reading input" placeholder
+        p(0, total=4, desc="Reading input")
+        time.sleep(1)
+        # Actually read the content now
+        if choice == "Text":
+            content = text_val or "No text provided"
+        elif choice == "URL":
+            content = fetch_url(url_val or "")
+        else:  # "File"
+            content = read_file(file_val)
+        # STEP 2: Some dummy step
+        p(1, total=4, desc="Doing something else")
+        time.sleep(1)
+        # STEP 3: Another dummy step
+        p(2, total=4, desc="Almost done...")
+        time.sleep(1)
+        # STEP 4: Final step
+        p(3, total=4, desc="Finalizing")
+        time.sleep(1)
+    # Return the content to show in the output
+    return content
 def create_interface():
+    with gr.Blocks(title="Minimal Progress Bar Demo") as demo:
+        gr.Markdown("# Minimal Progress Bar Demo")
         gr.Markdown(
+            "Select an input type, provide some data, then click **Analyze**. "
+            "A progress bar will appear with four steps."
         )
+        # 1) Dropdown to select input
         input_choice = gr.Dropdown(
             choices=["Text", "URL", "File"],
             value="Text",
             label="Select Input Type"
         )
+        # 2) Containers for each input
         with gr.Column(visible=True) as text_col:
             text_input = gr.Textbox(
                 label="Enter Text",
+                placeholder="Paste text here...",
+                lines=3
             )
         with gr.Column(visible=False) as url_col:
             url_input = gr.Textbox(
                 label="Enter URL",
                 placeholder="https://example.com"
             )
         with gr.Column(visible=False) as file_col:
             file_input = gr.File(
+                label="Upload a .txt File Only",
+                file_types=[".txt"]
             )
+        # Toggle visibility function
         def show_inputs(choice):
             return {
                 text_col: choice == "Text",
                 url_col: choice == "URL",
             outputs=[text_col, url_col, file_col]
         )
         analyze_btn = gr.Button("Analyze", variant="primary")
+        # 3) Output
+        output_box = gr.Textbox(
+            label="Output",
+            lines=6
+        )
+        # Link the button to the process function
         analyze_btn.click(
+            fn=process_input,
+            inputs=[input_choice, text_input, url_input, file_input],
+            outputs=[output_box],
+            show_progress=True
         )
     return demo