MHamdan commited on
Commit
8e8a46c
·
verified ·
1 Parent(s): c2c731a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -186
app.py CHANGED
@@ -1,141 +1,99 @@
1
  import gradio as gr
2
- import requests
3
  import time
4
- from bs4 import BeautifulSoup
5
- from transformers import pipeline
6
- import PyPDF2
7
- import docx
8
  import os
9
- from typing import List, Optional
10
-
11
- class ContentAnalyzer:
12
- def __init__(self):
13
- print("[DEBUG] Initializing pipelines...")
14
- self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
15
- self.sentiment_analyzer = pipeline("sentiment-analysis")
16
- self.zero_shot = pipeline("zero-shot-classification")
17
- print("[DEBUG] Pipelines initialized.")
18
-
19
- def read_file(self, file_obj) -> str:
20
- """Read content from different file types."""
21
- if file_obj is None:
22
- return ""
23
- file_ext = os.path.splitext(file_obj.name)[1].lower()
24
- print(f"[DEBUG] File extension: {file_ext}")
25
- try:
26
- if file_ext == '.txt':
27
- return file_obj.read().decode('utf-8')
28
- elif file_ext == '.pdf':
29
- pdf_reader = PyPDF2.PdfReader(file_obj)
30
- text = ""
31
- for page in pdf_reader.pages:
32
- text += page.extract_text() + "\n"
33
- return text
34
- elif file_ext == '.docx':
35
- doc = docx.Document(file_obj)
36
- return "\n".join([paragraph.text for paragraph in doc.paragraphs])
37
- else:
38
- return f"Unsupported file type: {file_ext}"
39
- except Exception as e:
40
- return f"Error reading file: {str(e)}"
41
-
42
- def fetch_web_content(self, url: str) -> str:
43
- """Fetch content from URL."""
44
- print(f"[DEBUG] Attempting to fetch URL: {url}")
45
- try:
46
- response = requests.get(url, timeout=10)
47
- response.raise_for_status()
48
- soup = BeautifulSoup(response.text, 'html.parser')
49
- # Remove scripts and styles
50
- for script in soup(["script", "style"]):
51
- script.decompose()
52
- text = soup.get_text(separator='\n')
53
- lines = (line.strip() for line in text.splitlines())
54
- final_text = "\n".join(line for line in lines if line)
55
- return final_text
56
- except Exception as e:
57
- return f"Error fetching URL: {str(e)}"
58
-
59
- def analyze_content(
60
- self,
61
- content: str,
62
- analysis_types: List[str],
63
- ) -> dict:
64
- """Perform summarization, sentiment analysis, and topic detection on `content`."""
65
- results = {}
66
- truncated = content[:1000] + "..." if len(content) > 1000 else content
67
- results["original_text"] = truncated
68
-
69
- # Summarize
70
- if "summarize" in analysis_types:
71
- summary = self.summarizer(content[:1024], max_length=130, min_length=30)
72
- results["summary"] = summary[0]['summary_text']
73
-
74
- # Sentiment
75
- if "sentiment" in analysis_types:
76
- sentiment = self.sentiment_analyzer(content[:512])
77
- results["sentiment"] = {
78
- "label": sentiment[0]['label'],
79
- "score": round(sentiment[0]['score'], 3)
80
- }
81
-
82
- # Topics
83
- if "topics" in analysis_types:
84
- topics = self.zero_shot(
85
- content[:512],
86
- candidate_labels=[
87
- "technology", "science", "business", "politics",
88
- "entertainment", "education", "health", "sports"
89
- ]
90
- )
91
- results["topics"] = [
92
- {"label": label, "score": round(score, 3)}
93
- for label, score in zip(topics['labels'], topics['scores'])
94
- if score > 0.1
95
- ]
96
-
97
- return results
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  def create_interface():
101
- analyzer = ContentAnalyzer()
102
-
103
- with gr.Blocks(title="Content Analyzer") as demo:
104
- gr.Markdown("# 📑 Content Analyzer")
105
  gr.Markdown(
106
- "Analyze text from **Text**, **URL**, or **File** with summarization, "
107
- "sentiment, and topic detection. A progress bar will appear during processing."
108
  )
109
 
110
- # Dropdown for input type
111
  input_choice = gr.Dropdown(
112
  choices=["Text", "URL", "File"],
113
  value="Text",
114
  label="Select Input Type"
115
  )
116
 
117
- # We use three separate columns to conditionally display
118
  with gr.Column(visible=True) as text_col:
119
  text_input = gr.Textbox(
120
  label="Enter Text",
121
- placeholder="Paste your text here...",
122
- lines=5
123
  )
124
-
125
  with gr.Column(visible=False) as url_col:
126
  url_input = gr.Textbox(
127
  label="Enter URL",
128
  placeholder="https://example.com"
129
  )
130
-
131
  with gr.Column(visible=False) as file_col:
132
  file_input = gr.File(
133
- label="Upload File",
134
- file_types=[".txt", ".pdf", ".docx"]
135
  )
136
 
 
137
  def show_inputs(choice):
138
- """Return a dict mapping columns to booleans for visibility."""
139
  return {
140
  text_col: choice == "Text",
141
  url_col: choice == "URL",
@@ -148,87 +106,20 @@ def create_interface():
148
  outputs=[text_col, url_col, file_col]
149
  )
150
 
151
- analysis_types = gr.CheckboxGroup(
152
- choices=["summarize", "sentiment", "topics"],
153
- value=["summarize"],
154
- label="Analysis Types"
155
- )
156
-
157
  analyze_btn = gr.Button("Analyze", variant="primary")
158
 
159
- # Output tabs
160
- with gr.Tabs():
161
- with gr.Tab("Original Text"):
162
- original_text = gr.Markdown()
163
- with gr.Tab("Summary"):
164
- summary_output = gr.Markdown()
165
- with gr.Tab("Sentiment"):
166
- sentiment_output = gr.Markdown()
167
- with gr.Tab("Topics"):
168
- topics_output = gr.Markdown()
169
-
170
- def process_analysis(choice, text_val, url_val, file_val, types):
171
- """
172
- This function does everything in one place using a 'with gr.Progress() as p:' block,
173
- so we can show each step of the process. We add time.sleep(1) just to demonstrate
174
- the progress bar (otherwise it may appear/disappear too quickly).
175
- """
176
- with gr.Progress() as p:
177
- # STEP 1: Retrieve content
178
- p(0, total=4, desc="Reading input")
179
- time.sleep(1) # For demonstration
180
- if choice == "Text":
181
- content = text_val or ""
182
- elif choice == "URL":
183
- content = analyzer.fetch_web_content(url_val or "")
184
- else: # File
185
- content = analyzer.read_file(file_val)
186
-
187
- if not content or content.startswith("Error"):
188
- return content or "No content provided", "", "", ""
189
-
190
- # STEP 2: Summarize
191
- p(1, total=4, desc="Summarizing content")
192
- time.sleep(1) # For demonstration
193
-
194
- # STEP 3: Sentiment
195
- p(2, total=4, desc="Performing sentiment analysis")
196
- time.sleep(1) # For demonstration
197
-
198
- # STEP 4: Topics
199
- p(3, total=4, desc="Identifying topics")
200
- time.sleep(1) # For demonstration
201
-
202
- # After the progress steps, do the actual analysis in one shot
203
- # (You could interleave the calls to pipeline with each progress step
204
- # if you want real-time progress. This is a simplified approach.)
205
- results = analyzer.analyze_content(content, types)
206
-
207
- if "error" in results:
208
- return results["error"], "", "", ""
209
-
210
- original = results.get("original_text", "")
211
- summary = results.get("summary", "")
212
- sentiment = ""
213
- if "sentiment" in results:
214
- s = results["sentiment"]
215
- sentiment = f"**Sentiment:** {s['label']} (Confidence: {s['score']})"
216
-
217
- topics = ""
218
- if "topics" in results:
219
- t_list = "\n".join([
220
- f"- {t['label']}: {t['score']}"
221
- for t in results["topics"]
222
- ])
223
- topics = "**Detected Topics:**\n" + t_list
224
-
225
- return original, summary, sentiment, topics
226
 
 
227
  analyze_btn.click(
228
- fn=process_analysis,
229
- inputs=[input_choice, text_input, url_input, file_input, analysis_types],
230
- outputs=[original_text, summary_output, sentiment_output, topics_output],
231
- show_progress=True # This ensures the Gradio progress bar is enabled
232
  )
233
 
234
  return demo
 
1
  import gradio as gr
 
2
  import time
3
+ import requests
 
 
 
4
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ def read_file(file_obj):
7
+ """Reads text from a .txt file only (no PDF/docx)."""
8
+ if file_obj is None:
9
+ return ""
10
+ file_ext = os.path.splitext(file_obj.name)[1].lower()
11
+ if file_ext != ".txt":
12
+ return f"Unsupported file type: {file_ext}"
13
+ try:
14
+ return file_obj.read().decode("utf-8")
15
+ except Exception as e:
16
+ return f"Error reading file: {str(e)}"
17
+
18
+ def fetch_url(url: str):
19
+ """Fetch text from URL."""
20
+ try:
21
+ resp = requests.get(url, timeout=10)
22
+ resp.raise_for_status()
23
+ return resp.text[:1000] # just show first 1000 chars
24
+ except Exception as e:
25
+ return f"Error fetching URL: {str(e)}"
26
+
27
+ def process_input(choice, text_val, url_val, file_val):
28
+ """
29
+ Minimal process function that:
30
+ 1. Shows a progress bar for 4 steps (with time.sleep to visualize).
31
+ 2. Reads content from the chosen input type.
32
+ 3. Returns that content to the output.
33
+ """
34
+ with gr.Progress() as p:
35
+ # STEP 1: "Reading input" placeholder
36
+ p(0, total=4, desc="Reading input")
37
+ time.sleep(1)
38
+
39
+ # Actually read the content now
40
+ if choice == "Text":
41
+ content = text_val or "No text provided"
42
+ elif choice == "URL":
43
+ content = fetch_url(url_val or "")
44
+ else: # "File"
45
+ content = read_file(file_val)
46
+
47
+ # STEP 2: Some dummy step
48
+ p(1, total=4, desc="Doing something else")
49
+ time.sleep(1)
50
+
51
+ # STEP 3: Another dummy step
52
+ p(2, total=4, desc="Almost done...")
53
+ time.sleep(1)
54
+
55
+ # STEP 4: Final step
56
+ p(3, total=4, desc="Finalizing")
57
+ time.sleep(1)
58
+
59
+ # Return the content to show in the output
60
+ return content
61
 
62
  def create_interface():
63
+ with gr.Blocks(title="Minimal Progress Bar Demo") as demo:
64
+ gr.Markdown("# Minimal Progress Bar Demo")
 
 
65
  gr.Markdown(
66
+ "Select an input type, provide some data, then click **Analyze**. "
67
+ "A progress bar will appear with four steps."
68
  )
69
 
70
+ # 1) Dropdown to select input
71
  input_choice = gr.Dropdown(
72
  choices=["Text", "URL", "File"],
73
  value="Text",
74
  label="Select Input Type"
75
  )
76
 
77
+ # 2) Containers for each input
78
  with gr.Column(visible=True) as text_col:
79
  text_input = gr.Textbox(
80
  label="Enter Text",
81
+ placeholder="Paste text here...",
82
+ lines=3
83
  )
 
84
  with gr.Column(visible=False) as url_col:
85
  url_input = gr.Textbox(
86
  label="Enter URL",
87
  placeholder="https://example.com"
88
  )
 
89
  with gr.Column(visible=False) as file_col:
90
  file_input = gr.File(
91
+ label="Upload a .txt File Only",
92
+ file_types=[".txt"]
93
  )
94
 
95
+ # Toggle visibility function
96
  def show_inputs(choice):
 
97
  return {
98
  text_col: choice == "Text",
99
  url_col: choice == "URL",
 
106
  outputs=[text_col, url_col, file_col]
107
  )
108
 
 
 
 
 
 
 
109
  analyze_btn = gr.Button("Analyze", variant="primary")
110
 
111
+ # 3) Output
112
+ output_box = gr.Textbox(
113
+ label="Output",
114
+ lines=6
115
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ # Link the button to the process function
118
  analyze_btn.click(
119
+ fn=process_input,
120
+ inputs=[input_choice, text_input, url_input, file_input],
121
+ outputs=[output_box],
122
+ show_progress=True
123
  )
124
 
125
  return demo