MHamdan commited on
Commit
c2c731a
·
verified ·
1 Parent(s): 523e9ce
Files changed (1) hide show
  1. app.py +93 -99
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  import requests
 
3
  from bs4 import BeautifulSoup
4
  from transformers import pipeline
5
  import PyPDF2
@@ -9,15 +10,18 @@ from typing import List, Optional
9
 
10
  class ContentAnalyzer:
11
  def __init__(self):
 
12
  self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
13
  self.sentiment_analyzer = pipeline("sentiment-analysis")
14
  self.zero_shot = pipeline("zero-shot-classification")
 
15
 
16
  def read_file(self, file_obj) -> str:
17
  """Read content from different file types."""
18
  if file_obj is None:
19
  return ""
20
  file_ext = os.path.splitext(file_obj.name)[1].lower()
 
21
  try:
22
  if file_ext == '.txt':
23
  return file_obj.read().decode('utf-8')
@@ -37,82 +41,60 @@ class ContentAnalyzer:
37
 
38
  def fetch_web_content(self, url: str) -> str:
39
  """Fetch content from URL."""
 
40
  try:
41
  response = requests.get(url, timeout=10)
42
  response.raise_for_status()
43
  soup = BeautifulSoup(response.text, 'html.parser')
 
44
  for script in soup(["script", "style"]):
45
  script.decompose()
46
  text = soup.get_text(separator='\n')
47
  lines = (line.strip() for line in text.splitlines())
48
- return "\n".join(line for line in lines if line)
 
49
  except Exception as e:
50
  return f"Error fetching URL: {str(e)}"
51
 
52
  def analyze_content(
53
  self,
54
- text: Optional[str] = None,
55
- url: Optional[str] = None,
56
- file: Optional[object] = None,
57
- analysis_types: List[str] = ["summarize"],
58
- progress_callback=None
59
  ) -> dict:
60
- try:
61
- # STEP 1: Retrieve content
62
- if progress_callback:
63
- progress_callback(1, "Reading input")
64
-
65
- if url:
66
- content = self.fetch_web_content(url)
67
- elif file:
68
- content = self.read_file(file)
69
- else:
70
- content = text or ""
71
-
72
- if not content or content.startswith("Error"):
73
- return {"error": content or "No content provided"}
74
-
75
- truncated = content[:1000] + "..." if len(content) > 1000 else content
76
- results = {"original_text": truncated}
77
-
78
- # STEP 2: Summarize
79
- if "summarize" in analysis_types:
80
- if progress_callback:
81
- progress_callback(2, "Summarizing content")
82
- summary = self.summarizer(content[:1024], max_length=130, min_length=30)
83
- results["summary"] = summary[0]['summary_text']
84
-
85
- # STEP 3: Sentiment
86
- if "sentiment" in analysis_types:
87
- if progress_callback:
88
- progress_callback(3, "Performing sentiment analysis")
89
- sentiment = self.sentiment_analyzer(content[:512])
90
- results["sentiment"] = {
91
- "label": sentiment[0]['label'],
92
- "score": round(sentiment[0]['score'], 3)
93
- }
94
-
95
- # STEP 4: Topics
96
- if "topics" in analysis_types:
97
- if progress_callback:
98
- progress_callback(4, "Identifying topics")
99
- topics = self.zero_shot(
100
- content[:512],
101
- candidate_labels=[
102
- "technology", "science", "business", "politics",
103
- "entertainment", "education", "health", "sports"
104
- ]
105
- )
106
- results["topics"] = [
107
- {"label": label, "score": round(score, 3)}
108
- for label, score in zip(topics['labels'], topics['scores'])
109
- if score > 0.1
110
  ]
 
 
 
 
 
 
111
 
112
- return results
113
-
114
- except Exception as e:
115
- return {"error": f"Analysis error: {str(e)}"}
116
 
117
 
118
  def create_interface():
@@ -120,49 +102,52 @@ def create_interface():
120
 
121
  with gr.Blocks(title="Content Analyzer") as demo:
122
  gr.Markdown("# 📑 Content Analyzer")
123
- gr.Markdown("Analyze text content from various sources using AI.")
 
 
 
124
 
125
- # Dropdown to choose input type
126
  input_choice = gr.Dropdown(
127
  choices=["Text", "URL", "File"],
128
  value="Text",
129
  label="Select Input Type"
130
  )
131
 
132
- # Containers for each input type
133
  with gr.Column(visible=True) as text_col:
134
  text_input = gr.Textbox(
135
  label="Enter Text",
136
  placeholder="Paste your text here...",
137
  lines=5
138
  )
 
139
  with gr.Column(visible=False) as url_col:
140
  url_input = gr.Textbox(
141
  label="Enter URL",
142
  placeholder="https://example.com"
143
  )
 
144
  with gr.Column(visible=False) as file_col:
145
  file_input = gr.File(
146
  label="Upload File",
147
  file_types=[".txt", ".pdf", ".docx"]
148
  )
149
 
150
- # Callback function to show/hide input columns
151
  def show_inputs(choice):
 
152
  return {
153
  text_col: choice == "Text",
154
  url_col: choice == "URL",
155
  file_col: choice == "File"
156
  }
157
 
158
- # Trigger showing/hiding based on the dropdown choice
159
  input_choice.change(
160
  fn=show_inputs,
161
  inputs=[input_choice],
162
  outputs=[text_col, url_col, file_col]
163
  )
164
 
165
- # Analysis Options
166
  analysis_types = gr.CheckboxGroup(
167
  choices=["summarize", "sentiment", "topics"],
168
  value=["summarize"],
@@ -171,7 +156,7 @@ def create_interface():
171
 
172
  analyze_btn = gr.Button("Analyze", variant="primary")
173
 
174
- # Output Sections in tabs
175
  with gr.Tabs():
176
  with gr.Tab("Original Text"):
177
  original_text = gr.Markdown()
@@ -182,40 +167,46 @@ def create_interface():
182
  with gr.Tab("Topics"):
183
  topics_output = gr.Markdown()
184
 
185
- def process_analysis(choice, text, url, file, types, progress=gr.Progress()):
186
- """Orchestrates analysis depending on input choice."""
187
- steps_total = 4
188
-
189
- def progress_callback(step: int, desc: str):
190
- progress(step, total=steps_total, desc=desc)
191
-
192
- # Determine which content to pass based on the input choice
193
- if choice == "Text":
194
- content_text = text
195
- content_url = None
196
- content_file = None
197
- elif choice == "URL":
198
- content_text = None
199
- content_url = url
200
- content_file = None
201
- else: # choice == "File"
202
- content_text = None
203
- content_url = None
204
- content_file = file
205
-
206
- # Perform analysis
207
- results = analyzer.analyze_content(
208
- text=content_text,
209
- url=content_url,
210
- file=content_file,
211
- analysis_types=types,
212
- progress_callback=progress_callback
213
- )
 
 
 
 
 
 
 
214
 
215
  if "error" in results:
216
  return results["error"], "", "", ""
217
 
218
- # Format outputs
219
  original = results.get("original_text", "")
220
  summary = results.get("summary", "")
221
  sentiment = ""
@@ -225,7 +216,10 @@ def create_interface():
225
 
226
  topics = ""
227
  if "topics" in results:
228
- t_list = "\n".join([f"- {t['label']}: {t['score']}" for t in results["topics"]])
 
 
 
229
  topics = "**Detected Topics:**\n" + t_list
230
 
231
  return original, summary, sentiment, topics
@@ -234,7 +228,7 @@ def create_interface():
234
  fn=process_analysis,
235
  inputs=[input_choice, text_input, url_input, file_input, analysis_types],
236
  outputs=[original_text, summary_output, sentiment_output, topics_output],
237
- show_progress=True
238
  )
239
 
240
  return demo
 
1
  import gradio as gr
2
  import requests
3
+ import time
4
  from bs4 import BeautifulSoup
5
  from transformers import pipeline
6
  import PyPDF2
 
10
 
11
  class ContentAnalyzer:
12
  def __init__(self):
13
+ print("[DEBUG] Initializing pipelines...")
14
  self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
15
  self.sentiment_analyzer = pipeline("sentiment-analysis")
16
  self.zero_shot = pipeline("zero-shot-classification")
17
+ print("[DEBUG] Pipelines initialized.")
18
 
19
  def read_file(self, file_obj) -> str:
20
  """Read content from different file types."""
21
  if file_obj is None:
22
  return ""
23
  file_ext = os.path.splitext(file_obj.name)[1].lower()
24
+ print(f"[DEBUG] File extension: {file_ext}")
25
  try:
26
  if file_ext == '.txt':
27
  return file_obj.read().decode('utf-8')
 
41
 
42
  def fetch_web_content(self, url: str) -> str:
43
  """Fetch content from URL."""
44
+ print(f"[DEBUG] Attempting to fetch URL: {url}")
45
  try:
46
  response = requests.get(url, timeout=10)
47
  response.raise_for_status()
48
  soup = BeautifulSoup(response.text, 'html.parser')
49
+ # Remove scripts and styles
50
  for script in soup(["script", "style"]):
51
  script.decompose()
52
  text = soup.get_text(separator='\n')
53
  lines = (line.strip() for line in text.splitlines())
54
+ final_text = "\n".join(line for line in lines if line)
55
+ return final_text
56
  except Exception as e:
57
  return f"Error fetching URL: {str(e)}"
58
 
59
  def analyze_content(
60
  self,
61
+ content: str,
62
+ analysis_types: List[str],
 
 
 
63
  ) -> dict:
64
+ """Perform summarization, sentiment analysis, and topic detection on `content`."""
65
+ results = {}
66
+ truncated = content[:1000] + "..." if len(content) > 1000 else content
67
+ results["original_text"] = truncated
68
+
69
+ # Summarize
70
+ if "summarize" in analysis_types:
71
+ summary = self.summarizer(content[:1024], max_length=130, min_length=30)
72
+ results["summary"] = summary[0]['summary_text']
73
+
74
+ # Sentiment
75
+ if "sentiment" in analysis_types:
76
+ sentiment = self.sentiment_analyzer(content[:512])
77
+ results["sentiment"] = {
78
+ "label": sentiment[0]['label'],
79
+ "score": round(sentiment[0]['score'], 3)
80
+ }
81
+
82
+ # Topics
83
+ if "topics" in analysis_types:
84
+ topics = self.zero_shot(
85
+ content[:512],
86
+ candidate_labels=[
87
+ "technology", "science", "business", "politics",
88
+ "entertainment", "education", "health", "sports"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  ]
90
+ )
91
+ results["topics"] = [
92
+ {"label": label, "score": round(score, 3)}
93
+ for label, score in zip(topics['labels'], topics['scores'])
94
+ if score > 0.1
95
+ ]
96
 
97
+ return results
 
 
 
98
 
99
 
100
  def create_interface():
 
102
 
103
  with gr.Blocks(title="Content Analyzer") as demo:
104
  gr.Markdown("# 📑 Content Analyzer")
105
+ gr.Markdown(
106
+ "Analyze text from **Text**, **URL**, or **File** with summarization, "
107
+ "sentiment, and topic detection. A progress bar will appear during processing."
108
+ )
109
 
110
+ # Dropdown for input type
111
  input_choice = gr.Dropdown(
112
  choices=["Text", "URL", "File"],
113
  value="Text",
114
  label="Select Input Type"
115
  )
116
 
117
+ # We use three separate columns to conditionally display
118
  with gr.Column(visible=True) as text_col:
119
  text_input = gr.Textbox(
120
  label="Enter Text",
121
  placeholder="Paste your text here...",
122
  lines=5
123
  )
124
+
125
  with gr.Column(visible=False) as url_col:
126
  url_input = gr.Textbox(
127
  label="Enter URL",
128
  placeholder="https://example.com"
129
  )
130
+
131
  with gr.Column(visible=False) as file_col:
132
  file_input = gr.File(
133
  label="Upload File",
134
  file_types=[".txt", ".pdf", ".docx"]
135
  )
136
 
 
137
  def show_inputs(choice):
138
+ """Return a dict mapping columns to booleans for visibility."""
139
  return {
140
  text_col: choice == "Text",
141
  url_col: choice == "URL",
142
  file_col: choice == "File"
143
  }
144
 
 
145
  input_choice.change(
146
  fn=show_inputs,
147
  inputs=[input_choice],
148
  outputs=[text_col, url_col, file_col]
149
  )
150
 
 
151
  analysis_types = gr.CheckboxGroup(
152
  choices=["summarize", "sentiment", "topics"],
153
  value=["summarize"],
 
156
 
157
  analyze_btn = gr.Button("Analyze", variant="primary")
158
 
159
+ # Output tabs
160
  with gr.Tabs():
161
  with gr.Tab("Original Text"):
162
  original_text = gr.Markdown()
 
167
  with gr.Tab("Topics"):
168
  topics_output = gr.Markdown()
169
 
170
+ def process_analysis(choice, text_val, url_val, file_val, types):
171
+ """
172
+ This function does everything in one place using a 'with gr.Progress() as p:' block,
173
+ so we can show each step of the process. We add time.sleep(1) just to demonstrate
174
+ the progress bar (otherwise it may appear/disappear too quickly).
175
+ """
176
+ with gr.Progress() as p:
177
+ # STEP 1: Retrieve content
178
+ p(0, total=4, desc="Reading input")
179
+ time.sleep(1) # For demonstration
180
+ if choice == "Text":
181
+ content = text_val or ""
182
+ elif choice == "URL":
183
+ content = analyzer.fetch_web_content(url_val or "")
184
+ else: # File
185
+ content = analyzer.read_file(file_val)
186
+
187
+ if not content or content.startswith("Error"):
188
+ return content or "No content provided", "", "", ""
189
+
190
+ # STEP 2: Summarize
191
+ p(1, total=4, desc="Summarizing content")
192
+ time.sleep(1) # For demonstration
193
+
194
+ # STEP 3: Sentiment
195
+ p(2, total=4, desc="Performing sentiment analysis")
196
+ time.sleep(1) # For demonstration
197
+
198
+ # STEP 4: Topics
199
+ p(3, total=4, desc="Identifying topics")
200
+ time.sleep(1) # For demonstration
201
+
202
+ # After the progress steps, do the actual analysis in one shot
203
+ # (You could interleave the calls to pipeline with each progress step
204
+ # if you want real-time progress. This is a simplified approach.)
205
+ results = analyzer.analyze_content(content, types)
206
 
207
  if "error" in results:
208
  return results["error"], "", "", ""
209
 
 
210
  original = results.get("original_text", "")
211
  summary = results.get("summary", "")
212
  sentiment = ""
 
216
 
217
  topics = ""
218
  if "topics" in results:
219
+ t_list = "\n".join([
220
+ f"- {t['label']}: {t['score']}"
221
+ for t in results["topics"]
222
+ ])
223
  topics = "**Detected Topics:**\n" + t_list
224
 
225
  return original, summary, sentiment, topics
 
228
  fn=process_analysis,
229
  inputs=[input_choice, text_input, url_input, file_input, analysis_types],
230
  outputs=[original_text, summary_output, sentiment_output, topics_output],
231
+ show_progress=True # This ensures the Gradio progress bar is enabled
232
  )
233
 
234
  return demo