kambris commited on
Commit
85b272d
·
verified ·
1 Parent(s): 97813a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -39
app.py CHANGED
@@ -126,32 +126,29 @@ class SpeechAnalyzer:
126
 
127
  def analyze_emotional_trajectory(self, text, window_size=5, ngram_size=3):
128
  """Enhanced emotional trajectory analysis using n-grams and relative scoring"""
129
- # Create overlapping n-grams
130
- words = text.split()
131
- ngrams = [' '.join(words[i:i+ngram_size]) for i in range(0, len(words)-ngram_size+1)]
132
-
133
- # Get raw sentiment scores
134
  sentiment_scores = []
135
- for ngram in ngrams:
136
- result = self.sentiment_pipeline(ngram)[0]
137
- # Use confidence score directly without binary transformation
138
- raw_score = result['score']
139
- sentiment_scores.append(raw_score)
140
 
141
- # Normalize scores to show relative variations
142
- if sentiment_scores:
143
- min_score = min(sentiment_scores)
144
- max_score = max(sentiment_scores)
145
- score_range = max_score - min_score
146
- if score_range > 0:
147
- sentiment_scores = [(s - min_score) / score_range * 2 - 1 for s in sentiment_scores]
 
 
 
 
148
 
149
- # Apply smoothing for cleaner visualization
150
- smoothed_scores = (savgol_filter(sentiment_scores, window_length=window_size, polyorder=2)
151
- if len(sentiment_scores) > window_size else sentiment_scores)
 
 
 
152
 
153
- return smoothed_scores
154
-
155
  def detect_named_entities(self, text):
156
  """Detect named entities in the text"""
157
  entities = self.ner_pipeline(text)
@@ -268,6 +265,8 @@ def main():
268
  text = ' '.join([page.extract_text() for page in pdf_reader.pages])
269
 
270
  # Create tabs for different analyses
 
 
271
  tab1, tab2, tab3, tab4, tab5 = st.tabs([
272
  "Moral Foundations",
273
  "Emotional Analysis",
@@ -277,6 +276,8 @@ def main():
277
  ])
278
 
279
  with tab1:
 
 
280
  st.subheader("Moral Foundations Analysis")
281
  moral_scores = analyzer.analyze_moral_foundations(text)
282
 
@@ -299,31 +300,49 @@ def main():
299
  st.write(f"**{MORAL_FOUNDATIONS[foundation]}**: {score:.2%}")
300
 
301
  with tab2:
 
 
302
  st.subheader("Speech Trajectory Analysis")
303
  col1, col2 = st.columns(2)
304
 
 
 
 
 
 
305
  with col1:
306
  st.write("### Emotional Flow")
307
- # Get enhanced sentiment trajectory with n-grams
308
- sentiment_scores = analyzer.analyze_emotional_trajectory(text, window_size=5, ngram_size=3)
309
 
310
- # Create segment labels based on n-gram windows
311
- segment_labels = [f"{i+1}" for i in range(len(sentiment_scores))]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
 
313
  trajectory_fig = go.Figure(data=go.Scatter(
314
  x=segment_labels,
315
  y=sentiment_scores,
316
  mode='lines+markers',
317
- line=dict(
318
- color='#1f77b4',
319
- width=3
320
- ),
321
- marker=dict(
322
- size=8,
323
- color='#1f77b4',
324
- symbol='circle'
325
- ),
326
- hovertemplate='Segment %{x}<br>Score: %{y:.2f}<extra></extra>'
327
  ))
328
 
329
  trajectory_fig.update_layout(
@@ -345,8 +364,6 @@ def main():
345
 
346
  with col2:
347
  st.write("### Moral Foundations Flow")
348
- # Process text in manageable chunks for moral analysis
349
- segments = analyzer.split_text(text, max_length=512)
350
  moral_trajectories = {foundation: [] for foundation in MORAL_FOUNDATIONS}
351
 
352
  for segment in segments:
@@ -357,7 +374,7 @@ def main():
357
  moral_fig = go.Figure()
358
  for foundation, scores in moral_trajectories.items():
359
  moral_fig.add_trace(go.Scatter(
360
- x=list(range(1, len(scores) + 1)),
361
  y=scores,
362
  name=MORAL_FOUNDATIONS[foundation],
363
  mode='lines+markers'
@@ -380,6 +397,8 @@ def main():
380
  st.plotly_chart(moral_fig)
381
 
382
  with tab3:
 
 
383
  st.subheader("Linguistic Analysis")
384
  readability = analyzer.calculate_readability(text)
385
 
@@ -425,6 +444,8 @@ def main():
425
  )
426
 
427
  with tab4:
 
 
428
  st.subheader("Semantic Network")
429
  semantic_graph = analyzer.create_semantic_network(text)
430
 
@@ -494,6 +515,8 @@ def main():
494
  st.plotly_chart(network_fig, use_container_width=True)
495
 
496
  with tab5:
 
 
497
  st.subheader("Named Entity Recognition")
498
  named_entities = analyzer.detect_named_entities(text)
499
 
 
126
 
127
  def analyze_emotional_trajectory(self, text, window_size=5, ngram_size=3):
128
  """Enhanced emotional trajectory analysis using n-grams and relative scoring"""
129
+ segments = self.split_text(text, max_length=512)
 
 
 
 
130
  sentiment_scores = []
 
 
 
 
 
131
 
132
+ for segment in segments:
133
+ words = segment.split()
134
+ ngrams = [' '.join(words[i:i+ngram_size]) for i in range(0, len(words)-ngram_size+1)]
135
+
136
+ segment_scores = []
137
+ for ngram in ngrams:
138
+ result = self.sentiment_pipeline(ngram)[0]
139
+ segment_scores.append(result['score'])
140
+
141
+ avg_score = np.mean(segment_scores) if segment_scores else 0
142
+ sentiment_scores.append(avg_score)
143
 
144
+ # Normalize scores
145
+ min_score = min(sentiment_scores)
146
+ max_score = max(sentiment_scores)
147
+ score_range = max_score - min_score
148
+ if score_range > 0:
149
+ sentiment_scores = [(s - min_score) / score_range * 2 - 1 for s in sentiment_scores]
150
 
151
+ return sentiment_scores
 
152
  def detect_named_entities(self, text):
153
  """Detect named entities in the text"""
154
  entities = self.ner_pipeline(text)
 
265
  text = ' '.join([page.extract_text() for page in pdf_reader.pages])
266
 
267
  # Create tabs for different analyses
268
+ progress_bar = st.progress(0)
269
+ status_text = st.empty()
270
  tab1, tab2, tab3, tab4, tab5 = st.tabs([
271
  "Moral Foundations",
272
  "Emotional Analysis",
 
276
  ])
277
 
278
  with tab1:
279
+ status_text.text('Analyzing Moral Foundations...')
280
+ progress_bar.progress(20)
281
  st.subheader("Moral Foundations Analysis")
282
  moral_scores = analyzer.analyze_moral_foundations(text)
283
 
 
300
  st.write(f"**{MORAL_FOUNDATIONS[foundation]}**: {score:.2%}")
301
 
302
  with tab2:
303
+ status_text.text('Processing Emotional Trajectory...')
304
+ progress_bar.progress(40)
305
  st.subheader("Speech Trajectory Analysis")
306
  col1, col2 = st.columns(2)
307
 
308
+ # First, create consistent segments for both analyses
309
+ segments = analyzer.split_text(text, max_length=512)
310
+ num_segments = len(segments)
311
+ segment_labels = [f"{i+1}" for i in range(num_segments)]
312
+
313
  with col1:
314
  st.write("### Emotional Flow")
315
+ sentiment_scores = []
 
316
 
317
+ for segment in segments:
318
+ # Get words for n-gram analysis within each segment
319
+ words = segment.split()
320
+ ngram_size = 3
321
+ ngrams = [' '.join(words[i:i+ngram_size]) for i in range(0, len(words)-ngram_size+1)]
322
+
323
+ # Calculate segment score from n-grams
324
+ segment_scores = []
325
+ for ngram in ngrams:
326
+ result = analyzer.sentiment_pipeline(ngram)[0]
327
+ segment_scores.append(result['score'])
328
+
329
+ # Use average score for the segment
330
+ avg_score = np.mean(segment_scores) if segment_scores else 0
331
+ sentiment_scores.append(avg_score)
332
+
333
+ # Normalize scores to show relative variations
334
+ min_score = min(sentiment_scores)
335
+ max_score = max(sentiment_scores)
336
+ score_range = max_score - min_score
337
+ if score_range > 0:
338
+ sentiment_scores = [(s - min_score) / score_range * 2 - 1 for s in sentiment_scores]
339
 
340
  trajectory_fig = go.Figure(data=go.Scatter(
341
  x=segment_labels,
342
  y=sentiment_scores,
343
  mode='lines+markers',
344
+ line=dict(color='#1f77b4', width=3),
345
+ marker=dict(size=8, color='#1f77b4', symbol='circle')
 
 
 
 
 
 
 
 
346
  ))
347
 
348
  trajectory_fig.update_layout(
 
364
 
365
  with col2:
366
  st.write("### Moral Foundations Flow")
 
 
367
  moral_trajectories = {foundation: [] for foundation in MORAL_FOUNDATIONS}
368
 
369
  for segment in segments:
 
374
  moral_fig = go.Figure()
375
  for foundation, scores in moral_trajectories.items():
376
  moral_fig.add_trace(go.Scatter(
377
+ x=segment_labels,
378
  y=scores,
379
  name=MORAL_FOUNDATIONS[foundation],
380
  mode='lines+markers'
 
397
  st.plotly_chart(moral_fig)
398
 
399
  with tab3:
400
+ status_text.text('Analyzing Linguistic Features...')
401
+ progress_bar.progress(60)
402
  st.subheader("Linguistic Analysis")
403
  readability = analyzer.calculate_readability(text)
404
 
 
444
  )
445
 
446
  with tab4:
447
+ status_text.text('Building Semantic Network...')
448
+ progress_bar.progress(80)
449
  st.subheader("Semantic Network")
450
  semantic_graph = analyzer.create_semantic_network(text)
451
 
 
515
  st.plotly_chart(network_fig, use_container_width=True)
516
 
517
  with tab5:
518
+ status_text.text('Extracting Named Entities...')
519
+ progress_bar.progress(100)
520
  st.subheader("Named Entity Recognition")
521
  named_entities = analyzer.detect_named_entities(text)
522