Update app.py
Browse files
app.py
CHANGED
@@ -126,32 +126,29 @@ class SpeechAnalyzer:
|
|
126 |
|
127 |
def analyze_emotional_trajectory(self, text, window_size=5, ngram_size=3):
|
128 |
"""Enhanced emotional trajectory analysis using n-grams and relative scoring"""
|
129 |
-
|
130 |
-
words = text.split()
|
131 |
-
ngrams = [' '.join(words[i:i+ngram_size]) for i in range(0, len(words)-ngram_size+1)]
|
132 |
-
|
133 |
-
# Get raw sentiment scores
|
134 |
sentiment_scores = []
|
135 |
-
for ngram in ngrams:
|
136 |
-
result = self.sentiment_pipeline(ngram)[0]
|
137 |
-
# Use confidence score directly without binary transformation
|
138 |
-
raw_score = result['score']
|
139 |
-
sentiment_scores.append(raw_score)
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
|
|
|
|
|
|
|
|
148 |
|
149 |
-
#
|
150 |
-
|
151 |
-
|
|
|
|
|
|
|
152 |
|
153 |
-
return
|
154 |
-
|
155 |
def detect_named_entities(self, text):
|
156 |
"""Detect named entities in the text"""
|
157 |
entities = self.ner_pipeline(text)
|
@@ -268,6 +265,8 @@ def main():
|
|
268 |
text = ' '.join([page.extract_text() for page in pdf_reader.pages])
|
269 |
|
270 |
# Create tabs for different analyses
|
|
|
|
|
271 |
tab1, tab2, tab3, tab4, tab5 = st.tabs([
|
272 |
"Moral Foundations",
|
273 |
"Emotional Analysis",
|
@@ -277,6 +276,8 @@ def main():
|
|
277 |
])
|
278 |
|
279 |
with tab1:
|
|
|
|
|
280 |
st.subheader("Moral Foundations Analysis")
|
281 |
moral_scores = analyzer.analyze_moral_foundations(text)
|
282 |
|
@@ -299,31 +300,49 @@ def main():
|
|
299 |
st.write(f"**{MORAL_FOUNDATIONS[foundation]}**: {score:.2%}")
|
300 |
|
301 |
with tab2:
|
|
|
|
|
302 |
st.subheader("Speech Trajectory Analysis")
|
303 |
col1, col2 = st.columns(2)
|
304 |
|
|
|
|
|
|
|
|
|
|
|
305 |
with col1:
|
306 |
st.write("### Emotional Flow")
|
307 |
-
|
308 |
-
sentiment_scores = analyzer.analyze_emotional_trajectory(text, window_size=5, ngram_size=3)
|
309 |
|
310 |
-
|
311 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
312 |
|
313 |
trajectory_fig = go.Figure(data=go.Scatter(
|
314 |
x=segment_labels,
|
315 |
y=sentiment_scores,
|
316 |
mode='lines+markers',
|
317 |
-
line=dict(
|
318 |
-
|
319 |
-
width=3
|
320 |
-
),
|
321 |
-
marker=dict(
|
322 |
-
size=8,
|
323 |
-
color='#1f77b4',
|
324 |
-
symbol='circle'
|
325 |
-
),
|
326 |
-
hovertemplate='Segment %{x}<br>Score: %{y:.2f}<extra></extra>'
|
327 |
))
|
328 |
|
329 |
trajectory_fig.update_layout(
|
@@ -345,8 +364,6 @@ def main():
|
|
345 |
|
346 |
with col2:
|
347 |
st.write("### Moral Foundations Flow")
|
348 |
-
# Process text in manageable chunks for moral analysis
|
349 |
-
segments = analyzer.split_text(text, max_length=512)
|
350 |
moral_trajectories = {foundation: [] for foundation in MORAL_FOUNDATIONS}
|
351 |
|
352 |
for segment in segments:
|
@@ -357,7 +374,7 @@ def main():
|
|
357 |
moral_fig = go.Figure()
|
358 |
for foundation, scores in moral_trajectories.items():
|
359 |
moral_fig.add_trace(go.Scatter(
|
360 |
-
x=
|
361 |
y=scores,
|
362 |
name=MORAL_FOUNDATIONS[foundation],
|
363 |
mode='lines+markers'
|
@@ -380,6 +397,8 @@ def main():
|
|
380 |
st.plotly_chart(moral_fig)
|
381 |
|
382 |
with tab3:
|
|
|
|
|
383 |
st.subheader("Linguistic Analysis")
|
384 |
readability = analyzer.calculate_readability(text)
|
385 |
|
@@ -425,6 +444,8 @@ def main():
|
|
425 |
)
|
426 |
|
427 |
with tab4:
|
|
|
|
|
428 |
st.subheader("Semantic Network")
|
429 |
semantic_graph = analyzer.create_semantic_network(text)
|
430 |
|
@@ -494,6 +515,8 @@ def main():
|
|
494 |
st.plotly_chart(network_fig, use_container_width=True)
|
495 |
|
496 |
with tab5:
|
|
|
|
|
497 |
st.subheader("Named Entity Recognition")
|
498 |
named_entities = analyzer.detect_named_entities(text)
|
499 |
|
|
|
126 |
|
127 |
def analyze_emotional_trajectory(self, text, window_size=5, ngram_size=3):
|
128 |
"""Enhanced emotional trajectory analysis using n-grams and relative scoring"""
|
129 |
+
segments = self.split_text(text, max_length=512)
|
|
|
|
|
|
|
|
|
130 |
sentiment_scores = []
|
|
|
|
|
|
|
|
|
|
|
131 |
|
132 |
+
for segment in segments:
|
133 |
+
words = segment.split()
|
134 |
+
ngrams = [' '.join(words[i:i+ngram_size]) for i in range(0, len(words)-ngram_size+1)]
|
135 |
+
|
136 |
+
segment_scores = []
|
137 |
+
for ngram in ngrams:
|
138 |
+
result = self.sentiment_pipeline(ngram)[0]
|
139 |
+
segment_scores.append(result['score'])
|
140 |
+
|
141 |
+
avg_score = np.mean(segment_scores) if segment_scores else 0
|
142 |
+
sentiment_scores.append(avg_score)
|
143 |
|
144 |
+
# Normalize scores
|
145 |
+
min_score = min(sentiment_scores)
|
146 |
+
max_score = max(sentiment_scores)
|
147 |
+
score_range = max_score - min_score
|
148 |
+
if score_range > 0:
|
149 |
+
sentiment_scores = [(s - min_score) / score_range * 2 - 1 for s in sentiment_scores]
|
150 |
|
151 |
+
return sentiment_scores
|
|
|
152 |
def detect_named_entities(self, text):
|
153 |
"""Detect named entities in the text"""
|
154 |
entities = self.ner_pipeline(text)
|
|
|
265 |
text = ' '.join([page.extract_text() for page in pdf_reader.pages])
|
266 |
|
267 |
# Create tabs for different analyses
|
268 |
+
progress_bar = st.progress(0)
|
269 |
+
status_text = st.empty()
|
270 |
tab1, tab2, tab3, tab4, tab5 = st.tabs([
|
271 |
"Moral Foundations",
|
272 |
"Emotional Analysis",
|
|
|
276 |
])
|
277 |
|
278 |
with tab1:
|
279 |
+
status_text.text('Analyzing Moral Foundations...')
|
280 |
+
progress_bar.progress(20)
|
281 |
st.subheader("Moral Foundations Analysis")
|
282 |
moral_scores = analyzer.analyze_moral_foundations(text)
|
283 |
|
|
|
300 |
st.write(f"**{MORAL_FOUNDATIONS[foundation]}**: {score:.2%}")
|
301 |
|
302 |
with tab2:
|
303 |
+
status_text.text('Processing Emotional Trajectory...')
|
304 |
+
progress_bar.progress(40)
|
305 |
st.subheader("Speech Trajectory Analysis")
|
306 |
col1, col2 = st.columns(2)
|
307 |
|
308 |
+
# First, create consistent segments for both analyses
|
309 |
+
segments = analyzer.split_text(text, max_length=512)
|
310 |
+
num_segments = len(segments)
|
311 |
+
segment_labels = [f"{i+1}" for i in range(num_segments)]
|
312 |
+
|
313 |
with col1:
|
314 |
st.write("### Emotional Flow")
|
315 |
+
sentiment_scores = []
|
|
|
316 |
|
317 |
+
for segment in segments:
|
318 |
+
# Get words for n-gram analysis within each segment
|
319 |
+
words = segment.split()
|
320 |
+
ngram_size = 3
|
321 |
+
ngrams = [' '.join(words[i:i+ngram_size]) for i in range(0, len(words)-ngram_size+1)]
|
322 |
+
|
323 |
+
# Calculate segment score from n-grams
|
324 |
+
segment_scores = []
|
325 |
+
for ngram in ngrams:
|
326 |
+
result = analyzer.sentiment_pipeline(ngram)[0]
|
327 |
+
segment_scores.append(result['score'])
|
328 |
+
|
329 |
+
# Use average score for the segment
|
330 |
+
avg_score = np.mean(segment_scores) if segment_scores else 0
|
331 |
+
sentiment_scores.append(avg_score)
|
332 |
+
|
333 |
+
# Normalize scores to show relative variations
|
334 |
+
min_score = min(sentiment_scores)
|
335 |
+
max_score = max(sentiment_scores)
|
336 |
+
score_range = max_score - min_score
|
337 |
+
if score_range > 0:
|
338 |
+
sentiment_scores = [(s - min_score) / score_range * 2 - 1 for s in sentiment_scores]
|
339 |
|
340 |
trajectory_fig = go.Figure(data=go.Scatter(
|
341 |
x=segment_labels,
|
342 |
y=sentiment_scores,
|
343 |
mode='lines+markers',
|
344 |
+
line=dict(color='#1f77b4', width=3),
|
345 |
+
marker=dict(size=8, color='#1f77b4', symbol='circle')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
346 |
))
|
347 |
|
348 |
trajectory_fig.update_layout(
|
|
|
364 |
|
365 |
with col2:
|
366 |
st.write("### Moral Foundations Flow")
|
|
|
|
|
367 |
moral_trajectories = {foundation: [] for foundation in MORAL_FOUNDATIONS}
|
368 |
|
369 |
for segment in segments:
|
|
|
374 |
moral_fig = go.Figure()
|
375 |
for foundation, scores in moral_trajectories.items():
|
376 |
moral_fig.add_trace(go.Scatter(
|
377 |
+
x=segment_labels,
|
378 |
y=scores,
|
379 |
name=MORAL_FOUNDATIONS[foundation],
|
380 |
mode='lines+markers'
|
|
|
397 |
st.plotly_chart(moral_fig)
|
398 |
|
399 |
with tab3:
|
400 |
+
status_text.text('Analyzing Linguistic Features...')
|
401 |
+
progress_bar.progress(60)
|
402 |
st.subheader("Linguistic Analysis")
|
403 |
readability = analyzer.calculate_readability(text)
|
404 |
|
|
|
444 |
)
|
445 |
|
446 |
with tab4:
|
447 |
+
status_text.text('Building Semantic Network...')
|
448 |
+
progress_bar.progress(80)
|
449 |
st.subheader("Semantic Network")
|
450 |
semantic_graph = analyzer.create_semantic_network(text)
|
451 |
|
|
|
515 |
st.plotly_chart(network_fig, use_container_width=True)
|
516 |
|
517 |
with tab5:
|
518 |
+
status_text.text('Extracting Named Entities...')
|
519 |
+
progress_bar.progress(100)
|
520 |
st.subheader("Named Entity Recognition")
|
521 |
named_entities = analyzer.detect_named_entities(text)
|
522 |
|