FLS

Sleeping

App Files Files Community

kambris commited on Dec 13, 2024

Commit

85b272d

verified ·

1 Parent(s): 97813a4

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -39

app.py CHANGED Viewed

@@ -126,32 +126,29 @@ class SpeechAnalyzer:
     def analyze_emotional_trajectory(self, text, window_size=5, ngram_size=3):
         """Enhanced emotional trajectory analysis using n-grams and relative scoring"""
-        # Create overlapping n-grams
-        words = text.split()
-        ngrams = [' '.join(words[i:i+ngram_size]) for i in range(0, len(words)-ngram_size+1)]
-        # Get raw sentiment scores
         sentiment_scores = []
-        for ngram in ngrams:
-            result = self.sentiment_pipeline(ngram)[0]
-            # Use confidence score directly without binary transformation
-            raw_score = result['score']
-            sentiment_scores.append(raw_score)
-        # Normalize scores to show relative variations
-        if sentiment_scores:
-            min_score = min(sentiment_scores)
-            max_score = max(sentiment_scores)
-            score_range = max_score - min_score
-            if score_range > 0:
-                sentiment_scores = [(s - min_score) / score_range * 2 - 1 for s in sentiment_scores]
-        # Apply smoothing for cleaner visualization
-        smoothed_scores = (savgol_filter(sentiment_scores, window_length=window_size, polyorder=2)
-                          if len(sentiment_scores) > window_size else sentiment_scores)
-        return smoothed_scores
     def detect_named_entities(self, text):
         """Detect named entities in the text"""
         entities = self.ner_pipeline(text)
@@ -268,6 +265,8 @@ def main():
             text = ' '.join([page.extract_text() for page in pdf_reader.pages])
         # Create tabs for different analyses
         tab1, tab2, tab3, tab4, tab5 = st.tabs([
             "Moral Foundations",
             "Emotional Analysis",
@@ -277,6 +276,8 @@ def main():
         ])
         with tab1:
             st.subheader("Moral Foundations Analysis")
             moral_scores = analyzer.analyze_moral_foundations(text)
@@ -299,31 +300,49 @@ def main():
                 st.write(f"**{MORAL_FOUNDATIONS[foundation]}**: {score:.2%}")
         with tab2:
             st.subheader("Speech Trajectory Analysis")
             col1, col2 = st.columns(2)
             with col1:
                 st.write("### Emotional Flow")
-                # Get enhanced sentiment trajectory with n-grams
-                sentiment_scores = analyzer.analyze_emotional_trajectory(text, window_size=5, ngram_size=3)
-                # Create segment labels based on n-gram windows
-                segment_labels = [f"{i+1}" for i in range(len(sentiment_scores))]
                 trajectory_fig = go.Figure(data=go.Scatter(
                     x=segment_labels,
                     y=sentiment_scores,
                     mode='lines+markers',
-                    line=dict(
-                        color='#1f77b4',
-                        width=3
-                    ),
-                    marker=dict(
-                        size=8,
-                        color='#1f77b4',
-                        symbol='circle'
-                    ),
-                    hovertemplate='Segment %{x}<br>Score: %{y:.2f}<extra></extra>'
                 ))
                 trajectory_fig.update_layout(
@@ -345,8 +364,6 @@ def main():
             with col2:
                 st.write("### Moral Foundations Flow")
-                # Process text in manageable chunks for moral analysis
-                segments = analyzer.split_text(text, max_length=512)
                 moral_trajectories = {foundation: [] for foundation in MORAL_FOUNDATIONS}
                 for segment in segments:
@@ -357,7 +374,7 @@ def main():
                 moral_fig = go.Figure()
                 for foundation, scores in moral_trajectories.items():
                     moral_fig.add_trace(go.Scatter(
-                        x=list(range(1, len(scores) + 1)),
                         y=scores,
                         name=MORAL_FOUNDATIONS[foundation],
                         mode='lines+markers'
@@ -380,6 +397,8 @@ def main():
                 st.plotly_chart(moral_fig)
         with tab3:
             st.subheader("Linguistic Analysis")
             readability = analyzer.calculate_readability(text)
@@ -425,6 +444,8 @@ def main():
                 )
         with tab4:
             st.subheader("Semantic Network")
             semantic_graph = analyzer.create_semantic_network(text)
@@ -494,6 +515,8 @@ def main():
             st.plotly_chart(network_fig, use_container_width=True)
         with tab5:
             st.subheader("Named Entity Recognition")
             named_entities = analyzer.detect_named_entities(text)

     def analyze_emotional_trajectory(self, text, window_size=5, ngram_size=3):
         """Enhanced emotional trajectory analysis using n-grams and relative scoring"""
+        segments = self.split_text(text, max_length=512)
         sentiment_scores = []
+        for segment in segments:
+            words = segment.split()
+            ngrams = [' '.join(words[i:i+ngram_size]) for i in range(0, len(words)-ngram_size+1)]
+            segment_scores = []
+            for ngram in ngrams:
+                result = self.sentiment_pipeline(ngram)[0]
+                segment_scores.append(result['score'])
+            avg_score = np.mean(segment_scores) if segment_scores else 0
+            sentiment_scores.append(avg_score)
+        # Normalize scores
+        min_score = min(sentiment_scores)
+        max_score = max(sentiment_scores)
+        score_range = max_score - min_score
+        if score_range > 0:
+            sentiment_scores = [(s - min_score) / score_range * 2 - 1 for s in sentiment_scores]
+        return sentiment_scores
     def detect_named_entities(self, text):
         """Detect named entities in the text"""
         entities = self.ner_pipeline(text)
             text = ' '.join([page.extract_text() for page in pdf_reader.pages])
         # Create tabs for different analyses
+        progress_bar = st.progress(0)
+        status_text = st.empty()
         tab1, tab2, tab3, tab4, tab5 = st.tabs([
             "Moral Foundations",
             "Emotional Analysis",
         ])
         with tab1:
+            status_text.text('Analyzing Moral Foundations...')
+            progress_bar.progress(20)
             st.subheader("Moral Foundations Analysis")
             moral_scores = analyzer.analyze_moral_foundations(text)
                 st.write(f"**{MORAL_FOUNDATIONS[foundation]}**: {score:.2%}")
         with tab2:
+            status_text.text('Processing Emotional Trajectory...')
+            progress_bar.progress(40)
             st.subheader("Speech Trajectory Analysis")
             col1, col2 = st.columns(2)
+            # First, create consistent segments for both analyses
+            segments = analyzer.split_text(text, max_length=512)
+            num_segments = len(segments)
+            segment_labels = [f"{i+1}" for i in range(num_segments)]
             with col1:
                 st.write("### Emotional Flow")
+                sentiment_scores = []
+                for segment in segments:
+                    # Get words for n-gram analysis within each segment
+                    words = segment.split()
+                    ngram_size = 3
+                    ngrams = [' '.join(words[i:i+ngram_size]) for i in range(0, len(words)-ngram_size+1)]
+                    # Calculate segment score from n-grams
+                    segment_scores = []
+                    for ngram in ngrams:
+                        result = analyzer.sentiment_pipeline(ngram)[0]
+                        segment_scores.append(result['score'])
+                    # Use average score for the segment
+                    avg_score = np.mean(segment_scores) if segment_scores else 0
+                    sentiment_scores.append(avg_score)
+                # Normalize scores to show relative variations
+                min_score = min(sentiment_scores)
+                max_score = max(sentiment_scores)
+                score_range = max_score - min_score
+                if score_range > 0:
+                    sentiment_scores = [(s - min_score) / score_range * 2 - 1 for s in sentiment_scores]
                 trajectory_fig = go.Figure(data=go.Scatter(
                     x=segment_labels,
                     y=sentiment_scores,
                     mode='lines+markers',
+                    line=dict(color='#1f77b4', width=3),
+                    marker=dict(size=8, color='#1f77b4', symbol='circle')
                 ))
                 trajectory_fig.update_layout(
             with col2:
                 st.write("### Moral Foundations Flow")
                 moral_trajectories = {foundation: [] for foundation in MORAL_FOUNDATIONS}
                 for segment in segments:
                 moral_fig = go.Figure()
                 for foundation, scores in moral_trajectories.items():
                     moral_fig.add_trace(go.Scatter(
+                        x=segment_labels,
                         y=scores,
                         name=MORAL_FOUNDATIONS[foundation],
                         mode='lines+markers'
                 st.plotly_chart(moral_fig)
         with tab3:
+            status_text.text('Analyzing Linguistic Features...')
+            progress_bar.progress(60)
             st.subheader("Linguistic Analysis")
             readability = analyzer.calculate_readability(text)
                 )
         with tab4:
+            status_text.text('Building Semantic Network...')
+            progress_bar.progress(80)
             st.subheader("Semantic Network")
             semantic_graph = analyzer.create_semantic_network(text)
             st.plotly_chart(network_fig, use_container_width=True)
         with tab5:
+            status_text.text('Extracting Named Entities...')
+            progress_bar.progress(100)
             st.subheader("Named Entity Recognition")
             named_entities = analyzer.detect_named_entities(text)