import streamlit as st
import pandas as pd
import numpy as np
import torch
import networkx as nx
import plotly.express as px
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.signal import savgol_filter
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from wordcloud import WordCloud
import spacy
st.set_page_config(page_title="Political Speech Analysis", page_icon="🗣️", layout="wide")
from transformers import (
AutoTokenizer,
AutoModelForSequenceClassification,
pipeline,
AutoModelForTokenClassification,
RobertaTokenizer,
RobertaForSequenceClassification
)
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from textstat import flesch_reading_ease, flesch_kincaid_grade
nltk.download('punkt', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)
nltk.download('stopwords', quiet=True)
nltk.download('punkt_tab', quiet=True)
try:
nlp = spacy.load('en_core_web_lg')
except:
st.error("Please install spaCy and en_core_web_lg model: \n"
"pip install spacy\n"
"python -m spacy download en_core_web_lg")
MORAL_FOUNDATIONS = {
'care': 'Care/Harm',
'fairness': 'Fairness/Cheating',
'loyalty': 'Loyalty/Betrayal',
'authority': 'Authority/Subversion',
'sanctity': 'Sanctity/Degradation'
}
RHETORICAL_DEVICES = {
'analogy': ['like', 'as', 'similar to'],
'repetition': ['repetitive', 'recurring'],
'metaphor': ['as if', 'like', 'represents'],
'hyperbole': ['always', 'never', 'absolute'],
'rhetorical_question': ['?']
}
class SpeechAnalyzer:
def __init__(self):
self.moral_model_path = "MMADS/MoralFoundationsClassifier"
self.moral_tokenizer = RobertaTokenizer.from_pretrained(self.moral_model_path)
self.moral_model = RobertaForSequenceClassification.from_pretrained(self.moral_model_path)
self.label_names = ['care', 'fairness', 'loyalty', 'authority', 'sanctity']
self.sentiment_pipeline = pipeline("sentiment-analysis")
self.ner_tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
self.ner_model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
self.ner_pipeline = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
self.emotion_classifier = pipeline("text-classification",
model="j-hartmann/emotion-english-distilroberta-base")
def split_text(self, text, max_length=256, overlap=50):
"""Split long text into overlapping segments"""
words = text.split()
segments = []
current_segment = []
current_length = 0
for word in words:
if current_length + len(word.split()) > max_length:
segments.append(' '.join(current_segment))
current_segment = current_segment[-overlap:] + [word]
current_length = len(' '.join(current_segment).split())
else:
current_segment.append(word)
current_length = len(' '.join(current_segment).split())
if current_segment:
segments.append(' '.join(current_segment))
return segments
def analyze_moral_foundations(self, text):
"""Analyze moral foundations using the RoBERTa-based classifier"""
segments = self.split_text(text)
foundation_scores = {
'care': [], 'fairness': [], 'loyalty': [],
'authority': [], 'sanctity': []
}
for segment in segments:
inputs = self.moral_tokenizer(segment, return_tensors="pt", truncation=True, max_length=512)
with torch.no_grad():
outputs = self.moral_model(**inputs)
probabilities = torch.softmax(outputs.logits, dim=1)
for idx, label in enumerate(self.label_names):
foundation = label.lower()
if foundation in foundation_scores:
foundation_scores[foundation].append(probabilities[0][idx].item())
aggregated_scores = {
foundation: np.mean(scores) for foundation, scores in foundation_scores.items()
}
return aggregated_scores
def analyze_emotional_trajectory(self, text, window_size=5):
"""Enhanced emotional trajectory analysis with basic emotions"""
segments = self.split_text(text, max_length=512)
sentiment_scores = []
basic_emotions = []
for segment in segments:
sentiment_result = self.sentiment_pipeline(segment, truncation=True, max_length=512)
score = sentiment_result[0]['score']
if sentiment_result[0]['label'] == 'POSITIVE':
score = 0.5 + (score * 0.5)
else:
score = 0.5 - (score * 0.5)
sentiment_scores.append(score)
emotion_result = self.emotion_classifier(segment, truncation=True, max_length=512)
emotion = emotion_result[0]['label']
basic_emotions.append(emotion)
return sentiment_scores, basic_emotions
def detect_named_entities(self, text):
"""Detect named entities in the text"""
entities = self.ner_pipeline(text)
return entities
def extract_key_phrases(self, text, top_n=10):
"""Extract key phrases using TF-IDF"""
vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1,2))
tfidf_matrix = vectorizer.fit_transform([text])
feature_names = vectorizer.get_feature_names_out()
sorted_idx = tfidf_matrix.toarray()[0].argsort()[::-1]
top_phrases = [feature_names[i] for i in sorted_idx[:top_n]]
return top_phrases
def calculate_readability(self, text):
"""Calculate readability metrics"""
return {
'Flesch Reading Ease': flesch_reading_ease(text),
'Flesch-Kincaid Grade Level': flesch_kincaid_grade(text)
}
def detect_rhetorical_devices(self, text):
"""Detect rhetorical devices"""
devices_found = {}
for device, markers in RHETORICAL_DEVICES.items():
count = sum(text.lower().count(marker) for marker in markers)
if count > 0:
devices_found[device] = count
return devices_found
def create_semantic_network(self, text, top_n=20, window_size=10, chunk_size=10000):
"""Create semantic network graph with weighted edges"""
chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
all_nouns = []
noun_freq = nltk.FreqDist()
for chunk in chunks:
doc = nlp(chunk)
chunk_nouns = [token.text.lower() for token in doc if token.pos_ == 'NOUN']
all_nouns.extend(chunk_nouns)
noun_freq.update(chunk_nouns)
top_nouns = [noun for noun, freq in noun_freq.most_common(top_n)]
G = nx.Graph()
cooc_matrix = np.zeros((len(top_nouns), len(top_nouns)))
noun_to_idx = {noun: idx for idx, noun in enumerate(top_nouns)}
for chunk in chunks:
doc = nlp(chunk)
words = [token.text.lower() for token in doc]
for i in range(len(words)):
window_words = words[max(0, i-window_size):min(len(words), i+window_size)]
for noun1 in top_nouns:
if noun1 in window_words:
for noun2 in top_nouns:
if noun1 != noun2 and noun2 in window_words:
idx1, idx2 = noun_to_idx[noun1], noun_to_idx[noun2]
cooc_matrix[idx1][idx2] += 1
cooc_matrix[idx2][idx1] += 1
for noun in top_nouns:
G.add_node(noun, size=noun_freq[noun])
max_weight = np.max(cooc_matrix)
if max_weight > 0: # Prevent division by zero
for i in range(len(top_nouns)):
for j in range(i+1, len(top_nouns)):
weight = cooc_matrix[i][j]
if weight > 0:
G.add_edge(top_nouns[i], top_nouns[j],
weight=weight,
width=3 * (weight/max_weight))
pos = nx.spring_layout(G, k=1, iterations=50)
for node in G.nodes():
G.nodes[node]['pos'] = pos[node]
return G
@st.cache_data
def process_all_analyses(text, _analyzer):
segments = _analyzer.split_text(text, max_length=512)
num_segments = len(segments)
segment_labels = [f"{i+1}" for i in range(num_segments)]
sentiment_scores, basic_emotions = _analyzer.analyze_emotional_trajectory(text)
moral_trajectories = {foundation: [] for foundation in ['care', 'fairness', 'loyalty', 'authority', 'sanctity']}
for segment in segments:
moral_scores = _analyzer.analyze_moral_foundations(segment)
for foundation in moral_trajectories.keys():
moral_trajectories[foundation].append(moral_scores[foundation])
return segments, segment_labels, sentiment_scores, basic_emotions, moral_trajectories
def main():
st.title("🗣️ Political Text Analysis Toolkit")
analyzer = SpeechAnalyzer()
uploaded_file = st.file_uploader("Upload your document", type=['txt', 'docx', 'pdf'])
if uploaded_file is not None:
if uploaded_file.name.endswith('.txt'):
text = uploaded_file.getvalue().decode('utf-8')
elif uploaded_file.name.endswith('.docx'):
import docx
doc = docx.Document(uploaded_file)
text = '\n'.join([paragraph.text for paragraph in doc.paragraphs])
elif uploaded_file.name.endswith('.pdf'):
import PyPDF2
pdf_reader = PyPDF2.PdfReader(uploaded_file)
text = ' '.join([page.extract_text() for page in pdf_reader.pages])
progress_bar = st.progress(0)
status_text = st.empty()
tab1, tab2, tab3, tab4, tab5 = st.tabs([
"Moral Foundations",
"Emotional Analysis",
"Linguistic Insights",
"Semantic Network",
"Advanced NLP"
])
with tab1:
status_text.text('Analyzing Moral Foundations...')
progress_bar.progress(20)
st.subheader("Moral Foundations Analysis")
moral_scores = analyzer.analyze_moral_foundations(text)
moral_df = pd.DataFrame.from_dict(moral_scores, orient='index', columns=['Score'])
moral_df.index.name = 'Moral Foundation'
moral_df = moral_df.reset_index()
fig = px.bar(
moral_df,
x='Moral Foundation',
y='Score',
title='Moral Foundations Breakdown',
color='Moral Foundation'
)
st.plotly_chart(fig)
for foundation, score in moral_scores.items():
st.write(f"**{MORAL_FOUNDATIONS[foundation]}**: {score:.2%}")
with tab2:
status_text.text('Processing Emotional Trajectory...')
progress_bar.progress(40)
st.subheader("Speech Trajectory Analysis")
segments, segment_labels, sentiment_scores, basic_emotions, moral_trajectories = process_all_analyses(text, analyzer)
unified_fig = go.Figure()
viz_options = st.multiselect(
"Select analyses to display:",
["Sentiment Flow", "Moral Foundations Flow", "Basic Emotions Flow"],
default=["Sentiment Flow"]
)
if "Sentiment Flow" in viz_options:
unified_fig.add_trace(go.Scatter(
x=segment_labels,
y=sentiment_scores,
name='Sentiment',
mode='lines+markers',
line=dict(color='#1f77b4', width=3),
marker=dict(
size=8,
color=['#ff4444' if score < -0.3 else '#44ff44' if score > 0.3 else '#888888' for score in sentiment_scores]
)
))
if "Moral Foundations Flow" in viz_options:
colors = px.colors.qualitative.Set3[:5]
for idx, (foundation, scores) in enumerate(moral_trajectories.items()):
unified_fig.add_trace(go.Scatter(
x=segment_labels,
y=scores,
name=MORAL_FOUNDATIONS[foundation],
mode='lines+markers',
line=dict(color=colors[idx], width=2),
marker=dict(size=6)
))
if "Basic Emotions Flow" in viz_options:
emotions_df = pd.DataFrame({
'Segment': segment_labels,
'Emotion': basic_emotions
})
emotion_colors = {
'joy': '#FFD700', # Gold
'sadness': '#4169E1', # Royal Blue
'anger': '#FF4500', # Red Orange
'fear': '#800080', # Purple
'disgust': '#006400', # Dark Green
'surprise': '#FFA500' # Orange
}
unified_fig.add_trace(go.Bar(
x=segment_labels,
y=[1] * len(basic_emotions),
name=f'Emotions Found: {", ".join(sorted(set(basic_emotions)))}',
marker=dict(
color=[emotion_colors.get(e.lower(), '#808080') for e in basic_emotions],
line=dict(width=1, color='#000000')
),
opacity=0.8,
hovertemplate="Segment %{x}
Emotion: %{text}