kambris commited on
Commit
ea6bffd
·
verified ·
1 Parent(s): 8550d55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -16
app.py CHANGED
@@ -17,10 +17,12 @@ st.set_page_config(page_title="Advanced Political Speech Analysis", page_icon="
17
 
18
  # Advanced NLP Libraries
19
  from transformers import (
20
- AutoTokenizer,
21
- AutoModelForSequenceClassification,
22
  pipeline,
23
- AutoModelForTokenClassification
 
 
24
  )
25
  import nltk
26
  from nltk.corpus import stopwords
@@ -58,12 +60,17 @@ RHETORICAL_DEVICES = {
58
 
59
  class SpeechAnalyzer:
60
  def __init__(self):
61
- # Load models
62
- self.moralbert_tokenizer = AutoTokenizer.from_pretrained("minyoungchang/moralbert")
63
- self.moralbert_model = AutoModelForSequenceClassification.from_pretrained("minyoungchang/moralbert")
64
- self.sentiment_pipeline = pipeline("sentiment-analysis")
 
 
 
 
65
 
66
- # Named Entity Recognition
 
67
  self.ner_tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
68
  self.ner_model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
69
  self.ner_pipeline = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
@@ -90,29 +97,32 @@ class SpeechAnalyzer:
90
  return segments
91
 
92
  def analyze_moral_foundations(self, text):
93
- """Analyze moral foundations in text"""
94
  segments = self.split_text(text)
95
 
96
  foundation_scores = {
97
  'care': [], 'fairness': [], 'loyalty': [],
98
  'authority': [], 'sanctity': []
99
  }
100
-
101
  for segment in segments:
102
- inputs = self.moralbert_tokenizer(segment, return_tensors="pt", truncation=True, max_length=512)
103
 
104
  with torch.no_grad():
105
- outputs = self.moralbert_model(**inputs)
106
 
107
  probabilities = torch.softmax(outputs.logits, dim=1)
108
 
109
- for foundation in foundation_scores.keys():
110
- foundation_scores[foundation].append(probabilities[0][1].item())
111
-
 
 
 
112
  aggregated_scores = {
113
  foundation: np.mean(scores) for foundation, scores in foundation_scores.items()
114
  }
115
-
116
  return aggregated_scores
117
 
118
  def analyze_emotional_trajectory(self, text, window_size=5):
 
17
 
18
  # Advanced NLP Libraries
19
  from transformers import (
20
+ AutoTokenizer,
21
+ AutoModelForSequenceClassification,
22
  pipeline,
23
+ AutoModelForTokenClassification,
24
+ RobertaTokenizer,
25
+ RobertaForSequenceClassification
26
  )
27
  import nltk
28
  from nltk.corpus import stopwords
 
60
 
61
  class SpeechAnalyzer:
62
  def __init__(self):
63
+ # Load MoralFoundations model
64
+ self.moral_model_path = "MMADS/MoralFoundationsClassifier"
65
+ self.moral_tokenizer = RobertaTokenizer.from_pretrained(self.moral_model_path)
66
+ self.moral_model = RobertaForSequenceClassification.from_pretrained(self.moral_model_path)
67
+
68
+ # Load label names
69
+ with open(f"{self.moral_model_path}/label_names.json", 'r') as f:
70
+ self.label_names = json.load(f)
71
 
72
+ # Other pipelines remain the same
73
+ self.sentiment_pipeline = pipeline("sentiment-analysis")
74
  self.ner_tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
75
  self.ner_model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
76
  self.ner_pipeline = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
 
97
  return segments
98
 
99
  def analyze_moral_foundations(self, text):
100
+ """Analyze moral foundations using the RoBERTa-based classifier"""
101
  segments = self.split_text(text)
102
 
103
  foundation_scores = {
104
  'care': [], 'fairness': [], 'loyalty': [],
105
  'authority': [], 'sanctity': []
106
  }
107
+
108
  for segment in segments:
109
+ inputs = self.moral_tokenizer(segment, return_tensors="pt", truncation=True, max_length=512)
110
 
111
  with torch.no_grad():
112
+ outputs = self.moral_model(**inputs)
113
 
114
  probabilities = torch.softmax(outputs.logits, dim=1)
115
 
116
+ for idx, label in enumerate(self.label_names):
117
+ foundation = label.lower()
118
+ if foundation in foundation_scores:
119
+ foundation_scores[foundation].append(probabilities[0][idx].item())
120
+
121
+ # Average the scores across segments
122
  aggregated_scores = {
123
  foundation: np.mean(scores) for foundation, scores in foundation_scores.items()
124
  }
125
+
126
  return aggregated_scores
127
 
128
  def analyze_emotional_trajectory(self, text, window_size=5):