hylee commited on
Commit
337b02e
·
1 Parent(s): f776d9e

make adjustments

Browse files
Files changed (1) hide show
  1. handler.py +34 -6
handler.py CHANGED
@@ -66,7 +66,7 @@ class Utterance:
66
  'text': self.text,
67
  'role': self.role,
68
  'timestamp': self.timestamp,
69
- 'moments': {'reasoning': self.reasoning, 'questioning': self.question, 'uptake': self.uptake},
70
  'unitMeasure': self.unit_measure,
71
  'aggregateUnitMeasure': self.aggregate_unit_measure,
72
  'wordCount': self.word_count
@@ -112,44 +112,70 @@ class Transcript:
112
  if ((uptake_speaker is None)):
113
  return None
114
  teacher_words = 0
 
115
  student_words = 0
 
116
  for utt in self.utterances:
117
  if (utt.speaker == uptake_speaker):
118
  utt.role = 'teacher'
119
  teacher_words += utt.get_num_words()
 
120
  else:
121
  utt.role = 'student'
122
  student_words += utt.get_num_words()
 
123
  teacher_percentage = round(
124
  (teacher_words / (teacher_words + student_words)) * 100)
125
  student_percentage = 100 - teacher_percentage
126
- return {'talk_distribution': {'teacher': teacher_percentage, 'student': student_percentage}}, {'talk_length': {'teacher': teacher_words, 'student': student_words}}
 
 
127
 
128
  def get_word_cloud_dicts(self):
129
  teacher_dict = {}
130
  student_dict = {}
 
131
  for utt in self.utterances:
 
132
  words = (utt.get_clean_text(remove_punct=True)).split(' ')
133
  for word in words:
134
  if utt.role == 'teacher':
135
  if word not in teacher_dict:
136
  teacher_dict[word] = 0
137
  teacher_dict[word] += 1
 
 
 
 
138
  else:
139
  if word not in student_dict:
140
  student_dict[word] = 0
141
  student_dict[word] += 1
142
  dict_list = []
 
143
  for word in teacher_dict.keys():
144
  dict_list.append(
145
  {'text': word, 'value': teacher_dict[word], 'category': 'teacher'})
 
146
  for word in student_dict.keys():
147
  dict_list.append(
148
  {'text': word, 'value': student_dict[word], 'category': 'student'})
149
- return dict_list
 
 
150
 
151
  def get_talk_timeline(self):
152
  return [utterance.to_talk_timeline_dict() for utterance in self.utterances]
 
 
 
 
 
 
 
 
 
 
153
 
154
  def to_dict(self):
155
  return {
@@ -325,9 +351,11 @@ class EndpointHandler():
325
  question_model = QuestionModel(
326
  self.device, self.tokenizer, self.input_builder)
327
  question_model.run_inference(transcript)
328
- transcript.update_utterance_roles
 
329
  talk_dist, talk_len = transcript.get_talk_distribution_and_length(uptake_speaker)
330
  talk_timeline = transcript.get_talk_timeline()
331
- word_cloud = transcript.get_word_cloud_dicts()
 
332
 
333
- return talk_dist, talk_len, talk_timeline, word_cloud
 
66
  'text': self.text,
67
  'role': self.role,
68
  'timestamp': self.timestamp,
69
+ 'moments': {'reasoning': True if self.reasoning else False, 'questioning': True if self.question else False, 'uptake': True if self.uptake else False},
70
  'unitMeasure': self.unit_measure,
71
  'aggregateUnitMeasure': self.aggregate_unit_measure,
72
  'wordCount': self.word_count
 
112
  if ((uptake_speaker is None)):
113
  return None
114
  teacher_words = 0
115
+ teacher_utt_count = 0
116
  student_words = 0
117
+ student_utt_count = 0
118
  for utt in self.utterances:
119
  if (utt.speaker == uptake_speaker):
120
  utt.role = 'teacher'
121
  teacher_words += utt.get_num_words()
122
+ teacher_utt_count += 1
123
  else:
124
  utt.role = 'student'
125
  student_words += utt.get_num_words()
126
+ student_utt_count += 1
127
  teacher_percentage = round(
128
  (teacher_words / (teacher_words + student_words)) * 100)
129
  student_percentage = 100 - teacher_percentage
130
+ avg_teacher_length = teacher_words / teacher_utt_count
131
+ avg_student_length = student_words / student_utt_count
132
+ return {'talk_distribution': {'teacher': teacher_percentage, 'student': student_percentage}}, {'talk_length': {'teacher': avg_teacher_length, 'student': avg_student_length}}
133
 
134
  def get_word_cloud_dicts(self):
135
  teacher_dict = {}
136
  student_dict = {}
137
+ uptake_teacher_dict = {}
138
  for utt in self.utterances:
139
+
140
  words = (utt.get_clean_text(remove_punct=True)).split(' ')
141
  for word in words:
142
  if utt.role == 'teacher':
143
  if word not in teacher_dict:
144
  teacher_dict[word] = 0
145
  teacher_dict[word] += 1
146
+ if utt.uptake == 1:
147
+ if word not in uptake_teacher_dict:
148
+ uptake_teacher_dict[word] = 0
149
+ uptake_teacher_dict[word] += 1
150
  else:
151
  if word not in student_dict:
152
  student_dict[word] = 0
153
  student_dict[word] += 1
154
  dict_list = []
155
+ uptake_dict_list = []
156
  for word in teacher_dict.keys():
157
  dict_list.append(
158
  {'text': word, 'value': teacher_dict[word], 'category': 'teacher'})
159
+ uptake_dict_list.append({'text': word, 'value': uptake_teacher_dict[word], 'category': 'teacher'})
160
  for word in student_dict.keys():
161
  dict_list.append(
162
  {'text': word, 'value': student_dict[word], 'category': 'student'})
163
+ sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
164
+ sorted_uptake_dict_list = sorted(uptake_dict_list, key=lambda x: x['value'], reverse=True)
165
+ return {'common_top_words': sorted_dict_list[:50]}, {'uptake_top_words':sorted_uptake_dict_list[:50]}
166
 
167
  def get_talk_timeline(self):
168
  return [utterance.to_talk_timeline_dict() for utterance in self.utterances]
169
+
170
+ def calculate_aggregate_word_count(self):
171
+ unit_measures = [utt.unit_measure for utt in self.utterances]
172
+ if None in unit_measures:
173
+ aggregate_word_count = 0
174
+ for utt in self.utterances:
175
+ aggregate_word_count += utt.get_num_words()
176
+ utt.unit_measure = utt.get_num_words()
177
+ utt.aggregate_unit_measure = aggregate_word_count
178
+
179
 
180
  def to_dict(self):
181
  return {
 
351
  question_model = QuestionModel(
352
  self.device, self.tokenizer, self.input_builder)
353
  question_model.run_inference(transcript)
354
+ transcript.update_utterance_roles()
355
+ transcript.calculate_aggregate_word_count()
356
  talk_dist, talk_len = transcript.get_talk_distribution_and_length(uptake_speaker)
357
  talk_timeline = transcript.get_talk_timeline()
358
+ talk_moments = {"talk_moments": talk_timeline}
359
+ word_cloud, uptake_word_cloud = transcript.get_word_cloud_dicts()
360
 
361
+ return talk_dist, talk_len, talk_moments, word_cloud, uptake_word_cloud