hylee
commited on
Commit
·
337b02e
1
Parent(s):
f776d9e
make adjustments
Browse files- handler.py +34 -6
handler.py
CHANGED
@@ -66,7 +66,7 @@ class Utterance:
|
|
66 |
'text': self.text,
|
67 |
'role': self.role,
|
68 |
'timestamp': self.timestamp,
|
69 |
-
'moments': {'reasoning': self.reasoning, 'questioning': self.question, 'uptake': self.uptake},
|
70 |
'unitMeasure': self.unit_measure,
|
71 |
'aggregateUnitMeasure': self.aggregate_unit_measure,
|
72 |
'wordCount': self.word_count
|
@@ -112,44 +112,70 @@ class Transcript:
|
|
112 |
if ((uptake_speaker is None)):
|
113 |
return None
|
114 |
teacher_words = 0
|
|
|
115 |
student_words = 0
|
|
|
116 |
for utt in self.utterances:
|
117 |
if (utt.speaker == uptake_speaker):
|
118 |
utt.role = 'teacher'
|
119 |
teacher_words += utt.get_num_words()
|
|
|
120 |
else:
|
121 |
utt.role = 'student'
|
122 |
student_words += utt.get_num_words()
|
|
|
123 |
teacher_percentage = round(
|
124 |
(teacher_words / (teacher_words + student_words)) * 100)
|
125 |
student_percentage = 100 - teacher_percentage
|
126 |
-
|
|
|
|
|
127 |
|
128 |
def get_word_cloud_dicts(self):
|
129 |
teacher_dict = {}
|
130 |
student_dict = {}
|
|
|
131 |
for utt in self.utterances:
|
|
|
132 |
words = (utt.get_clean_text(remove_punct=True)).split(' ')
|
133 |
for word in words:
|
134 |
if utt.role == 'teacher':
|
135 |
if word not in teacher_dict:
|
136 |
teacher_dict[word] = 0
|
137 |
teacher_dict[word] += 1
|
|
|
|
|
|
|
|
|
138 |
else:
|
139 |
if word not in student_dict:
|
140 |
student_dict[word] = 0
|
141 |
student_dict[word] += 1
|
142 |
dict_list = []
|
|
|
143 |
for word in teacher_dict.keys():
|
144 |
dict_list.append(
|
145 |
{'text': word, 'value': teacher_dict[word], 'category': 'teacher'})
|
|
|
146 |
for word in student_dict.keys():
|
147 |
dict_list.append(
|
148 |
{'text': word, 'value': student_dict[word], 'category': 'student'})
|
149 |
-
|
|
|
|
|
150 |
|
151 |
def get_talk_timeline(self):
|
152 |
return [utterance.to_talk_timeline_dict() for utterance in self.utterances]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
def to_dict(self):
|
155 |
return {
|
@@ -325,9 +351,11 @@ class EndpointHandler():
|
|
325 |
question_model = QuestionModel(
|
326 |
self.device, self.tokenizer, self.input_builder)
|
327 |
question_model.run_inference(transcript)
|
328 |
-
transcript.update_utterance_roles
|
|
|
329 |
talk_dist, talk_len = transcript.get_talk_distribution_and_length(uptake_speaker)
|
330 |
talk_timeline = transcript.get_talk_timeline()
|
331 |
-
|
|
|
332 |
|
333 |
-
return talk_dist, talk_len,
|
|
|
66 |
'text': self.text,
|
67 |
'role': self.role,
|
68 |
'timestamp': self.timestamp,
|
69 |
+
'moments': {'reasoning': True if self.reasoning else False, 'questioning': True if self.question else False, 'uptake': True if self.uptake else False},
|
70 |
'unitMeasure': self.unit_measure,
|
71 |
'aggregateUnitMeasure': self.aggregate_unit_measure,
|
72 |
'wordCount': self.word_count
|
|
|
112 |
if ((uptake_speaker is None)):
|
113 |
return None
|
114 |
teacher_words = 0
|
115 |
+
teacher_utt_count = 0
|
116 |
student_words = 0
|
117 |
+
student_utt_count = 0
|
118 |
for utt in self.utterances:
|
119 |
if (utt.speaker == uptake_speaker):
|
120 |
utt.role = 'teacher'
|
121 |
teacher_words += utt.get_num_words()
|
122 |
+
teacher_utt_count += 1
|
123 |
else:
|
124 |
utt.role = 'student'
|
125 |
student_words += utt.get_num_words()
|
126 |
+
student_utt_count += 1
|
127 |
teacher_percentage = round(
|
128 |
(teacher_words / (teacher_words + student_words)) * 100)
|
129 |
student_percentage = 100 - teacher_percentage
|
130 |
+
avg_teacher_length = teacher_words / teacher_utt_count
|
131 |
+
avg_student_length = student_words / student_utt_count
|
132 |
+
return {'talk_distribution': {'teacher': teacher_percentage, 'student': student_percentage}}, {'talk_length': {'teacher': avg_teacher_length, 'student': avg_student_length}}
|
133 |
|
134 |
def get_word_cloud_dicts(self):
|
135 |
teacher_dict = {}
|
136 |
student_dict = {}
|
137 |
+
uptake_teacher_dict = {}
|
138 |
for utt in self.utterances:
|
139 |
+
|
140 |
words = (utt.get_clean_text(remove_punct=True)).split(' ')
|
141 |
for word in words:
|
142 |
if utt.role == 'teacher':
|
143 |
if word not in teacher_dict:
|
144 |
teacher_dict[word] = 0
|
145 |
teacher_dict[word] += 1
|
146 |
+
if utt.uptake == 1:
|
147 |
+
if word not in uptake_teacher_dict:
|
148 |
+
uptake_teacher_dict[word] = 0
|
149 |
+
uptake_teacher_dict[word] += 1
|
150 |
else:
|
151 |
if word not in student_dict:
|
152 |
student_dict[word] = 0
|
153 |
student_dict[word] += 1
|
154 |
dict_list = []
|
155 |
+
uptake_dict_list = []
|
156 |
for word in teacher_dict.keys():
|
157 |
dict_list.append(
|
158 |
{'text': word, 'value': teacher_dict[word], 'category': 'teacher'})
|
159 |
+
uptake_dict_list.append({'text': word, 'value': uptake_teacher_dict[word], 'category': 'teacher'})
|
160 |
for word in student_dict.keys():
|
161 |
dict_list.append(
|
162 |
{'text': word, 'value': student_dict[word], 'category': 'student'})
|
163 |
+
sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
|
164 |
+
sorted_uptake_dict_list = sorted(uptake_dict_list, key=lambda x: x['value'], reverse=True)
|
165 |
+
return {'common_top_words': sorted_dict_list[:50]}, {'uptake_top_words':sorted_uptake_dict_list[:50]}
|
166 |
|
167 |
def get_talk_timeline(self):
|
168 |
return [utterance.to_talk_timeline_dict() for utterance in self.utterances]
|
169 |
+
|
170 |
+
def calculate_aggregate_word_count(self):
|
171 |
+
unit_measures = [utt.unit_measure for utt in self.utterances]
|
172 |
+
if None in unit_measures:
|
173 |
+
aggregate_word_count = 0
|
174 |
+
for utt in self.utterances:
|
175 |
+
aggregate_word_count += utt.get_num_words()
|
176 |
+
utt.unit_measure = utt.get_num_words()
|
177 |
+
utt.aggregate_unit_measure = aggregate_word_count
|
178 |
+
|
179 |
|
180 |
def to_dict(self):
|
181 |
return {
|
|
|
351 |
question_model = QuestionModel(
|
352 |
self.device, self.tokenizer, self.input_builder)
|
353 |
question_model.run_inference(transcript)
|
354 |
+
transcript.update_utterance_roles()
|
355 |
+
transcript.calculate_aggregate_word_count()
|
356 |
talk_dist, talk_len = transcript.get_talk_distribution_and_length(uptake_speaker)
|
357 |
talk_timeline = transcript.get_talk_timeline()
|
358 |
+
talk_moments = {"talk_moments": talk_timeline}
|
359 |
+
word_cloud, uptake_word_cloud = transcript.get_word_cloud_dicts()
|
360 |
|
361 |
+
return talk_dist, talk_len, talk_moments, word_cloud, uptake_word_cloud
|