add teacherTopWords and studentTopWords
Browse files- handler.py +36 -19
handler.py
CHANGED
@@ -174,17 +174,19 @@ class Transcript:
|
|
174 |
student_dict[word] += 1
|
175 |
dict_list = []
|
176 |
uptake_dict_list = []
|
|
|
|
|
177 |
for word in uptake_teacher_dict.keys():
|
178 |
uptake_dict_list.append({'text': word, 'value': uptake_teacher_dict[word], 'category': 'teacher'})
|
179 |
for word in teacher_dict.keys():
|
180 |
-
|
181 |
-
{'text': word, 'value': teacher_dict[word], 'category': '
|
182 |
for word in student_dict.keys():
|
183 |
-
|
184 |
-
{'text': word, 'value': student_dict[word], 'category': '
|
185 |
sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
|
186 |
sorted_uptake_dict_list = sorted(uptake_dict_list, key=lambda x: x['value'], reverse=True)
|
187 |
-
return sorted_dict_list[:50], sorted_uptake_dict_list[:50]
|
188 |
|
189 |
def get_talk_timeline(self):
|
190 |
return [utterance.to_talk_timeline_dict() for utterance in self.utterances]
|
@@ -377,7 +379,8 @@ def load_math_terms():
|
|
377 |
def run_math_density(transcript):
|
378 |
math_terms, math_terms_dict = load_math_terms()
|
379 |
sorted_terms = sorted(math_terms, key=len, reverse=True)
|
380 |
-
|
|
|
381 |
for i, utt in enumerate(transcript.utterances):
|
382 |
text = utt.get_clean_text(remove_punct=False)
|
383 |
num_matches = 0
|
@@ -388,21 +391,31 @@ def run_math_density(transcript):
|
|
388 |
# Filter out matches that share positions with longer terms
|
389 |
matches = [match for match in matches if not any(match.start() in range(existing[0], existing[1]) for existing in matched_positions)]
|
390 |
if len(matches) > 0:
|
391 |
-
if
|
392 |
-
|
393 |
-
|
394 |
-
|
|
|
|
|
|
|
|
|
|
|
395 |
# Update matched positions
|
396 |
matched_positions.update((match.start(), match.end()) for match in matches)
|
397 |
num_matches += len(matches)
|
398 |
utt.num_math_terms = num_matches
|
399 |
utt.math_terms = match_list
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
|
|
|
|
|
|
|
|
|
|
406 |
|
407 |
class EndpointHandler():
|
408 |
def __init__(self, path="."):
|
@@ -457,18 +470,22 @@ class EndpointHandler():
|
|
457 |
focusing_question_model.run_inference(transcript, uptake_speaker=uptake_speaker)
|
458 |
del focusing_question_model
|
459 |
|
460 |
-
math_cloud = run_math_density(transcript)
|
461 |
transcript.update_utterance_roles(uptake_speaker)
|
|
|
462 |
transcript.calculate_aggregate_word_count()
|
463 |
-
return_dict = {'talkDistribution': None, 'talkLength': None, 'talkMoments': None, 'commonTopWords': None, 'uptakeTopWords': None, 'mathTopWords': None}
|
464 |
talk_dist, talk_len = transcript.get_talk_distribution_and_length(uptake_speaker)
|
465 |
return_dict['talkDistribution'] = talk_dist
|
466 |
return_dict['talkLength'] = talk_len
|
467 |
talk_moments = transcript.get_talk_timeline()
|
468 |
return_dict['talkMoments'] = talk_moments
|
469 |
-
word_cloud, uptake_word_cloud = transcript.get_word_clouds()
|
470 |
return_dict['commonTopWords'] = word_cloud
|
471 |
return_dict['uptakeTopWords'] = uptake_word_cloud
|
472 |
return_dict['mathTopWords'] = math_cloud
|
|
|
|
|
|
|
|
|
473 |
|
474 |
return return_dict
|
|
|
174 |
student_dict[word] += 1
|
175 |
dict_list = []
|
176 |
uptake_dict_list = []
|
177 |
+
teacher_dict_list = []
|
178 |
+
student_dict_list = []
|
179 |
for word in uptake_teacher_dict.keys():
|
180 |
uptake_dict_list.append({'text': word, 'value': uptake_teacher_dict[word], 'category': 'teacher'})
|
181 |
for word in teacher_dict.keys():
|
182 |
+
teacher_dict_list.append(
|
183 |
+
{'text': word, 'value': teacher_dict[word], 'category': 'general'})
|
184 |
for word in student_dict.keys():
|
185 |
+
student_dict_list.append(
|
186 |
+
{'text': word, 'value': student_dict[word], 'category': 'general'})
|
187 |
sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
|
188 |
sorted_uptake_dict_list = sorted(uptake_dict_list, key=lambda x: x['value'], reverse=True)
|
189 |
+
return sorted_dict_list[:50], sorted_uptake_dict_list[:50], teacher_dict_list, student_dict_list
|
190 |
|
191 |
def get_talk_timeline(self):
|
192 |
return [utterance.to_talk_timeline_dict() for utterance in self.utterances]
|
|
|
379 |
def run_math_density(transcript):
|
380 |
math_terms, math_terms_dict = load_math_terms()
|
381 |
sorted_terms = sorted(math_terms, key=len, reverse=True)
|
382 |
+
teacher_math_word_cloud = {}
|
383 |
+
student_math_word_cloud = {}
|
384 |
for i, utt in enumerate(transcript.utterances):
|
385 |
text = utt.get_clean_text(remove_punct=False)
|
386 |
num_matches = 0
|
|
|
391 |
# Filter out matches that share positions with longer terms
|
392 |
matches = [match for match in matches if not any(match.start() in range(existing[0], existing[1]) for existing in matched_positions)]
|
393 |
if len(matches) > 0:
|
394 |
+
if utt.role == "teacher":
|
395 |
+
if math_terms_dict[term] not in teacher_math_word_cloud:
|
396 |
+
teacher_math_word_cloud[math_terms_dict[term]] = 0
|
397 |
+
teacher_math_word_cloud[math_terms_dict[term]] += len(matches)
|
398 |
+
else:
|
399 |
+
if math_terms_dict[term] not in student_math_word_cloud:
|
400 |
+
student_math_word_cloud[math_terms_dict[term]] = 0
|
401 |
+
student_math_word_cloud[math_terms_dict[term]] += len(matches)
|
402 |
+
match_list.append(math_terms_dict[term])
|
403 |
# Update matched positions
|
404 |
matched_positions.update((match.start(), match.end()) for match in matches)
|
405 |
num_matches += len(matches)
|
406 |
utt.num_math_terms = num_matches
|
407 |
utt.math_terms = match_list
|
408 |
+
teacher_dict_list = []
|
409 |
+
student_dict_list = []
|
410 |
+
for word in teacher_math_word_cloud.keys():
|
411 |
+
teacher_dict_list.append(
|
412 |
+
{'text': word, 'value': teacher_math_word_cloud[word], 'category': "math"})
|
413 |
+
for word in student_math_word_cloud.keys():
|
414 |
+
student_dict_list.append(
|
415 |
+
{'text': word, 'value': student_math_word_cloud[word], 'category': "math"})
|
416 |
+
# sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
|
417 |
+
# return sorted_dict_list[:50]
|
418 |
+
return teacher_dict_list, student_dict_list
|
419 |
|
420 |
class EndpointHandler():
|
421 |
def __init__(self, path="."):
|
|
|
470 |
focusing_question_model.run_inference(transcript, uptake_speaker=uptake_speaker)
|
471 |
del focusing_question_model
|
472 |
|
|
|
473 |
transcript.update_utterance_roles(uptake_speaker)
|
474 |
+
teacher_math_cloud, student_math_cloud = run_math_density(transcript)
|
475 |
transcript.calculate_aggregate_word_count()
|
476 |
+
return_dict = {'talkDistribution': None, 'talkLength': None, 'talkMoments': None, 'commonTopWords': None, 'uptakeTopWords': None, 'mathTopWords': None, 'studentTopWords': None, 'teacherTopWords': None}
|
477 |
talk_dist, talk_len = transcript.get_talk_distribution_and_length(uptake_speaker)
|
478 |
return_dict['talkDistribution'] = talk_dist
|
479 |
return_dict['talkLength'] = talk_len
|
480 |
talk_moments = transcript.get_talk_timeline()
|
481 |
return_dict['talkMoments'] = talk_moments
|
482 |
+
word_cloud, uptake_word_cloud, teacher_general_cloud, student_general_cloud = transcript.get_word_clouds()
|
483 |
return_dict['commonTopWords'] = word_cloud
|
484 |
return_dict['uptakeTopWords'] = uptake_word_cloud
|
485 |
return_dict['mathTopWords'] = math_cloud
|
486 |
+
teacher_cloud = teacher_math_cloud + teacher_general_cloud
|
487 |
+
student_cloud = student_math_cloud + student_general_cloud
|
488 |
+
return_dict['teacherTopWords'] = teacher_cloud
|
489 |
+
return_dict['studentTopWords'] = student_cloud
|
490 |
|
491 |
return return_dict
|