hylee719 commited on
Commit
c83470a
·
verified ·
1 Parent(s): 7290b72

add teacherTopWords and studentTopWords

Browse files
Files changed (1) hide show
  1. handler.py +36 -19
handler.py CHANGED
@@ -174,17 +174,19 @@ class Transcript:
174
  student_dict[word] += 1
175
  dict_list = []
176
  uptake_dict_list = []
 
 
177
  for word in uptake_teacher_dict.keys():
178
  uptake_dict_list.append({'text': word, 'value': uptake_teacher_dict[word], 'category': 'teacher'})
179
  for word in teacher_dict.keys():
180
- dict_list.append(
181
- {'text': word, 'value': teacher_dict[word], 'category': 'teacher'})
182
  for word in student_dict.keys():
183
- dict_list.append(
184
- {'text': word, 'value': student_dict[word], 'category': 'student'})
185
  sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
186
  sorted_uptake_dict_list = sorted(uptake_dict_list, key=lambda x: x['value'], reverse=True)
187
- return sorted_dict_list[:50], sorted_uptake_dict_list[:50]
188
 
189
  def get_talk_timeline(self):
190
  return [utterance.to_talk_timeline_dict() for utterance in self.utterances]
@@ -377,7 +379,8 @@ def load_math_terms():
377
  def run_math_density(transcript):
378
  math_terms, math_terms_dict = load_math_terms()
379
  sorted_terms = sorted(math_terms, key=len, reverse=True)
380
- math_word_cloud = {}
 
381
  for i, utt in enumerate(transcript.utterances):
382
  text = utt.get_clean_text(remove_punct=False)
383
  num_matches = 0
@@ -388,21 +391,31 @@ def run_math_density(transcript):
388
  # Filter out matches that share positions with longer terms
389
  matches = [match for match in matches if not any(match.start() in range(existing[0], existing[1]) for existing in matched_positions)]
390
  if len(matches) > 0:
391
- if math_terms_dict[term] not in math_word_cloud:
392
- math_word_cloud[math_terms_dict[term]] = 0
393
- math_word_cloud[math_terms_dict[term]] += len(matches)
394
- match_list.append(math_terms_dict[term])
 
 
 
 
 
395
  # Update matched positions
396
  matched_positions.update((match.start(), match.end()) for match in matches)
397
  num_matches += len(matches)
398
  utt.num_math_terms = num_matches
399
  utt.math_terms = match_list
400
- dict_list = []
401
- for word in math_word_cloud.keys():
402
- dict_list.append(
403
- {'text': word, 'value': math_word_cloud[word]})
404
- sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
405
- return sorted_dict_list[:50]
 
 
 
 
 
406
 
407
  class EndpointHandler():
408
  def __init__(self, path="."):
@@ -457,18 +470,22 @@ class EndpointHandler():
457
  focusing_question_model.run_inference(transcript, uptake_speaker=uptake_speaker)
458
  del focusing_question_model
459
 
460
- math_cloud = run_math_density(transcript)
461
  transcript.update_utterance_roles(uptake_speaker)
 
462
  transcript.calculate_aggregate_word_count()
463
- return_dict = {'talkDistribution': None, 'talkLength': None, 'talkMoments': None, 'commonTopWords': None, 'uptakeTopWords': None, 'mathTopWords': None}
464
  talk_dist, talk_len = transcript.get_talk_distribution_and_length(uptake_speaker)
465
  return_dict['talkDistribution'] = talk_dist
466
  return_dict['talkLength'] = talk_len
467
  talk_moments = transcript.get_talk_timeline()
468
  return_dict['talkMoments'] = talk_moments
469
- word_cloud, uptake_word_cloud = transcript.get_word_clouds()
470
  return_dict['commonTopWords'] = word_cloud
471
  return_dict['uptakeTopWords'] = uptake_word_cloud
472
  return_dict['mathTopWords'] = math_cloud
 
 
 
 
473
 
474
  return return_dict
 
174
  student_dict[word] += 1
175
  dict_list = []
176
  uptake_dict_list = []
177
+ teacher_dict_list = []
178
+ student_dict_list = []
179
  for word in uptake_teacher_dict.keys():
180
  uptake_dict_list.append({'text': word, 'value': uptake_teacher_dict[word], 'category': 'teacher'})
181
  for word in teacher_dict.keys():
182
+ teacher_dict_list.append(
183
+ {'text': word, 'value': teacher_dict[word], 'category': 'general'})
184
  for word in student_dict.keys():
185
+ student_dict_list.append(
186
+ {'text': word, 'value': student_dict[word], 'category': 'general'})
187
  sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
188
  sorted_uptake_dict_list = sorted(uptake_dict_list, key=lambda x: x['value'], reverse=True)
189
+ return sorted_dict_list[:50], sorted_uptake_dict_list[:50], teacher_dict_list, student_dict_list
190
 
191
  def get_talk_timeline(self):
192
  return [utterance.to_talk_timeline_dict() for utterance in self.utterances]
 
379
  def run_math_density(transcript):
380
  math_terms, math_terms_dict = load_math_terms()
381
  sorted_terms = sorted(math_terms, key=len, reverse=True)
382
+ teacher_math_word_cloud = {}
383
+ student_math_word_cloud = {}
384
  for i, utt in enumerate(transcript.utterances):
385
  text = utt.get_clean_text(remove_punct=False)
386
  num_matches = 0
 
391
  # Filter out matches that share positions with longer terms
392
  matches = [match for match in matches if not any(match.start() in range(existing[0], existing[1]) for existing in matched_positions)]
393
  if len(matches) > 0:
394
+ if utt.role == "teacher":
395
+ if math_terms_dict[term] not in teacher_math_word_cloud:
396
+ teacher_math_word_cloud[math_terms_dict[term]] = 0
397
+ teacher_math_word_cloud[math_terms_dict[term]] += len(matches)
398
+ else:
399
+ if math_terms_dict[term] not in student_math_word_cloud:
400
+ student_math_word_cloud[math_terms_dict[term]] = 0
401
+ student_math_word_cloud[math_terms_dict[term]] += len(matches)
402
+ match_list.append(math_terms_dict[term])
403
  # Update matched positions
404
  matched_positions.update((match.start(), match.end()) for match in matches)
405
  num_matches += len(matches)
406
  utt.num_math_terms = num_matches
407
  utt.math_terms = match_list
408
+ teacher_dict_list = []
409
+ student_dict_list = []
410
+ for word in teacher_math_word_cloud.keys():
411
+ teacher_dict_list.append(
412
+ {'text': word, 'value': teacher_math_word_cloud[word], 'category': "math"})
413
+ for word in student_math_word_cloud.keys():
414
+ student_dict_list.append(
415
+ {'text': word, 'value': student_math_word_cloud[word], 'category': "math"})
416
+ # sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
417
+ # return sorted_dict_list[:50]
418
+ return teacher_dict_list, student_dict_list
419
 
420
  class EndpointHandler():
421
  def __init__(self, path="."):
 
470
  focusing_question_model.run_inference(transcript, uptake_speaker=uptake_speaker)
471
  del focusing_question_model
472
 
 
473
  transcript.update_utterance_roles(uptake_speaker)
474
+ teacher_math_cloud, student_math_cloud = run_math_density(transcript)
475
  transcript.calculate_aggregate_word_count()
476
+ return_dict = {'talkDistribution': None, 'talkLength': None, 'talkMoments': None, 'commonTopWords': None, 'uptakeTopWords': None, 'mathTopWords': None, 'studentTopWords': None, 'teacherTopWords': None}
477
  talk_dist, talk_len = transcript.get_talk_distribution_and_length(uptake_speaker)
478
  return_dict['talkDistribution'] = talk_dist
479
  return_dict['talkLength'] = talk_len
480
  talk_moments = transcript.get_talk_timeline()
481
  return_dict['talkMoments'] = talk_moments
482
+ word_cloud, uptake_word_cloud, teacher_general_cloud, student_general_cloud = transcript.get_word_clouds()
483
  return_dict['commonTopWords'] = word_cloud
484
  return_dict['uptakeTopWords'] = uptake_word_cloud
485
  return_dict['mathTopWords'] = math_cloud
486
+ teacher_cloud = teacher_math_cloud + teacher_general_cloud
487
+ student_cloud = student_math_cloud + student_general_cloud
488
+ return_dict['teacherTopWords'] = teacher_cloud
489
+ return_dict['studentTopWords'] = student_cloud
490
 
491
  return return_dict