hylee719
/

transcript-analysis-testing

Model card Files Files and versions

xet

Community

hylee commited on Nov 5, 2023

Commit

337b02e

1 Parent(s): f776d9e

make adjustments

Browse files

Files changed (1) hide show

handler.py +34 -6

handler.py CHANGED Viewed

@@ -66,7 +66,7 @@ class Utterance:
             'text': self.text,
             'role': self.role,
             'timestamp': self.timestamp,
-            'moments': {'reasoning': self.reasoning, 'questioning': self.question, 'uptake': self.uptake},
             'unitMeasure': self.unit_measure,
             'aggregateUnitMeasure': self.aggregate_unit_measure,
             'wordCount': self.word_count
@@ -112,44 +112,70 @@ class Transcript:
         if ((uptake_speaker is None)):
             return None
         teacher_words = 0
         student_words = 0
         for utt in self.utterances:
             if (utt.speaker == uptake_speaker):
                 utt.role = 'teacher'
                 teacher_words += utt.get_num_words()
             else:
                 utt.role = 'student'
                 student_words += utt.get_num_words()
         teacher_percentage = round(
             (teacher_words / (teacher_words + student_words)) * 100)
         student_percentage = 100 - teacher_percentage
-        return {'talk_distribution': {'teacher': teacher_percentage, 'student': student_percentage}}, {'talk_length': {'teacher': teacher_words, 'student': student_words}}
     def get_word_cloud_dicts(self):
         teacher_dict = {}
         student_dict = {}
         for utt in self.utterances:
             words = (utt.get_clean_text(remove_punct=True)).split(' ')
             for word in words:
                 if utt.role == 'teacher':
                     if word not in teacher_dict:
                         teacher_dict[word] = 0
                     teacher_dict[word] += 1
                 else:
                     if word not in student_dict:
                         student_dict[word] = 0
                     student_dict[word] += 1
         dict_list = []
         for word in teacher_dict.keys():
             dict_list.append(
                 {'text': word, 'value': teacher_dict[word], 'category': 'teacher'})
         for word in student_dict.keys():
             dict_list.append(
                 {'text': word, 'value': student_dict[word], 'category': 'student'})
-        return dict_list
     def get_talk_timeline(self):
         return [utterance.to_talk_timeline_dict() for utterance in self.utterances]
     def to_dict(self):
         return {
@@ -325,9 +351,11 @@ class EndpointHandler():
         question_model = QuestionModel(
             self.device, self.tokenizer, self.input_builder)
         question_model.run_inference(transcript)
-        transcript.update_utterance_roles
         talk_dist, talk_len = transcript.get_talk_distribution_and_length(uptake_speaker)
         talk_timeline = transcript.get_talk_timeline()
-        word_cloud = transcript.get_word_cloud_dicts()
-        return talk_dist, talk_len, talk_timeline, word_cloud

             'text': self.text,
             'role': self.role,
             'timestamp': self.timestamp,
+            'moments': {'reasoning': True if self.reasoning else False, 'questioning': True if self.question else False, 'uptake': True if self.uptake else False},
             'unitMeasure': self.unit_measure,
             'aggregateUnitMeasure': self.aggregate_unit_measure,
             'wordCount': self.word_count
         if ((uptake_speaker is None)):
             return None
         teacher_words = 0
+        teacher_utt_count = 0
         student_words = 0
+        student_utt_count = 0
         for utt in self.utterances:
             if (utt.speaker == uptake_speaker):
                 utt.role = 'teacher'
                 teacher_words += utt.get_num_words()
+                teacher_utt_count += 1
             else:
                 utt.role = 'student'
                 student_words += utt.get_num_words()
+                student_utt_count += 1
         teacher_percentage = round(
             (teacher_words / (teacher_words + student_words)) * 100)
         student_percentage = 100 - teacher_percentage
+        avg_teacher_length = teacher_words / teacher_utt_count
+        avg_student_length = student_words / student_utt_count
+        return {'talk_distribution': {'teacher': teacher_percentage, 'student': student_percentage}}, {'talk_length': {'teacher': avg_teacher_length, 'student': avg_student_length}}
     def get_word_cloud_dicts(self):
         teacher_dict = {}
         student_dict = {}
+        uptake_teacher_dict = {}
         for utt in self.utterances:
             words = (utt.get_clean_text(remove_punct=True)).split(' ')
             for word in words:
                 if utt.role == 'teacher':
                     if word not in teacher_dict:
                         teacher_dict[word] = 0
                     teacher_dict[word] += 1
+                    if utt.uptake == 1:
+                        if word not in uptake_teacher_dict:
+                            uptake_teacher_dict[word] = 0
+                        uptake_teacher_dict[word] += 1
                 else:
                     if word not in student_dict:
                         student_dict[word] = 0
                     student_dict[word] += 1
         dict_list = []
+        uptake_dict_list = []
         for word in teacher_dict.keys():
             dict_list.append(
                 {'text': word, 'value': teacher_dict[word], 'category': 'teacher'})
+            uptake_dict_list.append({'text': word, 'value': uptake_teacher_dict[word], 'category': 'teacher'})
         for word in student_dict.keys():
             dict_list.append(
                 {'text': word, 'value': student_dict[word], 'category': 'student'})
+        sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
+        sorted_uptake_dict_list = sorted(uptake_dict_list, key=lambda x: x['value'], reverse=True)
+        return {'common_top_words': sorted_dict_list[:50]}, {'uptake_top_words':sorted_uptake_dict_list[:50]}
     def get_talk_timeline(self):
         return [utterance.to_talk_timeline_dict() for utterance in self.utterances]
+    def calculate_aggregate_word_count(self):
+        unit_measures = [utt.unit_measure for utt in self.utterances]
+        if None in unit_measures:
+            aggregate_word_count = 0
+            for utt in self.utterances:
+                aggregate_word_count += utt.get_num_words()
+                utt.unit_measure = utt.get_num_words()
+                utt.aggregate_unit_measure = aggregate_word_count
     def to_dict(self):
         return {
         question_model = QuestionModel(
             self.device, self.tokenizer, self.input_builder)
         question_model.run_inference(transcript)
+        transcript.update_utterance_roles()
+        transcript.calculate_aggregate_word_count()
         talk_dist, talk_len = transcript.get_talk_distribution_and_length(uptake_speaker)
         talk_timeline = transcript.get_talk_timeline()
+        talk_moments = {"talk_moments": talk_timeline}
+        word_cloud, uptake_word_cloud = transcript.get_word_cloud_dicts()
+        return talk_dist, talk_len, talk_moments, word_cloud, uptake_word_cloud