hylee719
/

transcript-analysis-testing

Model card Files Files and versions Community

hylee719 commited on Feb 26, 2024

Commit

1ab274a

verified ·

1 Parent(s): 754c981

small

Browse files

Files changed (1) hide show

handler.py +6 -3

handler.py CHANGED Viewed

@@ -43,6 +43,7 @@ class Utterance:
         self.aggregate_unit_measure = endtime
         self.num_math_terms = None
         self.math_terms = None
         # moments
         self.uptake = None
@@ -87,7 +88,7 @@ class Utterance:
             'wordCount': self.word_count,
             'numMathTerms': self.num_math_terms,
             'mathTerms': self.math_terms,
-            'mathTermsRaw': self.math_terms_raw
         }
     def __repr__(self):
@@ -385,7 +386,7 @@ def run_math_density(transcript):
     teacher_math_word_cloud = {}
     student_math_word_cloud = {}
     for i, utt in enumerate(transcript.utterances):
-        text = utt.get_clean_text(remove_punct=False)
         num_matches = 0
         matched_positions = set()
         match_list = []
@@ -393,7 +394,7 @@ def run_math_density(transcript):
             matches = list(re.finditer(term, text, re.IGNORECASE))
             # Filter out matches that share positions with longer terms
             matches = [match for match in matches if not any(match.start() in range(existing[0], existing[1]) for existing in matched_positions)]
-            matched_text = [match.group(0) for match in matches]
             if len(matches) > 0:
                 if utt.role == "teacher":
                     if math_terms_dict[term] not in teacher_math_word_cloud:
@@ -407,8 +408,10 @@ def run_math_density(transcript):
             # Update matched positions
             matched_positions.update((match.start(), match.end()) for match in matches)
             num_matches += len(matches)
         utt.num_math_terms = num_matches
         utt.math_terms = match_list
         utt.math_terms_raw = [text[start:end] for start, end in matched_positions]
     teacher_dict_list = []
     student_dict_list = []

         self.aggregate_unit_measure = endtime
         self.num_math_terms = None
         self.math_terms = None
+        self.math_terms_raw = None
         # moments
         self.uptake = None
             'wordCount': self.word_count,
             'numMathTerms': self.num_math_terms,
             'mathTerms': self.math_terms,
+            "mathTermsRaw": self.math_terms_raw,
         }
     def __repr__(self):
     teacher_math_word_cloud = {}
     student_math_word_cloud = {}
     for i, utt in enumerate(transcript.utterances):
+        text = utt.get_clean_text(remove_punct=True)
         num_matches = 0
         matched_positions = set()
         match_list = []
             matches = list(re.finditer(term, text, re.IGNORECASE))
             # Filter out matches that share positions with longer terms
             matches = [match for match in matches if not any(match.start() in range(existing[0], existing[1]) for existing in matched_positions)]
+            # matched_text = [match.group(0) for match in matches]
             if len(matches) > 0:
                 if utt.role == "teacher":
                     if math_terms_dict[term] not in teacher_math_word_cloud:
             # Update matched positions
             matched_positions.update((match.start(), match.end()) for match in matches)
             num_matches += len(matches)
+            # print("match group list: ", [match.group(0) for match in matches])
         utt.num_math_terms = num_matches
         utt.math_terms = match_list
+        # utt.math_match_positions = list(matched_positions)
         utt.math_terms_raw = [text[start:end] for start, end in matched_positions]
     teacher_dict_list = []
     student_dict_list = []