hylee719 commited on
Commit
1ab274a
·
verified ·
1 Parent(s): 754c981
Files changed (1) hide show
  1. handler.py +6 -3
handler.py CHANGED
@@ -43,6 +43,7 @@ class Utterance:
43
  self.aggregate_unit_measure = endtime
44
  self.num_math_terms = None
45
  self.math_terms = None
 
46
 
47
  # moments
48
  self.uptake = None
@@ -87,7 +88,7 @@ class Utterance:
87
  'wordCount': self.word_count,
88
  'numMathTerms': self.num_math_terms,
89
  'mathTerms': self.math_terms,
90
- 'mathTermsRaw': self.math_terms_raw
91
  }
92
 
93
  def __repr__(self):
@@ -385,7 +386,7 @@ def run_math_density(transcript):
385
  teacher_math_word_cloud = {}
386
  student_math_word_cloud = {}
387
  for i, utt in enumerate(transcript.utterances):
388
- text = utt.get_clean_text(remove_punct=False)
389
  num_matches = 0
390
  matched_positions = set()
391
  match_list = []
@@ -393,7 +394,7 @@ def run_math_density(transcript):
393
  matches = list(re.finditer(term, text, re.IGNORECASE))
394
  # Filter out matches that share positions with longer terms
395
  matches = [match for match in matches if not any(match.start() in range(existing[0], existing[1]) for existing in matched_positions)]
396
- matched_text = [match.group(0) for match in matches]
397
  if len(matches) > 0:
398
  if utt.role == "teacher":
399
  if math_terms_dict[term] not in teacher_math_word_cloud:
@@ -407,8 +408,10 @@ def run_math_density(transcript):
407
  # Update matched positions
408
  matched_positions.update((match.start(), match.end()) for match in matches)
409
  num_matches += len(matches)
 
410
  utt.num_math_terms = num_matches
411
  utt.math_terms = match_list
 
412
  utt.math_terms_raw = [text[start:end] for start, end in matched_positions]
413
  teacher_dict_list = []
414
  student_dict_list = []
 
43
  self.aggregate_unit_measure = endtime
44
  self.num_math_terms = None
45
  self.math_terms = None
46
+ self.math_terms_raw = None
47
 
48
  # moments
49
  self.uptake = None
 
88
  'wordCount': self.word_count,
89
  'numMathTerms': self.num_math_terms,
90
  'mathTerms': self.math_terms,
91
+ "mathTermsRaw": self.math_terms_raw,
92
  }
93
 
94
  def __repr__(self):
 
386
  teacher_math_word_cloud = {}
387
  student_math_word_cloud = {}
388
  for i, utt in enumerate(transcript.utterances):
389
+ text = utt.get_clean_text(remove_punct=True)
390
  num_matches = 0
391
  matched_positions = set()
392
  match_list = []
 
394
  matches = list(re.finditer(term, text, re.IGNORECASE))
395
  # Filter out matches that share positions with longer terms
396
  matches = [match for match in matches if not any(match.start() in range(existing[0], existing[1]) for existing in matched_positions)]
397
+ # matched_text = [match.group(0) for match in matches]
398
  if len(matches) > 0:
399
  if utt.role == "teacher":
400
  if math_terms_dict[term] not in teacher_math_word_cloud:
 
408
  # Update matched positions
409
  matched_positions.update((match.start(), match.end()) for match in matches)
410
  num_matches += len(matches)
411
+ # print("match group list: ", [match.group(0) for match in matches])
412
  utt.num_math_terms = num_matches
413
  utt.math_terms = match_list
414
+ # utt.math_match_positions = list(matched_positions)
415
  utt.math_terms_raw = [text[start:end] for start, end in matched_positions]
416
  teacher_dict_list = []
417
  student_dict_list = []