hylee commited on
Commit
e162c68
·
1 Parent(s): e5a4b0f

allow for plurals for all math words

Browse files
Files changed (1) hide show
  1. handler.py +11 -9
handler.py CHANGED
@@ -361,12 +361,14 @@ def load_math_terms():
361
  math_terms = []
362
  math_terms_dict = {}
363
  for term in MATH_WORDS:
364
- if term in MATH_PREFIXES:
365
- math_terms_dict[f"(^|[^a-zA-Z]){term}(s|es)?([^a-zA-Z]|$)"] = term
366
- math_terms.append(f"(^|[^a-zA-Z]){term}(s|es)?([^a-zA-Z]|$)")
367
- else:
368
- math_terms_dict[f"(^|[^a-zA-Z]){term}([^a-zA-Z]|$)"] = term
369
- math_terms.append(f"(^|[^a-zA-Z]){term}([^a-zA-Z]|$)")
 
 
370
  return math_terms, math_terms_dict
371
 
372
  def run_math_density(transcript):
@@ -374,9 +376,9 @@ def run_math_density(transcript):
374
  for i, utt in enumerate(transcript.utterances):
375
  found_math_terms = set()
376
  text = utt.get_clean_text(remove_punct=False)
377
- logging.set_verbosity_info()
378
- logger = logging.get_logger("transformers")
379
- logger.info(f"clean text in math density: {text}%")
380
  num_math_terms = 0
381
  for term in math_terms:
382
  count = len(re.findall(term, text))
 
361
  math_terms = []
362
  math_terms_dict = {}
363
  for term in MATH_WORDS:
364
+ math_terms.dict[f"(^|[^a-zA-Z]){term}(s|es)?([^a-zA-Z]|$)"] = term
365
+ math_terms.append(f"(^|[^a-zA-Z]){term}(s|es)?([^a-zA-Z]|$)")
366
+ # if term in MATH_PREFIXES:
367
+ # math_terms_dict[f"(^|[^a-zA-Z]){term}(s|es)?([^a-zA-Z]|$)"] = term
368
+ # math_terms.append(f"(^|[^a-zA-Z]){term}(s|es)?([^a-zA-Z]|$)")
369
+ # else:
370
+ # math_terms_dict[f"(^|[^a-zA-Z]){term}([^a-zA-Z]|$)"] = term
371
+ # math_terms.append(f"(^|[^a-zA-Z]){term}([^a-zA-Z]|$)")
372
  return math_terms, math_terms_dict
373
 
374
  def run_math_density(transcript):
 
376
  for i, utt in enumerate(transcript.utterances):
377
  found_math_terms = set()
378
  text = utt.get_clean_text(remove_punct=False)
379
+ # logging.set_verbosity_info()
380
+ # logger = logging.get_logger("transformers")
381
+ # logger.info(f"clean text in math density: {text}")
382
  num_math_terms = 0
383
  for term in math_terms:
384
  count = len(re.findall(term, text))