hylee
commited on
Commit
·
e162c68
1
Parent(s):
e5a4b0f
allow for plurals for all math words
Browse files- handler.py +11 -9
handler.py
CHANGED
@@ -361,12 +361,14 @@ def load_math_terms():
|
|
361 |
math_terms = []
|
362 |
math_terms_dict = {}
|
363 |
for term in MATH_WORDS:
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
|
|
|
|
370 |
return math_terms, math_terms_dict
|
371 |
|
372 |
def run_math_density(transcript):
|
@@ -374,9 +376,9 @@ def run_math_density(transcript):
|
|
374 |
for i, utt in enumerate(transcript.utterances):
|
375 |
found_math_terms = set()
|
376 |
text = utt.get_clean_text(remove_punct=False)
|
377 |
-
logging.set_verbosity_info()
|
378 |
-
logger = logging.get_logger("transformers")
|
379 |
-
logger.info(f"clean text in math density: {text}
|
380 |
num_math_terms = 0
|
381 |
for term in math_terms:
|
382 |
count = len(re.findall(term, text))
|
|
|
361 |
math_terms = []
|
362 |
math_terms_dict = {}
|
363 |
for term in MATH_WORDS:
|
364 |
+
math_terms.dict[f"(^|[^a-zA-Z]){term}(s|es)?([^a-zA-Z]|$)"] = term
|
365 |
+
math_terms.append(f"(^|[^a-zA-Z]){term}(s|es)?([^a-zA-Z]|$)")
|
366 |
+
# if term in MATH_PREFIXES:
|
367 |
+
# math_terms_dict[f"(^|[^a-zA-Z]){term}(s|es)?([^a-zA-Z]|$)"] = term
|
368 |
+
# math_terms.append(f"(^|[^a-zA-Z]){term}(s|es)?([^a-zA-Z]|$)")
|
369 |
+
# else:
|
370 |
+
# math_terms_dict[f"(^|[^a-zA-Z]){term}([^a-zA-Z]|$)"] = term
|
371 |
+
# math_terms.append(f"(^|[^a-zA-Z]){term}([^a-zA-Z]|$)")
|
372 |
return math_terms, math_terms_dict
|
373 |
|
374 |
def run_math_density(transcript):
|
|
|
376 |
for i, utt in enumerate(transcript.utterances):
|
377 |
found_math_terms = set()
|
378 |
text = utt.get_clean_text(remove_punct=False)
|
379 |
+
# logging.set_verbosity_info()
|
380 |
+
# logger = logging.get_logger("transformers")
|
381 |
+
# logger.info(f"clean text in math density: {text}")
|
382 |
num_math_terms = 0
|
383 |
for term in math_terms:
|
384 |
count = len(re.findall(term, text))
|