add math terms
Browse files- handler.py +32 -2
handler.py
CHANGED
@@ -2,10 +2,11 @@ from typing import Dict, List, Any
|
|
2 |
from scipy.special import softmax
|
3 |
import numpy as np
|
4 |
import weakref
|
|
|
5 |
|
6 |
from utils import clean_str, clean_str_nopunct
|
7 |
import torch
|
8 |
-
from utils import MultiHeadModel, BertInputBuilder, get_num_words
|
9 |
|
10 |
import transformers
|
11 |
from transformers import BertTokenizer, BertForSequenceClassification
|
@@ -29,6 +30,8 @@ class Utterance:
|
|
29 |
self.endtime = endtime
|
30 |
self.transcript = weakref.ref(transcript) if transcript else None
|
31 |
self.props = kwargs
|
|
|
|
|
32 |
|
33 |
self.uptake = None
|
34 |
self.reasoning = None
|
@@ -53,7 +56,9 @@ class Utterance:
|
|
53 |
'uptake': self.uptake,
|
54 |
'reasoning': self.reasoning,
|
55 |
'question': self.question,
|
56 |
-
'
|
|
|
|
|
57 |
**self.props
|
58 |
}
|
59 |
|
@@ -252,6 +257,29 @@ class FocusingQuestionModel:
|
|
252 |
return output
|
253 |
|
254 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
class EndpointHandler():
|
256 |
def __init__(self, path="."):
|
257 |
print("Loading models...")
|
@@ -304,4 +332,6 @@ class EndpointHandler():
|
|
304 |
self.device, self.tokenizer, self.input_builder)
|
305 |
focusing_question_model.run_inference(transcript, uptake_speaker=uptake_speaker)
|
306 |
|
|
|
|
|
307 |
return transcript.to_dict()
|
|
|
2 |
from scipy.special import softmax
|
3 |
import numpy as np
|
4 |
import weakref
|
5 |
+
import re
|
6 |
|
7 |
from utils import clean_str, clean_str_nopunct
|
8 |
import torch
|
9 |
+
from utils import MultiHeadModel, BertInputBuilder, get_num_words, MATH_PREFIXES, MATH_WORDS
|
10 |
|
11 |
import transformers
|
12 |
from transformers import BertTokenizer, BertForSequenceClassification
|
|
|
30 |
self.endtime = endtime
|
31 |
self.transcript = weakref.ref(transcript) if transcript else None
|
32 |
self.props = kwargs
|
33 |
+
self.num_math_terms = None
|
34 |
+
self.math_terms = None
|
35 |
|
36 |
self.uptake = None
|
37 |
self.reasoning = None
|
|
|
56 |
'uptake': self.uptake,
|
57 |
'reasoning': self.reasoning,
|
58 |
'question': self.question,
|
59 |
+
'focusingQuestion': self.focusing_question,
|
60 |
+
'numMathTerms': self.num_math_terms,
|
61 |
+
'mathTerms': self.math_terms,
|
62 |
**self.props
|
63 |
}
|
64 |
|
|
|
257 |
return output
|
258 |
|
259 |
|
260 |
+
def load_math_terms():
|
261 |
+
math_terms = []
|
262 |
+
for term in MATH_WORDS:
|
263 |
+
if term in MATH_PREFIXES:
|
264 |
+
math_terms.append(f"(^|[^a-zA-Z]){term}(s|es)?([^a-zA-Z]|$)")
|
265 |
+
else:
|
266 |
+
math_terms.append(f"(^|[^a-zA-Z]){term}([^a-zA-Z]|$)")
|
267 |
+
return math_terms
|
268 |
+
|
269 |
+
def run_math_density(transcript):
|
270 |
+
math_terms = load_math_terms()
|
271 |
+
for i, utt in enumerate(transcript.utterances):
|
272 |
+
found_math_terms = set()
|
273 |
+
text = utt.get_clean_text(remove_punct=False)
|
274 |
+
num_math_terms = 0
|
275 |
+
for term in math_terms:
|
276 |
+
count = len(re.findall(term, text))
|
277 |
+
if count > 0:
|
278 |
+
found_math_terms.add(term)
|
279 |
+
num_math_terms += count
|
280 |
+
utt.num_math_terms = num_math_terms
|
281 |
+
utt.math_terms = list(found_math_terms)
|
282 |
+
|
283 |
class EndpointHandler():
|
284 |
def __init__(self, path="."):
|
285 |
print("Loading models...")
|
|
|
332 |
self.device, self.tokenizer, self.input_builder)
|
333 |
focusing_question_model.run_inference(transcript, uptake_speaker=uptake_speaker)
|
334 |
|
335 |
+
run_math_density(transcript)
|
336 |
+
|
337 |
return transcript.to_dict()
|