hylee
commited on
Commit
·
d0d0944
1
Parent(s):
813a1db
add CPU checks
Browse files- handler.py +16 -1
handler.py
CHANGED
@@ -9,6 +9,8 @@ from utils import MultiHeadModel, BertInputBuilder, get_num_words
|
|
9 |
|
10 |
import transformers
|
11 |
from transformers import BertTokenizer, BertForSequenceClassification
|
|
|
|
|
12 |
|
13 |
|
14 |
transformers.logging.set_verbosity_debug()
|
@@ -241,21 +243,34 @@ class EndpointHandler():
|
|
241 |
transcript.add_utterance(Utterance(**utt))
|
242 |
|
243 |
print("Running inference on %d examples..." % transcript.length())
|
244 |
-
|
|
|
245 |
# Uptake
|
246 |
uptake_model = UptakeModel(
|
247 |
self.device, self.tokenizer, self.input_builder)
|
248 |
uptake_model.run_inference(transcript, min_prev_words=params['uptake_min_num_words'],
|
249 |
uptake_speaker=params.pop("uptake_speaker", None))
|
|
|
|
|
250 |
del uptake_model
|
|
|
|
|
251 |
# Reasoning
|
252 |
reasoning_model = ReasoningModel(
|
253 |
self.device, self.tokenizer, self.input_builder)
|
254 |
reasoning_model.run_inference(transcript)
|
|
|
|
|
255 |
del reasoning_model
|
|
|
|
|
256 |
# Question
|
257 |
question_model = QuestionModel(
|
258 |
self.device, self.tokenizer, self.input_builder)
|
259 |
question_model.run_inference(transcript)
|
|
|
|
|
260 |
del question_model
|
|
|
|
|
261 |
return transcript.to_dict()
|
|
|
9 |
|
10 |
import transformers
|
11 |
from transformers import BertTokenizer, BertForSequenceClassification
|
12 |
+
import psutil
|
13 |
+
import time
|
14 |
|
15 |
|
16 |
transformers.logging.set_verbosity_debug()
|
|
|
243 |
transcript.add_utterance(Utterance(**utt))
|
244 |
|
245 |
print("Running inference on %d examples..." % transcript.length())
|
246 |
+
cpu_percent = psutil.cpu_percent()
|
247 |
+
print(f"CPU Usage before models loaded: {cpu_percent}%")
|
248 |
# Uptake
|
249 |
uptake_model = UptakeModel(
|
250 |
self.device, self.tokenizer, self.input_builder)
|
251 |
uptake_model.run_inference(transcript, min_prev_words=params['uptake_min_num_words'],
|
252 |
uptake_speaker=params.pop("uptake_speaker", None))
|
253 |
+
cpu_percent = psutil.cpu_percent()
|
254 |
+
print(f"CPU Usage after model 1 loaded: {cpu_percent}%")
|
255 |
del uptake_model
|
256 |
+
cpu_percent = psutil.cpu_percent()
|
257 |
+
print(f"CPU Usage after model 1 deleted: {cpu_percent}%")
|
258 |
# Reasoning
|
259 |
reasoning_model = ReasoningModel(
|
260 |
self.device, self.tokenizer, self.input_builder)
|
261 |
reasoning_model.run_inference(transcript)
|
262 |
+
cpu_percent = psutil.cpu_percent()
|
263 |
+
print(f"CPU Usage after model 2 loaded: {cpu_percent}%")
|
264 |
del reasoning_model
|
265 |
+
cpu_percent = psutil.cpu_percent()
|
266 |
+
print(f"CPU Usage after model 2 deleted: {cpu_percent}%")
|
267 |
# Question
|
268 |
question_model = QuestionModel(
|
269 |
self.device, self.tokenizer, self.input_builder)
|
270 |
question_model.run_inference(transcript)
|
271 |
+
cpu_percent = psutil.cpu_percent()
|
272 |
+
print(f"CPU Usage after model 3 loaded: {cpu_percent}%")
|
273 |
del question_model
|
274 |
+
cpu_percent = psutil.cpu_percent()
|
275 |
+
print(f"CPU Usage after model 3 deleted: {cpu_percent}%")
|
276 |
return transcript.to_dict()
|