hylee
commited on
Commit
·
7841567
1
Parent(s):
1f758f7
add virtual mem logging
Browse files- handler.py +32 -4
handler.py
CHANGED
@@ -246,36 +246,64 @@ class EndpointHandler():
|
|
246 |
logging.set_verbosity_info()
|
247 |
logger = logging.get_logger("transformers")
|
248 |
logger.info(f"CPU Usage before models loaded: {cpu_percent}%")
|
|
|
|
|
|
|
|
|
249 |
# Uptake
|
250 |
uptake_model = UptakeModel(
|
251 |
self.device, self.tokenizer, self.input_builder)
|
252 |
uptake_model.run_inference(transcript, min_prev_words=params['uptake_min_num_words'],
|
253 |
uptake_speaker=params.pop("uptake_speaker", None))
|
254 |
cpu_percent = psutil.cpu_percent()
|
|
|
|
|
|
|
|
|
255 |
logger.info(f"CPU Usage after model 1 loaded: {cpu_percent}%")
|
256 |
del uptake_model
|
257 |
cpu_percent = psutil.cpu_percent()
|
|
|
|
|
|
|
|
|
258 |
logger.info(f"CPU Usage after model 1 deleted: {cpu_percent}%")
|
259 |
# Reasoning
|
260 |
reasoning_model = ReasoningModel(
|
261 |
self.device, self.tokenizer, self.input_builder)
|
262 |
reasoning_model.run_inference(transcript)
|
263 |
cpu_percent = psutil.cpu_percent()
|
|
|
|
|
|
|
|
|
264 |
logger.info(f"CPU Usage after model 2 loaded: {cpu_percent}%")
|
265 |
-
print(f"CPU Usage after model 2 loaded: {cpu_percent}%")
|
266 |
del reasoning_model
|
267 |
cpu_percent = psutil.cpu_percent()
|
|
|
|
|
|
|
|
|
268 |
logger.info(f"CPU Usage after model 2 deleted: {cpu_percent}%")
|
269 |
-
print(f"CPU Usage after model 2 deleted: {cpu_percent}%")
|
270 |
# Question
|
271 |
question_model = QuestionModel(
|
272 |
self.device, self.tokenizer, self.input_builder)
|
273 |
question_model.run_inference(transcript)
|
274 |
cpu_percent = psutil.cpu_percent()
|
275 |
logger.info(f"CPU Usage after model 3 loaded: {cpu_percent}%")
|
276 |
-
|
|
|
|
|
|
|
|
|
277 |
del question_model
|
278 |
cpu_percent = psutil.cpu_percent()
|
279 |
logger.info(f"CPU Usage after model 3 deleted: {cpu_percent}%")
|
280 |
-
|
|
|
|
|
|
|
|
|
281 |
return transcript.to_dict()
|
|
|
246 |
logging.set_verbosity_info()
|
247 |
logger = logging.get_logger("transformers")
|
248 |
logger.info(f"CPU Usage before models loaded: {cpu_percent}%")
|
249 |
+
mem_info = psutil.virtual_memory()
|
250 |
+
used_mem = mem_info.used / (1024 ** 3) # Convert to gigabytes
|
251 |
+
total_mem = mem_info.total / (1024 ** 3) # Convert to gigabytes
|
252 |
+
logger.info(f"Used Memory before models loaded: {used_mem:.2f} GB, Total RAM: {total_mem:.2f} GB")
|
253 |
# Uptake
|
254 |
uptake_model = UptakeModel(
|
255 |
self.device, self.tokenizer, self.input_builder)
|
256 |
uptake_model.run_inference(transcript, min_prev_words=params['uptake_min_num_words'],
|
257 |
uptake_speaker=params.pop("uptake_speaker", None))
|
258 |
cpu_percent = psutil.cpu_percent()
|
259 |
+
mem_info = psutil.virtual_memory()
|
260 |
+
used_mem = mem_info.used / (1024 ** 3) # Convert to gigabytes
|
261 |
+
total_mem = mem_info.total / (1024 ** 3) # Convert to gigabytes
|
262 |
+
logger.info(f"Used Memory after model 1 loaded: {used_mem:.2f} GB, Total Mem: {total_mem:.2f} GB")
|
263 |
logger.info(f"CPU Usage after model 1 loaded: {cpu_percent}%")
|
264 |
del uptake_model
|
265 |
cpu_percent = psutil.cpu_percent()
|
266 |
+
mem_info = psutil.virtual_memory()
|
267 |
+
used_mem = mem_info.used / (1024 ** 3) # Convert to gigabytes
|
268 |
+
total_mem = mem_info.total / (1024 ** 3) # Convert to gigabytes
|
269 |
+
logger.info(f"Used Memory after model 1 deleted: {used_mem:.2f} GB, Total Mem: {total_mem:.2f} GB")
|
270 |
logger.info(f"CPU Usage after model 1 deleted: {cpu_percent}%")
|
271 |
# Reasoning
|
272 |
reasoning_model = ReasoningModel(
|
273 |
self.device, self.tokenizer, self.input_builder)
|
274 |
reasoning_model.run_inference(transcript)
|
275 |
cpu_percent = psutil.cpu_percent()
|
276 |
+
mem_info = psutil.virtual_memory()
|
277 |
+
used_mem = mem_info.used / (1024 ** 3) # Convert to gigabytes
|
278 |
+
total_mem = mem_info.total / (1024 ** 3) # Convert to gigabytes
|
279 |
+
logger.info(f"Used Memory after model 2 loaded: {used_mem:.2f} GB, Total Mem: {total_mem:.2f} GB")
|
280 |
logger.info(f"CPU Usage after model 2 loaded: {cpu_percent}%")
|
281 |
+
# print(f"CPU Usage after model 2 loaded: {cpu_percent}%")
|
282 |
del reasoning_model
|
283 |
cpu_percent = psutil.cpu_percent()
|
284 |
+
mem_info = psutil.virtual_memory()
|
285 |
+
used_mem = mem_info.used / (1024 ** 3) # Convert to gigabytes
|
286 |
+
total_mem = mem_info.total / (1024 ** 3) # Convert to gigabytes
|
287 |
+
logger.info(f"Used Memory after model 2 deleted: {used_mem:.2f} GB, Total Mem: {total_mem:.2f} GB")
|
288 |
logger.info(f"CPU Usage after model 2 deleted: {cpu_percent}%")
|
289 |
+
# print(f"CPU Usage after model 2 deleted: {cpu_percent}%")
|
290 |
# Question
|
291 |
question_model = QuestionModel(
|
292 |
self.device, self.tokenizer, self.input_builder)
|
293 |
question_model.run_inference(transcript)
|
294 |
cpu_percent = psutil.cpu_percent()
|
295 |
logger.info(f"CPU Usage after model 3 loaded: {cpu_percent}%")
|
296 |
+
mem_info = psutil.virtual_memory()
|
297 |
+
used_mem = mem_info.used / (1024 ** 3) # Convert to gigabytes
|
298 |
+
total_mem = mem_info.total / (1024 ** 3) # Convert to gigabytes
|
299 |
+
logger.info(f"Used Memory after model 3 loaded: {used_mem:.2f} GB, Total Mem: {total_mem:.2f} GB")
|
300 |
+
# print(f"CPU Usage after model 3 loaded: {cpu_percent}%")
|
301 |
del question_model
|
302 |
cpu_percent = psutil.cpu_percent()
|
303 |
logger.info(f"CPU Usage after model 3 deleted: {cpu_percent}%")
|
304 |
+
mem_info = psutil.virtual_memory()
|
305 |
+
used_mem = mem_info.used / (1024 ** 3) # Convert to gigabytes
|
306 |
+
total_mem = mem_info.total / (1024 ** 3) # Convert to gigabytes
|
307 |
+
logger.info(f"Used Memory after model 3 deleted: {used_mem:.2f} GB, Total Mem: {total_mem:.2f} GB")
|
308 |
+
# print(f"CPU Usage after model 3 deleted: {cpu_percent}%")
|
309 |
return transcript.to_dict()
|