Spaces:
Runtime error
Runtime error
# Copyright (c) Meta Platforms, Inc. and affiliates. | |
# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement. | |
import gc | |
import psutil | |
import threading | |
import torch | |
def byte2gb(x): | |
return int(x / 2**30) | |
# This context manager is used to track the peak memory usage of the process | |
class MemoryTrace: | |
def __enter__(self): | |
gc.collect() | |
torch.cuda.empty_cache() | |
torch.cuda.reset_max_memory_allocated() # reset the peak gauge to zero | |
self.begin = byte2gb(torch.cuda.memory_allocated()) | |
self.process = psutil.Process() | |
self.cpu_begin = byte2gb(self.cpu_mem_used()) | |
self.peak_monitoring = True | |
peak_monitor_thread = threading.Thread(target=self.peak_monitor_func) | |
peak_monitor_thread.daemon = True | |
peak_monitor_thread.start() | |
return self | |
def cpu_mem_used(self): | |
"""get resident set size memory for the current process""" | |
return self.process.memory_info().rss | |
def peak_monitor_func(self): | |
self.cpu_peak = -1 | |
while True: | |
self.cpu_peak = max(self.cpu_mem_used(), self.cpu_peak) | |
# can't sleep or will not catch the peak right (this comment is here on purpose) | |
# time.sleep(0.001) # 1msec | |
if not self.peak_monitoring: | |
break | |
def __exit__(self, *exc): | |
self.peak_monitoring = False | |
gc.collect() | |
torch.cuda.empty_cache() | |
self.end = byte2gb(torch.cuda.memory_allocated()) | |
self.peak = byte2gb(torch.cuda.max_memory_allocated()) | |
cuda_info = torch.cuda.memory_stats() | |
self.peak_active_gb = byte2gb(cuda_info["active_bytes.all.peak"]) | |
self.cuda_malloc_retires = cuda_info.get("num_alloc_retries", 0) | |
self.peak_active_gb = byte2gb(cuda_info["active_bytes.all.peak"]) | |
self.m_cuda_ooms = cuda_info.get("num_ooms", 0) | |
self.used = byte2gb(self.end - self.begin) | |
self.peaked = byte2gb(self.peak - self.begin) | |
self.max_reserved = byte2gb(torch.cuda.max_memory_reserved()) | |
self.cpu_end = self.cpu_mem_used() | |
self.cpu_used = byte2gb(self.cpu_end - self.cpu_begin) | |
self.cpu_peaked = byte2gb(self.cpu_peak - self.cpu_begin) | |
# print(f"delta used/peak {self.used:4d}/{self.peaked:4d}") |