Spaces:
Runtime error
Runtime error
File size: 2,200 Bytes
b85aa9e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import GPUtil # pip install gputil
def get_gpu_ids_with_sufficient_memory(memory_requirement_GB):
'''
Returns the MINIMAL SET of GPU IDs that, combined, have at least `memory_requirement` MB of free memory.
You will need to use all returned GPU IDs to get the desired memory requirement.
It returns lower IDs first [0, 1, ...]
If `memory_requirement` is 0, returns all available GPUs.
If `memory_requirement` is not available, returns an empty list.
'''
memory_requirement_MB = float(memory_requirement_GB * 1024)
GPUs = sorted(GPUtil.getGPUs(), key=lambda x: x.memoryFree, reverse=True)
total_memory = sum(gpu.memoryFree for gpu in GPUs)
if memory_requirement_MB > total_memory:
return []
GPU_IDs = []
for gpu in GPUs:
if memory_requirement_MB <= 0:
break
GPU_IDs.append(gpu.id)
memory_requirement_MB -= gpu.memoryFree
return GPU_IDs
def get_device_with_most_free_memory():
'''
Returns the GPU ID of the GPU with the most free memory.
'''
GPUs = GPUtil.getGPUs()
return sorted(GPUs, key=lambda x: x.memoryFree, reverse=True)[0].id
def get_free_memory_dict(leave_extra_memory_unused_GiB: float = 2, leave_extra_memory_unused_gpu0_GiB: float = 3):
'''
Returns a dictionary of GPU IDs and their free memory, in MiB.
Compatible with huggingface Accelerate formatting: `max_memory=get_free_memory_dict()`
Accelerate seems to use more memory than we give it, so we default to telling Accelerate we have 2 GiB less than we actually do.
Example output:
{0: '24753MiB', 1: '26223MiB', 2: '25603MiB', 3: '9044MiB'}
'''
GPUs = GPUtil.getGPUs()
memory_map = {gpu.id: int(round(gpu.memoryFree)) for gpu in GPUs}
if leave_extra_memory_unused_GiB > 0:
for device_id, memory_MiB in memory_map.items():
memory_map[device_id] = memory_MiB - (leave_extra_memory_unused_GiB * 1024)
if leave_extra_memory_unused_gpu0_GiB > 0 and 0 in memory_map:
memory_map[0] = memory_map[0] - (leave_extra_memory_unused_gpu0_GiB * 1024)
# format to Accelerate's liking
for device_id, memory_MiB in memory_map.items():
memory_map[device_id] = f"{int(round(memory_MiB))}MiB"
return memory_map
|