Spaces:
Running
Running
import pandas as pd | |
from app.devices import Device | |
from app.models import Architecture, Estimate, Metadata, Tokenizer | |
from app.utils import abbreviate_number, human_readable_size | |
def get_model_info_df( | |
metadata: Metadata, architecture: Architecture, tokenizer: Tokenizer | |
): | |
return pd.DataFrame( | |
[ | |
{ | |
"Type": metadata.type_, | |
"Name": metadata.name, | |
"Architecture": metadata.architecture, | |
"File Size": human_readable_size(metadata.file_size), | |
"Parameters": abbreviate_number(metadata.parameters), | |
"Bits Per Weight": round(metadata.bits_per_weight, 2), | |
"Maximum Context Length": architecture.maximum_context_length, | |
"Vocabulary Length": architecture.vocabulary_length, | |
"Tokenizer Model": tokenizer.model, | |
"Tokens Size": human_readable_size(tokenizer.tokens_size), | |
} | |
] | |
) | |
def get_estimate_df(estimate: Estimate): | |
return pd.DataFrame( | |
[ | |
{ | |
"Max Token per Sec.": round( | |
estimate.items[0].maximum_tokens_per_second, 2 | |
), | |
"Context Size": estimate.context_size, | |
"Offload Layers": estimate.items[0].offload_layers, | |
"Full Offloaded": estimate.items[0].full_offloaded, | |
"CPU Handle Layers": estimate.items[0].ram.handle_layers, | |
"CPU UMA": human_readable_size(estimate.items[0].ram.uma), | |
"CPU NONUMA": human_readable_size(estimate.items[0].ram.nonuma), | |
} | |
] | |
) | |
def get_gpus_df(estimate: Estimate, gpu_name: str, selected_device: Device): | |
return pd.DataFrame( | |
[ | |
{ | |
"GPU": gpu_name, | |
"GPU Memory Size": selected_device.memory_size, | |
"Handle Layers": gpu.handle_layers, | |
"UMA": human_readable_size(gpu.uma), | |
"NONUMA": human_readable_size(gpu.nonuma), | |
} | |
for gpu in estimate.items[0].vrams | |
] | |
) | |