gguf-parser-web / app /tables.py
phate334's picture
[add] gpu info
c7f3542
import pandas as pd
from app.devices import Device
from app.models import Architecture, Estimate, Metadata, Tokenizer
from app.utils import abbreviate_number, human_readable_size
def get_model_info_df(
metadata: Metadata, architecture: Architecture, tokenizer: Tokenizer
):
return pd.DataFrame(
[
{
"Type": metadata.type_,
"Name": metadata.name,
"Architecture": metadata.architecture,
"File Size": human_readable_size(metadata.file_size),
"Parameters": abbreviate_number(metadata.parameters),
"Bits Per Weight": round(metadata.bits_per_weight, 2),
"Maximum Context Length": architecture.maximum_context_length,
"Vocabulary Length": architecture.vocabulary_length,
"Tokenizer Model": tokenizer.model,
"Tokens Size": human_readable_size(tokenizer.tokens_size),
}
]
)
def get_estimate_df(estimate: Estimate):
return pd.DataFrame(
[
{
"Max Token per Sec.": round(
estimate.items[0].maximum_tokens_per_second, 2
),
"Context Size": estimate.context_size,
"Offload Layers": estimate.items[0].offload_layers,
"Full Offloaded": estimate.items[0].full_offloaded,
"CPU Handle Layers": estimate.items[0].ram.handle_layers,
"CPU UMA": human_readable_size(estimate.items[0].ram.uma),
"CPU NONUMA": human_readable_size(estimate.items[0].ram.nonuma),
}
]
)
def get_gpus_df(estimate: Estimate, gpu_name: str, selected_device: Device):
return pd.DataFrame(
[
{
"GPU": gpu_name,
"GPU Memory Size": selected_device.memory_size,
"Handle Layers": gpu.handle_layers,
"UMA": human_readable_size(gpu.uma),
"NONUMA": human_readable_size(gpu.nonuma),
}
for gpu in estimate.items[0].vrams
]
)