Spaces:

NyxKrage
/

GGUF-VRAM-Calculator

Runtime error

App Files Files Community

GGUF-VRAM-Calculator / app.py

NyxKrage

Update app.py

260ecb5 verified 12 months ago

raw

history blame

4.85 kB

	from typing import Any
	import gradio as gr
	import pandas as pd
	import requests

	quants = {
	"Q2_K": 3.35,
	"Q3_K_S": 3.5,
	"Q3_K_M": 3.91,
	"Q3_K_L": 4.27,
	"Q4_0": 4.55,
	"Q4_K_S": 4.58,
	"Q4_K_M": 4.85,
	"Q5_0": 5.54,
	"Q5_K_S": 5.54,
	"Q5_K_M": 5.69,
	"Q6_K": 6.59,
	"Q8_0": 8.5,
	}


	def calc_model_size(parameters: int, quant: float) -> int:
	return parameters * quant // 8


	def get_model_config(hf_model: str) -> dict[str, Any]:
	config = requests.get(
	f"https://huggingface.co/{hf_model}/raw/main/config.json"
	).json()
	model_size = 0
	try:
	model_size = requests.get(
	f"https://huggingface.co/{hf_model}/raw/main/model.safetensors.index.json"
	).json()["metadta"]["total_size"]
	except:
	try:
	model_size = requests.get(
	f"https://huggingface.co/{hf_model}/raw/main/pytorch_model.bin.index.json"
	).json()["metadta"]["total_size"]
	except:
	model_page = requests.get(
	f"https://huggingface.co/{hf_model}"
	).text
	param_props_idx = model_page.find('data-target="ModelSafetensorsParams"')
	param_props_start = model_page.rfind("<div", 0, param_props_idx)
	model_size = (
	json.loads(
	[
	prop
	for prop in model_page[param_props_start:param_props_idx].split(" ")
	if prop.startswith("data-props=")
	][0]
	.split("=")[1]
	.replace('"', "")
	.replace(""", '"')
	)["safetensors"]["total"]
	* 2
	)


	# assume fp16 weights
	config["parameters"] = model_size / 2
	return config


	def calc_input_buffer_size(model_config, context: int) -> float:
	return 4096 + 2048 * model_config["hidden_size"] + context * 4 + context * 2048


	def calc_compute_buffer_size(model_config, context: int) -> float:
	return (
	(context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024
	)


	def calc_context_size(model_config, context: int) -> float:
	n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"]
	n_embd_gqa = model_config["hidden_size"] / n_gqa
	n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context)
	return 2 * n_elements * 2


	def calc(model_base, context, quant_size):
	model_config = get_model_config(model_base)
	quant_bpw = 0
	try:
	quant_bpw = float(quant_size)
	except:
	quant_bpw = quants[quant_size]

	model_size = round(
	calc_model_size(model_config["parameters"], quant_bpw) / 1000 / 1000 / 1000, 2
	)
	context_size = round(
	(
	calc_input_buffer_size(model_config, context)
	+ calc_context_size(model_config, context)
	+ calc_compute_buffer_size(model_config, context)
	)
	/ 1000
	/ 1000
	/ 1000,
	2,
	)

	return model_size, context_size, round(model_size + context_size, 2)


	title = "GGUF VRAM Calculator"

	with gr.Blocks(title=title, theme=gr.themes.Monochrome()) as app:
	default_model = "mistralai/Mistral-7B-v0.1"
	default_quant = "Q4_K_S"
	default_context = 8192
	default_size = calc(default_model, default_context, default_quant)
	default_model_size = default_size[0]
	default_context_size = default_size[1]

	gr.Markdown(
	f"# {app.title}\nThis is meant only as a guide and is will not be 100% accurate, this also does not account for anything that might be running in the background on your system or CUDA system memory fallback on Windows"
	)
	model = gr.Textbox(
	value=default_model,
	label="Enter Unquantized HF Model Name (e.g. mistralai/Mistral-7B-v0.1)",
	)
	context = gr.Number(
	minimum=1, value=default_context, label="Desired Context Size (Tokens)"
	)
	quant = gr.Dropdown(
	choices=list(quants.keys()),
	value=default_quant,
	allow_custom_value=True,
	label="Enter GGUF Quant (e.g. Q4_K_S) or the specific BPW for other quantization schemes such as exl2 (e.g. 4.5)",
	)
	btn = gr.Button(value="Submit", variant="primary")
	btn.click(
	calc,
	inputs=[
	model,
	context,
	quant,
	],
	outputs=[
	gr.Number(
	label="Model Size (GB)",
	value=default_size[0],
	),
	gr.Number(
	label="Context Size (GB)",
	value=default_size[1],
	),
	gr.Number(
	label="Total Size (GB)",
	value=default_size[2],
	),
	],
	)

	app.launch()