Spaces:

optimum-intel
/

benchmark-openvino

Running

App Files Files Community

benchmark-openvino / config_store.py

IlyasMoutawwakil HF staff

fix

449a109 16 days ago

raw

history blame

3.5 kB

	import gradio as gr


	def get_process_config():
	return {
	"process.numactl": gr.Checkbox(
	value=True,
	label="process.numactl",
	info="Runs the model with numactl",
	),
	"process.numactl_kwargs": gr.Textbox(
	label="process.numactl_kwargs",
	value="{'cpunodebind': 0, 'membind': 0}",
	info="Additional python dict of kwargs to pass to numactl",
	),
	}


	def get_inference_config():
	return {
	"inference.warmup_runs": gr.Slider(
	step=1,
	value=10,
	minimum=0,
	maximum=10,
	label="inference.warmup_runs",
	info="Number of warmup runs",
	),
	"inference.duration": gr.Slider(
	step=1,
	value=10,
	minimum=0,
	maximum=10,
	label="inference.duration",
	info="Minimum duration of the benchmark in seconds",
	),
	"inference.iterations": gr.Slider(
	step=1,
	value=10,
	minimum=0,
	maximum=10,
	label="inference.iterations",
	info="Minimum number of iterations of the benchmark",
	),
	"inference.latency": gr.Checkbox(
	value=True,
	label="inference.latency",
	info="Measures the latency of the model",
	),
	"inference.memory": gr.Checkbox(
	value=True,
	label="inference.memory",
	info="Measures the peak memory consumption",
	),
	"inference.input_shapes": gr.Textbox(
	label="inference.input_shapes",
	value="{'batch_size': 2, 'sequence_length': 16}",
	info="Input shapes to use for the benchmark",
	),
	"inference.generate_kwargs": gr.Textbox(
	label="inference.generate_kwargs",
	value="{'max_new_tokens': 32, 'min_new_tokens': 32}",
	info="Additional python dict of kwargs to pass to the generate function",
	),
	}


	def get_pytorch_config():
	return {
	"pytorch.torch_dtype": gr.Dropdown(
	value="float32",
	label="pytorch.torch_dtype",
	choices=["bfloat16", "float16", "float32", "auto"],
	info="The dtype to use for the model",
	),
	}


	def get_openvino_config():
	return {
	"openvino.half": gr.Checkbox(
	value=False,
	label="openvino.half",
	info="Converts model to half precision",
	),
	"openvino.reshape": gr.Checkbox(
	value=False,
	label="openvino.reshape",
	info="Reshapes the model to the input shape",
	),
	"openvino.reshape_kwargs": gr.Textbox(
	label="openvino.reshape_kwargs",
	value="{'batch_size': 2, 'sequence_length': 16}",
	info="Additional python dict of kwargs to pass to the reshape function",
	),
	"openvino.compile": gr.Checkbox(
	value=False,
	label="openvino.compile",
	info="Compiles model for the current device",
	),
	"openvino.load_in_8bit": gr.Checkbox(
	value=False,
	label="openvino.load_in_8bit",
	info="Loads model in 8 bits precision",
	),
	"openvino.load_in_4bit": gr.Checkbox(
	value=False,
	label="openvino.load_in_4bit",
	info="Loads model in 4 bits precision",
	),
	}