Intel
/

whisper-tiny-onnx-int4-inc

Automatic Speech Recognition

Intel® Neural Compressor

neural-compressor

Inference Endpoints

Model card Files Files and versions Community

yuwenz commited on Oct 8, 2023

Commit

4aed7f6

•

1 Parent(s): 778760c

Update README.md

Files changed (1) hide show

README.md +1 -1

README.md CHANGED Viewed

@@ -74,7 +74,7 @@ for model in model_list:
                                         "group_size": 32}}},
         op_name_dict={'/proj_out/MatMul': FP32},) # fallback last matmul in decoder to FP32
     q_model = quantization.fit(
-        os.path.join("/path/to/whisper-tiny", model), # FP32 model path
         config,
         calib_dataloader=dataloader)
     q_model.save(os.path.join("/path/to/whisper-tiny-onnx-int4", model)) # INT4 model path

                                         "group_size": 32}}},
         op_name_dict={'/proj_out/MatMul': FP32},) # fallback last matmul in decoder to FP32
     q_model = quantization.fit(
+        os.path.join("/path/to/whisper-tiny-with-past", model), # FP32 model path
         config,
         calib_dataloader=dataloader)
     q_model.save(os.path.join("/path/to/whisper-tiny-onnx-int4", model)) # INT4 model path