songxxzp
commited on
Commit
·
bcc35f0
1
Parent(s):
fe0674f
Add assertion when loading cpu and cuda kernel fails
Browse files- quantization.py +3 -3
quantization.py
CHANGED
@@ -441,10 +441,10 @@ def quantize(model, weight_bit_width, use_quantization_cache=False, empty_init=F
|
|
441 |
try:
|
442 |
load_cpu_kernel(**kwargs)
|
443 |
except:
|
444 |
-
print("Cannot load cpu kernel, don't use quantized model on cpu.")
|
445 |
if kernels is None: # CUDA kernels failed
|
446 |
-
print("Cannot load cuda kernel, quantization failed
|
447 |
-
|
|
|
448 |
|
449 |
current_device = model.device
|
450 |
|
|
|
441 |
try:
|
442 |
load_cpu_kernel(**kwargs)
|
443 |
except:
|
|
|
444 |
if kernels is None: # CUDA kernels failed
|
445 |
+
print("Cannot load cpu or cuda kernel, quantization failed:")
|
446 |
+
assert kernels is None
|
447 |
+
print("Cannot load cpu kernel, don't use quantized model on cpu.")
|
448 |
|
449 |
current_device = model.device
|
450 |
|