Remove assert in load_cpu_kernel
Browse files- quantization.py +2 -4
quantization.py
CHANGED
@@ -442,7 +442,6 @@ class QuantizedEmbedding(Embedding): # TODO: backward, check empty_init
|
|
442 |
def load_cpu_kernel(**kwargs):
|
443 |
global cpu_kernels
|
444 |
cpu_kernels = CPUKernel(**kwargs)
|
445 |
-
assert cpu_kernels.load
|
446 |
|
447 |
|
448 |
def quantize(model, weight_bit_width, use_quantization_cache=False, empty_init=False, **kwargs):
|
@@ -453,9 +452,8 @@ def quantize(model, weight_bit_width, use_quantization_cache=False, empty_init=F
|
|
453 |
dense_h_to_4h_quantization_cache = None
|
454 |
dense_4h_to_h_quantization_cache = None
|
455 |
|
456 |
-
|
457 |
-
|
458 |
-
except:
|
459 |
if kernels is None: # CUDA kernels failed
|
460 |
print("Cannot load cpu or cuda kernel, quantization failed:")
|
461 |
assert kernels is not None
|
|
|
442 |
def load_cpu_kernel(**kwargs):
|
443 |
global cpu_kernels
|
444 |
cpu_kernels = CPUKernel(**kwargs)
|
|
|
445 |
|
446 |
|
447 |
def quantize(model, weight_bit_width, use_quantization_cache=False, empty_init=False, **kwargs):
|
|
|
452 |
dense_h_to_4h_quantization_cache = None
|
453 |
dense_4h_to_h_quantization_cache = None
|
454 |
|
455 |
+
load_cpu_kernel(**kwargs)
|
456 |
+
if not cpu_kernels.load:
|
|
|
457 |
if kernels is None: # CUDA kernels failed
|
458 |
print("Cannot load cpu or cuda kernel, quantization failed:")
|
459 |
assert kernels is not None
|