We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents 2617c55 + 5fcc1c4 commit 010b5bcCopy full SHA for 010b5bc
vllm/model_executor/layers/quant.py
@@ -5,11 +5,10 @@
5
6
7
try:
8
- import awq_inference_engine # with CUDA kernels
+ import awq_inference_engine
9
+ KERNELS_INSTALLED = True
10
except ImportError as ex:
- raise ImportError(
11
- "Unable to import awq_inference_engine: run setup.py"
12
- " to install AWQ CUDA kernels")
+ KERNELS_INSTALLED = False
13
14
15
class ScaledActivation(nn.Module):
@@ -34,6 +33,11 @@ def __init__(
34
33
):
35
super().__init__()
36
+ if not KERNELS_INSTALLED:
37
+ raise ImportError(
38
+ "Unable to import awq_ext: run setup.py"
39
+ " to install AWQ CUDA kernels")
40
+
41
if w_bit not in [4]:
42
raise NotImplementedError("Only 4-bit are supported for now.")
43
0 commit comments