Merge pull request vllm-project#9 from ri938/organise

ri938 · web-flow · commit 010b5bc4b3cc · 2023-08-24T11:52:44.000+01:00
dont error if user doesnt have kernels installed
diff --git a/vllm/model_executor/layers/quant.py b/vllm/model_executor/layers/quant.py
@@ -5,11 +5,10 @@
 
 
 try:
-    import awq_inference_engine  # with CUDA kernels
+    import awq_inference_engine
+    KERNELS_INSTALLED = True
 except ImportError as ex:
-    raise ImportError(
-        "Unable to import awq_inference_engine: run setup.py"
-        " to install AWQ CUDA kernels")
+    KERNELS_INSTALLED = False
 
 
 class ScaledActivation(nn.Module):
@@ -34,6 +33,11 @@ def __init__(
         ):
         super().__init__()
 
+        if not KERNELS_INSTALLED:
+            raise ImportError(
+                "Unable to import awq_ext: run setup.py"
+                " to install AWQ CUDA kernels")
+
         if w_bit not in [4]:
             raise NotImplementedError("Only 4-bit are supported for now.")