|
10 | 10 | USE_CASES,
|
11 | 11 | OV_MODEL_CLASSES_MAPPING,
|
12 | 12 | PT_MODEL_CLASSES_MAPPING,
|
13 |
| - PA_ATTENTION_BACKEND, |
14 |
| - SDPA_ATTENTION_BACKEND |
| 13 | + PA_ATTENTION_BACKEND |
15 | 14 | )
|
16 | 15 | import librosa
|
17 | 16 |
|
@@ -181,19 +180,14 @@ def analyze_args(args):
|
181 | 180 | model_args['config'] = config
|
182 | 181 | if model_framework == 'ov':
|
183 | 182 | set_default_param_for_ov_config(model_args['config'])
|
184 |
| - if 'ATTENTION_BACKEND' not in model_args['config'] and use_case in ['text_gen', 'vlm'] and args.device != "NPU": |
| 183 | + if 'ATTENTION_BACKEND' not in model_args['config'] and use_case in ['text_gen', 'vlm'] and args.device != "NPU" and not optimum: |
185 | 184 | model_args['config']['ATTENTION_BACKEND'] = PA_ATTENTION_BACKEND
|
186 |
| - if model_args['config'].get('ATTENTION_BACKEND', '') == PA_ATTENTION_BACKEND and args.device == "NPU": |
187 |
| - model_args['config']['ATTENTION_BACKEND'] = SDPA_ATTENTION_BACKEND |
188 |
| - log.warning("Continuous Batching, Speculative decoding and Prompt Lookup decoding is not supported for NPU device") |
189 | 185 | log.info(f"OV Config={model_args['config']}")
|
190 | 186 | elif model_framework == 'pt':
|
191 | 187 | log.info(f"PT Config={model_args['config']}")
|
192 | 188 | model_args['model_type'] = get_model_type(model_name, use_case, model_framework)
|
193 | 189 | model_args['model_name'] = model_name
|
194 | 190 |
|
195 |
| - if model_args['config'].get('ATTENTION_BACKEND', '') == PA_ATTENTION_BACKEND and optimum: |
196 |
| - raise RuntimeError("Continuous batching mode supported only via OpenVINO GenAI") |
197 | 191 | cb_config = None
|
198 | 192 | if args.cb_config:
|
199 | 193 | cb_config = get_config(args.cb_config)
|
|
0 commit comments