Skip to content

Commit 7df8022

Browse files
committed
Avoid setting ATTENTION_BACKEND for optimum
1 parent 38ddeee commit 7df8022

File tree

2 files changed

+3
-9
lines changed

2 files changed

+3
-9
lines changed

tools/llm_bench/benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def get_argprser():
152152
"--use_cb",
153153
action="store_true",
154154
help='Deprecated, will be removed soon! Continues batching mode is used by default. '
155-
'To switch to SPDA mode, please, create .json file, set up ATTENTION_BACKEND="SDPA" in file and put it to --load_config.'
155+
'To switch to SPDA mode, please, set up {"ATTENTION_BACKEND": "SDPA"} in --load_config.'
156156
)
157157
parser.add_argument("--cb_config", required=False, default=None, help="Path to file with Continuous Batching Scheduler settings or dict")
158158
parser.add_argument("--draft_model", required=False, default=None,

tools/llm_bench/llm_bench_utils/model_utils.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@
1010
USE_CASES,
1111
OV_MODEL_CLASSES_MAPPING,
1212
PT_MODEL_CLASSES_MAPPING,
13-
PA_ATTENTION_BACKEND,
14-
SDPA_ATTENTION_BACKEND
13+
PA_ATTENTION_BACKEND
1514
)
1615
import librosa
1716

@@ -181,19 +180,14 @@ def analyze_args(args):
181180
model_args['config'] = config
182181
if model_framework == 'ov':
183182
set_default_param_for_ov_config(model_args['config'])
184-
if 'ATTENTION_BACKEND' not in model_args['config'] and use_case in ['text_gen', 'vlm'] and args.device != "NPU":
183+
if 'ATTENTION_BACKEND' not in model_args['config'] and use_case in ['text_gen', 'vlm'] and args.device != "NPU" and not optimum:
185184
model_args['config']['ATTENTION_BACKEND'] = PA_ATTENTION_BACKEND
186-
if model_args['config'].get('ATTENTION_BACKEND', '') == PA_ATTENTION_BACKEND and args.device == "NPU":
187-
model_args['config']['ATTENTION_BACKEND'] = SDPA_ATTENTION_BACKEND
188-
log.warning("Continuous Batching, Speculative decoding and Prompt Lookup decoding is not supported for NPU device")
189185
log.info(f"OV Config={model_args['config']}")
190186
elif model_framework == 'pt':
191187
log.info(f"PT Config={model_args['config']}")
192188
model_args['model_type'] = get_model_type(model_name, use_case, model_framework)
193189
model_args['model_name'] = model_name
194190

195-
if model_args['config'].get('ATTENTION_BACKEND', '') == PA_ATTENTION_BACKEND and optimum:
196-
raise RuntimeError("Continuous batching mode supported only via OpenVINO GenAI")
197191
cb_config = None
198192
if args.cb_config:
199193
cb_config = get_config(args.cb_config)

0 commit comments

Comments
 (0)