File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed
transformer_engine/pytorch/attention/dot_product_attention Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -434,8 +434,8 @@ def get_attention_backend(
434
434
# | FP8 | non-paged/paged | sm90 | thd | >= 1
435
435
# Unfused | FP32/FP16/BF16 | non-paged/paged | all | bshd,sbhd,thd | >= 1
436
436
if inference_params is not None :
437
- if device_compute_capability == (8 , 9 ) and cudnn_version <= (9 , 12 , 0 ):
438
- logger .debug ("Disabling FusedAttention for KV caching for sm89 and cuDNN <= 9.12 " )
437
+ if device_compute_capability == (8 , 9 ) and cudnn_version <= (9 , 13 , 0 ):
438
+ logger .debug ("Disabling FusedAttention for KV caching for sm89 and cuDNN <= 9.13 " )
439
439
use_fused_attention = False
440
440
if context_parallel :
441
441
logger .debug ("Disabling all backends for KV caching with context parallelism" )
You can’t perform that action at this time.
0 commit comments