File tree Expand file tree Collapse file tree 1 file changed +12
-10
lines changed
transformer_engine/common/recipe Expand file tree Collapse file tree 1 file changed +12
-10
lines changed Original file line number Diff line number Diff line change @@ -197,16 +197,18 @@ kernel_bulk(
197
197
const auto last_amax = ((amax_reduction_buffer != nullptr )
198
198
&& (amax_reduction_buffer[offset_in_buffer+count] != 0 .0f )) ?
199
199
amax_reduction_buffer[offset_in_buffer+count] : amax_history[0 ];
200
- for (size_t off = 0 ; off < length; off += bsize) {
201
- const size_t i = off + tid;
202
- float a = 0 ;
203
- if (i < length) {
204
- a = (i < length - 1 ) ? amax_history[(i+1 )*stride] : last_amax;
205
- amax = fmaxf (amax, a);
206
- }
207
- __syncthreads (); // Inplace roll
208
- if (i < length) {
209
- amax_history[i*stride] = (i > 0 ) ? a : 0 ;
200
+ if (last_amax != 0 .0f ) {
201
+ for (size_t off = 0 ; off < length; off += bsize) {
202
+ const size_t i = off + tid;
203
+ float a = 0 ;
204
+ if (i < length) {
205
+ a = (i < length - 1 ) ? amax_history[(i+1 )*stride] : last_amax;
206
+ amax = fmaxf (amax, a);
207
+ }
208
+ __syncthreads (); // Inplace roll
209
+ if (i < length) {
210
+ amax_history[i*stride] = (i > 0 ) ? a : 0 ;
211
+ }
210
212
}
211
213
}
212
214
You can’t perform that action at this time.
0 commit comments