@@ -95,6 +95,12 @@ struct ymm_vector<float> {
95
95
{
96
96
return _mm512_mask_i64gather_ps (src, mask, index, base, scale);
97
97
}
98
+ template <int scale>
99
+ static reg_t
100
+ mask_i64gather (reg_t src, opmask_t mask, __m256i index, void const *base)
101
+ {
102
+ return _mm256_mmask_i32gather_ps (src, mask, index, base, scale);
103
+ }
98
104
static reg_t i64gather (type_t *arr, arrsize_t *ind)
99
105
{
100
106
return set (arr[ind[7 ]],
@@ -247,6 +253,12 @@ struct ymm_vector<uint32_t> {
247
253
{
248
254
return _mm512_mask_i64gather_epi32 (src, mask, index, base, scale);
249
255
}
256
+ template <int scale>
257
+ static reg_t
258
+ mask_i64gather (reg_t src, opmask_t mask, __m256i index, void const *base)
259
+ {
260
+ return _mm256_mmask_i32gather_epi32 (src, mask, index, base, scale);
261
+ }
250
262
static reg_t i64gather (type_t *arr, arrsize_t *ind)
251
263
{
252
264
return set (arr[ind[7 ]],
@@ -393,6 +405,12 @@ struct ymm_vector<int32_t> {
393
405
{
394
406
return _mm512_mask_i64gather_epi32 (src, mask, index, base, scale);
395
407
}
408
+ template <int scale>
409
+ static reg_t
410
+ mask_i64gather (reg_t src, opmask_t mask, __m256i index, void const *base)
411
+ {
412
+ return _mm256_mmask_i32gather_epi32 (src, mask, index, base, scale);
413
+ }
396
414
static reg_t i64gather (type_t *arr, arrsize_t *ind)
397
415
{
398
416
return set (arr[ind[7 ]],
@@ -548,6 +566,12 @@ struct zmm_vector<int64_t> {
548
566
{
549
567
return _mm512_mask_i64gather_epi64 (src, mask, index, base, scale);
550
568
}
569
+ template <int scale>
570
+ static reg_t
571
+ mask_i64gather (reg_t src, opmask_t mask, __m256i index, void const *base)
572
+ {
573
+ return _mm512_mask_i32gather_epi64 (src, mask, index, base, scale);
574
+ }
551
575
static reg_t i64gather (type_t *arr, arrsize_t *ind)
552
576
{
553
577
return set (arr[ind[7 ]],
@@ -688,6 +712,12 @@ struct zmm_vector<uint64_t> {
688
712
{
689
713
return _mm512_mask_i64gather_epi64 (src, mask, index, base, scale);
690
714
}
715
+ template <int scale>
716
+ static reg_t
717
+ mask_i64gather (reg_t src, opmask_t mask, __m256i index, void const *base)
718
+ {
719
+ return _mm512_mask_i32gather_epi64 (src, mask, index, base, scale);
720
+ }
691
721
static reg_t i64gather (type_t *arr, arrsize_t *ind)
692
722
{
693
723
return set (arr[ind[7 ]],
@@ -864,6 +894,12 @@ struct zmm_vector<double> {
864
894
{
865
895
return _mm512_mask_i64gather_pd (src, mask, index, base, scale);
866
896
}
897
+ template <int scale>
898
+ static reg_t
899
+ mask_i64gather (reg_t src, opmask_t mask, __m256i index, void const *base)
900
+ {
901
+ return _mm512_mask_i32gather_pd (src, mask, index, base, scale);
902
+ }
867
903
static reg_t i64gather (type_t *arr, arrsize_t *ind)
868
904
{
869
905
return set (arr[ind[7 ]],
0 commit comments