@@ -2184,6 +2184,50 @@ defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f
2184
2184
2185
2185
} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch]
2186
2186
2187
+ def PrefetchLoc: SDNodeXForm<timm, [{
2188
+ uint32_t V = N->getZExtValue();
2189
+ V = (AMDGPU::CPol::SCOPE_MASK - (V & AMDGPU::CPol::SCOPE_MASK)) << AMDGPU::CPol::SCOPE_SHIFT;
2190
+ if (!Subtarget->hasSafeCUPrefetch())
2191
+ V = std::max(V, (uint32_t)AMDGPU::CPol::SCOPE_SE); // CU scope is unsafe
2192
+ return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32);
2193
+ }]>;
2194
+
2195
+ def prefetch_flat : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type),
2196
+ (prefetch node:$ptr, node:$rw, node:$loc, node:$type),
2197
+ [{ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS; }]> {
2198
+ let GISelPredicateCode = [{
2199
+ return (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
2200
+ }];
2201
+ }
2202
+
2203
+ def prefetch_global : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type),
2204
+ (prefetch node:$ptr, node:$rw, node:$loc, node:$type),
2205
+ [{ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
2206
+ (cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
2207
+ !Subtarget->hasSafeSmemPrefetch()); }]> {
2208
+ let GISelPredicateCode = [{
2209
+ return (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
2210
+ ((*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
2211
+ !Subtarget->hasSafeSmemPrefetch());
2212
+ }];
2213
+ }
2214
+
2215
+ multiclass FlatPrefetchPats<string inst, SDPatternOperator prefetch_kind, SDPatternOperator rw> {
2216
+ def : GCNPat <
2217
+ (prefetch_kind (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), rw, (i32 timm:$loc), i32imm_one),
2218
+ (!cast<FLAT_Pseudo>(inst) $vaddr, $offset, (i32 (PrefetchLoc $loc)))
2219
+ > {
2220
+ let AddedComplexity = !if(!eq(rw, i32imm_zero), 0, 25);
2221
+ }
2222
+
2223
+ def : GCNPat <
2224
+ (prefetch_kind (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), rw, (i32 timm:$loc), i32imm_one),
2225
+ (!cast<FLAT_Pseudo>(inst#"_SADDR") $saddr, $voffset, $offset, (i32 (PrefetchLoc $loc)))
2226
+ > {
2227
+ let AddedComplexity = !if(!eq(rw, i32imm_zero), 11, 30);
2228
+ }
2229
+ }
2230
+
2187
2231
multiclass FlatIntrPrefetchPats<string inst, SDPatternOperator intr> {
2188
2232
def : GCNPat <
2189
2233
(intr (FlatOffset i64:$vaddr, i32:$offset), timm:$cpol),
@@ -2198,6 +2242,14 @@ multiclass FlatIntrPrefetchPats<string inst, SDPatternOperator intr> {
2198
2242
}
2199
2243
2200
2244
let SubtargetPredicate = HasVmemPrefInsts in {
2245
+ defm : FlatPrefetchPats<"FLAT_PREFETCH_B8", prefetch_flat, i32imm_zero>;
2246
+ defm : FlatPrefetchPats<"GLOBAL_PREFETCH_B8", prefetch_global, i32imm_zero>;
2247
+
2248
+ // Patterns for forced vector prefetch with rw = 1.
2249
+ defm : FlatPrefetchPats<"FLAT_PREFETCH_B8", prefetch_flat, i32imm_one>;
2250
+ defm : FlatPrefetchPats<"GLOBAL_PREFETCH_B8", prefetch_global, i32imm_one>;
2251
+
2252
+
2201
2253
// Patterns for target intrinsics
2202
2254
defm : FlatIntrPrefetchPats<"FLAT_PREFETCH_B8", int_amdgcn_flat_prefetch>;
2203
2255
defm : FlatIntrPrefetchPats<"GLOBAL_PREFETCH_B8", int_amdgcn_global_prefetch>;
0 commit comments