From 90a95a4af9b280b8ae6be87e96086b90087d28f9 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Mon, 12 Apr 2021 12:03:51 +0300 Subject: [PATCH 1/4] [SYCL][NVPTX] Refactor NVPTX target configuration Avoid using `-sycldevice` for configuring NVPTX target. --- clang/lib/Basic/Targets/NVPTX.cpp | 3 +-- clang/lib/Basic/Targets/NVPTX.h | 5 +++++ llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 5 ++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 1a1d39636fa2d..1081e0ad679c9 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -62,8 +62,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, .Default(32); } - // FIXME: Needed for compiling SYCL to PTX. - TLSSupported = Triple.getEnvironment() == llvm::Triple::SYCLDevice; + TLSSupported = false; VLASupported = false; AddrSpaceMap = &NVPTXAddrSpaceMap; GridValues = llvm::omp::NVPTXGpuGridValues; diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index 2b4f0ec3eccae..200fb7208ebe7 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -171,6 +171,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo { return CCCR_Warning; } + void adjust(LangOptions &Opts) override { + TargetInfo::adjust(Opts); + TLSSupported = TLSSupported || Opts.SYCLIsDevice; + } + bool hasExtIntType() const override { return true; } }; } // namespace targets diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index cb729548c9d16..5f823a0a4d9da 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -304,9 +304,8 @@ void NVPTXPassConfig::addIRPasses() { const NVPTXSubtarget &ST = *getTM().getSubtargetImpl(); addPass(createNVVMReflectPass(ST.getSmVersion())); - if (getTM().getTargetTriple().getOS() == Triple::CUDA && - getTM().getTargetTriple().getEnvironment() == Triple::SYCLDevice) { - addPass(createGlobalOffsetPass()); + addPass(createGlobalOffsetPass()); + if (getTM().getTargetTriple().getOS() == Triple::CUDA) { addPass(createLocalAccessorToSharedMemoryPass()); } From 5a89dd264388370f725422fcec82ad0c6e9bc263 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Mon, 12 Apr 2021 14:43:19 +0300 Subject: [PATCH 2/4] Skip modules w/o nvvm.annotations metadata in LocalAccessorToSharedMemory pass --- llvm/lib/Target/NVPTX/SYCL/LocalAccessorToSharedMemory.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/NVPTX/SYCL/LocalAccessorToSharedMemory.cpp b/llvm/lib/Target/NVPTX/SYCL/LocalAccessorToSharedMemory.cpp index 00da015870099..2f72f3ba4707f 100644 --- a/llvm/lib/Target/NVPTX/SYCL/LocalAccessorToSharedMemory.cpp +++ b/llvm/lib/Target/NVPTX/SYCL/LocalAccessorToSharedMemory.cpp @@ -48,7 +48,9 @@ class LocalAccessorToSharedMemory : public ModulePass { // Access `nvvm.annotations` to determine which functions are kernel entry // points. auto NvvmMetadata = M.getNamedMetadata("nvvm.annotations"); - assert(NvvmMetadata && "IR compiled to PTX must have nvvm.annotations"); + if (!NvvmMetadata) + return false; + for (auto MetadataNode : NvvmMetadata->operands()) { if (MetadataNode->getNumOperands() != 3) continue; From d71c4b824f0d9178ab8a435cc04b9c228f2871f9 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Mon, 12 Apr 2021 15:38:09 +0300 Subject: [PATCH 3/4] Disable GlobalOffset offset pass for non-CUDA OS. --- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 5f823a0a4d9da..b657f6f4531fa 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -304,8 +304,10 @@ void NVPTXPassConfig::addIRPasses() { const NVPTXSubtarget &ST = *getTM().getSubtargetImpl(); addPass(createNVVMReflectPass(ST.getSmVersion())); - addPass(createGlobalOffsetPass()); + // FIXME: should the target triple check be done by the pass itself? + // See createNVPTXLowerArgsPass as an example if (getTM().getTargetTriple().getOS() == Triple::CUDA) { + addPass(createGlobalOffsetPass()); addPass(createLocalAccessorToSharedMemoryPass()); } From bc0a82d8d908306d1e60073a9c0912051d074a3e Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Tue, 13 Apr 2021 07:51:23 +0300 Subject: [PATCH 4/4] Added TODOs for issues to investigate. --- clang/lib/Basic/Targets/NVPTX.h | 1 + llvm/lib/Target/NVPTX/SYCL/LocalAccessorToSharedMemory.cpp | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index 200fb7208ebe7..eb158a17e0bca 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -173,6 +173,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo { void adjust(LangOptions &Opts) override { TargetInfo::adjust(Opts); + // FIXME: Needed for compiling SYCL to PTX. TLSSupported = TLSSupported || Opts.SYCLIsDevice; } diff --git a/llvm/lib/Target/NVPTX/SYCL/LocalAccessorToSharedMemory.cpp b/llvm/lib/Target/NVPTX/SYCL/LocalAccessorToSharedMemory.cpp index 2f72f3ba4707f..fc8374f647f82 100644 --- a/llvm/lib/Target/NVPTX/SYCL/LocalAccessorToSharedMemory.cpp +++ b/llvm/lib/Target/NVPTX/SYCL/LocalAccessorToSharedMemory.cpp @@ -38,7 +38,8 @@ class LocalAccessorToSharedMemory : public ModulePass { bool runOnModule(Module &M) override { // Invariant: This pass is only intended to operate on SYCL kernels being - // compiled to the `nvptx{,64}-nvidia-cuda-sycldevice` triple. + // compiled to the `nvptx{,64}-nvidia-cuda` triple. + // TODO: make sure that non-SYCL kernels are not impacted. if (skipModule(M)) return false;