Skip to content
Merged
7 changes: 6 additions & 1 deletion clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -3551,7 +3551,12 @@ def fsycl_esimd : Flag<["-"], "fsycl-explicit-simd">, Group<sycl_Group>, Flags<[
def fno_sycl_esimd : Flag<["-"], "fno-sycl-explicit-simd">, Group<sycl_Group>,
HelpText<"Disable SYCL explicit SIMD extension">, Flags<[NoArgumentUnused, CoreOption]>;
defm sycl_early_optimizations : OptOutFFlag<"sycl-early-optimizations", "Enable", "Disable", " standard optimization pipeline for SYCL device compiler", [CoreOption]>;

def fsycl_dead_args_optimization : Flag<["-"], "fsycl-dead-args-optimization">,
Group<sycl_Group>, Flags<[NoArgumentUnused, CoreOption]>, HelpText<"Enables "
"elimination of DPC++ dead kernel arguments">;
def fno_sycl_dead_args_optimization : Flag<["-"], "fno-sycl-dead-args-optimization">,
Group<sycl_Group>, Flags<[NoArgumentUnused, CoreOption]>, HelpText<"Disables "
"elimination of DPC++ dead kernel arguments">;
//===----------------------------------------------------------------------===//
// CC1 Options
//===----------------------------------------------------------------------===//
Expand Down
8 changes: 7 additions & 1 deletion clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3514,6 +3514,9 @@ class OffloadingActionBuilder final {
/// Flag to signal if the user requested device code split.
bool DeviceCodeSplit = false;

/// Flag to signal if DAE optimization is turned on.
bool EnableDAE = false;

/// The SYCL actions for the current input.
ActionList SYCLDeviceActions;

Expand Down Expand Up @@ -3951,7 +3954,7 @@ class OffloadingActionBuilder final {
ActionList WrapperInputs;
// post link is not optional - even if not splitting, always need to
// process specialization constants
bool MultiFileActionDeps = !isSpirvAOT || DeviceCodeSplit;
bool MultiFileActionDeps = !isSpirvAOT || DeviceCodeSplit || EnableDAE;
types::ID PostLinkOutType = isNVPTX || !MultiFileActionDeps
? types::TY_LLVM_BC
: types::TY_Tempfiletable;
Expand Down Expand Up @@ -4108,6 +4111,9 @@ class OffloadingActionBuilder final {
WrapDeviceOnlyBinary = Args.hasArg(options::OPT_fsycl_link_EQ);
auto *DeviceCodeSplitArg =
Args.getLastArg(options::OPT_fsycl_device_code_split_EQ);
EnableDAE =
Args.hasFlag(options::OPT_fsycl_dead_args_optimization,
options::OPT_fno_sycl_dead_args_optimization, false);
// -fsycl-device-code-split is an alias to
// -fsycl-device-code-split=per_source
DeviceCodeSplit = DeviceCodeSplitArg &&
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4123,6 +4123,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-sycl-opt");
}
// Turn on Dead Parameter Elimination Optimization with early optimizations
if (!RawTriple.isNVPTX() &&
Args.hasFlag(options::OPT_fsycl_dead_args_optimization,
options::OPT_fno_sycl_dead_args_optimization, false))
CmdArgs.push_back("-fenable-sycl-dae");

// Pass the triple of host when doing SYCL
auto AuxT = llvm::Triple(llvm::sys::getProcessTriple());
Expand Down Expand Up @@ -7807,6 +7812,11 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA,
// OPT_fsycl_device_code_split is not checked as it is an alias to
// -fsycl-device-code-split=per_source

// Turn on Dead Parameter Elimination Optimization with early optimizations
if (!getToolChain().getTriple().isNVPTX() &&
TCArgs.hasFlag(options::OPT_fsycl_dead_args_optimization,
options::OPT_fno_sycl_dead_args_optimization, false))
addArgs(CmdArgs, TCArgs, {"-emit-param-info"});
if (JA.getType() == types::TY_LLVM_BC) {
// single file output requested - this means only perform necessary IR
// transformations (like specialization constant intrinsic lowering) and
Expand Down
8 changes: 8 additions & 0 deletions clang/test/Driver/sycl-device-optimizations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,11 @@
// RUN: %clang_cl -### -fsycl -fintelfpga %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHECK-NO-SYCL-EARLY-OPTS %s
// CHECK-NO-SYCL-EARLY-OPTS: "-fno-sycl-early-optimizations"

/// Check that Dead Parameter Elimination Optimization is enabled
// RUN: %clang -### -fsycl -fsycl-dead-args-optimization %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHECK-DAE %s
// RUN: %clang_cl -### -fsycl -fsycl-dead-args-optimization %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHECK-DAE %s
// CHECK-DAE: clang{{.*}} "-fenable-sycl-dae"
// CHECK-DAE: sycl-post-link{{.*}} "-emit-param-info"
2 changes: 1 addition & 1 deletion sycl/test/basic_tests/sampler/sampler.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization %s -o %t.out -L %opencl_libs_dir -lOpenCL
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
Expand Down
4 changes: 3 additions & 1 deletion sycl/test/kernel_from_file/hw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
// CUDA does not support SPIR-V.

//-fsycl-targets=%sycl_triple
// RUN: %clangxx -fsycl-device-only -fno-sycl-use-bitcode -Xclang -fsycl-int-header=%t.h -c %s -o %t.spv -I %sycl_include -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict
// Runtime assumes that the image passed with SYCL_USE_KERNEL_SPV has no
// eliminated arguments, compile without early optimizations.
// RUN: %clangxx -fsycl-device-only -fno-sycl-early-optimizations -fno-sycl-use-bitcode -Xclang -fsycl-int-header=%t.h -c %s -o %t.spv -I %sycl_include -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict
// RUN: %clangxx -include %t.h -g %s -o %t.out -lsycl -I %sycl_include -Xclang -verify-ignore-unexpected=note,warning
// RUN: env SYCL_BE=%sycl_be SYCL_USE_KERNEL_SPV=%t.spv %t.out | FileCheck %s
// CHECK: Passed
Expand Down
4 changes: 2 additions & 2 deletions sycl/test/multi_ptr/multi_ptr.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization %s -o %t.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out
// RUN: %clangxx -DRESTRICT_WRITE_ACCESS_TO_CONSTANT_PTR -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
// RUN: %clangxx -DRESTRICT_WRITE_ACCESS_TO_CONSTANT_PTR -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization %s -o %t1.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
// RUN: %GPU_RUN_PLACEHOLDER %t1.out
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/scheduler/HandleException.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %sycl_source_dir %s -o %t.out
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization -I %sycl_source_dir %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
#include <CL/sycl.hpp>
#include <array>
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/scheduler/HostAccDestruction.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clangxx -fsycl -I %sycl_source_dir %s -o %t.out
// RUN: %clangxx -fsycl -fsycl-dead-args-optimization -I %sycl_source_dir %s -o %t.out
// RUN: env SYCL_PI_TRACE=2 %CPU_RUN_PLACEHOLDER %t.out 2>&1 %CPU_CHECK_PLACEHOLDER
//==---------------------- HostAccDestruction.cpp --------------------------==//
//
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/scheduler/ReleaseResourcesTest.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %sycl_source_dir %s -o %t.out
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization -I %sycl_source_dir %s -o %t.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
// RUN: env SYCL_PI_TRACE=2 %CPU_RUN_PLACEHOLDER %t.out 2>&1 %CPU_CHECK_PLACEHOLDER
// RUN: env SYCL_PI_TRACE=2 %GPU_RUN_PLACEHOLDER %t.out 2>&1 %GPU_CHECK_PLACEHOLDER
Expand Down
4 changes: 2 additions & 2 deletions sycl/test/separate-compile/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
//
// >> ---- compile src1
// >> device compilation...
// RUN: %clangxx -fsycl-device-only -Xclang -fsycl-int-header=sycl_ihdr_a.h %s -c -o a_kernel.bc -I %sycl_include -Wno-sycl-strict
// RUN: %clangxx -fsycl-device-only -fno-sycl-early-optimizations -Xclang -fsycl-int-header=sycl_ihdr_a.h %s -c -o a_kernel.bc -I %sycl_include -Wno-sycl-strict
// >> host compilation...
// RUN: %clangxx -include sycl_ihdr_a.h -g -c %s -o a.o -I %sycl_include -Wno-sycl-strict
//
// >> ---- compile src2
// >> device compilation...
// RUN: %clangxx -DB_CPP=1 -fsycl-device-only -Xclang -fsycl-int-header=sycl_ihdr_b.h %s -c -o b_kernel.bc -I %sycl_include -Wno-sycl-strict
// RUN: %clangxx -DB_CPP=1 -fsycl-device-only -fno-sycl-early-optimizations -Xclang -fsycl-int-header=sycl_ihdr_b.h %s -c -o b_kernel.bc -I %sycl_include -Wno-sycl-strict
// >> host compilation...
// RUN: %clangxx -DB_CPP=1 -include sycl_ihdr_b.h -g -c %s -o b.o -I %sycl_include -Wno-sycl-strict
//
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/usm/pfor_flatten.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// UNSUPPORTED: cuda
// CUDA does not support the unnamed lambda extension.
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-unnamed-lambda %s -o %t1.out
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-unnamed-lambda -fsycl-dead-args-optimization %s -o %t1.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
// RUN: %GPU_RUN_PLACEHOLDER %t1.out
Expand Down