From ae02915f50602f4c7e3f2fb51a0b6fb6631f110c Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Mon, 22 Sep 2025 13:58:39 -0700 Subject: [PATCH 1/3] [SYCL RTC] Implement `--auto-pch` support --- clang/include/clang/Driver/Options.h | 1 + clang/include/clang/Driver/Options.td | 14 +++ .../clang/Frontend/PrecompiledPreamble.h | 4 +- clang/lib/Frontend/PrecompiledPreamble.cpp | 18 +-- clang/test/Driver/sycl-unsupported.cpp | 9 ++ .../lib/rtc/DeviceCompilation.cpp | 119 ++++++++++++++++-- .../sycl_ext_oneapi_kernel_compiler.asciidoc | 51 ++++++++ .../KernelCompiler/auto_pch_compile_error.cpp | 57 +++++++++ .../KernelCompiler/empty_preamble.cpp | 47 +++++++ .../KernelCompiler/auto-pch.cpp | 95 ++++++++++++++ 10 files changed, 398 insertions(+), 17 deletions(-) create mode 100644 sycl/test-e2e/KernelCompiler/auto_pch_compile_error.cpp create mode 100644 sycl/test-e2e/KernelCompiler/empty_preamble.cpp create mode 100644 sycl/test-e2e/PerformanceTests/KernelCompiler/auto-pch.cpp diff --git a/clang/include/clang/Driver/Options.h b/clang/include/clang/Driver/Options.h index a7eaf3336339b..f15670e35d4b7 100644 --- a/clang/include/clang/Driver/Options.h +++ b/clang/include/clang/Driver/Options.h @@ -41,6 +41,7 @@ enum ClangVisibility { FlangOption = (1 << 4), FC1Option = (1 << 5), DXCOption = (1 << 6), + SYCLRTCOnlyOption = (1 << 7), }; enum ID { diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index fa5581a10bdfe..c6f9771652418 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -107,6 +107,8 @@ def FC1Option : OptionVisibility; // are made available when the driver is running in DXC compatibility mode. def DXCOption : OptionVisibility; +def SYCLRTCOnlyOption : OptionVisibility; + ///////// // Docs @@ -195,6 +197,11 @@ def sycl_Group : OptionGroup<"">, Group, DocName<"SYCL options">, Visibility<[ClangOption, CLOption]>; +def sycl_rtc_only_Group : OptionGroup<", + Group, + DocName<"SYCL RTC specific options">, + Visibility<[SYCLRTCOnlyOption]>; + def cuda_Group : OptionGroup<"">, Group, DocName<"CUDA options">, Visibility<[ClangOption, CLOption]>; @@ -7511,6 +7518,13 @@ def fsyclbin : Flag<["-"], "fsyclbin">, Alias, AliasArgs<["executable"]>; } // let Group = sycl_Group +let Visibility = [SYCLRTCOnlyOption] in { + let Group = sycl_rtc_only_Group in { + def auto_pch : Flag<["--"], "auto-pch">, + HelpText<"Enable Auto-PCH for SYCL RTC Compilation">; + } // let Group = sycl_rtc_only_Group +} // let Visibility = [SYCLRTCOnlyOption] + // FIXME: -fsycl-explicit-simd is deprecated. remove it when support is dropped. def : Flag<["-"], "fsycl-explicit-simd">, Flags<[Deprecated]>, Group, diff --git a/clang/include/clang/Frontend/PrecompiledPreamble.h b/clang/include/clang/Frontend/PrecompiledPreamble.h index 565395b4f3986..4b3935abea411 100644 --- a/clang/include/clang/Frontend/PrecompiledPreamble.h +++ b/clang/include/clang/Frontend/PrecompiledPreamble.h @@ -87,8 +87,8 @@ class PrecompiledPreamble { IntrusiveRefCntPtr Diagnostics, IntrusiveRefCntPtr VFS, std::shared_ptr PCHContainerOps, - bool StoreInMemory, StringRef StoragePath, - PreambleCallbacks &Callbacks); + bool StoreInMemory, StringRef StoragePath, PreambleCallbacks &Callbacks, + bool AllowASTWithErrors = true); PrecompiledPreamble(PrecompiledPreamble &&); PrecompiledPreamble &operator=(PrecompiledPreamble &&); diff --git a/clang/lib/Frontend/PrecompiledPreamble.cpp b/clang/lib/Frontend/PrecompiledPreamble.cpp index 03f70b74dfb42..f53bf24adc76c 100644 --- a/clang/lib/Frontend/PrecompiledPreamble.cpp +++ b/clang/lib/Frontend/PrecompiledPreamble.cpp @@ -247,9 +247,10 @@ class TempPCHFile { class PrecompilePreambleAction : public ASTFrontendAction { public: PrecompilePreambleAction(std::shared_ptr Buffer, bool WritePCHFile, - PreambleCallbacks &Callbacks) + PreambleCallbacks &Callbacks, + bool AllowASTWithErrors = true) : Buffer(std::move(Buffer)), WritePCHFile(WritePCHFile), - Callbacks(Callbacks) {} + Callbacks(Callbacks), AllowASTWithErrors(AllowASTWithErrors) {} std::unique_ptr CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override; @@ -285,6 +286,7 @@ class PrecompilePreambleAction : public ASTFrontendAction { bool WritePCHFile; // otherwise the PCH is written into the PCHBuffer only. std::unique_ptr FileOS; // null if in-memory PreambleCallbacks &Callbacks; + bool AllowASTWithErrors; }; class PrecompilePreambleConsumer : public PCHGenerator { @@ -292,10 +294,11 @@ class PrecompilePreambleConsumer : public PCHGenerator { PrecompilePreambleConsumer(PrecompilePreambleAction &Action, Preprocessor &PP, ModuleCache &ModCache, StringRef isysroot, std::shared_ptr Buffer, - const CodeGenOptions &CodeGenOpts) + const CodeGenOptions &CodeGenOpts, + bool AllowASTWithErrors = true) : PCHGenerator(PP, ModCache, "", isysroot, std::move(Buffer), CodeGenOpts, ArrayRef>(), - /*AllowASTWithErrors=*/true), + AllowASTWithErrors), Action(Action) {} bool HandleTopLevelDecl(DeclGroupRef DG) override { @@ -337,7 +340,7 @@ PrecompilePreambleAction::CreateASTConsumer(CompilerInstance &CI, return std::make_unique( *this, CI.getPreprocessor(), CI.getModuleCache(), Sysroot, Buffer, - CI.getCodeGenOpts()); + CI.getCodeGenOpts(), AllowASTWithErrors); } template bool moveOnNoError(llvm::ErrorOr Val, T &Output) { @@ -415,7 +418,8 @@ llvm::ErrorOr PrecompiledPreamble::Build( IntrusiveRefCntPtr Diagnostics, IntrusiveRefCntPtr VFS, std::shared_ptr PCHContainerOps, bool StoreInMemory, - StringRef StoragePath, PreambleCallbacks &Callbacks) { + StringRef StoragePath, PreambleCallbacks &Callbacks, + bool AllowASTWithErrors) { assert(VFS && "VFS is null"); auto PreambleInvocation = std::make_shared(Invocation); @@ -512,7 +516,7 @@ llvm::ErrorOr PrecompiledPreamble::Build( auto Act = std::make_unique( std::move(Buffer), /*WritePCHFile=*/Storage->getKind() == PCHStorage::Kind::TempFile, - Callbacks); + Callbacks, AllowASTWithErrors); if (!Act->BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0])) return BuildPreambleError::BeginSourceFileFailed; diff --git a/clang/test/Driver/sycl-unsupported.cpp b/clang/test/Driver/sycl-unsupported.cpp index 311efbecf8b6b..7caf761e05f91 100644 --- a/clang/test/Driver/sycl-unsupported.cpp +++ b/clang/test/Driver/sycl-unsupported.cpp @@ -64,6 +64,15 @@ // UNSUPPORTED_OPT-NOT: clang{{.*}} "-fsycl-is-device"{{.*}} "[[OPT_CC1]]{{.*}}" // UNSUPPORTED_OPT: clang{{.*}} "-fsycl-is-host"{{.*}} "[[OPT_CC1]]{{.*}}" +// "--auto-pch" should only be enabled for SYCL RTC compilations, regular driver +// shouldn't know about it: +// +// RUN: not %clangxx -### %s --auto-pch 2>&1 | FileCheck %s --check-prefix AUTO_PCH +// RUN: not %clangxx -fsycl-device-only -### %s --auto-pch 2>&1 | FileCheck %s --check-prefix AUTO_PCH +// RUN: not %clangxx -fsycl -### %s --auto-pch 2>&1 | FileCheck %s --check-prefix AUTO_PCH +// +// AUTO_PCH: error: unknown argument: '--auto-pch' + // FPGA support has been removed, usage of any FPGA specific options and any // options that have FPGA specific arguments should emit a specific error // diagnostic. diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index e0150188c71a2..6160196eefdac 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -78,6 +79,12 @@ class SYCLToolchain { } } + struct PrecompiledPreambles { + using key = std::pair; + std::mutex Mutex; + std::map> PreamblesMap; + }; + // Similar to FrontendActionFactory, but we don't take ownership of // `FrontendAction`, nor do we create copies of it as we only perform a single // `ToolInvocation`. @@ -140,9 +147,15 @@ class SYCLToolchain { } ArgStringList ASL; - for_each(DAL, [&DAL, &ASL](Arg *A) { A->render(DAL, ASL); }); - for_each(UserArgList, - [&UserArgList, &ASL](Arg *A) { A->render(UserArgList, ASL); }); + for (Arg *A : DAL) + A->render(DAL, ASL); + for (Arg *A : UserArgList) { + Option Group = A->getOption().getGroup(); + if (Group.isValid() && Group.getID() == OPT_sycl_rtc_only_Group) + continue; + + A->render(UserArgList, ASL); + } std::vector CommandLine; CommandLine.reserve(ASL.size() + 2); @@ -153,6 +166,79 @@ class SYCLToolchain { return CommandLine; } + class ActionWithPCHPreamble : public Action { + std::string CmdLineOpts; + + public: + ActionWithPCHPreamble(FrontendAction &FEAction, std::string &&CmdLineOpts) + : Action(FEAction), CmdLineOpts(std::move(CmdLineOpts)) {} + + bool runInvocation(std::shared_ptr Invocation, + FileManager *Files, + std::shared_ptr PCHContainerOps, + DiagnosticConsumer *DiagConsumer) override { + auto MainFilePath = Invocation->getFrontendOpts().Inputs[0].getFile(); + auto MainFileBuffer = Files->getBufferForFile(MainFilePath); + assert(MainFileBuffer && "Can't get memory buffer for in-memory source?"); + + PreambleBounds Bounds = ComputePreambleBounds( + Invocation->getLangOpts(), **MainFileBuffer, 100 /* MaxLines */); + + PrecompiledPreambles::key key{ + std::move(CmdLineOpts), + (*MainFileBuffer)->getBuffer().substr(0, Bounds.Size).str()}; + + std::shared_ptr Preamble; + { + PrecompiledPreambles &Preambles = SYCLToolchain::instance().Preambles; + std::lock_guard Lock{Preambles.Mutex}; + auto [It, Inserted] = Preambles.PreamblesMap.try_emplace(key); + + if (Inserted) { + PreambleCallbacks Callbacks; + auto DiagIds = llvm::makeIntrusiveRefCnt(); + auto DiagOpts = Invocation->getDiagnosticOpts(); + auto Diags = llvm::makeIntrusiveRefCnt( + DiagIds, DiagOpts, DiagConsumer, false); + + static std::string StoragePath = + (SYCLToolchain::instance().getPrefix() + "/preambles").str(); + llvm::ErrorOr NewPreamble = + PrecompiledPreamble::Build( + *Invocation, MainFileBuffer->get(), Bounds, Diags, + Files->getVirtualFileSystemPtr(), PCHContainerOps, + /*StorePreamblesInMemory*/ true, StoragePath, Callbacks, + /*AllowASTWithErrors=*/false); + + if (!NewPreamble) + return false; + + It->second = std::make_shared( + std::move(NewPreamble.get())); + } + + Preamble = It->second; + } // End lock + + assert(Preamble); + assert(Preamble->CanReuse(*Invocation, **MainFileBuffer, Bounds, + Files->getVirtualFileSystem())); + + // FIXME: WHY release???? + auto Buf = llvm::MemoryBuffer::getMemBufferCopy( + (*MainFileBuffer)->getBuffer(), MainFilePath) + .release(); + + auto VFS = Files->getVirtualFileSystemPtr(); + Preamble->AddImplicitPreamble(*Invocation, VFS, Buf); + auto NewFiles = makeIntrusiveRefCnt( + Files->getFileSystemOpts(), std::move(VFS)); + + return Action::runInvocation(std::move(Invocation), NewFiles.get(), + std::move(PCHContainerOps), DiagConsumer); + } + }; + public: static SYCLToolchain &instance() { static SYCLToolchain Instance; @@ -162,7 +248,8 @@ class SYCLToolchain { bool run(const InputArgList &UserArgList, BinaryFormat Format, const char *SourceFilePath, FrontendAction &FEAction, IntrusiveRefCntPtr FSOverlay = nullptr, - DiagnosticConsumer *DiagConsumer = nullptr) { + DiagnosticConsumer *DiagConsumer = nullptr, + bool UseAutoPCH = false) { std::vector CommandLine = createCommandLine(UserArgList, Format, SourceFilePath); @@ -175,9 +262,21 @@ class SYCLToolchain { auto Files = llvm::makeIntrusiveRefCnt( clang::FileSystemOptions{"." /* WorkingDir */}, FS); - Action A{FEAction}; - ToolInvocation TI{CommandLine, &A, Files.get(), - std::make_shared()}; + Action Normal{FEAction}; + + // User compilation options must be part of the key in the preambles map. We + // can either use "raw" user options or the "processed" from + // `createCommandLine` as long as we're consistent in what we're using. + // Current internal APIs pass `InputArgList` around instead of a single + // `std::string`, so it's easier to use `CommandLine`. Just make sure to + // drop `rtc_N.cpp` that is always different: + ActionWithPCHPreamble WithPreamble{FEAction, + join(drop_end(CommandLine, 1), " ")}; + ToolInvocation TI{CommandLine, + UseAutoPCH ? static_cast(&WithPreamble) + : &Normal, + Files.get(), std::make_shared()}; + TI.setDiagnosticConsumer(DiagConsumer ? DiagConsumer : &IgnoreDiag); return TI.run(); @@ -217,6 +316,8 @@ class SYCLToolchain { std::string ClangXXExe = (Prefix + "/bin/clang++").str(); llvm::IntrusiveRefCntPtr ToolchainFS = llvm::makeIntrusiveRefCnt(); + + PrecompiledPreambles Preambles; }; class ClangDiagnosticWrapper { @@ -348,9 +449,11 @@ Expected jit_compiler::compileDeviceCode( DiagnosticOptions DiagOpts; ClangDiagnosticWrapper Wrapper(BuildLog, &DiagOpts); + bool AutoPCH = UserArgList.hasArg(OPT_auto_pch); + if (SYCLToolchain::instance().run(UserArgList, Format, SourceFile.Path, ELOA, getInMemoryFS(SourceFile, IncludeFiles), - Wrapper.consumer())) { + Wrapper.consumer(), AutoPCH)) { return ELOA.takeModule(); } else { return createStringError(BuildLog); diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc index d21c5ff4c4394..be501e9158cb1 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc @@ -1127,6 +1127,57 @@ build_options{{ Relax the requirement that parameter types for free-function kernels must be forward-declarable. +===== `--auto-pch` + +Enable auto-detection of the preamble and use it as a pre-compiled header to +speed up subsequent compilations of TUs matching the preamble/compilation +options. Example of the code that can benefit from this: + +[source,c++] +---- +#include + +// Auto-detected preamble ends before next line: +namespace syclext = sycl::ext::oneapi; +namespace syclexp = sycl::ext::oneapi::experimental; + +extern "C" +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>)) +void iota(float start, float *ptr) { + size_t id = syclext::this_work_item::get_nd_item<1>().get_global_linear_id(); + ptr[id] = start + static_cast(id); +} +---- + +Limitations: + +* Preamble detection is done at the Lexer level and can't handle code like + +[source,c++] +---- +#if 1 +#include +#else +// Auto-detected preamble ends in the middle of `#else` and would fail to compile. +void foo() {} +#endif +---- + +* Any changes in either preamble or compilation options (including + `-DSOMETHING`!) result in a creation of a new pre-compiled header/preamble. + +* No support (including not reporting any errors) for `+__DATE__+`/`+__TIME__+` + macros inside auto-detected preamble (transitively in regards to the + includes). + +* Files used inside preamble must not change between different compilations (at + least for the same auto-detected preamble). + +* Auto-generated pre-compiled headers/preambles are stored in memory only. That means: + - No persistency between invocations + - Currently there is no eviction mechanism, so application is expected to use + the option only when number of preambles is limited. + === Known issues and limitations when the language is `sycl` ==== Changing the compiler action or output diff --git a/sycl/test-e2e/KernelCompiler/auto_pch_compile_error.cpp b/sycl/test-e2e/KernelCompiler/auto_pch_compile_error.cpp new file mode 100644 index 0000000000000..f4aa7d9d440af --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/auto_pch_compile_error.cpp @@ -0,0 +1,57 @@ +// RUN: %{build} -o %t.out +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out | FileCheck %s + +// UNSUPPORTED: target-native_cpu +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/20142 + +// CHECK-LABEL: Device compilation failed +// CHECK-NEXT: Detailed information: +// CHECK-NEXT: rtc_0.cpp:2:10: fatal error: 'non-existent.hpp' file not found +// CHECK-NEXT: 2 | #include "non-existent.hpp" +// CHECK-NEXT: | ^~~~~~~~~~~~~~~~~~ + +// Make sure that the error is reported properly when using "--auto-pch" option. + +#include +#include + +#include +#include +#include +#include + +using namespace std::string_view_literals; +namespace syclexp = sycl::ext::oneapi::experimental; + +int main(int argc, char **argv) { + std::string src = R"""( +#include "non-existent.hpp" +#include +#include + +namespace syclext = sycl::ext::oneapi; +namespace syclexp = sycl::ext::oneapi::experimental; + +extern "C" +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>)) +void iota(float start, float *ptr) { + size_t id = syclext::this_work_item::get_nd_item<1>().get_global_linear_id(); + ptr[id] = start + static_cast(id); +} +)"""; + + sycl::queue q; + sycl::kernel_bundle kb_src = + syclexp::create_kernel_bundle_from_source( + q.get_context(), syclexp::source_language::sycl, src); + try { + sycl::kernel_bundle kb_exe = + syclexp::build(kb_src, syclexp::properties{syclexp::build_options{ + std::vector{"--auto-pch"}}} + + ); + return 1; + } catch (sycl::exception &e) { + std::cout << e.what() << std::endl; + } +} diff --git a/sycl/test-e2e/KernelCompiler/empty_preamble.cpp b/sycl/test-e2e/KernelCompiler/empty_preamble.cpp new file mode 100644 index 0000000000000..75fc4aaa1777a --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/empty_preamble.cpp @@ -0,0 +1,47 @@ +// RUN: %{build} -o %t.out +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out + +// UNSUPPORTED: target-native_cpu +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/20142 + +// Verify that empty preamble works fine + +#include +#include + +#include +#include +#include +#include + +using namespace std::string_view_literals; +namespace syclexp = sycl::ext::oneapi::experimental; + +int main(int argc, char **argv) { + std::string src = R"""(void foo() {} +#include +#include +namespace syclext = sycl::ext::oneapi; +namespace syclexp = sycl::ext::oneapi::experimental; + +extern "C" +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>)) +void iota(float start, float *ptr) { + size_t id = syclext::this_work_item::get_nd_item<1>().get_global_linear_id(); + ptr[id] = start + static_cast(id); +} +)"""; + + sycl::queue q; + // Two iterations to test pch creation/use: + for (int i = 0; i < 2; ++i) { + sycl::kernel_bundle kb_src = + syclexp::create_kernel_bundle_from_source( + q.get_context(), syclexp::source_language::sycl, src); + sycl::kernel_bundle kb_exe = + syclexp::build(kb_src, syclexp::properties{syclexp::build_options{ + std::vector{"--auto-pch"}}} + + ); + } +} diff --git a/sycl/test-e2e/PerformanceTests/KernelCompiler/auto-pch.cpp b/sycl/test-e2e/PerformanceTests/KernelCompiler/auto-pch.cpp new file mode 100644 index 0000000000000..c37d8febb5acb --- /dev/null +++ b/sycl/test-e2e/PerformanceTests/KernelCompiler/auto-pch.cpp @@ -0,0 +1,95 @@ +// RUN: %{build} -O3 -o %t.out +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out + +// UNSUPPORTED: target-native_cpu +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/20142 + +#include +#include + +#include +#include +#include +#include + +using namespace std::string_view_literals; +namespace syclexp = sycl::ext::oneapi::experimental; + +void run(std::vector ExtraHeaders) { + std::string preamble = [&]() { + std::stringstream preamble; + + // These are necessary: + preamble << R"""( +#include +#include +)"""; + + for (std::string_view Header : ExtraHeaders) + preamble << "#include <" << Header << ">\n"; + + preamble << "void preamble_stop();\n"; + return preamble.str(); + }(); + + // Each iteration will have + // + // #define VAL + // + // between preamble and body + + std::string body = R"""( +namespace syclext = sycl::ext::oneapi; +namespace syclexp = sycl::ext::oneapi::experimental; + +extern "C" +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>)) +void iota(float start, float *ptr) { + size_t id = syclext::this_work_item::get_nd_item<1>().get_global_linear_id(); + ptr[id] = start + static_cast(id) + VAL; +} +)"""; + + sycl::queue q; + + auto Run = [&](auto props) { + for (int i = 0; i < 5; ++i) { + std::string src_str = + preamble + "#define VAL " + std::to_string(i) + "\n" + body; + auto t1 = std::chrono::high_resolution_clock::now(); + sycl::kernel_bundle kb_src = + syclexp::create_kernel_bundle_from_source( + q.get_context(), syclexp::source_language::sycl, src_str); + sycl::kernel_bundle kb_exe = + syclexp::build(kb_src, props); + auto t2 = std::chrono::high_resolution_clock::now(); + std::chrono::duration iter_duration = t2 - t1; + std::cout << static_cast(iter_duration.count()) << "ms" << " "; + } + }; + + if (ExtraHeaders.empty()) + std::cout << " "; + for (std::string_view Header : ExtraHeaders) + std::cout << Header << " "; + std::cout << "| "; + Run(syclexp::properties{}); + std::cout << "| "; + Run(syclexp::properties{ + syclexp::build_options{std::vector{"--auto-pch"}}}); + std::cout << std::endl; +} + +int main(int argc, char **argv) { + // So that output could be copy-pasted into GH comments and rendered as a + // table: + std::cout << "Extra Headers | Without PCH | With auto-PCH" << std::endl; + std::cout << "-|-|-" << std::endl; + run({}); + run({"sycl/half_type.hpp"}); + run({"sycl/ext/oneapi/bfloat16.hpp"}); + run({"sycl/marray.hpp"}); + run({"sycl/vector.hpp"}); + run({"sycl/multi_ptr.hpp"}); + run({"sycl/builtins.hpp"}); +} From 48ee6c1fef5cac16a08801bacd631bdffcac0808 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Wed, 1 Oct 2025 08:21:18 -0700 Subject: [PATCH 2/3] Add comment in `Options.td` --- clang/include/clang/Driver/Options.td | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index c6f9771652418..183cfd926c86a 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -107,6 +107,7 @@ def FC1Option : OptionVisibility; // are made available when the driver is running in DXC compatibility mode. def DXCOption : OptionVisibility; +// SYCLRTCOnlyOption - only acceptable for the SYCL RTC (Run Time Compilation). def SYCLRTCOnlyOption : OptionVisibility; ///////// @@ -7518,6 +7519,8 @@ def fsyclbin : Flag<["-"], "fsyclbin">, Alias, AliasArgs<["executable"]>; } // let Group = sycl_Group +// Options specific to the SYCL RTC and only available for JIT compilation (not +// through regular `clang++ -fsycl` in command line): let Visibility = [SYCLRTCOnlyOption] in { let Group = sycl_rtc_only_Group in { def auto_pch : Flag<["--"], "auto-pch">, From 736efc4bc3c806aae5707330d7347a0d6f2414f7 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Wed, 1 Oct 2025 12:29:07 -0700 Subject: [PATCH 3/3] Clarify release --- sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index 6160196eefdac..45f8ca2e6891c 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -12,6 +12,7 @@ #include "Resource.h" #include "translation/Translation.h" +#include "clang/Lex/PreprocessorOptions.h" #include #include #include @@ -224,7 +225,11 @@ class SYCLToolchain { assert(Preamble->CanReuse(*Invocation, **MainFileBuffer, Bounds, Files->getVirtualFileSystem())); - // FIXME: WHY release???? + assert(Invocation->getPreprocessorOpts().RetainRemappedFileBuffers == + false); + // `PreprocessorOptions::RetainRemappedFileBuffers` defaults to false, so + // MemoryBuffer will be cleaned up by the CompilerInstance, thus + // `std::unique_ptr::release`. auto Buf = llvm::MemoryBuffer::getMemBufferCopy( (*MainFileBuffer)->getBuffer(), MainFilePath) .release();