Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 70 additions & 45 deletions sycl/include/CL/sycl/INTEL/online_compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@

#pragma once

#include <CL/sycl/context.hpp>
#include <CL/sycl/detail/defines_elementary.hpp> // for __SYCL_INLINE_NAMESPACE
#include <CL/sycl/detail/export.hpp> // for __SYCL_EXPORT
#include <CL/sycl/device.hpp>

#include <memory>
#include <string>
#include <vector>

__SYCL_INLINE_NAMESPACE(cl) {
Expand Down Expand Up @@ -59,7 +59,9 @@ class device_arch {

/// Represents an error happend during online compilation.
class online_compile_error : public sycl::exception {
// TBD
public:
online_compile_error() = default;
online_compile_error(const string_class &Msg) : sycl::exception(Msg) {}
};

/// Designates a source language for the online compiler.
Expand All @@ -70,25 +72,32 @@ enum class source_language { opencl_c, cm };
template <source_language Lang> class online_compiler {
public:
/// Constructs online compiler which can target any device and produces
/// given compiled code format. Produces device code is 64-bit.
/// given compiled code format. Produces 64-bit device code.
/// The created compiler is "optimistic" - it assumes all applicable SYCL
/// device capabilities are supported by the target device(s).
online_compiler(compiled_code_format fmt = compiled_code_format::spir_v)
: OutputFormat(fmt), OutputFormatVersion({0, 0}),
DeviceArch(device_arch::any), Is64Bit(true), DeviceStepping("") {}
DeviceType(sycl::info::device_type::all), DeviceArch(device_arch::any),
Is64Bit(true), DeviceStepping("") {}

/// Constructs online compiler which targets given architecture and produces
/// given compiled code format. Produces device code is 64-bit.
/// given compiled code format. Produces 64-bit device code.
/// Throws online_compile_error if values of constructor arguments are
/// contradictory or not supported - e.g. if the source language is not
/// supported for given device type.
online_compiler(sycl::info::device_type dev_type, device_arch arch,
compiled_code_format fmt = compiled_code_format::spir_v)
: OutputFormat(fmt), OutputFormatVersion({0, 0}), DeviceArch(arch),
Is64Bit(true), DeviceStepping("") {}
: OutputFormat(fmt), OutputFormatVersion({0, 0}), DeviceType(dev_type),
DeviceArch(arch), Is64Bit(true), DeviceStepping("") {}

/// Constructs online compiler for the target specified by given SYCL device.
online_compiler(const sycl::device &dev);
// TODO: the initial version generates the generic code (SKL now), need
// to do additional device::info calls to determine the device by it's
// features.
online_compiler(const sycl::device &dev)
: OutputFormat(compiled_code_format::spir_v), OutputFormatVersion({0, 0}),
DeviceType(sycl::info::device_type::all), DeviceArch(device_arch::any),
Is64Bit(true), DeviceStepping("") {}

/// Compiles given in-memory \c Lang source to a binary blob. Blob format,
/// other parameters are set in the constructor by the compilation target
Expand All @@ -100,31 +109,50 @@ template <source_language Lang> class online_compiler {
std::vector<byte> compile(const std::string &src, const Tys &... args);

/// Sets the compiled code format of the compilation target and returns *this.
online_compiler<Lang> &setOutputFormat(compiled_code_format fmt);
online_compiler<Lang> &setOutputFormat(compiled_code_format fmt) {
OutputFormat = fmt;
return *this;
}

/// Sets the compiled code format version of the compilation target and
/// returns *this.
online_compiler<Lang> &setOutputFormatVersion(int major, int minor);
online_compiler<Lang> &setOutputFormatVersion(int major, int minor) {
OutputFormatVersion = {major, minor};
return *this;
}

/// Sets the device type of the compilation target and returns *this.
online_compiler<Lang> &setTargetDeviceType(sycl::info::device_type type);
online_compiler<Lang> &setTargetDeviceType(sycl::info::device_type type) {
DeviceType = type;
return *this;
}

/// Sets the device architecture of the compilation target and returns *this.
online_compiler<Lang> &setTargetDeviceArch(device_arch arch);
online_compiler<Lang> &setTargetDeviceArch(device_arch arch) {
DeviceArch = arch;
return *this;
}

/// Makes the compilation target 32-bit and returns *this.
online_compiler<Lang> &set32bitTarget();
online_compiler<Lang> &set32bitTarget() {
Is64Bit = false;
return *this;
};

/// Makes the compilation target 64-bit and returns *this.
online_compiler<Lang> &set64bitTarget();
online_compiler<Lang> &set64bitTarget() {
Is64Bit = true;
return *this;
};

/// Sets implementation-defined target device stepping of the compilation
/// target and returns *this.
online_compiler<Lang> &setTargetDeviceStepping(const std::string &id);
online_compiler<Lang> &setTargetDeviceStepping(const std::string &id) {
DeviceStepping = id;
return *this;
}

private:
// Compilation target specification fields: {

/// Compiled code format.
compiled_code_format OutputFormat;

Expand All @@ -142,51 +170,48 @@ template <source_language Lang> class online_compiler {

/// Target device stepping (implementation defined)
std::string DeviceStepping;
// }

/// Handles to helper functions used by the implementation.
void *CompileToSPIRVHandle = nullptr;
void *FreeSPIRVOutputsHandle = nullptr;
};

// Specializations of the online_compiler class and 'compile' function for
// particular languages and parameter types.

/// Compiles given OpenCL source. May throw \c online_compile_error.
/// @param src - contents of the source
/// Compiles the given OpenCL source. May throw \c online_compile_error.
/// @param src - contents of the source.
/// @param options - compilation options (implementation defined); standard
/// OpenCL JIT compiler options must be supported.
template <>
template <>
__SYCL_EXPORT std::vector<byte>
online_compiler<source_language::opencl_c>::compile(
const std::string &src, const std::vector<std::string> &options);

/// Compiles the given OpenCL source. May throw \c online_compile_error.
/// @param src - contents of the source.
template <>
template <>
std::vector<byte>
online_compiler<source_language::opencl_c>::compile(const std::string &src) {
// real implementation will call some non-templated impl function here
return std::vector<byte>{};
return compile(src, std::vector<std::string>{});
}

/// Compiles given OpenCL source. May throw \c online_compile_error.
/// @param src - contents of the source
/// @param options - compilation options (implementation defined); standard
/// OpenCL JIT compiler options must be supported
/// Compiles the given CM source \p src.
/// @param src - contents of the source.
/// @param options - compilation options (implementation defined).
template <>
template <>
std::vector<byte> online_compiler<source_language::opencl_c>::compile(
const std::string &src, const std::vector<std::string> &options) {
// real implementation will call some non-templated impl function here
return std::vector<byte>{};
}
__SYCL_EXPORT std::vector<byte> online_compiler<source_language::cm>::compile(
const std::string &src, const std::vector<std::string> &options);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have the feeling that

Suggested change
const std::string &src, const std::vector<std::string> &options);
const std::string &src, const std::vector<std::string> &options = {});

would make the first overload useless.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I surely thought about doing that, but it doesn't work and the attempt to add the default argument, then remove the overload accepting only 'src' causes this error during SYCL build phase:

llvm/sycl/include/CL/sycl/INTEL/online_compiler.hpp:195:73: error: default argument specified in explicit specialization [ -fpermissive ]
const std::string &src, const std::vector< std::string > &options = {});

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, because you have the main definition as

template <typename... Tys>
  std::vector<byte> compile(const std::string &src, const Tys &... args);

you cannot add this default parameter there either.
Actually I am even surprised that it is possible to specialize a variadic argument by a non variadic one...

But, at the end, why do you need to declare some super generic compile() that you do not really use, instead of having extensions just adding incrementally the overload they want?
I have the feeling that this specialization just adds some complexity for nothing valuable.
And the variadic template derail some simple implementations like:
https://godbolt.org/z/Yfo9bd

#include <iostream>
#include <string>
#include <vector>
struct a {
    template <typename... Types>
    static void compile(Types... args) {
        std::cout << "the generic default" << std::endl;
    }
    static void compile(std::string arg, std::vector<std::string> args = {}) {
        std::cout << "single arg with vector" << std::endl;
    }
};

int main() {
   // This one is captured by the generic default because a `const char*` is not a `std::string`, but do we need this at all?
   a::compile("default");
   a::compile(std::string{});
   a::compile(std::string{}, {"opt"});
   a::compile("", {"opt"});
   a::compile(std::string{}, {});
   a::compile(std::string{}, {"opt"});
     return 0;
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But, at the end, why do you need to declare some super generic compile() that you do not really use, instead of having extensions just adding incrementally the overload they want?
I have the feeling that this specialization just adds some complexity for nothing valuable.
And the variadic template derail some simple implementations like:
https://godbolt.org/z/Yfo9bd

@kbobrovs , is the one who wrote the feature SPEC, perhaps Konst can answer your question.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The assumption here is that different languages may have very different parameter sets for compile. Variadic templates provide the needed freedom. Concrete specializations fix variants used for particular language.

To avoid "derailing" mentioned above, we simply don't provide compile specialization pairs which conflict in that way.

Overloading is surely an option too, but some overloads will be unavailable for certain languages, which adds confusion. Isn't variadic template is ok C++ way to go when parameter list can be arbitrary?

Adding @Pennycook and @rolandschulz for possible comments.


/// Compiles given CM source.
/// Compiles the given CM source \p src.
template <>
template <>
std::vector<byte>
online_compiler<source_language::cm>::compile(const std::string &src) {
// real implementation will call some non-templated impl function here
return std::vector<byte>{};
}

/// Compiles given CM source.
/// @param options - compilation options (implementation defined)
template <>
template <>
std::vector<byte> online_compiler<source_language::cm>::compile(
const std::string &src, const std::vector<std::string> &options) {
// real implementation will call some non-templated impl function here
return std::vector<byte>{};
return compile(src, std::vector<std::string>{});
}

} // namespace INTEL
Expand Down
2 changes: 2 additions & 0 deletions sycl/include/CL/sycl/exception.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ class __SYCL_EXPORT exception : public std::exception {
shared_ptr_class<context> Context = nullptr)
: MMsg(Msg + " " + detail::codeToString(CLErr)), MCLErr(CLErr),
MContext(Context) {}

exception(const string_class &Msg) : MMsg(Msg), MContext(nullptr) {}
};

class runtime_error : public exception {
Expand Down
1 change: 1 addition & 0 deletions sycl/source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ set(SYCL_SOURCES
"detail/program_impl.cpp"
"detail/program_manager/program_manager.cpp"
"detail/queue_impl.cpp"
"detail/online_compiler/online_compiler.cpp"
"detail/os_util.cpp"
"detail/platform_util.cpp"
"detail/reduction.cpp"
Expand Down
110 changes: 110 additions & 0 deletions sycl/source/detail/online_compiler/ocloc_api.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
//===------- ocloc_api.h --------------------------------------------------===//
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the reason to copy this header into SYCL rather than reusing the existing one?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The alternative solution requires pulling the whole https://github.com/intel/compute-runtime workspace just for the purpose of using this one ocloc_api.h header seems excessive. Do you agree?
Or did you suggest something else?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think pulling just this header from https://github.com/intel/compute-runtime is enough. For example, SYCL uses OpenCL headers to build, and pulls in just the headers, not the entire repo.

Copy link
Contributor Author

@v-klochkov v-klochkov Dec 22, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You cannot pull one file from repo. Git does not provide such functionality. Using other tools is not safe.
For OpenCL headers we pull the whole workspace: https://github.com/KhronosGroup/OpenCL-Headers.git

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Looks like this is necessary evil (duplication of the source) then :(

//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// This file is copied from
// https://github.com/intel/compute-runtime/blob/master/shared/offline_compiler/source/ocloc_api.h

#include <cstdint>

#ifndef OCLOC_MAKE_VERSION
/// Generates ocloc API versions
#define OCLOC_MAKE_VERSION(_major, _minor) \
((_major << 16) | (_minor & 0x0000ffff))
#endif // OCLOC_MAKE_VERSION

typedef enum _ocloc_version_t {
OCLOC_VERSION_1_0 = OCLOC_MAKE_VERSION(1, 0), ///< version 1.0
OCLOC_VERSION_CURRENT = OCLOC_MAKE_VERSION(1, 0), ///< latest known version
OCLOC_VERSION_FORCE_UINT32 = 0x7fffffff
} ocloc_version_t;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at this code, perhaps it could be modernized upstream too if it is C++ by replacing OCLOC_MAKE_VERSION by a constexpr function in some nice namespace and the enum a C++ one without the typedef.
But this would be outside of this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That could be done. The alternative to modernizing the file is keeping it as close to the original as possible to make it easier follow the changes in the original file.
I did only clang-format changes over the original.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To give more context for others: this file is copied from the underlying Gen driver source base. See the discussion above.


#ifdef _WIN32
#define SIGNATURE __declspec(dllexport) int __cdecl
#else
#define SIGNATURE int
#endif

extern "C" {
/// Invokes ocloc API using C interface. Supported commands match
/// the functionality of ocloc executable (check ocloc's "help"
/// for reference : shared/offline_compiler/source/ocloc_api.cpp)
/// at https://github.com/intel/compute-runtime.
///
/// NumArgs and argv params represent the command line.
/// Remaining params represent I/O.
/// Output params should be freed using oclocFreeOutput() when
/// no longer needed.
/// List and names of outputs match outputs of ocloc executable.
///
/// \param NumArgs is the number of arguments to pass to ocloc.
///
/// \param Argv is an array of arguments to be passed to ocloc.
///
/// \param NumSources is the number of in-memory representations
/// of source files to be passed to ocloc.
///
/// \param DataSources is an array of in-memory representations
/// of source files to be passed to ocloc.
///
/// \param LenSources is an array of sizes of in-memory representations
/// of source files passed to ocloc as DataSources.
///
/// \param NameSources is an array of names of in-memory representations
/// of source files passed to ocloc as DataSources.
///
/// \param NumInputHeaders is the number of in-memory representations
/// of header files to be passed to ocloc.
///
/// \param DataInputHeaders is an array of in-memory representations
/// of header files to be passed to ocloc.
///
/// \param LenInputHeaders is an array of sizes of in-memory representations
/// of header files passed to ocloc as DataInputHeaders.
///
/// \param NameInputHeaders is an array of names of in-memory representations
/// of header files passed to ocloc as DataInputHeaders.
///
/// \param NumOutputs returns the number of outputs.
///
/// \param DataOutputs returns an array of in-memory representations
/// of output files.
///
/// \param LenOutputs returns an array of sizes of in-memory representations
/// of output files.
///
/// \param NameOutputs returns an array of names of in-memory representations
/// of output files. Special name stdout.log describes output that contains
/// messages generated by ocloc (e.g. compiler errors/warnings).
///
/// \returns 0 on succes. Returns non-0 in case of failure.
SIGNATURE oclocInvoke(uint32_t NumArgs, const char *Argv[], uint32_t NumSources,
const uint8_t **DataSources, const uint64_t *LenSources,
const char **NameSources, uint32_t NumInputHeaders,
const uint8_t **DataInputHeaders,
const uint64_t *LenInputHeaders,
const char **NameInputHeaders, uint32_t *NumOutputs,
uint8_t ***DataOutputs, uint64_t **LenOutputs,
char ***NameOutputs);

/// Frees results of oclocInvoke
///
/// \param NumOutputs is number of outputs as returned by oclocInvoke().
///
/// \param DataOutputs is array of outputs as returned by oclocInvoke().
///
/// \param LenOutputs is array of sizes of outputs as returned by oclocInvoke().
///
/// \param NameOutputs is array of names of outputs as returned by oclocInvoke()
///
/// \returns 0 on succes. Returns non-0 in case of failure.
SIGNATURE oclocFreeOutput(uint32_t *NumOutputs, uint8_t ***DataOutputs,
uint64_t **LenOutputs, char ***NameOutputs);

/// Returns the current version of ocloc.
SIGNATURE oclocVersion();
}
Loading