Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 14 additions & 12 deletions llvm/include/llvm/BinaryFormat/ELF.h
Original file line number Diff line number Diff line change
Expand Up @@ -931,6 +931,12 @@ enum : unsigned {
// Processor selection mask for EF_CUDA_SM* values prior to blackwell.
EF_CUDA_SM = 0xff,

// Processor selection mask for EF_CUDA_SM* values following blackwell.
EF_CUDA_SM_MASK = 0xff00,

// Processor selection mask for EF_CUDA_SM* values following blackwell.
EF_CUDA_SM_OFFSET = 8,

// SM based processor values.
EF_CUDA_SM20 = 0x14,
EF_CUDA_SM21 = 0x15,
Expand All @@ -950,9 +956,15 @@ enum : unsigned {
EF_CUDA_SM80 = 0x50,
EF_CUDA_SM86 = 0x56,
EF_CUDA_SM87 = 0x57,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

EF_CUDA_SM88 = 0x58,
EF_CUDA_SM89 = 0x59,
// The sm_90a variant uses the same machine flag.
EF_CUDA_SM90 = 0x5a,
EF_CUDA_SM100 = 0x64,
EF_CUDA_SM101 = 0x65,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

About that sm_101.
In CUDA-13 it's been renamed to sm_110.
You may want to check what NVIDIA tools end up generating for the same GPU in cuda-12.9 (sm_101) and 13.0(sm110), and whether NVIDIA kept the same ELF flags for them, or changed them.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have easy access to CUDA 13.0 yet, just an ELF someone else gave me which these work on. Will it be sufficient to just handle both cases?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's the dump of ELF headers for sm_110/a/f with cuda-13 and 101/a/f with cuda-12.9: https://gist.github.com/Artem-B/1995e3bd80a06b3bee33196e8753d73b

The definitions look fine.

EF_CUDA_SM103 = 0x67,
EF_CUDA_SM110 = 0x6e,
EF_CUDA_SM120 = 0x78,
EF_CUDA_SM121 = 0x79,

// Unified texture binding is enabled.
EF_CUDA_TEXMODE_UNIFIED = 0x100,
Expand All @@ -968,17 +980,7 @@ enum : unsigned {
// Virtual processor selection mask for EF_CUDA_VIRTUAL_SM* values.
EF_CUDA_VIRTUAL_SM = 0xff0000,

// Processor selection mask for EF_CUDA_SM* values following blackwell.
EF_CUDA_SM_MASK = 0xff00,

// SM based processor values.
EF_CUDA_SM100 = 0x6400,
EF_CUDA_SM101 = 0x6500,
EF_CUDA_SM103 = 0x6700,
EF_CUDA_SM120 = 0x7800,
EF_CUDA_SM121 = 0x7900,

// Set when using an accelerator variant like sm_100a.
// Set when using an accelerator variant like sm_100a in the new ABI.
EF_CUDA_ACCELERATORS = 0x8,
};

Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/Object/ELFObjectFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,8 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const {
assert(getEMachine() == ELF::EM_CUDA);
unsigned SM = getEIdentABIVersion() == ELF::ELFABIVERSION_CUDA_V1
? getPlatformFlags() & ELF::EF_CUDA_SM
: getPlatformFlags() & ELF::EF_CUDA_SM_MASK;
: (getPlatformFlags() & ELF::EF_CUDA_SM_MASK) >>
ELF::EF_CUDA_SM_OFFSET;

switch (SM) {
// Fermi architecture.
Expand Down Expand Up @@ -674,6 +675,8 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const {
return "sm_86";
case ELF::EF_CUDA_SM87:
return "sm_87";
case ELF::EF_CUDA_SM88:
return "sm_88";

// Ada architecture.
case ELF::EF_CUDA_SM89:
Expand All @@ -694,6 +697,9 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const {
case ELF::EF_CUDA_SM103:
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_103a"
: "sm_103";
case ELF::EF_CUDA_SM110:
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_110a"
: "sm_110";

// Rubin architecture.
case ELF::EF_CUDA_SM120:
Expand Down
66 changes: 54 additions & 12 deletions llvm/tools/llvm-readobj/ELFDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1114,6 +1114,7 @@ const EnumEntry<unsigned> ElfOSABI[] = {
{"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
{"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA_V2},
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}};

const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
Expand Down Expand Up @@ -1679,19 +1680,60 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
};

const EnumEntry<unsigned> ElfHeaderNVPTXFlags[] = {
ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"),
ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"),
ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"),
ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"),
ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"),
ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"),
ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"),
ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"),
ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"),
ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"),
ENUM_ENT(EF_CUDA_SM100, "sm_100"), ENUM_ENT(EF_CUDA_SM101, "sm_101"),
ENUM_ENT(EF_CUDA_SM103, "sm_103"), ENUM_ENT(EF_CUDA_SM120, "sm_120"),
ENUM_ENT(EF_CUDA_SM20, "sm_20"),
ENUM_ENT(EF_CUDA_SM21, "sm_21"),
ENUM_ENT(EF_CUDA_SM30, "sm_30"),
ENUM_ENT(EF_CUDA_SM32, "sm_32"),
ENUM_ENT(EF_CUDA_SM35, "sm_35"),
ENUM_ENT(EF_CUDA_SM37, "sm_37"),
ENUM_ENT(EF_CUDA_SM50, "sm_50"),
ENUM_ENT(EF_CUDA_SM52, "sm_52"),
ENUM_ENT(EF_CUDA_SM53, "sm_53"),
ENUM_ENT(EF_CUDA_SM60, "sm_60"),
ENUM_ENT(EF_CUDA_SM61, "sm_61"),
ENUM_ENT(EF_CUDA_SM62, "sm_62"),
ENUM_ENT(EF_CUDA_SM70, "sm_70"),
ENUM_ENT(EF_CUDA_SM72, "sm_72"),
ENUM_ENT(EF_CUDA_SM75, "sm_75"),
ENUM_ENT(EF_CUDA_SM80, "sm_80"),
ENUM_ENT(EF_CUDA_SM86, "sm_86"),
ENUM_ENT(EF_CUDA_SM87, "sm_87"),
ENUM_ENT(EF_CUDA_SM88, "sm_88"),
ENUM_ENT(EF_CUDA_SM89, "sm_89"),
ENUM_ENT(EF_CUDA_SM90, "sm_90"),
ENUM_ENT(EF_CUDA_SM100, "sm_100"),
ENUM_ENT(EF_CUDA_SM101, "sm_101"),
ENUM_ENT(EF_CUDA_SM103, "sm_103"),
ENUM_ENT(EF_CUDA_SM110, "sm_110"),
ENUM_ENT(EF_CUDA_SM120, "sm_120"),
ENUM_ENT(EF_CUDA_SM121, "sm_121"),
ENUM_ENT(EF_CUDA_SM20 << EF_CUDA_SM_OFFSET, "sm_20"),
ENUM_ENT(EF_CUDA_SM21 << EF_CUDA_SM_OFFSET, "sm_21"),
ENUM_ENT(EF_CUDA_SM30 << EF_CUDA_SM_OFFSET, "sm_30"),
ENUM_ENT(EF_CUDA_SM32 << EF_CUDA_SM_OFFSET, "sm_32"),
ENUM_ENT(EF_CUDA_SM35 << EF_CUDA_SM_OFFSET, "sm_35"),
ENUM_ENT(EF_CUDA_SM37 << EF_CUDA_SM_OFFSET, "sm_37"),
ENUM_ENT(EF_CUDA_SM50 << EF_CUDA_SM_OFFSET, "sm_50"),
ENUM_ENT(EF_CUDA_SM52 << EF_CUDA_SM_OFFSET, "sm_52"),
ENUM_ENT(EF_CUDA_SM53 << EF_CUDA_SM_OFFSET, "sm_53"),
ENUM_ENT(EF_CUDA_SM60 << EF_CUDA_SM_OFFSET, "sm_60"),
ENUM_ENT(EF_CUDA_SM61 << EF_CUDA_SM_OFFSET, "sm_61"),
ENUM_ENT(EF_CUDA_SM62 << EF_CUDA_SM_OFFSET, "sm_62"),
ENUM_ENT(EF_CUDA_SM70 << EF_CUDA_SM_OFFSET, "sm_70"),
ENUM_ENT(EF_CUDA_SM72 << EF_CUDA_SM_OFFSET, "sm_72"),
ENUM_ENT(EF_CUDA_SM75 << EF_CUDA_SM_OFFSET, "sm_75"),
ENUM_ENT(EF_CUDA_SM80 << EF_CUDA_SM_OFFSET, "sm_80"),
ENUM_ENT(EF_CUDA_SM86 << EF_CUDA_SM_OFFSET, "sm_86"),
ENUM_ENT(EF_CUDA_SM87 << EF_CUDA_SM_OFFSET, "sm_87"),
ENUM_ENT(EF_CUDA_SM88 << EF_CUDA_SM_OFFSET, "sm_88"),
ENUM_ENT(EF_CUDA_SM89 << EF_CUDA_SM_OFFSET, "sm_89"),
ENUM_ENT(EF_CUDA_SM90 << EF_CUDA_SM_OFFSET, "sm_90"),
ENUM_ENT(EF_CUDA_SM100 << EF_CUDA_SM_OFFSET, "sm_100"),
ENUM_ENT(EF_CUDA_SM101 << EF_CUDA_SM_OFFSET, "sm_101"),
ENUM_ENT(EF_CUDA_SM103 << EF_CUDA_SM_OFFSET, "sm_103"),
ENUM_ENT(EF_CUDA_SM110 << EF_CUDA_SM_OFFSET, "sm_110"),
ENUM_ENT(EF_CUDA_SM120 << EF_CUDA_SM_OFFSET, "sm_120"),
ENUM_ENT(EF_CUDA_SM121 << EF_CUDA_SM_OFFSET, "sm_121"),
};

const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
Expand Down
2 changes: 1 addition & 1 deletion offload/plugins-nextgen/cuda/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1581,7 +1581,7 @@ struct CUDAPluginTy final : public GenericPluginTy {
unsigned SM =
Header.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1
? Header.e_flags & ELF::EF_CUDA_SM
: (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> 8;
: (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> ELF::EF_CUDA_SM_OFFSET;

CUdevice Device;
CUresult Res = cuDeviceGet(&Device, DeviceId);
Expand Down