diff --git a/compiler/rustc_abi/src/lib.rs b/compiler/rustc_abi/src/lib.rs index 369874521e57e..0804310ac7078 100644 --- a/compiler/rustc_abi/src/lib.rs +++ b/compiler/rustc_abi/src/lib.rs @@ -1716,6 +1716,8 @@ pub struct AddressSpace(pub u32); impl AddressSpace { /// LLVM's `0` address space. pub const ZERO: Self = AddressSpace(0); + /// The address space for shared memory on nvptx and amdgpu. + pub const SHARED: Self = AddressSpace(3); } /// The way we represent values to the backend diff --git a/compiler/rustc_codegen_llvm/src/declare.rs b/compiler/rustc_codegen_llvm/src/declare.rs index 960a895a2031c..96c09702eb1ce 100644 --- a/compiler/rustc_codegen_llvm/src/declare.rs +++ b/compiler/rustc_codegen_llvm/src/declare.rs @@ -14,6 +14,7 @@ use std::borrow::Borrow; use itertools::Itertools; +use rustc_abi::AddressSpace; use rustc_codegen_ssa::traits::TypeMembershipCodegenMethods; use rustc_data_structures::fx::FxIndexSet; use rustc_middle::ty::{Instance, Ty}; @@ -99,6 +100,28 @@ impl<'ll, CX: Borrow>> GenericCx<'ll, CX> { ) } } + + /// Declare a global value in a specific address space. + /// + /// If there’s a value with the same name already declared, the function will + /// return its Value instead. + pub(crate) fn declare_global_in_addrspace( + &self, + name: &str, + ty: &'ll Type, + addr_space: AddressSpace, + ) -> &'ll Value { + debug!("declare_global(name={name:?}, addrspace={addr_space:?})"); + unsafe { + llvm::LLVMRustGetOrInsertGlobalInAddrspace( + (**self).borrow().llmod, + name.as_c_char_ptr(), + name.len(), + ty, + addr_space.0, + ) + } + } } impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index 49d3dedbeabdf..61e262c981068 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -1,7 +1,9 @@ use std::assert_matches::assert_matches; use std::cmp::Ordering; -use rustc_abi::{Align, BackendRepr, ExternAbi, Float, HasDataLayout, Primitive, Size}; +use rustc_abi::{ + AddressSpace, Align, BackendRepr, ExternAbi, Float, HasDataLayout, Primitive, Size, +}; use rustc_codegen_ssa::base::{compare_simd_types, wants_msvc_seh, wants_wasm_eh}; use rustc_codegen_ssa::codegen_attrs::autodiff_attrs; use rustc_codegen_ssa::common::{IntPredicate, TypeKind}; @@ -532,6 +534,22 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> { return Ok(()); } + sym::dynamic_shared_memory => { + let global = self.declare_global_in_addrspace( + "dynamic_shared_memory", + self.type_array(self.type_i8(), 0), + AddressSpace::SHARED, + ); + let ty::RawPtr(inner_ty, _) = result.layout.ty.kind() else { unreachable!() }; + let alignment = self.align_of(*inner_ty).bytes() as u32; + unsafe { + if alignment > llvm::LLVMGetAlignment(global) { + llvm::LLVMSetAlignment(global, alignment); + } + } + self.cx().const_pointercast(global, self.type_ptr()) + } + _ if name.as_str().starts_with("simd_") => { // Unpack non-power-of-2 #[repr(packed, simd)] arguments. // This gives them the expected layout of a regular #[repr(simd)] vector. diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index b66fc157b3cb2..2639b71caf523 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -1907,6 +1907,13 @@ unsafe extern "C" { NameLen: size_t, T: &'a Type, ) -> &'a Value; + pub(crate) fn LLVMRustGetOrInsertGlobalInAddrspace<'a>( + M: &'a Module, + Name: *const c_char, + NameLen: size_t, + T: &'a Type, + AddressSpace: c_uint, + ) -> &'a Value; pub(crate) fn LLVMRustInsertPrivateGlobal<'a>(M: &'a Module, T: &'a Type) -> &'a Value; pub(crate) fn LLVMRustGetNamedValue( M: &Module, diff --git a/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs b/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs index 3c667b8e88203..18dc839901a39 100644 --- a/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs +++ b/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs @@ -110,6 +110,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { sym::abort | sym::unreachable | sym::cold_path + | sym::dynamic_shared_memory | sym::breakpoint | sym::assert_zero_valid | sym::assert_mem_uninitialized_valid diff --git a/compiler/rustc_hir_analysis/src/check/intrinsic.rs b/compiler/rustc_hir_analysis/src/check/intrinsic.rs index cfc6bc2f3a0a9..4af40d336172c 100644 --- a/compiler/rustc_hir_analysis/src/check/intrinsic.rs +++ b/compiler/rustc_hir_analysis/src/check/intrinsic.rs @@ -74,6 +74,7 @@ fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: LocalDefId) -> hi | sym::align_of | sym::needs_drop | sym::caller_location + | sym::dynamic_shared_memory | sym::add_with_overflow | sym::sub_with_overflow | sym::mul_with_overflow @@ -213,6 +214,7 @@ pub(crate) fn check_intrinsic_type( } sym::rustc_peek => (1, 0, vec![param(0)], param(0)), sym::caller_location => (0, 0, vec![], tcx.caller_location_ty()), + sym::dynamic_shared_memory => (1, 0, vec![], Ty::new_mut_ptr(tcx, param(0))), sym::assert_inhabited | sym::assert_zero_valid | sym::assert_mem_uninitialized_valid => { (1, 0, vec![], tcx.types.unit) } diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp index 361a5f765510f..25c49b2ee8a6f 100644 --- a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp @@ -209,10 +209,10 @@ extern "C" LLVMValueRef LLVMRustGetOrInsertFunction(LLVMModuleRef M, .getCallee()); } -extern "C" LLVMValueRef LLVMRustGetOrInsertGlobal(LLVMModuleRef M, - const char *Name, - size_t NameLen, - LLVMTypeRef Ty) { +extern "C" LLVMValueRef +LLVMRustGetOrInsertGlobalInAddrspace(LLVMModuleRef M, const char *Name, + size_t NameLen, LLVMTypeRef Ty, + unsigned AddressSpace) { Module *Mod = unwrap(M); auto NameRef = StringRef(Name, NameLen); @@ -223,10 +223,21 @@ extern "C" LLVMValueRef LLVMRustGetOrInsertGlobal(LLVMModuleRef M, GlobalVariable *GV = Mod->getGlobalVariable(NameRef, true); if (!GV) GV = new GlobalVariable(*Mod, unwrap(Ty), false, - GlobalValue::ExternalLinkage, nullptr, NameRef); + GlobalValue::ExternalLinkage, nullptr, NameRef, + nullptr, GlobalValue::NotThreadLocal, AddressSpace); return wrap(GV); } +extern "C" LLVMValueRef LLVMRustGetOrInsertGlobal(LLVMModuleRef M, + const char *Name, + size_t NameLen, + LLVMTypeRef Ty) { + Module *Mod = unwrap(M); + unsigned AddressSpace = Mod->getDataLayout().getDefaultGlobalsAddressSpace(); + return LLVMRustGetOrInsertGlobalInAddrspace(M, Name, NameLen, Ty, + AddressSpace); +} + extern "C" LLVMValueRef LLVMRustInsertPrivateGlobal(LLVMModuleRef M, LLVMTypeRef Ty) { return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false, diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 77260d07c9950..7980d541555e1 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -903,6 +903,7 @@ symbols! { dyn_star, dyn_trait, dynamic_no_pic: "dynamic-no-pic", + dynamic_shared_memory, e, edition_panic, effective_target_features, diff --git a/library/core/src/intrinsics/mod.rs b/library/core/src/intrinsics/mod.rs index 904aa52c7845b..3ffe38b758c96 100644 --- a/library/core/src/intrinsics/mod.rs +++ b/library/core/src/intrinsics/mod.rs @@ -3238,6 +3238,23 @@ pub(crate) const fn miri_promise_symbolic_alignment(ptr: *const (), align: usize ) } +/// Returns a pointer to dynamic shared memory. +/// +/// The returned pointer is the start of the dynamic shared memory region. +/// All pointers returned by `dynamic_shared_memory` point to the same address, +/// so alias the same memory. +/// The returned pointer is aligned by at least the alignment of `T`. +/// +/// # Other APIs +/// +/// CUDA and HIP call this shared memory. +/// OpenCL and SYCL call this local memory. +#[rustc_intrinsic] +#[rustc_nounwind] +#[unstable(feature = "dynamic_shared_memory", issue = "135513")] +#[cfg(any(target_arch = "amdgpu", target_arch = "nvptx64"))] +pub fn dynamic_shared_memory() -> *mut T; + /// Copies the current location of arglist `src` to the arglist `dst`. /// /// FIXME: document safety requirements diff --git a/src/bootstrap/src/core/build_steps/compile.rs b/src/bootstrap/src/core/build_steps/compile.rs index 0b75e85772f86..893c48c7cb9db 100644 --- a/src/bootstrap/src/core/build_steps/compile.rs +++ b/src/bootstrap/src/core/build_steps/compile.rs @@ -668,6 +668,14 @@ pub fn std_cargo(builder: &Builder<'_>, target: TargetSelection, cargo: &mut Car cargo.rustflag("-Cforce-unwind-tables=yes"); } + // amdgcn must have a cpu specified, otherwise it refuses to compile. + // We want to be able to run tests for amdgcn that depend on core, therefore + // we need to be able to compiler core. + // The cpu used here must match in tests that use the standard library. + if target.contains("amdgcn") && target.file.is_none() { + cargo.rustflag("-Ctarget-cpu=gfx900"); + } + // Enable frame pointers by default for the library. Note that they are still controlled by a // separate setting for the compiler. cargo.rustflag("-Zunstable-options"); diff --git a/src/build_helper/src/targets.rs b/src/build_helper/src/targets.rs index cccc413368bc9..c1b198f0ac864 100644 --- a/src/build_helper/src/targets.rs +++ b/src/build_helper/src/targets.rs @@ -6,6 +6,7 @@ // `compiletest`. pub fn target_supports_std(target_tuple: &str) -> bool { !(target_tuple.contains("-none") + || target_tuple.contains("amdgcn") || target_tuple.contains("nvptx") || target_tuple.contains("switch")) } diff --git a/src/tools/compiletest/src/directives/directive_names.rs b/src/tools/compiletest/src/directives/directive_names.rs index 0ef84fb459493..cdb2da6af4c26 100644 --- a/src/tools/compiletest/src/directives/directive_names.rs +++ b/src/tools/compiletest/src/directives/directive_names.rs @@ -188,6 +188,7 @@ pub(crate) const KNOWN_DIRECTIVE_NAMES: &[&str] = &[ "only-aarch64", "only-aarch64-apple-darwin", "only-aarch64-unknown-linux-gnu", + "only-amdgpu", "only-apple", "only-arm", "only-avr", diff --git a/tests/codegen-llvm/dynamic_shared_memory.rs b/tests/codegen-llvm/dynamic_shared_memory.rs new file mode 100644 index 0000000000000..8828c30958f9d --- /dev/null +++ b/tests/codegen-llvm/dynamic_shared_memory.rs @@ -0,0 +1,27 @@ +// Checks that dynamic_shared_memory works. + +//@ revisions: amdgpu nvptx x86 +//@ compile-flags: --crate-type=rlib +// +//@ [amdgpu] compile-flags: --target amdgcn-amd-amdhsa -Ctarget-cpu=gfx900 +//@ [amdgpu] only-amdgpu +//@ [amdgpu] needs-llvm-components: amdgpu +//@ [nvptx] compile-flags: --target nvptx64-nvidia-cuda +//@ [nvptx] only-nvptx64 +//@ [nvptx] needs-llvm-components: nvptx +//@ [x86] compile-flags: --target x86_64-unknown-linux-gnu +//@ [x86] only-x86_64 +//@ [x86] needs-llvm-components: x86 +//@ [x86] should-fail +#![feature(core_intrinsics, dynamic_shared_memory)] +#![no_std] + +use core::intrinsics::dynamic_shared_memory; + +// CHECK: @dynamic_shared_memory = external addrspace(3) global [0 x i8], align 8 +// CHECK: ret ptr addrspacecast (ptr addrspace(3) @dynamic_shared_memory to ptr) +pub fn fun() -> *mut i32 { + let res = dynamic_shared_memory::(); + dynamic_shared_memory::(); // Increase alignment to 8 + res +}