diff --git a/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs index 25200d0062cf43..e66db58ba95f52 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs @@ -25,7 +25,13 @@ internal static unsafe string StrCns(uint rid, IntPtr scopeHandle) } [MethodImpl(MethodImplOptions.InternalCall)] - internal static extern string FastAllocateString(int length); + internal static extern unsafe string FastAllocateString(MethodTable *pMT, int length); + + [DebuggerHidden] + internal static unsafe string FastAllocateString(int length) + { + return FastAllocateString(TypeHandle.TypeHandleOf().AsMethodTable(), length); + } [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "String_Intern")] private static partial void Intern(StringHandleOnStack src); diff --git a/src/coreclr/inc/eventtracebase.h b/src/coreclr/inc/eventtracebase.h index 72e6cd09fc5b6a..4fbbbc9567f837 100644 --- a/src/coreclr/inc/eventtracebase.h +++ b/src/coreclr/inc/eventtracebase.h @@ -960,7 +960,6 @@ namespace ETW static VOID SendMethodDetailsEvent(MethodDesc *pMethodDesc); static VOID SendNonDuplicateMethodDetailsEvent(MethodDesc* pMethodDesc, MethodDescSet* set); static VOID StubInitialized(ULONGLONG ullHelperStartAddress, LPCWSTR pHelperName); - static VOID StubsInitialized(PVOID *pHelperStartAddress, PVOID *pHelperNames, LONG ulNoOfHelpers); static VOID MethodRestored(MethodDesc * pMethodDesc); static VOID MethodTableRestored(MethodTable * pMethodTable); static VOID DynamicMethodDestroyed(MethodDesc *pMethodDesc); @@ -972,7 +971,6 @@ namespace ETW static VOID MethodJitting(MethodDesc *pMethodDesc, COR_ILMETHOD_DECODER* methodDecoder, SString *namespaceOrClassName, SString *methodName, SString *methodSignature); static VOID MethodJitted(MethodDesc *pMethodDesc, SString *namespaceOrClassName, SString *methodName, SString *methodSignature, PCODE pNativeCodeStartAddress, PrepareCodeConfig *pConfig); static VOID StubInitialized(ULONGLONG ullHelperStartAddress, LPCWSTR pHelperName) {}; - static VOID StubsInitialized(PVOID *pHelperStartAddress, PVOID *pHelperNames, LONG ulNoOfHelpers) {}; static VOID MethodRestored(MethodDesc * pMethodDesc) {}; static VOID MethodTableRestored(MethodTable * pMethodTable) {}; static VOID DynamicMethodDestroyed(MethodDesc *pMethodDesc) {}; diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index f254c295178781..4ae16055356a7b 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -33,8 +33,7 @@ #define FEATURE_USE_HELPERS_FOR_32BIT_INT_DIV #endif -// pfnHelper is set to NULL if it is a stubbed helper. -// It will be set in InitJITHelpers1 +// pfnHelper is set to NULL if it is an unused helper. JITHELPER(CORINFO_HELP_UNDEF, NULL, METHOD__NIL) @@ -100,20 +99,20 @@ JITHELPER(CORINFO_HELP_DBLREM, JIT_DblRem, METHOD__NIL) // Allocating a new object - JITHELPER(CORINFO_HELP_NEWFAST, JIT_New, METHOD__NIL) - JITHELPER(CORINFO_HELP_NEWFAST_MAYBEFROZEN, JIT_NewMaybeFrozen,METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_NEWSFAST, JIT_New, METHOD__NIL) - JITHELPER(CORINFO_HELP_NEWSFAST_FINALIZE, NULL, METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_New, METHOD__NIL) - JITHELPER(CORINFO_HELP_NEWSFAST_ALIGN8_VC, NULL, METHOD__NIL) - JITHELPER(CORINFO_HELP_NEWSFAST_ALIGN8_FINALIZE, NULL, METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_NEW_MDARR, NULL, METHOD__ARRAY__CREATEINSTANCEMDARRAY) - DYNAMICJITHELPER(CORINFO_HELP_NEW_MDARR_RARE, NULL, METHOD__ARRAY__CREATEINSTANCEMDARRAY) - JITHELPER(CORINFO_HELP_NEWARR_1_DIRECT, JIT_NewArr1,METHOD__NIL) - JITHELPER(CORINFO_HELP_NEWARR_1_MAYBEFROZEN, JIT_NewArr1MaybeFrozen,METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1,METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1,METHOD__NIL) - DYNAMICJITHELPER(CORINFO_HELP_NEWARR_1_ALIGN8, JIT_NewArr1,METHOD__NIL) + JITHELPER(CORINFO_HELP_NEWFAST, RhpNew, METHOD__NIL) + JITHELPER(CORINFO_HELP_NEWFAST_MAYBEFROZEN, RhpNewMaybeFrozen, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_NEWSFAST, RhpNew, METHOD__NIL) + JITHELPER(CORINFO_HELP_NEWSFAST_FINALIZE, NULL, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_NEWSFAST_ALIGN8, RhpNew, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_NEWSFAST_ALIGN8_VC, RhpNew, METHOD__NIL) + JITHELPER(CORINFO_HELP_NEWSFAST_ALIGN8_FINALIZE, NULL, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_NEW_MDARR, NULL, METHOD__ARRAY__CREATEINSTANCEMDARRAY) + DYNAMICJITHELPER(CORINFO_HELP_NEW_MDARR_RARE, NULL, METHOD__ARRAY__CREATEINSTANCEMDARRAY) + JITHELPER(CORINFO_HELP_NEWARR_1_DIRECT, RhpNewVariableSizeObject, METHOD__NIL) + JITHELPER(CORINFO_HELP_NEWARR_1_MAYBEFROZEN, RhpNewArrayMaybeFrozen, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_NEWARR_1_OBJ, RhpNewVariableSizeObject, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_NEWARR_1_VC, RhpNewVariableSizeObject, METHOD__NIL) + DYNAMICJITHELPER(CORINFO_HELP_NEWARR_1_ALIGN8, RhpNewVariableSizeObject, METHOD__NIL) DYNAMICJITHELPER(CORINFO_HELP_STRCNS, NULL, METHOD__STRING__STRCNS) diff --git a/src/coreclr/nativeaot/BuildIntegration/NativeAOT.natstepfilter b/src/coreclr/nativeaot/BuildIntegration/NativeAOT.natstepfilter index 395fe22ec49d57..71ceb23095b636 100644 --- a/src/coreclr/nativeaot/BuildIntegration/NativeAOT.natstepfilter +++ b/src/coreclr/nativeaot/BuildIntegration/NativeAOT.natstepfilter @@ -1,7 +1,7 @@ - RhpNewFast|RhpNewFinalizable|RhpNewFastAlign8|RhpNewFastMisalign|RhpNewFinalizableAlign8|RhpNewArray|RhpNewArrayAlign8 + RhpNewFast|RhpNewFinalizable|RhpNewFastAlign8|RhpNewFastMisalign|RhpNewFinalizableAlign8|RhpNewArrayFast|RhpNewArrayFastAlign8 NoStepInto diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/InternalCalls.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/InternalCalls.cs index 52c39c333d1516..8f6dfb77bea39c 100644 --- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/InternalCalls.cs +++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/InternalCalls.cs @@ -128,9 +128,9 @@ internal static int RhEndNoGCRegion() [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe object RhpNewFinalizable(MethodTable* pEEType); - [RuntimeImport(RuntimeLibrary, "RhpNewArray")] + [RuntimeImport(RuntimeLibrary, "RhpNewArrayFast")] [MethodImpl(MethodImplOptions.InternalCall)] - internal static extern unsafe object RhpNewArray(MethodTable* pEEType, int length); + internal static extern unsafe object RhpNewArrayFast(MethodTable* pEEType, int length); #if FEATURE_64BIT_ALIGNMENT [RuntimeImport(RuntimeLibrary, "RhpNewFastAlign8")] @@ -141,9 +141,9 @@ internal static int RhEndNoGCRegion() [MethodImpl(MethodImplOptions.InternalCall)] internal static extern unsafe object RhpNewFinalizableAlign8(MethodTable* pEEType); - [RuntimeImport(RuntimeLibrary, "RhpNewArrayAlign8")] + [RuntimeImport(RuntimeLibrary, "RhpNewArrayFastAlign8")] [MethodImpl(MethodImplOptions.InternalCall)] - internal static extern unsafe object RhpNewArrayAlign8(MethodTable* pEEType, int length); + internal static extern unsafe object RhpNewArrayFastAlign8(MethodTable* pEEType, int length); [RuntimeImport(RuntimeLibrary, "RhpNewFastMisalign")] [MethodImpl(MethodImplOptions.InternalCall)] diff --git a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/RuntimeExports.cs b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/RuntimeExports.cs index a782468f52cb9a..93d0cc5c27deaa 100644 --- a/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/RuntimeExports.cs +++ b/src/coreclr/nativeaot/Runtime.Base/src/System/Runtime/RuntimeExports.cs @@ -57,20 +57,44 @@ public static unsafe object RhNewObject(MethodTable* pEEType) [RuntimeExport("RhNewArray")] public static unsafe object RhNewArray(MethodTable* pEEType, int length) + { + Debug.Assert(pEEType->IsSzArray); + +#if FEATURE_64BIT_ALIGNMENT + MethodTable* pEEElementType = pEEType->RelatedParameterType; + if (pEEElementType->IsValueType && pEEElementType->RequiresAlign8) + { + return InternalCalls.RhpNewArrayFastAlign8(pEEType, length); + } + else +#endif // FEATURE_64BIT_ALIGNMENT + { + return InternalCalls.RhpNewArrayFast(pEEType, length); + } + } + + [RuntimeExport("RhNewVariableSizeObject")] + public static unsafe object RhNewVariableSizeObject(MethodTable* pEEType, int length) { Debug.Assert(pEEType->IsArray || pEEType->IsString); + object array; #if FEATURE_64BIT_ALIGNMENT MethodTable* pEEElementType = pEEType->RelatedParameterType; if (pEEElementType->IsValueType && pEEElementType->RequiresAlign8) { - return InternalCalls.RhpNewArrayAlign8(pEEType, length); + RuntimeImports.RhAllocateNewArray(pEEType, (uint)length, (uint)GC_ALLOC_FLAGS.GC_ALLOC_ALIGN8, &array); } else #endif // FEATURE_64BIT_ALIGNMENT { - return InternalCalls.RhpNewArray(pEEType, length); + RuntimeImports.RhAllocateNewArray(pEEType, (uint)length, (uint)GC_ALLOC_FLAGS.GC_ALLOC_NO_FLAGS, &array); } + + if (array == null) + throw new OutOfMemoryException(); + + return array; } public static unsafe object RhBox(MethodTable* pEEType, ref byte data) @@ -380,10 +404,10 @@ internal static unsafe IntPtr RhGetRuntimeHelperForType(MethodTable* pEEType, Ru #if FEATURE_64BIT_ALIGNMENT MethodTable* pEEElementType = pEEType->RelatedParameterType; if (pEEElementType->IsValueType && pEEElementType->RequiresAlign8) - return (IntPtr)(delegate*)&InternalCalls.RhpNewArrayAlign8; + return (IntPtr)(delegate*)&InternalCalls.RhpNewArrayFastAlign8; #endif // FEATURE_64BIT_ALIGNMENT - return (IntPtr)(delegate*)&InternalCalls.RhpNewArray; + return (IntPtr)(delegate*)&InternalCalls.RhpNewArrayFast; default: Debug.Fail("Unknown RuntimeHelperKind"); diff --git a/src/coreclr/nativeaot/Runtime/AsmOffsets.h b/src/coreclr/nativeaot/Runtime/AsmOffsets.h index 1bf3a0c3e06338..616c4847235e5d 100644 --- a/src/coreclr/nativeaot/Runtime/AsmOffsets.h +++ b/src/coreclr/nativeaot/Runtime/AsmOffsets.h @@ -32,6 +32,8 @@ ASM_OFFSET( 4, 8, String, m_Length) ASM_OFFSET( 8, C, String, m_FirstChar) ASM_CONST( 2, 2, STRING_COMPONENT_SIZE) ASM_CONST( E, 16, STRING_BASE_SIZE) +ASM_CONST( C, 18, SZARRAY_BASE_SIZE) +ASM_CONST( C, 18, MIN_OBJECT_SIZE) ASM_CONST(3FFFFFDF,3FFFFFDF,MAX_STRING_LENGTH) diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt index 9f8473c5ecbec2..2d73f2e7293cc7 100644 --- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt @@ -205,7 +205,7 @@ if (CLR_CMAKE_TARGET_ARCH_AMD64) endif (CLR_CMAKE_TARGET_ARCH_AMD64) list(APPEND RUNTIME_SOURCES_ARCH_ASM - ${ARCH_SOURCES_DIR}/AllocFast.${ASM_SUFFIX} + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/ExceptionHandling.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/GcProbe.${ASM_SUFFIX} ${ARCH_SOURCES_DIR}/MiscStubs.${ASM_SUFFIX} diff --git a/src/coreclr/nativeaot/Runtime/GCHelpers.cpp b/src/coreclr/nativeaot/Runtime/GCHelpers.cpp index 7ef489ddd33389..9cd79820daf7ed 100644 --- a/src/coreclr/nativeaot/Runtime/GCHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/GCHelpers.cpp @@ -655,7 +655,7 @@ static Object* GcAllocInternal(MethodTable* pEEType, uint32_t uFlags, uintptr_t // numElements - number of array elements // pTransitionFrame- transition frame to make stack crawlable // Returns a pointer to the object allocated or NULL on failure. -EXTERN_C void* F_CALL_CONV RhpGcAlloc(MethodTable* pEEType, uint32_t uFlags, uintptr_t numElements, PInvokeTransitionFrame* pTransitionFrame) +EXTERN_C void* RhpGcAlloc(MethodTable* pEEType, uint32_t uFlags, uintptr_t numElements, PInvokeTransitionFrame* pTransitionFrame) { Thread* pThread = ThreadStore::GetCurrentThread(); diff --git a/src/coreclr/nativeaot/Runtime/ObjectLayout.h b/src/coreclr/nativeaot/Runtime/ObjectLayout.h index 146c986938f7c3..606148a0f32704 100644 --- a/src/coreclr/nativeaot/Runtime/ObjectLayout.h +++ b/src/coreclr/nativeaot/Runtime/ObjectLayout.h @@ -125,6 +125,9 @@ static uintptr_t const STRING_COMPONENT_SIZE = StringConstants::ComponentSize; //------------------------------------------------------------------------------------------------- static uintptr_t const STRING_BASE_SIZE = StringConstants::BaseSize; +//------------------------------------------------------------------------------------------------- +static uintptr_t const SZARRAY_BASE_SIZE = MIN_OBJECT_SIZE; + //------------------------------------------------------------------------------------------------- static uintptr_t const MAX_STRING_LENGTH = 0x3FFFFFDF; diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc index 5d5cde13bced18..13b65d17570366 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc +++ b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc @@ -302,6 +302,8 @@ PUSH_COOP_PINVOKE_FRAME macro trashReg ;; allocate scratch space and any required alignment alloc_stack 28h + + END_PROLOGUE endm ;; @@ -322,6 +324,10 @@ POP_COOP_PINVOKE_FRAME macro pop r10 ; discard caller RSP endm +INLINE_GET_ALLOC_CONTEXT_BASE macro destReg, trashReg + INLINE_GET_TLS_VAR destReg, trashReg, tls_CurrentThread +endm + ; - TAILCALL_RAX: ("jmp rax") should be used for tailcalls, this emits an instruction ; sequence which is recognized by the unwinder as a valid epilogue terminator TAILJMP_RAX TEXTEQU @@ -335,10 +341,8 @@ TSF_DoNotTriggerGc equ 10h ;; ;; Rename fields of nested structs ;; -OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -OFFSETOF__Thread__m_eeAllocContext__combined_limit equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit - - +OFFSETOF__ee_alloc_context__alloc_ptr equ OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__ee_alloc_context equ OFFSETOF__Thread__m_eeAllocContext ;; GC type flags GC_ALLOC_FINALIZE equ 1 diff --git a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h index 89393563f51473..7e02484922ba16 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h +++ b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h @@ -78,8 +78,8 @@ STATUS_REDHAWK_THREAD_ABORT equ 0x43 ;; ;; Rename fields of nested structs ;; -OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -OFFSETOF__Thread__m_eeAllocContext__combined_limit equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit +OFFSETOF__ee_alloc_context__alloc_ptr equ OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__ee_alloc_context equ OFFSETOF__Thread__m_eeAllocContext ;; ;; IMPORTS @@ -220,7 +220,6 @@ TrashRegister32Bit SETS "w":CC:("$TrashRegister32Bit":RIGHT:((:LEN:TrashRegister INLINE_GET_TLS_VAR $destReg, $trashReg, tls_CurrentThread MEND - MACRO INLINE_THREAD_UNHIJACK $threadReg, $trashReg1, $trashReg2 ;; @@ -236,6 +235,12 @@ TrashRegister32Bit SETS "w":CC:("$TrashRegister32Bit":RIGHT:((:LEN:TrashRegister 0 MEND + MACRO + INLINE_GET_ALLOC_CONTEXT_BASE $destReg, $trashReg + + INLINE_GET_TLS_VAR $destReg, $trashReg, tls_CurrentThread + MEND + ;; ---------------------------------------------------------------------------- - ;; ;; Macro to add a memory barrier. Equal to __sync_synchronize(). diff --git a/src/coreclr/nativeaot/Runtime/i386/AllocFast.S b/src/coreclr/nativeaot/Runtime/i386/AllocFast.S deleted file mode 100644 index 876f2dfbcb80d6..00000000000000 --- a/src/coreclr/nativeaot/Runtime/i386/AllocFast.S +++ /dev/null @@ -1,4 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -// TODO: Implement diff --git a/src/coreclr/nativeaot/Runtime/i386/AllocFast.asm b/src/coreclr/nativeaot/Runtime/i386/AllocFast.asm deleted file mode 100644 index d557f5ec750774..00000000000000 --- a/src/coreclr/nativeaot/Runtime/i386/AllocFast.asm +++ /dev/null @@ -1,387 +0,0 @@ -;; Licensed to the .NET Foundation under one or more agreements. -;; The .NET Foundation licenses this file to you under the MIT license. - - .586 - .model flat - option casemap:none - .code - - -include AsmMacros.inc - -;; Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's -;; allocation context then automatically fallback to the slow allocation path. -;; ECX == MethodTable -FASTCALL_FUNC RhpNewFast, 4 - - ;; edx = GetThread(), TRASHES eax - INLINE_GETTHREAD edx, eax - - ;; - ;; ecx contains MethodTable pointer - ;; - mov eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize] - - ;; - ;; eax: base size - ;; ecx: MethodTable pointer - ;; edx: Thread pointer - ;; - - add eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - cmp eax, [edx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] - ja AllocFailed - - ;; set the new alloc pointer - mov [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], eax - - ;; calc the new object pointer - sub eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize] - - ;; set the new object's MethodTable pointer - mov [eax], ecx - ret - -AllocFailed: - - ;; - ;; ecx: MethodTable pointer - ;; - push ebp - mov ebp, esp - - PUSH_COOP_PINVOKE_FRAME edx - - ;; Preserve MethodTable in ESI. - mov esi, ecx - - ;; Push alloc helper arguments - push edx ; transition frame - push 0 ; numElements - xor edx, edx ; Flags - ;; Passing MethodTable in ecx - - ;; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - call RhpGcAlloc - - test eax, eax - jz NewFast_OOM - - POP_COOP_PINVOKE_FRAME - - pop ebp - ret - -NewFast_OOM: - ;; This is the failure path. We're going to tail-call to a managed helper that will throw - ;; an out of memory exception that the caller of this allocator understands. - - mov eax, esi ; Preserve MethodTable pointer over POP_COOP_PINVOKE_FRAME - - POP_COOP_PINVOKE_FRAME - - ;; Cleanup our ebp frame - pop ebp - - mov ecx, eax ; MethodTable pointer - xor edx, edx ; Indicate that we should throw OOM. - jmp RhExceptionHandling_FailedAllocation - -FASTCALL_ENDFUNC - -;; Allocate non-array object with finalizer. -;; ECX == MethodTable -FASTCALL_FUNC RhpNewFinalizable, 4 - ;; Create EBP frame. - push ebp - mov ebp, esp - - PUSH_COOP_PINVOKE_FRAME edx - - ;; Preserve MethodTable in ESI - mov esi, ecx - - ;; Push alloc helper arguments - push edx ; transition frame - push 0 ; numElements - mov edx, GC_ALLOC_FINALIZE ; Flags - ;; Passing MethodTable in ecx - - ;; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - call RhpGcAlloc - - test eax, eax - jz NewFinalizable_OOM - - POP_COOP_PINVOKE_FRAME - - ;; Collapse EBP frame and return - pop ebp - ret - -NewFinalizable_OOM: - ;; This is the failure path. We're going to tail-call to a managed helper that will throw - ;; an out of memory exception that the caller of this allocator understands. - - mov eax, esi ; Preserve MethodTable pointer over POP_COOP_PINVOKE_FRAME - - POP_COOP_PINVOKE_FRAME - - ;; Cleanup our ebp frame - pop ebp - - mov ecx, eax ; MethodTable pointer - xor edx, edx ; Indicate that we should throw OOM. - jmp RhExceptionHandling_FailedAllocation - -FASTCALL_ENDFUNC - -;; Allocate a new string. -;; ECX == MethodTable -;; EDX == element count -FASTCALL_FUNC RhNewString, 8 - - push ecx - push edx - - ;; Make sure computing the aligned overall allocation size won't overflow - cmp edx, MAX_STRING_LENGTH - ja StringSizeOverflow - - ; Compute overall allocation size (align(base size + (element size * elements), 4)). - lea eax, [(edx * STRING_COMPONENT_SIZE) + (STRING_BASE_SIZE + 3)] - and eax, -4 - - ; ECX == MethodTable - ; EAX == allocation size - ; EDX == scratch - - INLINE_GETTHREAD edx, ecx ; edx = GetThread(), TRASHES ecx - - ; ECX == scratch - ; EAX == allocation size - ; EDX == thread - - mov ecx, eax - add eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - jc StringAllocContextOverflow - cmp eax, [edx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] - ja StringAllocContextOverflow - - ; ECX == allocation size - ; EAX == new alloc ptr - ; EDX == thread - - ; set the new alloc pointer - mov [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], eax - - ; calc the new object pointer - sub eax, ecx - - pop edx - pop ecx - - ; set the new object's MethodTable pointer and element count - mov [eax + OFFSETOF__Object__m_pEEType], ecx - mov [eax + OFFSETOF__String__m_Length], edx - ret - -StringAllocContextOverflow: - ; ECX == string size - ; original ECX pushed - ; original EDX pushed - - ; Re-push original ECX - push [esp + 4] - - ; Create EBP frame. - mov [esp + 8], ebp - lea ebp, [esp + 8] - - PUSH_COOP_PINVOKE_FRAME edx - - ; Get the MethodTable and put it in ecx. - mov ecx, dword ptr [ebp - 8] - - ; Push alloc helper arguments (thread, size, flags, MethodTable). - push edx ; transition frame - push [ebp - 4] ; numElements - xor edx, edx ; Flags - ;; Passing MethodTable in ecx - - ;; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - call RhpGcAlloc - - test eax, eax - jz StringOutOfMemoryWithFrame - - POP_COOP_PINVOKE_FRAME - add esp, 8 ; pop ecx / edx - pop ebp - ret - -StringOutOfMemoryWithFrame: - ; This is the OOM failure path. We're going to tail-call to a managed helper that will throw - ; an out of memory exception that the caller of this allocator understands. - - mov eax, [ebp - 8] ; Preserve MethodTable pointer over POP_COOP_PINVOKE_FRAME - - POP_COOP_PINVOKE_FRAME - add esp, 8 ; pop ecx / edx - pop ebp ; restore ebp - - mov ecx, eax ; MethodTable pointer - xor edx, edx ; Indicate that we should throw OOM. - jmp RhExceptionHandling_FailedAllocation - -StringSizeOverflow: - ;; We get here if the size of the final string object can't be represented as an unsigned - ;; 32-bit value. We're going to tail-call to a managed helper that will throw - ;; an OOM exception that the caller of this allocator understands. - - add esp, 8 ; pop ecx / edx - - ;; ecx holds MethodTable pointer already - xor edx, edx ; Indicate that we should throw OOM. - jmp RhExceptionHandling_FailedAllocation - -FASTCALL_ENDFUNC - - -;; Allocate one dimensional, zero based array (SZARRAY). -;; ECX == MethodTable -;; EDX == element count -FASTCALL_FUNC RhpNewArray, 8 - - push ecx - push edx - - ; Compute overall allocation size (align(base size + (element size * elements), 4)). - ; if the element count is <= 0x10000, no overflow is possible because the component size is - ; <= 0xffff, and thus the product is <= 0xffff0000, and the base size for the worst case - ; (32 dimensional MdArray) is less than 0xffff. - movzx eax, word ptr [ecx + OFFSETOF__MethodTable__m_usComponentSize] - cmp edx,010000h - ja ArraySizeBig - mul edx - add eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize] - add eax, 3 -ArrayAlignSize: - and eax, -4 - - ; ECX == MethodTable - ; EAX == array size - ; EDX == scratch - - INLINE_GETTHREAD edx, ecx ; edx = GetThread(), TRASHES ecx - - ; ECX == scratch - ; EAX == array size - ; EDX == thread - - mov ecx, eax - add eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - jc ArrayAllocContextOverflow - cmp eax, [edx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] - ja ArrayAllocContextOverflow - - ; ECX == array size - ; EAX == new alloc ptr - ; EDX == thread - - ; set the new alloc pointer - mov [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], eax - - ; calc the new object pointer - sub eax, ecx - - pop edx - pop ecx - - ; set the new object's MethodTable pointer and element count - mov [eax + OFFSETOF__Object__m_pEEType], ecx - mov [eax + OFFSETOF__Array__m_Length], edx - ret - -ArraySizeBig: - ; Compute overall allocation size (align(base size + (element size * elements), 4)). - ; if the element count is negative, it's an overflow, otherwise it's out of memory - cmp edx, 0 - jl ArraySizeOverflow - mul edx - jc ArrayOutOfMemoryNoFrame - add eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize] - jc ArrayOutOfMemoryNoFrame - add eax, 3 - jc ArrayOutOfMemoryNoFrame - jmp ArrayAlignSize - -ArrayAllocContextOverflow: - ; ECX == array size - ; original ECX pushed - ; original EDX pushed - - ; Re-push original ECX - push [esp + 4] - - ; Create EBP frame. - mov [esp + 8], ebp - lea ebp, [esp + 8] - - PUSH_COOP_PINVOKE_FRAME edx - - ; Get the MethodTable and put it in ecx. - mov ecx, dword ptr [ebp - 8] - - ; Push alloc helper arguments (thread, size, flags, MethodTable). - push edx ; transition frame - push [ebp - 4] ; numElements - xor edx, edx ; Flags - ;; Passing MethodTable in ecx - - ;; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - call RhpGcAlloc - - test eax, eax - jz ArrayOutOfMemoryWithFrame - - POP_COOP_PINVOKE_FRAME - add esp, 8 ; pop ecx / edx - pop ebp - ret - -ArrayOutOfMemoryWithFrame: - ; This is the OOM failure path. We're going to tail-call to a managed helper that will throw - ; an out of memory exception that the caller of this allocator understands. - - mov eax, [ebp - 8] ; Preserve MethodTable pointer over POP_COOP_PINVOKE_FRAME - - POP_COOP_PINVOKE_FRAME - add esp, 8 ; pop ecx / edx - pop ebp ; restore ebp - - mov ecx, eax ; MethodTable pointer - xor edx, edx ; Indicate that we should throw OOM. - jmp RhExceptionHandling_FailedAllocation - -ArrayOutOfMemoryNoFrame: - add esp, 8 ; pop ecx / edx - - ; ecx holds MethodTable pointer already - xor edx, edx ; Indicate that we should throw OOM. - jmp RhExceptionHandling_FailedAllocation - -ArraySizeOverflow: - ; We get here if the size of the final array object can't be represented as an unsigned - ; 32-bit value. We're going to tail-call to a managed helper that will throw - ; an overflow exception that the caller of this allocator understands. - - add esp, 8 ; pop ecx / edx - - ; ecx holds MethodTable pointer already - mov edx, 1 ; Indicate that we should throw OverflowException - jmp RhExceptionHandling_FailedAllocation - -FASTCALL_ENDFUNC - - end diff --git a/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc b/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc index 4dd51f72269286..2e9cd75420a7c9 100644 --- a/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc +++ b/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc @@ -70,13 +70,15 @@ endm ;; ;; This macro builds a frame describing the current state of managed code. ;; -;; The macro assumes it is called from a helper that has already set up an EBP frame and that the values of -;; EBX, ESI and EDI remain unchanged from their values in managed code. It pushes the frame at the top of the -;; stack. +;; The macro assumes it is called from a helper that the values of EBX, ESI and EDI remain unchanged from their +;; values in managed code. It pushes the frame at the top of the stack. ;; ;; EAX is trashed by this macro. ;; PUSH_COOP_PINVOKE_FRAME macro transitionFrameReg + push ebp + mov ebp, esp + lea eax, [ebp + 8] ; get the ESP of the caller push eax ; save ESP push edi @@ -94,18 +96,21 @@ endm ;; ;; Remove the frame from a previous call to PUSH_COOP_PINVOKE_FRAME from the top of the stack and restore EBX, -;; ESI and EDI to their previous values. -;; -;; TRASHES ECX +;; ESI, and EDI to their previous values. Tears down the EBP frame. ;; POP_COOP_PINVOKE_FRAME macro add esp, 4*4 pop ebx pop esi pop edi - pop ecx + add esp, 4 + + pop ebp endm +INLINE_GET_ALLOC_CONTEXT_BASE macro destReg, trashReg + INLINE_GETTHREAD destReg, trashReg +endm ;; ;; CONSTANTS -- INTEGER @@ -115,6 +120,8 @@ TSF_DoNotTriggerGc equ 10h ;; GC type flags GC_ALLOC_FINALIZE equ 1 +GC_ALLOC_ALIGN8_BIAS equ 4 +GC_ALLOC_ALIGN8 equ 8 ;; Note: these must match the defs in PInvokeTransitionFrameFlags PTFF_SAVE_RBX equ 00000001h @@ -137,15 +144,15 @@ STATUS_REDHAWK_THREAD_ABORT equ 43h ;; ;; Rename fields of nested structs ;; -OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -OFFSETOF__Thread__m_eeAllocContext__combined_limit equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit +OFFSETOF__ee_alloc_context__alloc_ptr equ OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__ee_alloc_context equ OFFSETOF__Thread__m_eeAllocContext ;; ;; CONSTANTS -- SYMBOLS ;; RhDebugBreak equ @RhDebugBreak@0 -RhpGcAlloc equ @RhpGcAlloc@16 +RhpGcAlloc equ _RhpGcAlloc@16 G_LOWEST_ADDRESS equ _g_lowest_address G_HIGHEST_ADDRESS equ _g_highest_address G_EPHEMERAL_LOW equ _g_ephemeral_low diff --git a/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm b/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm index fe09d2a73022a7..8500381fc4fc4b 100644 --- a/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm @@ -191,12 +191,9 @@ RhpGcPoll proc RhpGcPoll endp RhpGcPollRare proc - push ebp - mov ebp, esp PUSH_COOP_PINVOKE_FRAME ecx call RhpGcPoll2 POP_COOP_PINVOKE_FRAME - pop ebp ret RhpGcPollRare endp diff --git a/src/coreclr/nativeaot/Runtime/portable.cpp b/src/coreclr/nativeaot/Runtime/portable.cpp index 3cfb34807ef72f..29c9f07470eb66 100644 --- a/src/coreclr/nativeaot/Runtime/portable.cpp +++ b/src/coreclr/nativeaot/Runtime/portable.cpp @@ -32,7 +32,7 @@ #include "GCMemoryHelpers.inl" #if defined(USE_PORTABLE_HELPERS) -EXTERN_C void* F_CALL_CONV RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame); +EXTERN_C void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame); static Object* AllocateObject(MethodTable* pEEType, uint32_t uFlags, uintptr_t numElements) { @@ -88,7 +88,7 @@ FCIMPL1(Object *, RhpNewFinalizable, MethodTable* pEEType) } FCIMPLEND -FCIMPL2(Array *, RhpNewArray, MethodTable * pArrayEEType, int numElements) +FCIMPL2(Array *, RhpNewArrayFast, MethodTable * pArrayEEType, int numElements) { Thread * pCurThread = ThreadStore::GetCurrentThread(); gc_alloc_context * acontext = pCurThread->GetAllocContext(); @@ -129,9 +129,9 @@ FCIMPLEND FCIMPL2(String *, RhNewString, MethodTable * pArrayEEType, int numElements) { - // TODO: Implement. We tail call to RhpNewArray for now since there's a bunch of TODOs in the places + // TODO: Implement. We tail call to RhpNewArrayFast for now since there's a bunch of TODOs in the places // that matter anyway. - return (String*)RhpNewArray(pArrayEEType, numElements); + return (String*)RhpNewArrayFast(pArrayEEType, numElements); } FCIMPLEND @@ -221,7 +221,7 @@ FCIMPL1(Object*, RhpNewFastMisalign, MethodTable* pEEType) } FCIMPLEND -FCIMPL2(Array*, RhpNewArrayAlign8, MethodTable* pArrayEEType, int numElements) +FCIMPL2(Array*, RhpNewArrayFastAlign8, MethodTable* pArrayEEType, int numElements) { Thread* pCurThread = ThreadStore::GetCurrentThread(); gc_alloc_context* acontext = pCurThread->GetAllocContext(); diff --git a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S deleted file mode 100644 index 4690b12c38dbfb..00000000000000 --- a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S +++ /dev/null @@ -1,273 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include "AsmOffsets.inc" - -// GC type flags -#define GC_ALLOC_FINALIZE 1 - -// -// Rename fields of nested structs -// -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) -#define OFFSETOF__Thread__m_eeAllocContext__combined_limit (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit) - -// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's -// allocation context then automatically fallback to the slow allocation path. -// a0 == MethodTable - LEAF_ENTRY RhpNewFast, _TEXT - - // a1 = GetThread() - // Save MethodTable pointer. INLINE_GETTHREAD will trash a0. - mv t2, a0 - INLINE_GETTHREAD a1 - - // - // t2 contains MethodTable pointer - // - lw a2, OFFSETOF__MethodTable__m_uBaseSize(t2) - - // - // t2: MethodTable pointer - // a1: Thread pointer - // a2: base size - // - - // Load potential new object address into t3. - ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a1) - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add a2, a2, t3 - ld t4, OFFSETOF__Thread__m_eeAllocContext__combined_limit(a1) - bltu t4, a2, LOCAL_LABEL(RhpNewFast_RarePath) - - // Update the alloc pointer to account for the allocation. - sd a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a1) - - // Set the new objects MethodTable pointer - sd t2, OFFSETOF__Object__m_pEEType(t3) - - mv a0, t3 - ret - -LOCAL_LABEL(RhpNewFast_RarePath): - mv a1, zero - mv a0, t2 - tail RhpNewObject - LEAF_END RhpNewFast, _TEXT - -// Allocate non-array object with finalizer. -// a0 == MethodTable - LEAF_ENTRY RhpNewFinalizable, _TEXT - li a1, GC_ALLOC_FINALIZE - tail RhpNewObject - LEAF_END RhpNewFinalizable, _TEXT - -// Allocate non-array object. -// a0 == MethodTable -// a1 == alloc flags - NESTED_ENTRY RhpNewObject, _TEXT, NoHandler - - PUSH_COOP_PINVOKE_FRAME a3 - - // a3: transition frame - - // Preserve the MethodTable in s2 - mv s2, a0 - - li a2, 0 // numElements - - // Call the rest of the allocation helper. - // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - call C_FUNC(RhpGcAlloc) - - // Set the new object's MethodTable pointer on success. - beq a0, zero, LOCAL_LABEL(NewOutOfMemory) - - .cfi_remember_state - POP_COOP_PINVOKE_FRAME - EPILOG_RETURN - - .cfi_restore_state - -LOCAL_LABEL(NewOutOfMemory): - // This is the OOM failure path. We are going to tail-call to a managed helper that will throw - // an out of memory exception that the caller of this allocator understands. - - mv a0, s2 // MethodTable pointer - li a1, 0 // Indicate that we should throw OOM. - - POP_COOP_PINVOKE_FRAME - tail C_FUNC(RhExceptionHandling_FailedAllocation) - - NESTED_END RhpNewObject, _TEXT - -// Allocate a string. -// a0 == MethodTable -// a1 == element/character count - LEAF_ENTRY RhNewString, _TEXT - // Make sure computing the overall allocation size won't overflow - li a2, MAX_STRING_LENGTH - bltu a2, a1, LOCAL_LABEL(StringSizeOverflow) // Branch if a2 < a1 (overflow) - - // Compute overall allocation size (align(base size + (element size * elements), 8)). - li a3, STRING_COMPONENT_SIZE // Load STRING_COMPONENT_SIZE into a3 - slli a2, a1, 1 // a2 = a1 * STRING_COMPONENT_SIZE, where STRING_COMPONENT_SIZE == 2 - addi a2, a2, STRING_BASE_SIZE + 7 // a2 = a2 + STRING_BASE_SIZE + 7 - andi a2, a2, ~0x7 // Clear the bits[2:0] of a2 (align to 8 bytes) - - // a0 == MethodTable - // a1 == element count - // a2 == string size - - // Save MethodTable pointer. INLINE_GETTHREAD will trash a0. - mv t2, a0 - INLINE_GETTHREAD a3 - - // Load potential new object address into t3. - ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add a2, a2, t3 - ld t3, OFFSETOF__Thread__m_eeAllocContext__combined_limit(a3) - bltu t3, a2, LOCAL_LABEL(RhNewString_Rare) - - // Reload new object address into t3. - ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) - - // Update the alloc pointer to account for the allocation. - sd a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) - - // Set the new object's MethodTable pointer and element count. - sd t2, OFFSETOF__Object__m_pEEType(t3) - sd a1, OFFSETOF__Array__m_Length(t3) - - // Return the object allocated in a0. - mv a0, t3 - - ret - -LOCAL_LABEL(StringSizeOverflow): - // We get here if the length of the final string object cannot be represented as an unsigned - // 32-bit value. We are going to tail-call to a managed helper that will throw - // an OOM exception that the caller of this allocator understands. - - mv a0, t2 - li a1, 1 // Indicate that we should throw OverflowException - tail C_FUNC(RhExceptionHandling_FailedAllocation) - -LOCAL_LABEL(RhNewString_Rare): - mv a0, t2 - tail C_FUNC(RhpNewArrayRare) - LEAF_END RhNewString, _TEXT - -// Allocate one-dimensional, zero-based array (SZARRAY). -// a0 == MethodTable -// a1 == element count - LEAF_ENTRY RhpNewArray, _TEXT - - // We want to limit the element count to the non-negative 32-bit int range. - // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component - // size is <= 0xffff (it is an unsigned 16-bit value), and the base size for the worst - // case (32-dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits. - li a2, 0x7fffffff - bltu a2, a1, LOCAL_LABEL(ArraySizeOverflow) // Branch if a2 < a1 (check for overflow) - - lhu a2, OFFSETOF__MethodTable__m_usComponentSize(a0) // Load component size - mul a2, a1, a2 // a2 = a1 * component size - lw a3, OFFSETOF__MethodTable__m_uBaseSize(a0) // Load base size - add a2, a2, a3 // a2 = a2 + base size - addi a2, a2, 7 // a2 = a2 + 7 - andi a2, a2, ~0x7 // Clear the bits[2:0] of a2 (align to 8 bytes) - - // a0 == MethodTable - // a1 == element count - // a2 == array size - - // Save MethodTable pointer. INLINE_GETTHREAD will trash a0. - mv t2, a0 - INLINE_GETTHREAD a3 - - // Load potential new object address into t3. - ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add a2, a2, t3 - ld t3, OFFSETOF__Thread__m_eeAllocContext__combined_limit(a3) - bltu t3, a2, LOCAL_LABEL(RhpNewArray_Rare) - - // Reload new object address into t3. - ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) - - // Update the alloc pointer to account for the allocation. - sd a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) - - // Set the new object's MethodTable pointer and element count. - sd t2, OFFSETOF__Object__m_pEEType(t3) - sd a1, OFFSETOF__Array__m_Length(t3) - - // Return the object allocated in a0. - mv a0, t3 - - ret - -LOCAL_LABEL(ArraySizeOverflow): - // We get here if the size of the final array object cannot be represented as an unsigned - // 32-bit value. We are going to tail-call to a managed helper that will throw - // an overflow exception that the caller of this allocator understands. - - mv a0, t2 - li a1, 1 // Indicate that we should throw OverflowException - tail C_FUNC(RhExceptionHandling_FailedAllocation) - -LOCAL_LABEL(RhpNewArray_Rare): - mv a0, t2 - tail C_FUNC(RhpNewArrayRare) - LEAF_END RhpNewArray, _TEXT - -// Allocate one-dimensional, zero-based array (SZARRAY) using the slow path that calls a runtime helper. -// a0 == MethodTable -// a1 == element count -// a2 == array size + Thread::m_alloc_context::alloc_ptr -// a3 == Thread - NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler - - // Recover array size by subtracting the alloc_ptr from a2. - ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) - sub a2, a2, t3 - - PUSH_COOP_PINVOKE_FRAME a3 - - // Preserve data we will need later into the callee saved registers - mv s2, a0 // Preserve MethodTable - - mv a2, a1 // numElements - li a1, 0 // uFlags - - // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - call C_FUNC(RhpGcAlloc) - - // Set the new object's MethodTable pointer and length on success. - beq a0, zero, LOCAL_LABEL(ArrayOutOfMemory) - - .cfi_remember_state - POP_COOP_PINVOKE_FRAME - EPILOG_RETURN - - .cfi_restore_state - -LOCAL_LABEL(ArrayOutOfMemory): - // This is the OOM failure path. We are going to tail-call to a managed helper that will throw - // an out of memory exception that the caller of this allocator understands. - - mv a0, s2 // MethodTable Pointer - li a1, 0 // Indicate that we should throw OOM. - - POP_COOP_PINVOKE_FRAME - tail C_FUNC(RhExceptionHandling_FailedAllocation) - - NESTED_END RhpNewArrayRare, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc index 4cf213cab49abf..3f0fe522361d61 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc @@ -34,6 +34,17 @@ .equiv \New, \Old .endm +// Rename offsets of nested structures +#define OFFSETOF__ee_alloc_context__alloc_ptr (OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) +#define OFFSETOF__ee_alloc_context OFFSETOF__Thread__m_eeAllocContext + +// GC type flags +#define GC_ALLOC_FINALIZE 1 +#define GC_ALLOC_ALIGN8_BIAS 4 +#define GC_ALLOC_ALIGN8 8 + +#define G_FREE_OBJECT_METHOD_TABLE g_pFreeObjectEEType + #if defined(HOST_AMD64) #include "unixasmmacrosamd64.inc" #elif defined(HOST_ARM) diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc index 735c17a904966c..5e491a49770d58 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc @@ -41,6 +41,7 @@ C_FUNC(\Name): .global C_FUNC(_\Name) .type \Name, %function #endif + .p2align 4 C_FUNC(\Name): .cfi_startproc .endm @@ -233,15 +234,6 @@ C_FUNC(\Name): #define TSF_SuppressGcStress 0x08 #define TSF_DoNotTriggerGc 0x10 -// -// Rename fields of nested structs -// -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -#define OFFSETOF__Thread__m_eeAllocContext__combined_limit OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit - -// GC type flags -#define GC_ALLOC_FINALIZE 1 - // Note: these must match the defs in PInvokeTransitionFrameFlags #define PTFF_SAVE_RBX 0x00000001 #define PTFF_SAVE_R12 0x00000010 @@ -287,6 +279,10 @@ C_FUNC(\Name): #endif .endm +.macro INLINE_GET_ALLOC_CONTEXT_BASE + INLINE_GETTHREAD +.endm + .macro INLINE_THREAD_UNHIJACK threadReg, trashReg1, trashReg2 // // Thread::Unhijack() diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc index 8aeb084f8e3cd7..bb2ff4b7362a5e 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc @@ -5,11 +5,6 @@ // CONSTANTS -- INTEGER // -// GC type flags -#define GC_ALLOC_FINALIZE 1 -#define GC_ALLOC_ALIGN8_BIAS 4 -#define GC_ALLOC_ALIGN8 8 - #define TSF_Attached 0x01 #define TSF_SuppressGcStress 0x08 #define TSF_DoNotTriggerGc 0x10 @@ -28,12 +23,8 @@ #define TrapThreadsFlags_AbortInProgress 1 #define TrapThreadsFlags_TrapThreads 2 -// Rename fields of nested structs -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) -#define OFFSETOF__Thread__m_eeAllocContext__combined_limit (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit) - // GC minimal sized object. We use this to switch between 4 and 8 byte alignment in the GC heap (see AllocFast.asm). -#define SIZEOF__MinObject 12 +#define ASM_MIN_OBJECT_SIZE 12 .macro NESTED_ENTRY Name, Section, Handler LEAF_ENTRY \Name, \Section @@ -285,6 +276,10 @@ C_FUNC(\Name): #endif .endm +.macro INLINE_GET_ALLOC_CONTEXT_BASE + INLINE_GETTHREAD +.endm + .macro INLINE_THREAD_UNHIJACK threadReg, trashReg1, trashReg2 // // Thread::Unhijack() diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc index 36698fece5050b..74e5c0b7e0b454 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc @@ -226,6 +226,31 @@ C_FUNC(\Name): INLINE_GET_TLS_VAR \target, C_FUNC(tls_CurrentThread) .endm +// Target cannot be x0. +.macro INLINE_GET_ALLOC_CONTEXT_BASE target + .ifc \target, x0 + .error "target cannot be x0" + .endif + +#ifdef FEATURE_EMULATED_TLS + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -0x20 + PROLOG_SAVE_REG_PAIR x0, x1, 0x10 + + bl C_FUNC(RhpGetThread) + mov \target, x0 + + .ifc \target, x1 + EPILOG_RESTORE_REG_PAIR x0, xzr, 0x10 + .else + EPILOG_RESTORE_REG_PAIR x0, x1, 0x10 + .endif + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 0x20 +#else + INLINE_GET_TLS_VAR \target, C_FUNC(tls_CurrentThread) +#endif +.endm + + // Do not use these ETLS macros in functions that already create a stack frame. // Creating two stack frames in one function can confuse the unwinder/debugger diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc index 9a9d6dc69d7615..85eedf18b48822 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc @@ -171,6 +171,11 @@ C_FUNC(\Name): INLINE_GET_TLS_VAR \target, C_FUNC(tls_CurrentThread) .endm +// Target cannot be x0. +.macro INLINE_GET_ALLOC_CONTEXT_BASE target + INLINE_GET_TLS_VAR \target, C_FUNC(tls_CurrentThread) +.endm + .macro InterlockedOperationBarrier dbar 0 .endm diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc index cbfc289518db11..9aca4d7c311624 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc @@ -156,7 +156,7 @@ C_FUNC(\Name): // Loads the address of a thread-local variable into the target register. // The target register cannot be a0. -.macro INLINE_GET_TLS_VAR target, var +.macro INLINE_GET_TLS_VAR target, var, ofs = 0 .ifc \target, a0 .error "target cannot be a0" .endif @@ -187,7 +187,7 @@ C_FUNC(\Name): ld a7, 0(sp) addi sp, sp, 72 - mv \target, a0 + add \target, a0, \ofs /* In the future we should switch to TLS descriptors. Its support was added in 2024 in glibc, musl, llvm, gcc and binutils, @@ -217,6 +217,9 @@ C_FUNC(\Name): addi a0, a0, %tlsdesc_add_lo(\var) jalr t0, 0(t0), %tlsdesc_call(\var) add \target, tp, a0 + .ifnc \ofs, 0 + add \target, \target, \ofs + .endif */ .endm @@ -226,6 +229,13 @@ C_FUNC(\Name): INLINE_GET_TLS_VAR \target, C_FUNC(tls_CurrentThread) .endm +// Caller must have an established frame, trashes volatile registers +.macro INLINE_GET_ALLOC_CONTEXT_BASE + // global dynamic TLS, see https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/eb2b2962/riscv-elf.adoc#global-dynamic + la.tls.gd a0, C_FUNC(tls_CurrentThread) + call C_FUNC(__tls_get_addr) +.endm + .macro InterlockedOperationBarrier fence rw, rw .endm diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs index fbbd850e5bb90f..d83afdccd16814 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs @@ -664,7 +664,7 @@ internal static unsafe Array NewMultiDimArray(MethodTable* eeType, int* pLengths throw new OutOfMemoryException(); // "Array dimensions exceeded supported range." Debug.Assert(eeType->NumVtableSlots != 0, "Compiler enforces we never have unconstructed MTs for multi-dim arrays since those can be template-constructed anytime"); - Array ret = RuntimeImports.RhNewArray(eeType, (int)totalLength); + Array ret = RuntimeImports.RhNewVariableSizeObject(eeType, (int)totalLength); ref int bounds = ref ret.GetRawMultiDimArrayBounds(); for (int i = 0; i < rank; i++) diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs index 9f8dbe11a212eb..d241102d507031 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs @@ -29,7 +29,7 @@ public Type GetType() protected internal unsafe object MemberwiseClone() { object clone = this.GetMethodTable()->IsArray ? - RuntimeImports.RhNewArray(this.GetMethodTable(), Unsafe.As(this).Length) : + RuntimeImports.RhNewVariableSizeObject(this.GetMethodTable(), Unsafe.As(this).Length) : RuntimeImports.RhNewObject(this.GetMethodTable()); // copy contents of "this" to the clone diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs index 177d8440facd26..abf213ccfb4719 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs @@ -390,6 +390,10 @@ internal static IntPtr RhHandleAllocDependent(object primary, object secondary) [RuntimeImport(RuntimeLibrary, "RhNewArray")] internal static extern unsafe Array RhNewArray(MethodTable* pEEType, int length); + [MethodImpl(MethodImplOptions.InternalCall)] + [RuntimeImport(RuntimeLibrary, "RhNewVariableSizeObject")] + internal static extern unsafe Array RhNewVariableSizeObject(MethodTable* pEEType, int length); + [MethodImpl(MethodImplOptions.InternalCall)] [RuntimeImport(RuntimeLibrary, "RhNewString")] internal static extern unsafe string RhNewString(MethodTable* pEEType, int length); diff --git a/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/RuntimeImports.cs b/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/RuntimeImports.cs index 52d1277f0bd0c5..85d2096b89a409 100644 --- a/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/RuntimeImports.cs +++ b/src/coreclr/nativeaot/Test.CoreLib/src/System/Runtime/RuntimeImports.cs @@ -80,6 +80,13 @@ internal static IntPtr RhGetModuleSection(TypeManagerHandle module, ReadyToRunSe [RuntimeImport(RuntimeLibrary, "RhNewArray")] private static extern unsafe Array RhNewArray(MethodTable* pEEType, int length); + [MethodImpl(MethodImplOptions.InternalCall)] + [RuntimeImport(RuntimeLibrary, "RhNewString")] + internal static extern unsafe string RhNewString(MethodTable* pEEType, int length); + + [DllImport(RuntimeLibrary)] + internal static extern unsafe void RhAllocateNewArray(MethodTable* pArrayEEType, uint numElements, uint flags, void* pResult); + [DllImport(RuntimeLibrary)] internal static extern unsafe void RhAllocateNewObject(IntPtr pEEType, uint flags, void* pResult); diff --git a/src/coreclr/nativeaot/Test.CoreLib/src/Test.CoreLib.csproj b/src/coreclr/nativeaot/Test.CoreLib/src/Test.CoreLib.csproj index 2c58d5764fcd4e..956b3ecbf04e8e 100644 --- a/src/coreclr/nativeaot/Test.CoreLib/src/Test.CoreLib.csproj +++ b/src/coreclr/nativeaot/Test.CoreLib/src/Test.CoreLib.csproj @@ -22,8 +22,6 @@ INPLACE_RUNTIME;$(DefineConstants) - FEATURE_64BIT_ALIGNMENT;$(DefineConstants) - FEATURE_64BIT_ALIGNMENT;$(DefineConstants) $(ArtifactsObjDir)\coreclr\$(TargetOS).$(TargetArchitecture).$(CoreCLRConfiguration) $(IntermediatesDir)\ide diff --git a/src/coreclr/pal/inc/unixasmmacros.inc b/src/coreclr/pal/inc/unixasmmacros.inc index 120b26543e3faa..83b5ce1523a5ad 100644 --- a/src/coreclr/pal/inc/unixasmmacros.inc +++ b/src/coreclr/pal/inc/unixasmmacros.inc @@ -38,6 +38,23 @@ .equiv \New, \Old .endm +// GC type flags +#define GC_ALLOC_FINALIZE 1 +#define GC_ALLOC_ALIGN8_BIAS 4 +#define GC_ALLOC_ALIGN8 8 + +#define G_FREE_OBJECT_METHOD_TABLE g_pFreeObjectMethodTable + +// Offset of ee_alloc_context relative to INLINE_GET_ALLOC_CONTEXT_BASE. +// +// Since we have a disparity on how thread locals are accessed on various platforms and +// the current value of OFFSETOF__RuntimeThreadLocals__ee_alloc_context is zero we expect +// the helpers to add the OFFSETOF__RuntimeThreadLocals__ee_alloc_context constant to the +// base and set the additional offset to zero. +// In other words, we treat INLINE_GET_ALLOC_CONTEXT_BASE as returning the same value as +// GetThreadEEAllocContext. +#define OFFSETOF__ee_alloc_context 0 + #if defined(HOST_X86) #include "unixasmmacrosx86.inc" #elif defined(HOST_AMD64) diff --git a/src/coreclr/pal/inc/unixasmmacrosamd64.inc b/src/coreclr/pal/inc/unixasmmacrosamd64.inc index a70aa048938abf..d84f70e8ad57db 100644 --- a/src/coreclr/pal/inc/unixasmmacrosamd64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosamd64.inc @@ -40,6 +40,7 @@ C_FUNC(\Name): #else .type \Name, %function #endif + .p2align 4 C_FUNC(\Name): .cfi_startproc .endm @@ -366,3 +367,41 @@ C_FUNC(\Name\()_End): .cfi_same_value rbp .endm + +// Inlined version of GetThreadEEAllocContext. Trashes volatile registers. +.macro INLINE_GET_ALLOC_CONTEXT_BASE +#if defined(FEATURE_EMULATED_TLS) || defined(__APPLE__) + call C_FUNC(GetThreadEEAllocContext) +#else + .att_syntax + .byte 0x66 // data16 prefix - padding to have space for linker relaxations + leaq t_runtime_thread_locals@TLSGD(%rip), %rdi + .byte 0x66 // + .byte 0x66 // + .byte 0x48 // rex.W prefix, also for padding + callq __tls_get_addr@PLT + .intel_syntax noprefix + + .ifnc OFFSETOF__RuntimeThreadLocals__ee_alloc_context, 0 + lea rax, [rax + OFFSETOF__RuntimeThreadLocals__ee_alloc_context] + .endif +#endif +.endm + +// Pushes a TransitionBlock on the stack without saving the argument registers. +// See the PROLOG_WITH_TRANSITION_BLOCK macro for the stack layout. +.macro PUSH_COOP_PINVOKE_FRAME target + set_cfa_register rsp, 8 + + PUSH_CALLEE_SAVED_REGISTERS + // 6 * 8 for argument register space in TransitionBlock + alignment of the stack to 16b + alloc_stack 56 + END_PROLOGUE + + lea \target, [rsp + 8] +.endm + +.macro POP_COOP_PINVOKE_FRAME + free_stack 56 + POP_CALLEE_SAVED_REGISTERS +.endm diff --git a/src/coreclr/pal/inc/unixasmmacrosarm.inc b/src/coreclr/pal/inc/unixasmmacrosarm.inc index 226d23aebd2f75..54a6f7d4dc3b19 100644 --- a/src/coreclr/pal/inc/unixasmmacrosarm.inc +++ b/src/coreclr/pal/inc/unixasmmacrosarm.inc @@ -271,6 +271,29 @@ C_FUNC(\Name): vpop_nonvol_reg "\RegList" .endm +.macro INLINE_GET_ALLOC_CONTEXT_BASE + bl C_FUNC(GetThreadEEAllocContext) +.endm + +.macro PUSH_COOP_PINVOKE_FRAME target + // Reserve space for argument registers + alloc_stack 16 + PUSH_CALLEE_SAVED_REGISTERS + PROLOG_STACK_SAVE_OFFSET r7, #12 + // let r7 point the saved r7 in the stack (clang FP style) + // align the stack + alloc_stack 4 + CHECK_STACK_ALIGNMENT + END_PROLOGUE + add \target, sp, 4 +.endm + +.macro POP_COOP_PINVOKE_FRAME + free_stack 4 + POP_CALLEE_SAVED_REGISTERS + free_stack 16 +.endm + //----------------------------------------------------------------------------- // Macro used to check (in debug builds only) whether the stack is 64-bit aligned (a requirement before calling // out into C++/OS code). Invoke this directly after your prolog (if the stack frame size is fixed) or directly @@ -289,3 +312,9 @@ C_FUNC(\Name): 0: #endif .endm + +// Loads a 32bit constant into destination register +.macro MOV32 DestReg, Constant + movw \DestReg, #((\Constant) & 0xFFFF) + movt \DestReg, #((\Constant) >> 16) +.endm diff --git a/src/coreclr/pal/inc/unixasmmacrosarm64.inc b/src/coreclr/pal/inc/unixasmmacrosarm64.inc index 746e48321db352..4e8b9e7c257101 100644 --- a/src/coreclr/pal/inc/unixasmmacrosarm64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosarm64.inc @@ -325,6 +325,70 @@ C_FUNC(\Name\()_End): .endm +// Inlined version of GetThreadEEAllocContext. Target cannot be x0 or x1. +.macro INLINE_GET_ALLOC_CONTEXT_BASE target + .ifc \target, x0 + .error "target cannot be x0" + .endif + .ifc \target, x1 + .error "target cannot be x1" + .endif + +#if defined(FEATURE_EMULATED_TLS) || defined(__APPLE__) + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -0x20 + PROLOG_SAVE_REG_PAIR x0, x1, 0x10 + + bl C_FUNC(GetThreadEEAllocContext) + mov \target, x0 + + EPILOG_RESTORE_REG_PAIR x0, x1, 0x10 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 0x20 +#else + PROLOG_SAVE_REG_PAIR_INDEXED x0, lr, -0x10 + + // This sequence of instructions is recognized and potentially patched + // by the linker (GD->IE/LE relaxation). + adrp x0, :tlsdesc:t_runtime_thread_locals + ldr \target, [x0, :tlsdesc_lo12:t_runtime_thread_locals] + add x0, x0, :tlsdesc_lo12:t_runtime_thread_locals + blr \target + // End of the sequence + + mrs \target, TPIDR_EL0 + add \target, \target, x0 + + .ifnc OFFSETOF__RuntimeThreadLocals__ee_alloc_context, 0 + add \target, x0, OFFSETOF__RuntimeThreadLocals__ee_alloc_context + .endif + + EPILOG_RESTORE_REG_PAIR_INDEXED x0, lr, 0x10 +#endif +.endm + +// Pushes a TransitionBlock on the stack without saving the argument registers. +// See the PROLOG_WITH_TRANSITION_BLOCK macro for the stack layout. +.macro PUSH_COOP_PINVOKE_FRAME target + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -176 + + // Spill callee saved registers + PROLOG_SAVE_REG_PAIR x19, x20, 16 + PROLOG_SAVE_REG_PAIR x21, x22, 32 + PROLOG_SAVE_REG_PAIR x23, x24, 48 + PROLOG_SAVE_REG_PAIR x25, x26, 64 + PROLOG_SAVE_REG_PAIR x27, x28, 80 + + mov \target, sp +.endm + +.macro POP_COOP_PINVOKE_FRAME + EPILOG_RESTORE_REG_PAIR x19, x20, 16 + EPILOG_RESTORE_REG_PAIR x21, x22, 32 + EPILOG_RESTORE_REG_PAIR x23, x24, 48 + EPILOG_RESTORE_REG_PAIR x25, x26, 64 + EPILOG_RESTORE_REG_PAIR x27, x28, 80 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 176 +.endm + // ------------------------------------------------------------------ // Macro to generate Redirection Stubs // diff --git a/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc b/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc index a949491f5fabdb..e44f07ddf4fd40 100644 --- a/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosloongarch64.inc @@ -432,6 +432,44 @@ C_FUNC(\Name\()_End): .endm +// Inlined version of GetThreadEEAllocContext. Target cannot be a0. +.macro INLINE_GET_ALLOC_CONTEXT_BASE target + .ifc \target, $a0 + .error "target cannot be a0" + .endif + + // Save $a0, $ra + PROLOG_SAVE_REG_PAIR_INDEXED 4, 1, 16, 0 + + // This instruction is recognized and potentially patched + // by the linker (GD->IE/LE relaxation). + la.tls.desc $a0, t_runtime_thread_locals + + addi.d \target, $tp, OFFSETOF__RuntimeThreadLocals__ee_alloc_context + add.d \target, \target, $a0 + + // Restore $a0, $ra + EPILOG_RESTORE_REG_PAIR_INDEXED 4, 1, 16 +.endm + +// Pushes a TransitionBlock on the stack without saving the argument registers. +// See the PROLOG_WITH_TRANSITION_BLOCK macro for the stack layout. +.macro PUSH_COOP_PINVOKE_FRAME target + // Including fp, ra, s0-s8, padding, and (a0-a7)arguments. (1+1+9+1)*8 + 8*8 == 96. + PROLOG_STACK_ALLOC 160 + // $fp,$ra + PROLOG_SAVE_REG_PAIR 22, 1, 0, 1 + // Spill callee saved registers. $sp=$r3. + SAVE_CALLEESAVED_REGISTERS 3, 0 + move \target, $sp +.endm + +.macro POP_COOP_PINVOKE_FRAME + RESTORE_CALLEESAVED_REGISTERS 3, 0 + EPILOG_RESTORE_REG_PAIR 22, 1, 0 + EPILOG_STACK_FREE 160 +.endm + // ------------------------------------------------------------------ // Macro to generate Redirection Stubs // diff --git a/src/coreclr/pal/inc/unixasmmacrosriscv64.inc b/src/coreclr/pal/inc/unixasmmacrosriscv64.inc index 1f7344c4d15741..8201a03817f9c7 100644 --- a/src/coreclr/pal/inc/unixasmmacrosriscv64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosriscv64.inc @@ -299,7 +299,6 @@ C_FUNC(\Name): .endm .macro EPILOG_WITH_TRANSITION_BLOCK_RETURN - RESTORE_CALLEESAVED_REGISTERS sp, __PWTB_CalleeSavedRegisters EPILOG_RESTORE_REG_PAIR fp, ra, __PWTB_CalleeSavedRegisters @@ -328,6 +327,28 @@ C_FUNC(\Name): EPILOG_STACK_FREE __PWTB_StackAlloc .endm +// Inlined version of GetThreadEEAllocContext +.macro INLINE_GET_ALLOC_CONTEXT_BASE + call C_FUNC(GetThreadEEAllocContext) +.endm + +// Pushes a TransitionBlock on the stack without saving the argument registers. +// See the PROLOG_WITH_TRANSITION_BLOCK macro for the stack layout. +.macro PUSH_COOP_PINVOKE_FRAME target + // Including fp, ra, s1-s11, tp, gp, padding and (a0-a7)arguments. (1+1+11+1+1)*8 + 8 + 8*8. + PROLOG_STACK_ALLOC 192 + PROLOG_SAVE_REG_PAIR fp, ra, 0, 1 + // Spill callee saved registers. sp=r2. + SAVE_CALLEESAVED_REGISTERS sp, 0 + mv \target, sp +.endm + +.macro POP_COOP_PINVOKE_FRAME + RESTORE_CALLEESAVED_REGISTERS sp, 0 + EPILOG_RESTORE_REG_PAIR fp, ra, 0 + EPILOG_STACK_FREE 192 +.endm + // ------------------------------------------------------------------ // Macro to generate Redirection Stubs // diff --git a/src/coreclr/pal/inc/unixasmmacrosx86.inc b/src/coreclr/pal/inc/unixasmmacrosx86.inc index 0de1d04a2437c5..c28b2b170bec5b 100644 --- a/src/coreclr/pal/inc/unixasmmacrosx86.inc +++ b/src/coreclr/pal/inc/unixasmmacrosx86.inc @@ -56,6 +56,11 @@ C_FUNC(\Name\()_End): .cfi_rel_offset \Reg, 0 .endm +.macro PROLOG_ALLOC Size + sub esp, \Size + .cfi_adjust_cfa_offset \Size +.endm + .macro PROLOG_END .cfi_def_cfa_register ebp .cfi_def_cfa_offset 8 @@ -69,6 +74,11 @@ C_FUNC(\Name\()_End): .cfi_restore \Reg .endm +.macro EPILOG_FREE Size + add esp, \Size + .cfi_adjust_cfa_offset -\Size +.endm + .macro EPILOG_END pop ebp .endm @@ -81,8 +91,7 @@ C_FUNC(\Name\()_End): .endm .macro ESP_PROLOG_ALLOC Size - sub esp, \Size - .cfi_adjust_cfa_offset \Size + PROLOG_ALLOC \Size .endm .macro ESP_PROLOG_END @@ -97,8 +106,7 @@ C_FUNC(\Name\()_End): .endm .macro ESP_EPILOG_FREE Size - add esp, \Size - .cfi_adjust_cfa_offset -\Size + EPILOG_FREE \Size .endm .macro ESP_EPILOG_END @@ -115,6 +123,49 @@ C_FUNC(\Name\()_End): .intel_syntax noprefix .endm +.macro PUSH_COOP_PINVOKE_FRAME Target + // push ebp-frame + PROLOG_BEG + + // save CalleeSavedRegisters + PROLOG_PUSH ebx + PROLOG_PUSH esi + PROLOG_PUSH edi + + // make space for ArgumentRegisters (8) + alignment (4) + PROLOG_ALLOC 12 + + // set frame pointer + PROLOG_END + + lea \Target, [esp + 4] +.endm + +// Inlined version of GetThreadEEAllocContext. +.macro INLINE_GET_ALLOC_CONTEXT_BASE + push ecx + push eax + call C_FUNC(GetThreadEEAllocContext) + pop eax + pop ecx +.endm + +.macro POP_COOP_PINVOKE_FRAME + // restore stack pointer + EPILOG_BEG + + // skip over alignment (4) + ArgumentRegisters (8) + EPILOG_FREE 12 + + // pop CalleeSavedRegisters + EPILOG_POP edi + EPILOG_POP esi + EPILOG_POP ebx + + // pop ebp-frame + EPILOG_END +.endm + .macro CHECK_STACK_ALIGNMENT #ifdef _DEBUG test esp, 0x0F diff --git a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.S b/src/coreclr/runtime/amd64/AllocFast.S similarity index 62% rename from src/coreclr/nativeaot/Runtime/amd64/AllocFast.S rename to src/coreclr/runtime/amd64/AllocFast.S index 8923a7a4fbb64b..361993b157a69b 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.S +++ b/src/coreclr/runtime/amd64/AllocFast.S @@ -2,18 +2,18 @@ // The .NET Foundation licenses this file to you under the MIT license. .intel_syntax noprefix -#include // generated by the build from AsmOffsets.cpp -#include +#include "AsmMacros_Shared.h" // Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's // allocation context then automatically fallback to the slow allocation path. // RDI == MethodTable -NESTED_ENTRY RhpNewFast, _TEXT, NoHandler +LEAF_ENTRY RhpNewFast, _TEXT + push_nonvol_reg rbx mov rbx, rdi - // rax = GetThread() - INLINE_GETTHREAD + // rax = ee_alloc_context pointer; trashes volatile registers + INLINE_GET_ALLOC_CONTEXT_BASE // // rbx contains MethodTable pointer @@ -21,23 +21,33 @@ NESTED_ENTRY RhpNewFast, _TEXT, NoHandler mov edx, [rbx + OFFSETOF__MethodTable__m_uBaseSize] // - // rax: Thread pointer + // rax: ee_alloc_context pointer // rbx: MethodTable pointer // rdx: base size // - mov rsi, [rax + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - add rdx, rsi - cmp rdx, [rax + OFFSETOF__Thread__m_eeAllocContext__combined_limit] + // Load potential new object address into rsi. + mov rsi, [rax + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] + + // Load and calculate the maximum size of object we can fit.ß + mov rdi, [rax + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] + sub rdi, rsi + + // Determine whether the end of the object is too big for the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + cmp rdx, rdi ja LOCAL_LABEL(RhpNewFast_RarePath) - // set the new alloc pointer - mov [rax + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rdx + // Calculate the new alloc pointer to account for the allocation. + add rdx, rsi - mov rax, rsi + // Set the new object's MethodTable pointer. + mov [rsi + OFFSETOF__Object__m_pEEType], rbx - // set the new object's MethodTable pointer - mov [rsi], rbx + // Update the alloc pointer to the newly calculated one. + mov [rax + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], rdx + + mov rax, rsi .cfi_remember_state pop_nonvol_reg rbx @@ -51,8 +61,7 @@ LOCAL_LABEL(RhpNewFast_RarePath): pop_nonvol_reg rbx jmp C_FUNC(RhpNewObject) -NESTED_END RhpNewFast, _TEXT - +LEAF_END RhpNewFast, _TEXT // Allocate non-array object with finalizer @@ -63,7 +72,6 @@ LEAF_ENTRY RhpNewFinalizable, _TEXT LEAF_END RhpNewFinalizable, _TEXT - // Allocate non-array object // RDI == MethodTable // ESI == alloc flags @@ -102,75 +110,82 @@ LOCAL_LABEL(NewOutOfMemory): POP_COOP_PINVOKE_FRAME jmp EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation) + NESTED_END RhpNewObject, _TEXT -// Allocate a string. +// Shared code for RhNewString, RhpNewArrayFast and RhpNewObjectArrayFast +// RAX == string/array size // RDI == MethodTable // ESI == character/element count -NESTED_ENTRY RhNewString, _TEXT, NoHandler - // we want to limit the element count to the non-negative 32-bit int range - cmp rsi, MAX_STRING_LENGTH - ja LOCAL_LABEL(StringSizeOverflow) +.macro NEW_ARRAY_FAST push_nonvol_reg rbx push_nonvol_reg r12 - push_register rcx // padding + push_nonvol_reg r13 mov rbx, rdi // save MethodTable mov r12, rsi // save element count + mov r13, rax // save size - // rax = GetThread() - INLINE_GETTHREAD - - mov rcx, rax // rcx = Thread* + // rax = ee_alloc_context pointer; trashes volatile registers + INLINE_GET_ALLOC_CONTEXT_BASE - // Compute overall allocation size (align(base size + (element size * elements), 8)). - lea rax, [r12 * STRING_COMPONENT_SIZE + STRING_BASE_SIZE + 7] - and rax, -8 + mov rcx, rax - // rax == string size + // rcx == ee_alloc_context* // rbx == MethodTable - // rcx == Thread* // r12 == element count + // r13 == string/array size - mov rdx, rax - add rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - jc LOCAL_LABEL(RhNewString_RarePath) - - // rax == new alloc ptr - // rbx == MethodTable - // rcx == Thread* - // rdx == string size - // r12 == element count - cmp rax, [rcx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] - ja LOCAL_LABEL(RhNewString_RarePath) + mov rax, [rcx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] + mov rdi, [rcx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] + sub rdi, rax - mov [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax - - // calc the new object pointer - sub rax, rdx + // Determine whether the end of the object is too big for the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + cmp r13, rdi + ja 1f + add r13, rax mov [rax + OFFSETOF__Object__m_pEEType], rbx - mov [rax + OFFSETOF__String__m_Length], r12d + mov [rax + OFFSETOF__Array__m_Length], r12d + mov [rcx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r13 .cfi_remember_state - pop_register rcx // padding + pop_nonvol_reg r13 pop_nonvol_reg r12 pop_nonvol_reg rbx ret .cfi_restore_state .cfi_def_cfa_offset 32 // workaround cfi_restore_state bug -LOCAL_LABEL(RhNewString_RarePath): +1: mov rdi, rbx // restore MethodTable mov rsi, r12 // restore element count - // passing string size in rdx - pop_register rcx // padding + pop_nonvol_reg r13 pop_nonvol_reg r12 pop_nonvol_reg rbx - jmp C_FUNC(RhpNewArrayRare) + jmp C_FUNC(RhpNewVariableSizeObject) + +.endm // NEW_ARRAY_FAST + + +// Allocate a string. +// RDI == MethodTable +// ESI == character/element count +LEAF_ENTRY RhNewString, _TEXT + + // we want to limit the element count to the non-negative 32-bit int range + cmp rsi, MAX_STRING_LENGTH + ja LOCAL_LABEL(StringSizeOverflow) + + // Compute overall allocation size (align(base size + (element size * elements), 8)). + lea rax, [rsi * STRING_COMPONENT_SIZE + STRING_BASE_SIZE + 7] + and rax, -8 + + NEW_ARRAY_FAST LOCAL_LABEL(StringSizeOverflow): // We get here if the size of the final string object can't be represented as an unsigned @@ -181,92 +196,65 @@ LOCAL_LABEL(StringSizeOverflow): xor esi, esi // Indicate that we should throw OOM. jmp EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation) -NESTED_END RhNewString, _TEXT +LEAF_END RhNewString, _TEXT // Allocate one dimensional, zero based array (SZARRAY). // RDI == MethodTable // ESI == element count -NESTED_ENTRY RhpNewArray, _TEXT, NoHandler +LEAF_ENTRY RhpNewArrayFast, _TEXT + // we want to limit the element count to the non-negative 32-bit int range cmp rsi, 0x07fffffff ja LOCAL_LABEL(ArraySizeOverflow) - push_nonvol_reg rbx - push_nonvol_reg r12 - push_register rcx // padding - - mov rbx, rdi // save MethodTable - mov r12, rsi // save element count - - // rax = GetThread() - INLINE_GETTHREAD - - mov rcx, rax // rcx = Thread* - // Compute overall allocation size (align(base size + (element size * elements), 8)). - movzx eax, word ptr [rbx + OFFSETOF__MethodTable__m_usComponentSize] - mul r12 - mov edx, [rbx + OFFSETOF__MethodTable__m_uBaseSize] - add rax, rdx - add rax, 7 + movzx eax, word ptr [rdi + OFFSETOF__MethodTable__m_usComponentSize] + imul rax, rsi + lea rax, [rax + SZARRAY_BASE_SIZE + 7] and rax, -8 - // rax == array size - // rbx == MethodTable - // rcx == Thread* - // r12 == element count + NEW_ARRAY_FAST - mov rdx, rax - add rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - jc LOCAL_LABEL(RhpNewArray_RarePath) +LOCAL_LABEL(ArraySizeOverflow): + // We get here if the size of the final array object can't be represented as an unsigned + // 32-bit value. We're going to tail-call to a managed helper that will throw + // an overflow exception that the caller of this allocator understands. - // rax == new alloc ptr - // rbx == MethodTable - // rcx == Thread* - // rdx == array size - // r12 == element count - cmp rax, [rcx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] - ja LOCAL_LABEL(RhpNewArray_RarePath) + // rdi holds MethodTable pointer already + mov esi, 1 // Indicate that we should throw OverflowException + jmp EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation) - mov [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax +LEAF_END RhpNewArrayFast, _TEXT - // calc the new object pointer - sub rax, rdx - mov [rax + OFFSETOF__Object__m_pEEType], rbx - mov [rax + OFFSETOF__Array__m_Length], r12d +#ifndef FEATURE_NATIVEAOT +// Allocate one dimensional, zero based array (SZARRAY) of objects (pointer sized elements). +// RDI == MethodTable +// ESI == element count +LEAF_ENTRY RhpNewObjectArrayFast, _TEXT - .cfi_remember_state - pop_register rcx // padding - pop_nonvol_reg r12 - pop_nonvol_reg rbx - ret + // Delegate overflow handling to the generic helper conservatively - .cfi_restore_state - .cfi_def_cfa_offset 32 // workaround cfi_restore_state bug -LOCAL_LABEL(RhpNewArray_RarePath): - mov rdi, rbx // restore MethodTable - mov rsi, r12 // restore element count - // passing array size in rdx + cmp rsi, (0x40000000 / 8) // sizeof(void*) + jae C_FUNC(RhpNewVariableSizeObject) - pop_register rcx // padding - pop_nonvol_reg r12 - pop_nonvol_reg rbx - jmp C_FUNC(RhpNewArrayRare) + // In this case we know the element size is sizeof(void *), or 8 for x64 + // This helps us in two ways - we can shift instead of multiplying, and + // there's no need to align the size either -LOCAL_LABEL(ArraySizeOverflow): - // We get here if the size of the final array object can't be represented as an unsigned - // 32-bit value. We're going to tail-call to a managed helper that will throw - // an overflow exception that the caller of this allocator understands. + lea eax, [esi * 8 + SZARRAY_BASE_SIZE] - // rdi holds MethodTable pointer already - mov esi, 1 // Indicate that we should throw OverflowException - jmp EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation) + // No need for rounding in this case - element size is 8, and m_BaseSize is guaranteed + // to be a multiple of 8. + + NEW_ARRAY_FAST + +LEAF_END RhpNewObjectArrayFast, _TEXT +#endif -NESTED_END RhpNewArray, _TEXT -NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler +NESTED_ENTRY RhpNewVariableSizeObject, _TEXT, NoHandler // rdi == MethodTable // rsi == element count @@ -290,7 +278,7 @@ NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler call C_FUNC(RhpGcAlloc) test rax, rax - jz LOCAL_LABEL(ArrayOutOfMemory) + jz LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory) .cfi_remember_state POP_COOP_PINVOKE_FRAME @@ -298,7 +286,7 @@ NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler .cfi_restore_state .cfi_def_cfa_offset 96 // workaround cfi_restore_state bug -LOCAL_LABEL(ArrayOutOfMemory): +LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory): // This is the OOM failure path. We're going to tail-call to a managed helper that will throw // an out of memory exception that the caller of this allocator understands. @@ -309,4 +297,4 @@ LOCAL_LABEL(ArrayOutOfMemory): jmp EXTERNAL_C_FUNC(RhExceptionHandling_FailedAllocation) -NESTED_END RhpNewArrayRare, _TEXT +NESTED_END RhpNewVariableSizeObject, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.asm b/src/coreclr/runtime/amd64/AllocFast.asm similarity index 66% rename from src/coreclr/nativeaot/Runtime/amd64/AllocFast.asm rename to src/coreclr/runtime/amd64/AllocFast.asm index 6ba69c0c141274..4c72ddd6ab4b8e 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.asm +++ b/src/coreclr/runtime/amd64/AllocFast.asm @@ -1,7 +1,7 @@ ;; Licensed to the .NET Foundation under one or more agreements. ;; The .NET Foundation licenses this file to you under the MIT license. -include asmmacros.inc +include AsmMacros_Shared.inc ;; Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's @@ -9,8 +9,8 @@ include asmmacros.inc ;; RCX == MethodTable LEAF_ENTRY RhpNewFast, _TEXT - ;; rdx = GetThread(), TRASHES rax - INLINE_GETTHREAD rdx, rax + ;; rdx = ee_alloc_context pointer, TRASHES rax + INLINE_GET_ALLOC_CONTEXT_BASE rdx, rax ;; ;; rcx contains MethodTable pointer @@ -20,19 +20,24 @@ LEAF_ENTRY RhpNewFast, _TEXT ;; ;; eax: base size ;; rcx: MethodTable pointer - ;; rdx: Thread pointer + ;; rdx: ee_alloc_context pointer ;; - mov rax, [rdx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - add r8, rax - cmp r8, [rdx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] + mov rax, [rdx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] + mov r9, [rdx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] + sub r9, rax + cmp r8, r9 ja RhpNewFast_RarePath - ;; set the new alloc pointer - mov [rdx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], r8 + ;; Calculate the new alloc pointer to account for the allocation. + add r8, rax + + ;; Set the new object's MethodTable pointer + mov [rax + OFFSETOF__Object__m_pEEType], rcx + + ;; Set the new alloc pointer + mov [rdx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 - ;; set the new object's MethodTable pointer - mov [rax], rcx ret RhpNewFast_RarePath: @@ -42,14 +47,14 @@ RhpNewFast_RarePath: LEAF_END RhpNewFast, _TEXT - ;; Allocate non-array object with finalizer ;; RCX == MethodTable LEAF_ENTRY RhpNewFinalizable, _TEXT + mov edx, GC_ALLOC_FINALIZE jmp RhpNewObject -LEAF_END RhpNewFinalizable, _TEXT +LEAF_END RhpNewFinalizable, _TEXT ;; Allocate non-array object @@ -58,7 +63,6 @@ LEAF_END RhpNewFinalizable, _TEXT NESTED_ENTRY RhpNewObject, _TEXT PUSH_COOP_PINVOKE_FRAME r9 - END_PROLOGUE ; R9: transition frame @@ -87,49 +91,55 @@ NewOutOfMemory: POP_COOP_PINVOKE_FRAME jmp RhExceptionHandling_FailedAllocation + NESTED_END RhpNewObject, _TEXT -;; Allocate a string. +;; Shared code for RhNewString, RhpNewArrayFast and RhpNewObjectArrayFast +;; RAX == string/array size ;; RCX == MethodTable -;; EDX == character/element count -LEAF_ENTRY RhNewString, _TEXT +;; RDX == character/element count +NEW_ARRAY_FAST MACRO - ; we want to limit the element count to the non-negative 32-bit int range - cmp rdx, MAX_STRING_LENGTH - ja StringSizeOverflow + ; r10 = ee_alloc_context pointer, TRASHES r8 + INLINE_GET_ALLOC_CONTEXT_BASE r10, r8 - ; Compute overall allocation size (align(base size + (element size * elements), 8)). - lea rax, [(rdx * STRING_COMPONENT_SIZE) + (STRING_BASE_SIZE + 7)] - and rax, -8 + mov r8, rax + mov rax, [r10 + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] + mov r9, [r10 + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] + sub r9, rax - ; rax == string size + ; rax == new object ptr ; rcx == MethodTable ; rdx == element count + ; r8 == array size + ; r10 == ee_alloc_context pointer + cmp r8, r9 + ja RhpNewVariableSizeObject - INLINE_GETTHREAD r10, r8 + add r8, rax + mov [rax + OFFSETOF__Object__m_pEEType], rcx + mov [rax + OFFSETOF__Array__m_Length], edx + mov [r10 + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 + ret - mov r8, rax - add rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - jc RhpNewArrayRare +ENDM ; NEW_ARRAY_FAST - ; rax == new alloc ptr - ; rcx == MethodTable - ; rdx == element count - ; r8 == array size - ; r10 == thread - cmp rax, [r10 + OFFSETOF__Thread__m_eeAllocContext__combined_limit] - ja RhpNewArrayRare - mov [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax +;; Allocate a string. +;; RCX == MethodTable +;; EDX == character/element count +LEAF_ENTRY RhNewString, _TEXT - ; calc the new object pointer - sub rax, r8 + ; we want to limit the element count to the non-negative 32-bit int range + cmp rdx, MAX_STRING_LENGTH + ja StringSizeOverflow - mov [rax + OFFSETOF__Object__m_pEEType], rcx - mov [rax + OFFSETOF__String__m_Length], edx + ; Compute overall allocation size (align(base size + (element size * elements), 8)). + lea rax, [(rdx * STRING_COMPONENT_SIZE) + (STRING_BASE_SIZE + 7)] + and rax, -8 - ret + NEW_ARRAY_FAST StringSizeOverflow: ; We get here if the size of the final string object can't be represented as an unsigned @@ -139,76 +149,71 @@ StringSizeOverflow: ; rcx holds MethodTable pointer already xor edx, edx ; Indicate that we should throw OOM. jmp RhExceptionHandling_FailedAllocation + LEAF_END RhNewString, _TEXT ;; Allocate one dimensional, zero based array (SZARRAY). ;; RCX == MethodTable ;; EDX == element count -LEAF_ENTRY RhpNewArray, _TEXT +LEAF_ENTRY RhpNewArrayFast, _TEXT ; we want to limit the element count to the non-negative 32-bit int range cmp rdx, 07fffffffh ja ArraySizeOverflow - ; save element count - mov r8, rdx - ; Compute overall allocation size (align(base size + (element size * elements), 8)). movzx eax, word ptr [rcx + OFFSETOF__MethodTable__m_usComponentSize] - mul rdx - mov edx, [rcx + OFFSETOF__MethodTable__m_uBaseSize] - add rax, rdx - add rax, 7 + imul rax, rdx + lea rax, [rax + SZARRAY_BASE_SIZE + 7] and rax, -8 - mov rdx, r8 + NEW_ARRAY_FAST - ; rax == array size - ; rcx == MethodTable - ; rdx == element count +ArraySizeOverflow: + ; We get here if the size of the final array object can't be represented as an unsigned + ; 32-bit value. We're going to tail-call to a managed helper that will throw + ; an overflow exception that the caller of this allocator understands. - INLINE_GETTHREAD r10, r8 + ; rcx holds MethodTable pointer already + mov edx, 1 ; Indicate that we should throw OverflowException + jmp RhExceptionHandling_FailedAllocation - mov r8, rax - add rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - jc RhpNewArrayRare +LEAF_END RhpNewArrayFast, _TEXT - ; rax == new alloc ptr - ; rcx == MethodTable - ; rdx == element count - ; r8 == array size - ; r10 == thread - cmp rax, [r10 + OFFSETOF__Thread__m_eeAllocContext__combined_limit] - ja RhpNewArrayRare - mov [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax +IFNDEF FEATURE_NATIVEAOT +;; Allocate one dimensional, zero based array (SZARRAY) of objects (pointer sized elements). +;; RCX == MethodTable +;; EDX == element count +LEAF_ENTRY RhpNewObjectArrayFast, _TEXT - ; calc the new object pointer - sub rax, r8 + ; Delegate overflow handling to the generic helper conservatively - mov [rax + OFFSETOF__Object__m_pEEType], rcx - mov [rax + OFFSETOF__Array__m_Length], edx + cmp rdx, (40000000h / 8) ; sizeof(void*) + jae RhpNewVariableSizeObject - ret + ; In this case we know the element size is sizeof(void *), or 8 for x64 + ; This helps us in two ways - we can shift instead of multiplying, and + ; there's no need to align the size either -ArraySizeOverflow: - ; We get here if the size of the final array object can't be represented as an unsigned - ; 32-bit value. We're going to tail-call to a managed helper that will throw - ; an overflow exception that the caller of this allocator understands. + lea eax, [edx * 8 + SZARRAY_BASE_SIZE] - ; rcx holds MethodTable pointer already - mov edx, 1 ; Indicate that we should throw OverflowException - jmp RhExceptionHandling_FailedAllocation -LEAF_END RhpNewArray, _TEXT + ; No need for rounding in this case - element size is 8, and m_BaseSize is guaranteed + ; to be a multiple of 8. -NESTED_ENTRY RhpNewArrayRare, _TEXT + NEW_ARRAY_FAST + +LEAF_END RhpNewObjectArrayFast, _TEXT +ENDIF ; FEATURE_NATIVEAOT + + +NESTED_ENTRY RhpNewVariableSizeObject, _TEXT ; rcx == MethodTable ; rdx == element count PUSH_COOP_PINVOKE_FRAME r9 - END_PROLOGUE ; r9: transition frame @@ -225,12 +230,12 @@ NESTED_ENTRY RhpNewArrayRare, _TEXT call RhpGcAlloc test rax, rax - jz ArrayOutOfMemory + jz RhpNewVariableSizeObject_OutOfMemory POP_COOP_PINVOKE_FRAME ret -ArrayOutOfMemory: +RhpNewVariableSizeObject_OutOfMemory: ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw ;; an out of memory exception that the caller of this allocator understands. @@ -241,7 +246,6 @@ ArrayOutOfMemory: jmp RhExceptionHandling_FailedAllocation -NESTED_END RhpNewArrayRare, _TEXT - +NESTED_END RhpNewVariableSizeObject, _TEXT END diff --git a/src/coreclr/nativeaot/Runtime/arm/AllocFast.S b/src/coreclr/runtime/arm/AllocFast.S similarity index 57% rename from src/coreclr/nativeaot/Runtime/arm/AllocFast.S rename to src/coreclr/runtime/arm/AllocFast.S index 76091303696546..312b424859ee72 100644 --- a/src/coreclr/nativeaot/Runtime/arm/AllocFast.S +++ b/src/coreclr/runtime/arm/AllocFast.S @@ -4,64 +4,153 @@ .syntax unified .thumb -#include // generated by the build from AsmOffsets.cpp -#include +#include "AsmMacros_Shared.h" -// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's -// allocation context then automatically fallback to the slow allocation path. +// Shared code for RhpNewFast, RhpNewFastAlign8 and RhpNewFastMisalign // r0 == MethodTable -LEAF_ENTRY RhpNewFast, _TEXT +.macro NEW_FAST Variation PROLOG_PUSH "{r4,lr}" - mov r4, r0 // save MethodTable + mov r4, r0 // save MethodTable - // r0 = GetThread() - INLINE_GETTHREAD + // r0 = ee_alloc_context pointer; trashes volatile registers, expects saved lr + INLINE_GET_ALLOC_CONTEXT_BASE - // r4 contains MethodTable pointer ldr r2, [r4, #OFFSETOF__MethodTable__m_uBaseSize] - // r0: Thread pointer - // r4: MethodTable pointer - // r2: base size + // Load potential new object address into r3. + ldr r3, [r0, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)] + + // Load and calculate the maximum size of object we can fit. + ldr r1, [r0, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit)] + sub r1, r3 + + // When doing aligned or misaligned allocation we first check + // the alignment and skip to the regular path if it's already + // matching the expectation. + // Otherwise, we try to allocate size + ASM_MIN_OBJECT_SIZE and + // then prepend a dummy free object at the beginning of the + // allocation. +.ifnc \Variation, + tst r3, #0x7 +.ifc \Variation,Align8 + beq 1f // AlreadyAligned +.else // Variation == "Misalign" + bne 1f // AlreadyAligned +.endif - ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + add r2, ASM_MIN_OBJECT_SIZE + + // Determine whether the end of the object is too big for the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + cmp r2, r1 + bhi 2f // AllocFailed + + // Update the alloc pointer to account for the allocation. add r2, r3 - ldr r1, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] + str r2, [r0, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)] + + // Initialize the padding object preceeding the new object. + PREPARE_EXTERNAL_VAR_INDIRECT G_FREE_OBJECT_METHOD_TABLE, r2 + str r2, [r3, #OFFSETOF__Object__m_pEEType] + mov r2, #0 + str r2, [r3, #OFFSETOF__Array__m_Length] + + // Calculate the new object pointer and initialize it. + add r3, ASM_MIN_OBJECT_SIZE + str r4, [r3, #OFFSETOF__Object__m_pEEType] + + // Return the object allocated in r0. + mov r0, r3 + + EPILOG_POP "{r4,pc}" +.endif // Variation != "" + +1: // AlreadyAligned + + // r0: ee_alloc_context pointer + // r1: ee_alloc_context.combined_limit + // r2: base size + // r3: ee_alloc_context.alloc_ptr + // r4: MethodTable pointer + + // Determine whether the end of the object is too big for the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. cmp r2, r1 - bhi LOCAL_LABEL(RhpNewFast_RarePath) + bhi 2f // AllocFailed - // set the new alloc pointer - str r2, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + // Calculate the new alloc pointer to account for the allocation. + add r2, r3 - // Set the new object's MethodTable pointer + // Set the new object's MethodTable pointer. str r4, [r3, #OFFSETOF__Object__m_pEEType] + // Update the alloc pointer to the newly calculated one. + str r2, [r0, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)] + + // Return the object allocated in r0. mov r0, r3 EPILOG_POP "{r4,pc}" -LOCAL_LABEL(RhpNewFast_RarePath): +2: // AllocFailed mov r0, r4 // restore MethodTable +.ifc \Variation, mov r1, #0 +.else +.ifc \Variation,Align8 + mov r1, #GC_ALLOC_ALIGN8 +.else + mov r1, #(GC_ALLOC_ALIGN8 | GC_ALLOC_ALIGN8_BIAS) +.endif +.endif EPILOG_POP "{r4,lr}" b C_FUNC(RhpNewObject) +.endm + +// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's +// allocation context then automatically fallback to the slow allocation path. +// r0 == MethodTable +LEAF_ENTRY RhpNewFast, _TEXT + NEW_FAST LEAF_END RhpNewFast, _TEXT + +// Allocate simple object (not finalizable, array or value type) on an 8 byte boundary. +// r0 == MethodTable +LEAF_ENTRY RhpNewFastAlign8, _TEXT + NEW_FAST Align8 +LEAF_END RhpNewFastAlign8, _TEXT + + +// Allocate a value type object (i.e. box it) on an 8 byte boundary + 4 (so that the value type payload +// itself is 8 byte aligned). +// r0 == MethodTable +LEAF_ENTRY RhpNewFastMisalign, _TEXT + NEW_FAST Misalign +LEAF_END RhpNewFastMisalign, _TEXT + + // Allocate non-array object with finalizer. // r0 == MethodTable -// LEAF_ENTRY RhpNewFinalizable, _TEXT mov r1, #GC_ALLOC_FINALIZE b C_FUNC(RhpNewObject) LEAF_END RhpNewFinalizable, _TEXT +// Allocate a finalizable object (by definition not an array or value type) on an 8 byte boundary. +// r0 == MethodTable +LEAF_ENTRY RhpNewFinalizableAlign8, _TEXT + mov r1, #(GC_ALLOC_FINALIZE | GC_ALLOC_ALIGN8) + b C_FUNC(RhpNewObject) +LEAF_END RhpNewFinalizableAlign8, _TEXT + + // Allocate non-array object. // r0 == MethodTable // r1 == alloc flags NESTED_ENTRY RhpNewObject, _TEXT, NoHandler - PUSH_COOP_PINVOKE_FRAME r3 // r0: MethodTable @@ -91,15 +180,75 @@ LOCAL_LABEL(NewOutOfMemory): POP_COOP_PINVOKE_FRAME b C_FUNC(RhExceptionHandling_FailedAllocation) - NESTED_END RhpNewObject, _TEXT +// Shared code for RhNewString, RhpNewArrayFast and RhpNewObjectArray +// r0 == MethodTable +// r1 == character/element count +// r2 == string/array size +.macro NEW_ARRAY_FAST_PROLOG + PROLOG_PUSH "{r4-r6,lr}" +.endm + +.macro NEW_ARRAY_FAST_TAIL_EPILOG + EPILOG_POP "{r4-r6,lr}" +.endm + +.macro NEW_ARRAY_FAST + mov r4, r0 // Save MethodTable + mov r5, r1 // Save element count + mov r6, r2 // Save string/array size + + // r0 = ee_alloc_context pointer; trashes volatile registers, expects saved lr + INLINE_GET_ALLOC_CONTEXT_BASE + + // r4 == MethodTable + // r5 == element count + // r6 == string/array size + // r0 == ee_alloc_context* + + // Load potential new object address into r3. + ldr r3, [r0, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)] + + // Load and calculate the maximum size of object we can fit + ldr r1, [r0, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit)] + sub r1, r3 + + // Determine whether the end of the object is too big for the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + cmp r6, r1 + bhi 1f + + // Calculate the alloc pointer to account for the allocation. + add r6, r3 + + // Set the new object's MethodTable pointer and element count. + str r4, [r3, #OFFSETOF__Object__m_pEEType] + str r5, [r3, #OFFSETOF__Array__m_Length] + + // Update the alloc pointer to the newly calculated one. + str r6, [r0, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)] + + // Return the object allocated in r0. + mov r0, r3 + EPILOG_POP "{r4-r6,pc}" + +1: + mov r0, r4 + mov r1, r5 + // r0 == MethodTable + // r1 == element count + NEW_ARRAY_FAST_TAIL_EPILOG + b C_FUNC(RhpNewVariableSizeObject) +.endm + // Allocate a string. // r0 == MethodTable // r1 == element/character count LEAF_ENTRY RhNewString, _TEXT - PROLOG_PUSH "{r4-r6,lr}" + NEW_ARRAY_FAST_PROLOG + // Make sure computing the overall allocation size won't overflow MOV32 r12, MAX_STRING_LENGTH cmp r1, r12 @@ -114,41 +263,7 @@ LEAF_ENTRY RhNewString, _TEXT #endif bic r2, r2, #3 - mov r4, r0 // Save MethodTable - mov r5, r1 // Save element count - mov r6, r2 // Save string size - // r0 = GetThread() - INLINE_GETTHREAD - // r4 == MethodTable - // r5 == element count - // r6 == string size - // r0 == Thread* - - // Load potential new object address into r12. - ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - adds r6, r12 - bcs LOCAL_LABEL(RhNewString_RarePath) // if we get a carry here, the string is too large to fit below 4 GB - - ldr r12, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] - cmp r6, r12 - bhi LOCAL_LABEL(RhNewString_RarePath) - - // Reload new object address into r12. - ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Update the alloc pointer to account for the allocation. - str r6, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Set the new object's MethodTable pointer and element count. - str r4, [r12, #OFFSETOF__Object__m_pEEType] - str r5, [r12, #OFFSETOF__String__m_Length] - - // Return the object allocated in r0. - mov r0, r12 - EPILOG_POP "{r4-r6,pc}" + NEW_ARRAY_FAST LOCAL_LABEL(StringSizeOverflow): // We get here if the size of the final string object can't be represented as an unsigned @@ -157,29 +272,16 @@ LOCAL_LABEL(StringSizeOverflow): // MethodTable is in r0 already mov r1, 0 // Indicate that we should throw OOM - EPILOG_POP "{r4-r6,lr}" + NEW_ARRAY_FAST_TAIL_EPILOG b C_FUNC(RhExceptionHandling_FailedAllocation) - -LOCAL_LABEL(RhNewString_RarePath): - mov r3, r0 - mov r0, r4 - mov r1, r5 - mov r2, r6 - // r0 == MethodTable - // r1 == element count - // r2 == string size + Thread::m_alloc_context::alloc_ptr - // r3 == Thread - EPILOG_POP "{r4-r6,lr}" - b C_FUNC(RhpNewArrayRare) - LEAF_END RhNewString, _TEXT // Allocate one dimensional, zero based array (SZARRAY). // r0 == MethodTable // r1 == element count -LEAF_ENTRY RhpNewArray, _TEXT - PROLOG_PUSH "{r4-r6,lr}" +LEAF_ENTRY RhpNewArrayFast, _TEXT + NEW_ARRAY_FAST_PROLOG // Compute overall allocation size (align(base size + (element size * elements), 4)). // if the element count is <= 0x10000, no overflow is possible because the component @@ -189,47 +291,11 @@ LEAF_ENTRY RhpNewArray, _TEXT cmp r1, #0x10000 bhi LOCAL_LABEL(ArraySizeBig) umull r2, r3, r2, r1 - ldr r3, [r0, #OFFSETOF__MethodTable__m_uBaseSize] - adds r2, r3 - adds r2, #3 + adds r2, #(SZARRAY_BASE_SIZE + 3) LOCAL_LABEL(ArrayAlignSize): bic r2, r2, #3 - mov r4, r0 // Save MethodTable - mov r5, r1 // Save element count - mov r6, r2 // Save array size - // r0 = GetThread() - INLINE_GETTHREAD - // r4 == MethodTable - // r5 == element count - // r6 == array size - // r0 == Thread* - - // Load potential new object address into r12. - ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - adds r6, r12 - bcs LOCAL_LABEL(RhpNewArray_RarePath) // if we get a carry here, the array is too large to fit below 4 GB - - ldr r12, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] - cmp r6, r12 - bhi LOCAL_LABEL(RhpNewArray_RarePath) - - // Reload new object address into r12. - ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Update the alloc pointer to account for the allocation. - str r6, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Set the new object's MethodTable pointer and element count. - str r4, [r12, #OFFSETOF__Object__m_pEEType] - str r5, [r12, #OFFSETOF__Array__m_Length] - - // Return the object allocated in r0. - mov r0, r12 - EPILOG_POP "{r4-r6,pc}" + NEW_ARRAY_FAST LOCAL_LABEL(ArraySizeBig): // if the element count is negative, it's an overflow error @@ -253,7 +319,7 @@ LOCAL_LABEL(ArrayOutOfMemoryFinal): // MethodTable is in r0 already mov r1, #0 // Indicate that we should throw OOM. - EPILOG_POP "{r4-r6,lr}" + NEW_ARRAY_FAST_TAIL_EPILOG b C_FUNC(RhExceptionHandling_FailedAllocation) LOCAL_LABEL(ArraySizeOverflow): @@ -263,35 +329,40 @@ LOCAL_LABEL(ArraySizeOverflow): // MethodTable is in r0 already mov r1, #1 // Indicate that we should throw OverflowException - EPILOG_POP "{r4-r6,lr}" + NEW_ARRAY_FAST_TAIL_EPILOG b C_FUNC(RhExceptionHandling_FailedAllocation) +LEAF_END RhpNewArrayFast, _TEXT -LOCAL_LABEL(RhpNewArray_RarePath): - mov r3, r0 - mov r0, r4 - mov r1, r5 - mov r2, r6 - // r0 == MethodTable - // r1 == element count - // r2 == array size + Thread::m_alloc_context::alloc_ptr - // r3 == Thread - EPILOG_POP "{r4-r6,lr}" - b C_FUNC(RhpNewArrayRare) -LEAF_END RhpNewArray, _TEXT +#ifndef FEATURE_NATIVEAOT +// Allocate one dimensional, zero based array (SZARRAY) of objects (pointer sized elements). +// r0 == MethodTable +// r1 == element count +LEAF_ENTRY RhpNewObjectArrayFast, _TEXT + NEW_ARRAY_FAST_PROLOG + + // Delegate overflow handling to the generic helper conservatively + + mov r2, #1 << 28 // 0x40000000 / sizeof(void*) + cmp r1, r2 + bhs LOCAL_LABEL(RhpNewObjectArrayFast_RarePath) + + mov r2, #SZARRAY_BASE_SIZE + add r2, r2, r1, lsl #2 + + NEW_ARRAY_FAST + +LOCAL_LABEL(RhpNewObjectArrayFast_RarePath): + NEW_ARRAY_FAST_TAIL_EPILOG + b C_FUNC(RhpNewVariableSizeObject) +LEAF_END RhpNewObjectArrayFast, _TEXT +#endif // Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper. // r0 == MethodTable // r1 == element count -// r2 == array size + Thread::m_alloc_context::alloc_ptr -// r3 == Thread -NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler - - // Recover array size by subtracting the alloc_ptr from r2. - ldr r12, [r3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - sub r2, r12 - +NESTED_ENTRY RhpNewVariableSizeObject, _TEXT, NoHandler PUSH_COOP_PINVOKE_FRAME r3 // Preserve the MethodTable in r5. @@ -304,12 +375,12 @@ NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler blx C_FUNC(RhpGcAlloc) // Test for failure (NULL return). - cbz r0, LOCAL_LABEL(ArrayOutOfMemory) + cbz r0, LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory) POP_COOP_PINVOKE_FRAME bx lr -LOCAL_LABEL(ArrayOutOfMemory): +LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory): mov r0, r5 // MethodTable mov r1, #0 // Indicate that we should throw OOM. @@ -318,129 +389,13 @@ LOCAL_LABEL(ArrayOutOfMemory): b C_FUNC(RhExceptionHandling_FailedAllocation) -NESTED_END RhpNewArrayRare, _TEXT - -// Allocate simple object (not finalizable, array or value type) on an 8 byte boundary. -// r0 == MethodTable -LEAF_ENTRY RhpNewFastAlign8, _TEXT - PROLOG_PUSH "{r4,lr}" - - mov r4, r0 // save MethodTable - - // r0 = GetThread() - INLINE_GETTHREAD - - // Fetch object size into r2. - ldr r2, [r4, #OFFSETOF__MethodTable__m_uBaseSize] - - // r4: MethodTable pointer - // r0: Thread pointer - // r2: base size - - // Load potential new object address into r3. Cache this result in r12 as well for the common case - // where the allocation succeeds (r3 will be overwritten in the following bounds check). - ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - mov r12, r3 - - // Check whether the current allocation context is already aligned for us. - tst r3, #0x7 - bne LOCAL_LABEL(Alloc8Failed) +NESTED_END RhpNewVariableSizeObject, _TEXT - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add r2, r3 - ldr r3, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] - cmp r2, r3 - bhi LOCAL_LABEL(Alloc8Failed) - - // Update the alloc pointer to account for the allocation. - str r2, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Set the new object's MethodTable pointer. - str r4, [r12, #OFFSETOF__Object__m_pEEType] - - // Return the object allocated in r0. - mov r0, r12 - - EPILOG_POP "{r4,pc}" - -LOCAL_LABEL(Alloc8Failed): - // Fast allocation failed. Call slow helper with flags set to indicate an 8-byte alignment and no - // finalization. - mov r0, r4 // restore MethodTable - mov r1, #GC_ALLOC_ALIGN8 - EPILOG_POP "{r4,lr}" - b C_FUNC(RhpNewObject) - -LEAF_END RhpNewFastAlign8, _TEXT - -// Allocate a finalizable object (by definition not an array or value type) on an 8 byte boundary. -// r0 == MethodTable -LEAF_ENTRY RhpNewFinalizableAlign8, _TEXT - mov r1, #(GC_ALLOC_FINALIZE | GC_ALLOC_ALIGN8) - b C_FUNC(RhpNewObject) -LEAF_END RhpNewFinalizableAlign8, _TEXT - -// Allocate a value type object (i.e. box it) on an 8 byte boundary + 4 (so that the value type payload -// itself is 8 byte aligned). -// r0 == MethodTable -LEAF_ENTRY RhpNewFastMisalign, _TEXT - PROLOG_PUSH "{r4,lr}" - - mov r4, r0 // save MethodTable - - // r0 = GetThread() - INLINE_GETTHREAD - - // Fetch object size into r2. - ldr r2, [r4, #OFFSETOF__MethodTable__m_uBaseSize] - - // r4: MethodTable pointer - // r0: Thread pointer - // r2: base size - - // Load potential new object address into r3. Cache this result in r12 as well for the common case - // where the allocation succeeds (r3 will be overwritten in the following bounds check). - ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - mov r12, r3 - - // Check whether the current allocation context is already aligned for us (for boxing that means the - // address % 8 == 4, so the value type payload following the MethodTable* is actually 8-byte aligned). - tst r3, #0x7 - beq LOCAL_LABEL(BoxAlloc8Failed) - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add r2, r3 - ldr r3, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] - cmp r2, r3 - bhi LOCAL_LABEL(BoxAlloc8Failed) - - // Update the alloc pointer to account for the allocation. - str r2, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Set the new object's MethodTable pointer. - str r4, [r12, #OFFSETOF__Object__m_pEEType] - - // Return the object allocated in r0. - mov r0, r12 - - EPILOG_POP "{r4,pc}" - -LOCAL_LABEL(BoxAlloc8Failed): - // Fast allocation failed. Call slow helper with flags set to indicate an 8+4 byte alignment and no - // finalization. - mov r0, r4 // restore MethodTable - mov r1, #(GC_ALLOC_ALIGN8 | GC_ALLOC_ALIGN8_BIAS) - EPILOG_POP "{r4,lr}" - b C_FUNC(RhpNewObject) - -LEAF_END RhpNewFastMisalign, _TEXT // Allocate an array on an 8 byte boundary. // r0 == MethodTable // r1 == element count -NESTED_ENTRY RhpNewArrayAlign8, _TEXT, NoHandler +NESTED_ENTRY RhpNewArrayFastAlign8, _TEXT, NoHandler PUSH_COOP_PINVOKE_FRAME r3 @@ -499,4 +454,4 @@ LOCAL_LABEL(Array8OutOfMemory1): POP_COOP_PINVOKE_FRAME b C_FUNC(RhExceptionHandling_FailedAllocation) -NESTED_END RhpNewArrayAlign8, _TEXT +NESTED_END RhpNewArrayFastAlign8, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S b/src/coreclr/runtime/arm64/AllocFast.S similarity index 61% rename from src/coreclr/nativeaot/Runtime/arm64/AllocFast.S rename to src/coreclr/runtime/arm64/AllocFast.S index 6c61b2de356393..14fb2b29a4c59f 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S +++ b/src/coreclr/runtime/arm64/AllocFast.S @@ -1,31 +1,15 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -#include -#include "AsmOffsets.inc" - -// GC type flags -#define GC_ALLOC_FINALIZE 1 - -// -// Rename fields of nested structs -// -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) -#define OFFSETOF__Thread__m_eeAllocContext__combined_limit (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit) - - +#include "AsmMacros_Shared.h" // Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's // allocation context then automatically fallback to the slow allocation path. // x0 == MethodTable LEAF_ENTRY RhpNewFast, _TEXT - // x1 = GetThread() -#ifdef FEATURE_EMULATED_TLS - GETTHREAD_ETLS_1 -#else - INLINE_GETTHREAD x1 -#endif + // x3 = ee_alloc_context pointer + INLINE_GET_ALLOC_CONTEXT_BASE x3 // // x0 contains MethodTable pointer @@ -34,34 +18,41 @@ // // x0: MethodTable pointer - // x1: Thread pointer // x2: base size + // x3: ee_alloc_context pointer // // Load potential new object address into x12. - ldr x12, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + ldr x12, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)] - // Determine whether the end of the object would lie outside of the current allocation context. If so, + // Load and calculate the maximum size of object we can fit. + ldr x13, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit)] + sub x13, x13, x12 + + // Determine whether the end of the object is too big for the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add x2, x2, x12 - ldr x13, [x1, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x13 bhi LOCAL_LABEL(RhpNewFast_RarePath) - // Update the alloc pointer to account for the allocation. - str x2, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + // Calculate the new alloc pointer to account for the allocation. + add x2, x2, x12 - // Set the new objects MethodTable pointer + // Set the new object's MethodTable pointer. str x0, [x12, #OFFSETOF__Object__m_pEEType] + // Update the alloc pointer to the newly calculated one. + str x2, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)] + mov x0, x12 ret LOCAL_LABEL(RhpNewFast_RarePath): mov x1, #0 b C_FUNC(RhpNewObject) + LEAF_END RhpNewFast, _TEXT + // Allocate non-array object with finalizer. // x0 == MethodTable LEAF_ENTRY RhpNewFinalizable, _TEXT @@ -69,6 +60,7 @@ LOCAL_LABEL(RhpNewFast_RarePath): b C_FUNC(RhpNewObject) LEAF_END RhpNewFinalizable, _TEXT + // Allocate non-array object. // x0 == MethodTable // x1 == alloc flags @@ -87,7 +79,7 @@ LOCAL_LABEL(RhpNewFast_RarePath): // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) bl C_FUNC(RhpGcAlloc) - // Set the new objects MethodTable pointer on success. + // Set the new object's MethodTable pointer on success. cbz x0, LOCAL_LABEL(NewOutOfMemory) .cfi_remember_state @@ -107,57 +99,73 @@ LOCAL_LABEL(NewOutOfMemory): NESTED_END RhpNewObject, _TEXT -// Allocate a string. + +// Shared code for RhNewString, RhpNewArrayFast and RhpNewObjectArrayFast // x0 == MethodTable -// x1 == element/character count - LEAF_ENTRY RhNewString, _TEXT - // Make sure computing the overall allocation size wont overflow - movz x2, MAX_STRING_LENGTH & 0xFFFF - movk x2, MAX_STRING_LENGTH >> 16, lsl 16 - cmp x1, x2 - bhi LOCAL_LABEL(StringSizeOverflow) +// x1 == character/element count +// x2 == string/array size + .macro NEW_ARRAY_FAST - // Compute overall allocation size (align(base size + (element size * elements), 8)). - mov w2, #STRING_COMPONENT_SIZE - mov x3, #(STRING_BASE_SIZE + 7) - umaddl x2, w1, w2, x3 // x2 = w1 * w2 + x3 - and x2, x2, #-8 + INLINE_GET_ALLOC_CONTEXT_BASE x3 - // x0 == MethodTable - // x1 == element count - // x2 == string size + // Load potential new object address into x12. + ldr x12, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)] + + // Load and calculate the maximum size of object we can fit. + ldr x13, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit)] + sub x13, x13, x12 -#ifdef FEATURE_EMULATED_TLS - GETTHREAD_ETLS_3 + // Determine whether the end of the object is too big for the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + cmp x2, x13 +#if defined(__APPLE__) + bhi 1f #else - INLINE_GETTHREAD x3 + bhi C_FUNC(RhpNewVariableSizeObject) #endif - // Load potential new object address into x12. - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. + // Calculate the new alloc pointer to account for the allocation. add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] - cmp x2, x12 - bhi LOCAL_LABEL(RhNewString_Rare) - - // Reload new object address into r12. - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - - // Update the alloc pointer to account for the allocation. - str x2, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - // Set the new objects MethodTable pointer and element count. + // Set the new object's MethodTable pointer and element count. str x0, [x12, #OFFSETOF__Object__m_pEEType] str x1, [x12, #OFFSETOF__Array__m_Length] + // Update the alloc pointer to the newly calculated one. + str x2, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)] + // Return the object allocated in x0. mov x0, x12 ret +#if defined(__APPLE__) +1: + b C_FUNC(RhpNewVariableSizeObject) +#endif + + .endm + + +// Allocate a string. +// x0 == MethodTable +// x1 == element/character count + LEAF_ENTRY RhNewString, _TEXT + + // Make sure computing the overall allocation size wont overflow + movz x2, MAX_STRING_LENGTH & 0xFFFF + movk x2, MAX_STRING_LENGTH >> 16, lsl 16 + cmp x1, x2 + bhi LOCAL_LABEL(StringSizeOverflow) + + // Compute overall allocation size (align(base size + (element size * elements), 8)). + mov w2, #STRING_COMPONENT_SIZE + mov x3, #(STRING_BASE_SIZE + 7) + umaddl x2, w1, w2, x3 // x2 = w1 * w2 + x3 + and x2, x2, #-8 + + NEW_ARRAY_FAST + LOCAL_LABEL(StringSizeOverflow): // We get here if the length of the final string object can not be represented as an unsigned // 32-bit value. We are going to tail-call to a managed helper that will throw @@ -167,14 +175,13 @@ LOCAL_LABEL(StringSizeOverflow): mov x1, #1 // Indicate that we should throw OverflowException b C_FUNC(RhExceptionHandling_FailedAllocation) -LOCAL_LABEL(RhNewString_Rare): - b C_FUNC(RhpNewArrayRare) LEAF_END RhNewString, _Text + // Allocate one dimensional, zero based array (SZARRAY). // x0 == MethodTable // x1 == element count - LEAF_ENTRY RhpNewArray, _Text + LEAF_ENTRY RhpNewArrayFast, _Text // We want to limit the element count to the non-negative 32-bit int range. // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component @@ -186,69 +193,64 @@ LOCAL_LABEL(RhNewString_Rare): ldrh w2, [x0, #OFFSETOF__MethodTable__m_usComponentSize] umull x2, w1, w2 - ldr w3, [x0, #OFFSETOF__MethodTable__m_uBaseSize] - add x2, x2, x3 - add x2, x2, #7 + add x2, x2, #(SZARRAY_BASE_SIZE + 7) and x2, x2, #-8 - // x0 == MethodTable - // x1 == element count - // x2 == array size + NEW_ARRAY_FAST -#ifdef FEATURE_EMULATED_TLS - GETTHREAD_ETLS_3 -#else - INLINE_GETTHREAD x3 -#endif +LOCAL_LABEL(ArraySizeOverflow): + // We get here if the size of the final array object can not be represented as an unsigned + // 32-bit value. We are going to tail-call to a managed helper that will throw + // an overflow exception that the caller of this allocator understands. - // Load potential new object address into x12. - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + // x0 holds MethodTable pointer already + mov x1, #1 // Indicate that we should throw OverflowException + b C_FUNC(RhExceptionHandling_FailedAllocation) - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] - cmp x2, x12 - bhi LOCAL_LABEL(RhpNewArray_Rare) + LEAF_END RhpNewArrayFast, _TEXT - // Reload new object address into x12. - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - // Update the alloc pointer to account for the allocation. - str x2, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] +#ifndef FEATURE_NATIVEAOT +// Allocate one dimensional, zero based array (SZARRAY) of objects (pointer sized elements). +// x0 == MethodTable +// x1 == element count + LEAF_ENTRY RhpNewObjectArrayFast, _Text - // Set the new objects MethodTable pointer and element count. - str x0, [x12, #OFFSETOF__Object__m_pEEType] - str x1, [x12, #OFFSETOF__Array__m_Length] + // Delegate overflow handling to the generic helper conservatively - // Return the object allocated in r0. - mov x0, x12 + mov x2, #(0x40000000 / 8) // sizeof(void*) + cmp x1, x2 +#if defined(__APPLE__) + bhs 1f +#else + bhs C_FUNC(RhpNewVariableSizeObject) +#endif - ret + // In this case we know the element size is sizeof(void *), or 8 for arm64 + // This helps us in two ways - we can shift instead of multiplying, and + // there's no need to align the size either -LOCAL_LABEL(ArraySizeOverflow): - // We get here if the size of the final array object can not be represented as an unsigned - // 32-bit value. We are going to tail-call to a managed helper that will throw - // an overflow exception that the caller of this allocator understands. + lsl x2, x1, #3 + add x2, x2, #SZARRAY_BASE_SIZE - // x0 holds MethodTable pointer already - mov x1, #1 // Indicate that we should throw OverflowException - b C_FUNC(RhExceptionHandling_FailedAllocation) + // No need for rounding in this case - element size is 8, and m_BaseSize is guaranteed + // to be a multiple of 8. + + NEW_ARRAY_FAST + +#if defined(__APPLE__) +1: + b C_FUNC(RhpNewVariableSizeObject) +#endif + + LEAF_END RhpNewObjectArrayFast, _TEXT +#endif -LOCAL_LABEL(RhpNewArray_Rare): - b C_FUNC(RhpNewArrayRare) - LEAF_END RhpNewArray, _TEXT // Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper. // x0 == MethodTable // x1 == element count -// x2 == array size + Thread::m_alloc_context::alloc_ptr -// x3 == Thread - NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler - - // Recover array size by subtracting the alloc_ptr from x2. - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - sub x2, x2, x12 + NESTED_ENTRY RhpNewVariableSizeObject, _TEXT, NoHandler PUSH_COOP_PINVOKE_FRAME x3 @@ -261,15 +263,15 @@ LOCAL_LABEL(RhpNewArray_Rare): // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) bl C_FUNC(RhpGcAlloc) - // Set the new objects MethodTable pointer and length on success. - cbz x0, LOCAL_LABEL(ArrayOutOfMemory) + // Set the new object's MethodTable pointer and length on success. + cbz x0, LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory) .cfi_remember_state POP_COOP_PINVOKE_FRAME EPILOG_RETURN .cfi_restore_state -LOCAL_LABEL(ArrayOutOfMemory): +LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory): // This is the OOM failure path. We are going to tail-call to a managed helper that will throw // an out of memory exception that the caller of this allocator understands. @@ -279,4 +281,4 @@ LOCAL_LABEL(ArrayOutOfMemory): POP_COOP_PINVOKE_FRAME b C_FUNC(RhExceptionHandling_FailedAllocation) - NESTED_END RhpNewArrayRare, _TEXT + NESTED_END RhpNewVariableSizeObject, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.asm b/src/coreclr/runtime/arm64/AllocFast.asm similarity index 66% rename from src/coreclr/nativeaot/Runtime/arm64/AllocFast.asm rename to src/coreclr/runtime/arm64/AllocFast.asm index d8e506335d77f2..290d4c53159f20 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.asm +++ b/src/coreclr/runtime/arm64/AllocFast.asm @@ -1,7 +1,7 @@ ;; Licensed to the .NET Foundation under one or more agreements. ;; The .NET Foundation licenses this file to you under the MIT license. -#include "AsmMacros.h" +#include "AsmMacros_Shared.h" TEXTAREA @@ -10,8 +10,8 @@ ;; x0 == MethodTable LEAF_ENTRY RhpNewFast - ;; x1 = GetThread(), TRASHES x2 - INLINE_GETTHREAD x1, x2 + ;; x3 = ee_alloc_context pointer, TRASHES x2 + INLINE_GET_ALLOC_CONTEXT_BASE x3, x2 ;; ;; x0 contains MethodTable pointer @@ -20,26 +20,31 @@ ;; ;; x0: MethodTable pointer - ;; x1: Thread pointer ;; x2: base size + ;; x3: ee_alloc_context pointer ;; ;; Load potential new object address into x12. - ldr x12, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + ldr x12, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)] - ;; Determine whether the end of the object would lie outside of the current allocation context. If so, + ;; Load and calculate the maximum size of object we can fit. + ldr x13, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit)] + sub x13, x13, x12 + + ;; Determine whether the end of the object is too big for the current allocation context. If so, ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. - add x2, x2, x12 - ldr x13, [x1, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x13 bhi RhpNewFast_RarePath - ;; Update the alloc pointer to account for the allocation. - str x2, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + ;; Calculate the new alloc pointer to account for the allocation. + add x2, x2, x12 - ;; Set the new object's MethodTable pointer + ;; Set the new object's MethodTable pointer. str x0, [x12, #OFFSETOF__Object__m_pEEType] + ;; Update the alloc pointer to the newly calculated one. + str x2, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)] + mov x0, x12 ret @@ -90,53 +95,62 @@ NewOutOfMemory NESTED_END RhpNewObject -;; Allocate a string. +;; Shared code for RhNewString, RhpNewArrayFast and RhpNewObjectArrayFast ;; x0 == MethodTable -;; x1 == element/character count - LEAF_ENTRY RhNewString - ;; Make sure computing the overall allocation size won't overflow - movz x2, #(MAX_STRING_LENGTH & 0xFFFF) - movk x2, #(MAX_STRING_LENGTH >> 16), lsl #16 - cmp x1, x2 - bhi StringSizeOverflow - - ;; Compute overall allocation size (align(base size + (element size * elements), 8)). - mov w2, #STRING_COMPONENT_SIZE - mov x3, #(STRING_BASE_SIZE + 7) - umaddl x2, w1, w2, x3 ; x2 = w1 * w2 + x3 - and x2, x2, #-8 +;; x1 == character/element count +;; x2 == string/array size + MACRO + NEW_ARRAY_FAST - ; x0 == MethodTable - ; x1 == element count - ; x2 == string size - - INLINE_GETTHREAD x3, x5 + INLINE_GET_ALLOC_CONTEXT_BASE x3, x5 ;; Load potential new object address into x12. - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + ldr x12, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)] - ;; Determine whether the end of the object would lie outside of the current allocation context. If so, - ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. - add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] - cmp x2, x12 - bhi RhpNewArrayRare + ;; Load and calculate the maximum size of object we can fit. + ldr x13, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit)] + sub x13, x13, x12 - ;; Reload new object address into r12. - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + ;; Determine whether the end of the object is too big for the current allocation context. If so, + ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. + cmp x2, x13 + bhi RhpNewVariableSizeObject - ;; Update the alloc pointer to account for the allocation. - str x2, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + ;; Calculate the new alloc pointer to account for the allocation. + add x2, x2, x12 ;; Set the new object's MethodTable pointer and element count. str x0, [x12, #OFFSETOF__Object__m_pEEType] str x1, [x12, #OFFSETOF__Array__m_Length] + ;; Update the alloc pointer to the newly calculated one. + str x2, [x3, #(OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)] + ;; Return the object allocated in x0. mov x0, x12 ret + MEND + +;; Allocate a string. +;; x0 == MethodTable +;; x1 == element/character count + LEAF_ENTRY RhNewString + ;; Make sure computing the overall allocation size won't overflow + movz x2, #(MAX_STRING_LENGTH & 0xFFFF) + movk x2, #(MAX_STRING_LENGTH >> 16), lsl #16 + cmp x1, x2 + bhi StringSizeOverflow + + ;; Compute overall allocation size (align(base size + (element size * elements), 8)). + mov w2, #STRING_COMPONENT_SIZE + mov x3, #(STRING_BASE_SIZE + 7) + umaddl x2, w1, w2, x3 ; x2 = w1 * w2 + x3 + and x2, x2, #-8 + + NEW_ARRAY_FAST + StringSizeOverflow ; We get here if the length of the final string object can't be represented as an unsigned ; 32-bit value. We're going to tail-call to a managed helper that will throw @@ -150,7 +164,7 @@ StringSizeOverflow ;; Allocate one dimensional, zero based array (SZARRAY). ;; x0 == MethodTable ;; x1 == element count - LEAF_ENTRY RhpNewArray + LEAF_ENTRY RhpNewArrayFast ;; We want to limit the element count to the non-negative 32-bit int range. ;; If the element count is <= 0x7FFFFFFF, no overflow is possible because the component @@ -162,62 +176,52 @@ StringSizeOverflow ldrh w2, [x0, #OFFSETOF__MethodTable__m_usComponentSize] umull x2, w1, w2 - ldr w3, [x0, #OFFSETOF__MethodTable__m_uBaseSize] - add x2, x2, x3 - add x2, x2, #7 + add x2, x2, #(SZARRAY_BASE_SIZE + 7) and x2, x2, #-8 - ; x0 == MethodTable - ; x1 == element count - ; x2 == array size + NEW_ARRAY_FAST - INLINE_GETTHREAD x3, x5 +ArraySizeOverflow + ; We get here if the size of the final array object can't be represented as an unsigned + ; 32-bit value. We're going to tail-call to a managed helper that will throw + ; an overflow exception that the caller of this allocator understands. - ;; Load potential new object address into x12. - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + ; x0 holds MethodTable pointer already + mov x1, #1 ; Indicate that we should throw OverflowException + b RhExceptionHandling_FailedAllocation + LEAF_END RhpNewArrayFast - ;; Determine whether the end of the object would lie outside of the current allocation context. If so, - ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. - add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] - cmp x2, x12 - bhi RhpNewArrayRare +#ifndef FEATURE_NATIVEAOT +;; Allocate one dimensional, zero based array (SZARRAY) of objects (pointer sized elements). +;; x0 == MethodTable +;; x1 == element count + LEAF_ENTRY RhpNewObjectArrayFast - ;; Reload new object address into x12. - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + ; Delegate overflow handling to the generic helper conservatively - ;; Update the alloc pointer to account for the allocation. - str x2, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + mov x2, #(0x40000000 / 8) ; sizeof(void*) + cmp x1, x2 + bhs RhpNewVariableSizeObject - ;; Set the new object's MethodTable pointer and element count. - str x0, [x12, #OFFSETOF__Object__m_pEEType] - str x1, [x12, #OFFSETOF__Array__m_Length] + ; In this case we know the element size is sizeof(void *), or 8 for arm64 + ; This helps us in two ways - we can shift instead of multiplying, and + ; there's no need to align the size either - ;; Return the object allocated in r0. - mov x0, x12 + lsl x2, x1, #3 + add x2, x2, #SZARRAY_BASE_SIZE - ret + ; No need for rounding in this case - element size is 8, and m_BaseSize is guaranteed + ; to be a multiple of 8. -ArraySizeOverflow - ; We get here if the size of the final array object can't be represented as an unsigned - ; 32-bit value. We're going to tail-call to a managed helper that will throw - ; an overflow exception that the caller of this allocator understands. + NEW_ARRAY_FAST - ; x0 holds MethodTable pointer already - mov x1, #1 ; Indicate that we should throw OverflowException - b RhExceptionHandling_FailedAllocation - LEAF_END RhpNewArray + LEAF_END RhpNewObjectArrayFast +#endif ;; Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper. ;; x0 == MethodTable ;; x1 == element count -;; x2 == array size + Thread::m_alloc_context::alloc_ptr -;; x3 == Thread - NESTED_ENTRY RhpNewArrayRare - - ; Recover array size by subtracting the alloc_ptr from x2. - PROLOG_NOP ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] - PROLOG_NOP sub x2, x2, x12 + NESTED_ENTRY RhpNewVariableSizeObject PUSH_COOP_PINVOKE_FRAME x3 @@ -230,12 +234,12 @@ ArraySizeOverflow ;; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) bl RhpGcAlloc - cbz x0, ArrayOutOfMemory + cbz x0, RhpNewVariableSizeObject_OutOfMemory POP_COOP_PINVOKE_FRAME EPILOG_RETURN -ArrayOutOfMemory +RhpNewVariableSizeObject_OutOfMemory ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw ;; an out of memory exception that the caller of this allocator understands. @@ -245,6 +249,6 @@ ArrayOutOfMemory POP_COOP_PINVOKE_FRAME EPILOG_NOP b RhExceptionHandling_FailedAllocation - NESTED_END RhpNewArrayRare + NESTED_END RhpNewVariableSizeObject END diff --git a/src/coreclr/runtime/i386/AllocFast.S b/src/coreclr/runtime/i386/AllocFast.S new file mode 100644 index 00000000000000..6f9724980fbd41 --- /dev/null +++ b/src/coreclr/runtime/i386/AllocFast.S @@ -0,0 +1,276 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include "AsmMacros_Shared.h" + +// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's +// allocation context then automatically fallback to the slow allocation path. +// ECX == MethodTable +LEAF_ENTRY RhpNewFast, _TEXT + // edx = ee_alloc_context pointer, TRASHES eax + INLINE_GET_ALLOC_CONTEXT_BASE + + mov eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize] + add eax, [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] + jc LOCAL_LABEL(RhpNewFast_AllocFailed) + cmp eax, [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] + ja LOCAL_LABEL(RhpNewFast_AllocFailed) + mov [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], eax + + // calc the new object pointer and initialize it + sub eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize] + mov [eax + OFFSETOF__Object__m_pEEType], ecx + + ret + +LOCAL_LABEL(RhpNewFast_AllocFailed): + xor edx, edx + jmp RhpNewObject +LEAF_END RhpNewFast, _TEXT + +// Allocate non-array object with finalizer. +// ECX == MethodTable +LEAF_ENTRY RhpNewFinalizable, _TEXT + mov edx, GC_ALLOC_FINALIZE // Flags + jmp RhpNewObject +LEAF_END RhpNewFinalizable, _TEXT + +// Allocate non-array object +// ECX == MethodTable +// EDX == alloc flags +LEAF_ENTRY RhpNewObject, _TEXT + PUSH_COOP_PINVOKE_FRAME eax + + // Preserve MethodTable in ESI. + mov esi, ecx + + push eax // transition frame + push 0 // numElements + push edx + push ecx + + // Call the rest of the allocation helper. + // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) + call RhpGcAlloc + + add esp, 16 + + test eax, eax + jz LOCAL_LABEL(NewOutOfMemory) + + POP_COOP_PINVOKE_FRAME + + ret + +LOCAL_LABEL(NewOutOfMemory): + // This is the OOM failure path. We're going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + mov ecx, esi // Restore MethodTable pointer + xor edx, edx // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + + jmp RhExceptionHandling_FailedAllocation +LEAF_END RhpNewObject, _TEXT + +// Shared code for RhNewString, RhpNewArrayFast and RhpNewObjectArrayFast +// EAX == string/array size +// ECX == MethodTable +// EDX == character/element count +.macro NEW_ARRAY_FAST_PROLOG + ESP_PROLOG_BEG + ESP_PROLOG_PUSH ecx + ESP_PROLOG_PUSH edx + ESP_EPILOG_END +.endm + +.macro NEW_ARRAY_FAST + // edx = ee_alloc_context pointer, TRASHES ecx + INLINE_GET_ALLOC_CONTEXT_BASE + + // ECX == scratch + // EAX == allocation size + // EDX == ee_alloc_context pointer + + mov ecx, eax + add eax, [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] + jc 1f + cmp eax, [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] + ja 1f + + // ECX == allocation size + // EAX == new alloc ptr + // EDX == ee_alloc_context pointer + + // set the new alloc pointer + mov [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], eax + + // calc the new object pointer + sub eax, ecx + + ESP_EPILOG_BEG + // Restore the element count and put it in edx + ESP_EPILOG_POP edx + // Restore the MethodTable and put it in ecx + ESP_EPILOG_POP ecx + ESP_EPILOG_END + + // set the new object's MethodTable pointer and element count + mov [eax + OFFSETOF__Object__m_pEEType], ecx + mov [eax + OFFSETOF__Array__m_Length], edx + ret + +1: + ESP_EPILOG_BEG + // Restore the element count and put it in edx + ESP_EPILOG_POP edx + // Restore the MethodTable and put it in ecx + ESP_EPILOG_POP ecx + ESP_EPILOG_END + + jmp RhpNewVariableSizeObject +.endm + +// Allocate a new string. +// ECX == MethodTable +// EDX == element count +LEAF_ENTRY RhNewString, _TEXT + // Make sure computing the aligned overall allocation size won't overflow + cmp edx, MAX_STRING_LENGTH + ja LOCAL_LABEL(RhNewString_StringSizeOverflow) + + // Compute overall allocation size (align(base size + (element size * elements), 4)). + lea eax, [(edx * STRING_COMPONENT_SIZE) + (STRING_BASE_SIZE + 3)] + and eax, -4 + + NEW_ARRAY_FAST_PROLOG + NEW_ARRAY_FAST + +LOCAL_LABEL(RhNewString_StringSizeOverflow): + // We get here if the size of the final string object can't be represented as an unsigned + // 32-bit value. We're going to tail-call to a managed helper that will throw + // an OOM exception that the caller of this allocator understands. + + // ecx holds MethodTable pointer already + xor edx, edx // Indicate that we should throw OOM. + jmp RhExceptionHandling_FailedAllocation +LEAF_END RhNewString, _TEXT + + +// Allocate one dimensional, zero based array (SZARRAY). +// ECX == MethodTable +// EDX == element count +LEAF_ENTRY RhpNewArrayFast, _TEXT + NEW_ARRAY_FAST_PROLOG + + // Compute overall allocation size (align(base size + (element size * elements), 4)). + // if the element count is <= 0x10000, no overflow is possible because the component size is + // <= 0xffff, and thus the product is <= 0xffff0000, and the base size for the worst case + // (32 dimensional MdArray) is less than 0xffff. + movzx eax, word ptr [ecx + OFFSETOF__MethodTable__m_usComponentSize] + cmp edx, 0x010000 + ja LOCAL_LABEL(RhpNewArrayFast_ArraySizeBig) + mul edx + lea eax, [eax + SZARRAY_BASE_SIZE + 3] +LOCAL_LABEL(RhpNewArrayFast_ArrayAlignSize): + and eax, -4 + + NEW_ARRAY_FAST + +LOCAL_LABEL(RhpNewArrayFast_ArraySizeBig): + // Compute overall allocation size (align(base size + (element size * elements), 4)). + // if the element count is negative, it's an overflow, otherwise it's out of memory + cmp edx, 0 + jl LOCAL_LABEL(RhpNewArrayFast_ArraySizeOverflow) + mul edx + jc LOCAL_LABEL(RhpNewArrayFast_ArrayOutOfMemoryNoFrame) + add eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize] + jc LOCAL_LABEL(RhpNewArrayFast_ArrayOutOfMemoryNoFrame) + add eax, 3 + jc LOCAL_LABEL(RhpNewArrayFast_ArrayOutOfMemoryNoFrame) + jmp LOCAL_LABEL(RhpNewArrayFast_ArrayAlignSize) + +LOCAL_LABEL(RhpNewArrayFast_ArrayOutOfMemoryNoFrame): + ESP_EPILOG_FREE 8 + + // ecx holds MethodTable pointer already + xor edx, edx // Indicate that we should throw OOM. + jmp RhExceptionHandling_FailedAllocation + +LOCAL_LABEL(RhpNewArrayFast_ArraySizeOverflow): + ESP_EPILOG_FREE 8 + + // We get here if the size of the final array object can't be represented as an unsigned + // 32-bit value. We're going to tail-call to a managed helper that will throw + // an overflow exception that the caller of this allocator understands. + + // ecx holds MethodTable pointer already + mov edx, 1 // Indicate that we should throw OverflowException + jmp RhExceptionHandling_FailedAllocation +LEAF_END RhpNewArrayFast, _TEXT + + +#ifndef FEATURE_NATIVEAOT +// Allocate one dimensional, zero based array (SZARRAY) of objects (pointer sized elements). +// ECX == MethodTable +// EDX == element count +LEAF_ENTRY RhpNewObjectArrayFast, _TEXT + // Delegate overflow handling to the generic helper conservatively + + cmp edx, (0x40000000 / 4) // sizeof(void*) + jae RhpNewVariableSizeObject + + // In this case we know the element size is sizeof(void *), or 4 for x86 + // This helps us in two ways - we can shift instead of multiplying, and + // there's no need to align the size either + + lea eax, [edx * 4 + SZARRAY_BASE_SIZE] + + NEW_ARRAY_FAST_PROLOG + NEW_ARRAY_FAST +LEAF_END RhpNewObjectArrayFast, _TEXT +#endif + +// +// Object* RhpNewVariableSizeObject(MethodTable *pMT, INT_PTR size) +// +// ecx == MethodTable +// edx == element count +// +NESTED_ENTRY RhpNewVariableSizeObject, _TEXT, NoHandler + PUSH_COOP_PINVOKE_FRAME eax + + // Preserve MethodTable in ESI. + mov esi, ecx + + // Push alloc helper arguments (transition frame, size, flags, MethodTable). + push eax // transition frame + push edx // numElements + push 0 // Flags + push ecx // MethodTable + + // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) + call RhpGcAlloc + + add esp, 16 + + test eax, eax + jz LOCAL_LABEL(RhpNewVariableSizeObject_AllocFailed) + + POP_COOP_PINVOKE_FRAME + + ret + +LOCAL_LABEL(RhpNewVariableSizeObject_AllocFailed): + // This is the OOM failure path. We're going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + mov ecx, esi // Restore MethodTable pointer + xor edx, edx // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + + jmp RhExceptionHandling_FailedAllocation +NESTED_END RhpNewVariableSizeObject, _TEXT diff --git a/src/coreclr/runtime/i386/AllocFast.asm b/src/coreclr/runtime/i386/AllocFast.asm new file mode 100644 index 00000000000000..fe33456a061383 --- /dev/null +++ b/src/coreclr/runtime/i386/AllocFast.asm @@ -0,0 +1,268 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + + .586 + .model flat + option casemap:none + .code + +include AsmMacros_Shared.inc + +; Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's +; allocation context then automatically fallback to the slow allocation path. +; ECX == MethodTable +FASTCALL_FUNC RhpNewFast, 4 + ; edx = ee_alloc_context pointer, TRASHES eax + INLINE_GET_ALLOC_CONTEXT_BASE edx, eax + + mov eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize] + add eax, [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] + jc AllocFailed + cmp eax, [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] + ja AllocFailed + mov [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], eax + + ; calc the new object pointer and initialize it + sub eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize] + mov [eax + OFFSETOF__Object__m_pEEType], ecx + + ret + +AllocFailed: + xor edx, edx + jmp @RhpNewObject@8 +FASTCALL_ENDFUNC + +; Allocate non-array object with finalizer. +; ECX == MethodTable +FASTCALL_FUNC RhpNewFinalizable, 4 + mov edx, GC_ALLOC_FINALIZE ; Flags + jmp @RhpNewObject@8 +FASTCALL_ENDFUNC + +; Allocate non-array object +; ECX == MethodTable +; EDX == alloc flags +FASTCALL_FUNC RhpNewObject, 8 + PUSH_COOP_PINVOKE_FRAME eax + + ; Preserve MethodTable in ESI. + mov esi, ecx + + push eax ; transition frame + push 0 ; numElements + push edx + push ecx + + ;; Call the rest of the allocation helper. + ;; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) + call RhpGcAlloc + + test eax, eax + jz NewOutOfMemory + + POP_COOP_PINVOKE_FRAME + + ret + +NewOutOfMemory: + ;; This is the OOM failure path. We're going to tail-call to a managed helper that will throw + ;; an out of memory exception that the caller of this allocator understands. + + mov ecx, esi ; Restore MethodTable pointer + xor edx, edx ; Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + + jmp RhExceptionHandling_FailedAllocation +FASTCALL_ENDFUNC + +; Shared code for RhNewString, RhpNewArrayFast and RhpNewObjectArrayFast +; EAX == string/array size +; ECX == MethodTable +; EDX == character/element count +NEW_ARRAY_FAST_PROLOG MACRO + push ecx + push edx +ENDM + +NEW_ARRAY_FAST MACRO + LOCAL AllocContextOverflow + + ; EDX = ee_alloc_context pointer, trashes ECX + INLINE_GET_ALLOC_CONTEXT_BASE edx, ecx + + mov ecx, eax + add eax, [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] + jc AllocContextOverflow + cmp eax, [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] + ja AllocContextOverflow + + ; ECX == allocation size + ; EAX == new alloc ptr + ; EDX == ee_alloc_context pointer + + ; set the new alloc pointer + mov [edx + OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], eax + + ; calc the new object pointer + sub eax, ecx + + ; Restore the element count and put it in edx + pop edx + ; Restore the MethodTable and put it in ecx + pop ecx + + ; set the new object's MethodTable pointer and element count + mov [eax + OFFSETOF__Object__m_pEEType], ecx + mov [eax + OFFSETOF__Array__m_Length], edx + ret + +AllocContextOverflow: + ; Restore the element count and put it in edx + pop edx + ; Restore the MethodTable and put it in ecx + pop ecx + + jmp @RhpNewVariableSizeObject@8 +ENDM + +; Allocate a new string. +; ECX == MethodTable +; EDX == element count +FASTCALL_FUNC RhNewString, 8 + ; Make sure computing the aligned overall allocation size won't overflow + cmp edx, MAX_STRING_LENGTH + ja StringSizeOverflow + + ; Compute overall allocation size (align(base size + (element size * elements), 4)). + lea eax, [(edx * STRING_COMPONENT_SIZE) + (STRING_BASE_SIZE + 3)] + and eax, -4 + + NEW_ARRAY_FAST_PROLOG + NEW_ARRAY_FAST + +StringSizeOverflow: + ; We get here if the size of the final string object can't be represented as an unsigned + ; 32-bit value. We're going to tail-call to a managed helper that will throw + ; an OOM exception that the caller of this allocator understands. + + ; ecx holds MethodTable pointer already + xor edx, edx ; Indicate that we should throw OOM. + jmp RhExceptionHandling_FailedAllocation +FASTCALL_ENDFUNC + +; Allocate one dimensional, zero based array (SZARRAY). +; ECX == MethodTable +; EDX == element count +FASTCALL_FUNC RhpNewArrayFast, 8 + NEW_ARRAY_FAST_PROLOG + + ; Compute overall allocation size (align(base size + (element size * elements), 4)). + ; if the element count is <= 0x10000, no overflow is possible because the component size is + ; <= 0xffff, and thus the product is <= 0xffff0000, and the base size for the worst case + ; (32 dimensional MdArray) is less than 0xffff. + movzx eax, word ptr [ecx + OFFSETOF__MethodTable__m_usComponentSize] + cmp edx, 010000h + ja ArraySizeBig + mul edx + lea eax, [eax + SZARRAY_BASE_SIZE + 3] +ArrayAlignSize: + and eax, -4 + + NEW_ARRAY_FAST + +ArraySizeBig: + ; Compute overall allocation size (align(base size + (element size * elements), 4)). + ; if the element count is negative, it's an overflow, otherwise it's out of memory + cmp edx, 0 + jl ArraySizeOverflow + mul edx + jc ArrayOutOfMemoryNoFrame + add eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize] + jc ArrayOutOfMemoryNoFrame + add eax, 3 + jc ArrayOutOfMemoryNoFrame + jmp ArrayAlignSize + +ArrayOutOfMemoryNoFrame: + add esp, 8 + + ; ecx holds MethodTable pointer already + xor edx, edx ; Indicate that we should throw OOM. + jmp RhExceptionHandling_FailedAllocation + +ArraySizeOverflow: + add esp, 8 + + ; We get here if the size of the final array object can't be represented as an unsigned + ; 32-bit value. We're going to tail-call to a managed helper that will throw + ; an overflow exception that the caller of this allocator understands. + + ; ecx holds MethodTable pointer already + mov edx, 1 ; Indicate that we should throw OverflowException + jmp RhExceptionHandling_FailedAllocation +FASTCALL_ENDFUNC + +IFNDEF FEATURE_NATIVEAOT +; Allocate one dimensional, zero based array (SZARRAY) of objects (pointer sized elements). +; ECX == MethodTable +; EDX == element count +FASTCALL_FUNC RhpNewObjectArrayFast, 8 + ; Delegate overflow handling to the generic helper conservatively + + cmp edx, (40000000h / 4) ; sizeof(void*) + jae @RhpNewVariableSizeObject@8 + + ; In this case we know the element size is sizeof(void *), or 4 for x86 + ; This helps us in two ways - we can shift instead of multiplying, and + ; there's no need to align the size either + + lea eax, [edx * 4 + SZARRAY_BASE_SIZE] + + NEW_ARRAY_FAST_PROLOG + NEW_ARRAY_FAST +FASTCALL_ENDFUNC +ENDIF + +; +; Object* RhpNewVariableSizeObject(MethodTable *pMT, INT_PTR size) +; +; ecx == MethodTable +; edx == element count +; +FASTCALL_FUNC RhpNewVariableSizeObject, 8 + PUSH_COOP_PINVOKE_FRAME eax + + ; Preserve MethodTable in ESI. + mov esi, ecx + + ; Push alloc helper arguments (transition frame, size, flags, MethodTable). + push eax ; transition frame + push edx ; numElements + push 0 ; Flags + push ecx ; MethodTable + + ; void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) + call RhpGcAlloc + + test eax, eax + jz RhpNewVariableSizeObject_OutOfMemory + + POP_COOP_PINVOKE_FRAME + + ret + +RhpNewVariableSizeObject_OutOfMemory: + ; This is the OOM failure path. We're going to tail-call to a managed helper that will throw + ; an out of memory exception that the caller of this allocator understands. + + mov ecx, esi ; Restore MethodTable pointer + xor edx, edx ; Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + + jmp RhExceptionHandling_FailedAllocation +FASTCALL_ENDFUNC + + end diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S b/src/coreclr/runtime/loongarch64/AllocFast.S similarity index 61% rename from src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S rename to src/coreclr/runtime/loongarch64/AllocFast.S index 42913f5c091839..b9f057b8efa131 100644 --- a/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S +++ b/src/coreclr/runtime/loongarch64/AllocFast.S @@ -1,52 +1,47 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -#include -#include "AsmOffsets.inc" - -// GC type flags -#define GC_ALLOC_FINALIZE 1 - -// -// Rename fields of nested structs -// -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) -#define OFFSETOF__Thread__m_eeAllocContext__combined_limit (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit) +#include "AsmMacros_Shared.h" // Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's // allocation context then automatically fallback to the slow allocation path. // $a0 == MethodTable LEAF_ENTRY RhpNewFast, _TEXT - // a1 = GetThread() - INLINE_GETTHREAD $a1 + // a1 = ee_alloc_context pointer + INLINE_GET_ALLOC_CONTEXT_BASE $a1 // // a0 contains MethodTable pointer // - ld.w $a2, $a0, OFFSETOF__MethodTable__m_uBaseSize + ld.w $a2, $a0, OFFSETOF__ee_alloc_context + OFFSETOF__MethodTable__m_uBaseSize // // a0: MethodTable pointer - // a1: Thread pointer + // a1: ee_alloc_context pointer // a2: base size // // Load potential new object address into t3. - ld.d $t3, $a1, OFFSETOF__Thread__m_alloc_context__alloc_ptr + ld.d $t3, $a1, OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr + + // Load and calculate the maximum size of object we can fit. + ld.d $t2, $a1, OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit + sub.d $t2, $t2, $t3 - // Determine whether the end of the object would lie outside of the current allocation context. If so, + // Determine whether the end of the object is too big for the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add.d $a2, $a2, $t3 - ld.d $t4, $a1, OFFSETOF__Thread__m_eeAllocContext__combined_limit - bltu $t4, $a2, LOCAL_LABEL(RhpNewFast_RarePath) + bltu $t2, $a2, LOCAL_LABEL(RhpNewFast_RarePath) - // Update the alloc pointer to account for the allocation. - st.d $a2, $a1, OFFSETOF__Thread__m_alloc_context__alloc_ptr + // Calculate the new alloc pointer to account for the allocation. + add.d $a2, $a2, $t3 - // Set the new objects MethodTable pointer + // Set the new object's MethodTable pointer. st.d $a0, $t3, OFFSETOF__Object__m_pEEType + // Update the alloc pointer to the newly calculated one. + st.d $a2, $a1, OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr + ori $a0, $t3, 0 jirl $r0, $ra, 0 @@ -80,7 +75,7 @@ LOCAL_LABEL(RhpNewFast_RarePath): // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) bl C_FUNC(RhpGcAlloc) - // Set the new objects MethodTable pointer on success. + // Set the new object's MethodTable pointer on success. beqz $a0, LOCAL_LABEL(NewOutOfMemory) .cfi_remember_state @@ -100,51 +95,60 @@ LOCAL_LABEL(NewOutOfMemory): NESTED_END RhpNewObject, _TEXT -// Allocate a string. +// Shared code for RhNewString, RhpNewArrayFast and RhpNewObjectArrayFast // a0 == MethodTable -// a1 == element/character count - LEAF_ENTRY RhNewString, _TEXT - // Make sure computing the overall allocation size wont overflow - lu12i.w $a2, ((MAX_STRING_LENGTH >> 12) & 0xFFFFF) - ori $a2, $a2, (MAX_STRING_LENGTH & 0xFFF) - bltu $a2, $a1, LOCAL_LABEL(StringSizeOverflow) - - // Compute overall allocation size (align(base size + (element size * elements), 8)). - ori $a2, $zero, STRING_COMPONENT_SIZE - mulw.d.w $a2, $a1, $a2 // a2 = (a1[31:0] * a2[31:0])[64:0] - addi.d $a2, $a2, STRING_BASE_SIZE + 7 // a2 = a2 + STRING_BASE_SIZE + 7 - bstrins.d $a2, $r0, 2, 0 // clear the bits[2:0] of $a2 - - // a0 == MethodTable - // a1 == element count - // a2 == string size +// a1 == character/element count +// a2 == string/array size + .macro NEW_ARRAY_FAST - INLINE_GETTHREAD $a3 + INLINE_GET_ALLOC_CONTEXT_BASE $a3 // Load potential new object address into t3. - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr + ld.d $t3, $a3, OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add.d $a2, $a2, $t3 - ld.d $t3, $a3, OFFSETOF__Thread__m_eeAllocContext__combined_limit - bltu $t3, $a2, LOCAL_LABEL(RhNewString_Rare) + // Load and calculate the maximum size of object we can fit. + ld.d $t2, $a3, OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit + sub.d $t2, $t2, $t3 - // Reload new object address into r12. - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr + // Determine whether the end of the object is too big for the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + bltu $t2, $a2, C_FUNC(RhpNewVariableSizeObject) - // Update the alloc pointer to account for the allocation. - st.d $a2, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr + // Calculate the new alloc pointer to account for the allocation. + add.d $a2, $a2, $t3 - // Set the new objects MethodTable pointer and element count. + // Set the new object's MethodTable pointer and element count. st.d $a0, $t3, OFFSETOF__Object__m_pEEType st.d $a1, $t3, OFFSETOF__Array__m_Length + // Update the alloc pointer to the newly calculated one. + st.d $a2, $a3, OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr + // Return the object allocated in a0. ori $a0, $t3, 0 jirl $r0, $ra, 0 + .endm + +// Allocate a string. +// a0 == MethodTable +// a1 == element/character count + LEAF_ENTRY RhNewString, _TEXT + + // Make sure computing the overall allocation size wont overflow + lu12i.w $a2, ((MAX_STRING_LENGTH >> 12) & 0xFFFFF) + ori $a2, $a2, (MAX_STRING_LENGTH & 0xFFF) + bltu $a2, $a1, LOCAL_LABEL(StringSizeOverflow) + + // Compute overall allocation size (align(base size + (element size * elements), 8)). + ori $a2, $zero, STRING_COMPONENT_SIZE + mulw.d.w $a2, $a1, $a2 // a2 = (a1[31:0] * a2[31:0])[64:0] + addi.d $a2, $a2, STRING_BASE_SIZE + 7 // a2 = a2 + STRING_BASE_SIZE + 7 + bstrins.d $a2, $r0, 2, 0 // clear the bits[2:0] of $a2 + + NEW_ARRAY_FAST + LOCAL_LABEL(StringSizeOverflow): // We get here if the length of the final string object can not be represented as an unsigned // 32-bit value. We are going to tail-call to a managed helper that will throw @@ -154,14 +158,12 @@ LOCAL_LABEL(StringSizeOverflow): ori $a1, $zero, 1 // Indicate that we should throw OverflowException b C_FUNC(RhExceptionHandling_FailedAllocation) -LOCAL_LABEL(RhNewString_Rare): - b C_FUNC(RhpNewArrayRare) LEAF_END RhNewString, _Text // Allocate one dimensional, zero based array (SZARRAY). // $a0 == MethodTable // $a1 == element count - LEAF_ENTRY RhpNewArray, _Text + LEAF_ENTRY RhpNewArrayFast, _Text // We want to limit the element count to the non-negative 32-bit int range. // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component @@ -173,39 +175,10 @@ LOCAL_LABEL(RhNewString_Rare): ld.h $a2, $a0, OFFSETOF__MethodTable__m_usComponentSize mulw.d.w $a2, $a1, $a2 - ld.w $a3, $a0, OFFSETOF__MethodTable__m_uBaseSize - add.d $a2, $a2, $a3 - addi.d $a2, $a2, 7 + addi.d $a2, $a2, SZARRAY_BASE_SIZE + 7 bstrins.d $a2, $r0, 2, 0 - // a0 == MethodTable - // a1 == element count - // a2 == array size - - INLINE_GETTHREAD $a3 - - // Load potential new object address into t3. - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add.d $a2, $a2, $t3 - ld.d $t3, $a3, OFFSETOF__Thread__m_eeAllocContext__combined_limit - bltu $t3, $a2, LOCAL_LABEL(RhpNewArray_Rare) - - // Reload new object address into t3. - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr - - // Update the alloc pointer to account for the allocation. - st.d $a2, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr - // Set the new objects MethodTable pointer and element count. - st.d $a0, $t3, OFFSETOF__Object__m_pEEType - st.d $a1, $t3, OFFSETOF__Array__m_Length - - // Return the object allocated in r0. - ori $a0, $t3, 0 - - jirl $r0, $ra, 0 + NEW_ARRAY_FAST LOCAL_LABEL(ArraySizeOverflow): // We get here if the size of the final array object can not be represented as an unsigned @@ -216,20 +189,38 @@ LOCAL_LABEL(ArraySizeOverflow): ori $a1, $zero, 1 // Indicate that we should throw OverflowException b C_FUNC(RhExceptionHandling_FailedAllocation) -LOCAL_LABEL(RhpNewArray_Rare): - b C_FUNC(RhpNewArrayRare) - LEAF_END RhpNewArray, _TEXT + LEAF_END RhpNewArrayFast, _TEXT + +#ifndef FEATURE_NATIVEAOT +// Allocate one-dimensional, zero-based array (SZARRAY) of objects (pointer sized elements). +// $a0 == MethodTable +// $a1 == element count + LEAF_ENTRY RhpNewObjectArrayFast, _Text + + // Delegate overflow handling to the generic helper conservatively + + li.w $a2, (0x40000000 / 8) // sizeof(void*) + bgeu $a1, $a2, C_FUNC(RhpNewVariableSizeObject) + + // In this case we know the element size is sizeof(void *), or 8 for arm64 + // This helps us in two ways - we can shift instead of multiplying, and + // there's no need to align the size either + + slli.d $a2, $a1, 3 + addi.d $a2, $a2, SZARRAY_BASE_SIZE + + // No need for rounding in this case - element size is 8, and m_BaseSize is guaranteed + // to be a multiple of 8. + + NEW_ARRAY_FAST + + LEAF_END RhpNewObjectArrayFast, _TEXT +#endif // Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper. // a0 == MethodTable // a1 == element count -// a2 == array size + Thread::m_alloc_context::alloc_ptr -// a3 == Thread - NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler - - // Recover array size by subtracting the alloc_ptr from a2. - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr - sub.d $a2, $a2, $t3 + NESTED_ENTRY RhpNewVariableSizeObject, _TEXT, NoHandler PUSH_COOP_PINVOKE_FRAME $a3 @@ -242,15 +233,15 @@ LOCAL_LABEL(RhpNewArray_Rare): // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) bl C_FUNC(RhpGcAlloc) - // Set the new objects MethodTable pointer and length on success. - beqz $a0, LOCAL_LABEL(ArrayOutOfMemory) + // Set the new object's MethodTable pointer and length on success. + beqz $a0, LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory) .cfi_remember_state POP_COOP_PINVOKE_FRAME EPILOG_RETURN .cfi_restore_state -LOCAL_LABEL(ArrayOutOfMemory): +LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory): // This is the OOM failure path. We are going to tail-call to a managed helper that will throw // an out of memory exception that the caller of this allocator understands. @@ -260,4 +251,4 @@ LOCAL_LABEL(ArrayOutOfMemory): POP_COOP_PINVOKE_FRAME b C_FUNC(RhExceptionHandling_FailedAllocation) - NESTED_END RhpNewArrayRare, _TEXT + NESTED_END RhpNewVariableSizeObject, _TEXT diff --git a/src/coreclr/runtime/riscv64/AllocFast.S b/src/coreclr/runtime/riscv64/AllocFast.S new file mode 100644 index 00000000000000..b7bfa6d104680a --- /dev/null +++ b/src/coreclr/runtime/riscv64/AllocFast.S @@ -0,0 +1,289 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros_Shared.h" + +// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's +// allocation context then automatically fallback to the slow allocation path. +// a0 == MethodTable + LEAF_ENTRY RhpNewFast, _TEXT + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 0x20 + PROLOG_SAVE_REG s1, 0x10 + + // Save MethodTable pointer + mv s1, a0 + + // a0 = ee_alloc_context pointer; trashes volatile registers + INLINE_GET_ALLOC_CONTEXT_BASE + + // + // s1 contains MethodTable pointer + // + lw t0, OFFSETOF__MethodTable__m_uBaseSize(s1) + + // + // s1: MethodTable pointer + // a0: ee_alloc_context pointer + // t0: base size + // + + // Load potential new object address into t1. + ld t1, (OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)(a0) + + // Load and calculate the maximum size of object we can fit. + ld t2, (OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit)(a0) + sub t2, t2, t1 + + // Determine whether the end of the object is too big for the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + bltu t2, t0, LOCAL_LABEL(RhpNewFast_RarePath) + + // Calculate the new alloc pointer to account for the allocation. + add t0, t0, t1 + + // Set the new object's MethodTable pointer. + sd s1, OFFSETOF__Object__m_pEEType(t1) + + // Update the alloc pointer to the newly calculated one. + sd t0, (OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)(a0) + + mv a0, t1 + + EPILOG_RESTORE_REG s1, 0x10 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 0x20 + + ret + +LOCAL_LABEL(RhpNewFast_RarePath): + mv a1, zero + mv a0, s1 + + EPILOG_RESTORE_REG s1, 0x10 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 0x20 + + tail RhpNewObject + LEAF_END RhpNewFast, _TEXT + +// Allocate non-array object with finalizer. +// a0 == MethodTable + LEAF_ENTRY RhpNewFinalizable, _TEXT + li a1, GC_ALLOC_FINALIZE + tail RhpNewObject + LEAF_END RhpNewFinalizable, _TEXT + +// Allocate non-array object. +// a0 == MethodTable +// a1 == alloc flags + NESTED_ENTRY RhpNewObject, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME a3 + + // a3: transition frame + + // Preserve the MethodTable in s2 + mv s2, a0 + + li a2, 0 // numElements + + // Call the rest of the allocation helper. + // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) + call C_FUNC(RhpGcAlloc) + + // Set the new object's MethodTable pointer on success. + beq a0, zero, LOCAL_LABEL(NewOutOfMemory) + + .cfi_remember_state + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + + .cfi_restore_state + +LOCAL_LABEL(NewOutOfMemory): + // This is the OOM failure path. We are going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + mv a0, s2 // MethodTable pointer + li a1, 0 // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + tail C_FUNC(RhExceptionHandling_FailedAllocation) + + NESTED_END RhpNewObject, _TEXT + +// Shared code for RhNewString, RhpNewArrayFast and RhpNewObjectArrayFast +// a0 == MethodTable +// a1 == character/element count +// t0 == string/array size + .macro NEW_ARRAY_FAST + + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 0x20 + PROLOG_SAVE_REG_PAIR s1, s2, 0x10 + + // Save MethodTable pointer and string length + mv s1, a0 + mv s2, a1 + + // a0 = ee_alloc_context pointer; trashes volatile registers + INLINE_GET_ALLOC_CONTEXT_BASE + + // s1 == MethodTable + // s2 == element count + // t0 == string/array size + + // Load potential new object address into t3. + ld t1, (OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)(a0) + + // Load and calculate the maximum size of object we can fit. + ld t2, (OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit)(a0) + sub t2, t2, t1 + + // Determine whether the end of the object is too big for the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + bltu t2, t0, 1f + + // Calculate the new alloc pointer to account for the allocation. + add t0, t0, t1 + + // Set the new object's MethodTable pointer. + sd s1, OFFSETOF__Object__m_pEEType(t1) + sd s2, OFFSETOF__Array__m_Length(t1) + + // Update the alloc pointer to the newly calculated one. + sd t0, (OFFSETOF__ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr)(a0) + + // Return the object allocated in a0. + mv a0, t1 + + EPILOG_RESTORE_REG_PAIR s1, s2, 0x10 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 0x20 + + ret + +1: + mv a0, s1 + mv a1, s2 + + EPILOG_RESTORE_REG_PAIR s1, s2, 0x10 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 0x20 + + tail C_FUNC(RhpNewVariableSizeObject) + + .endm + +// Allocate a string. +// a0 == MethodTable +// a1 == element/character count + LEAF_ENTRY RhNewString, _TEXT + + // Make sure computing the overall allocation size won't overflow + li a2, MAX_STRING_LENGTH + bltu a2, a1, LOCAL_LABEL(StringSizeOverflow) // Branch if a2 < a1 (overflow) + + // Compute overall allocation size (align(base size + (element size * elements), 8)). + slli t0, a1, 1 // t0 = a1 * STRING_COMPONENT_SIZE, where STRING_COMPONENT_SIZE == 2 + addi t0, t0, STRING_BASE_SIZE + 7 // t0 = t0 + STRING_BASE_SIZE + 7 + andi t0, t0, ~0x7 // Clear the bits[2:0] of t0 (align to 8 bytes) + + NEW_ARRAY_FAST + +LOCAL_LABEL(StringSizeOverflow): + // We get here if the length of the final string object cannot be represented as an unsigned + // 32-bit value. We are going to tail-call to a managed helper that will throw + // an OOM exception that the caller of this allocator understands. + + li a1, 1 // Indicate that we should throw OverflowException + tail C_FUNC(RhExceptionHandling_FailedAllocation) + + LEAF_END RhNewString, _TEXT + +// Allocate one-dimensional, zero-based array (SZARRAY). +// a0 == MethodTable +// a1 == element count + LEAF_ENTRY RhpNewArrayFast, _TEXT + + // We want to limit the element count to the non-negative 32-bit int range. + // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component + // size is <= 0xffff (it is an unsigned 16-bit value), and the base size for the worst + // case (32-dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits. + li a2, 0x7fffffff + bltu a2, a1, LOCAL_LABEL(ArraySizeOverflow) // Branch if a2 < a1 (check for overflow) + + lhu t0, OFFSETOF__MethodTable__m_usComponentSize(a0) // Load component size + mul t0, a1, t0 // t0 = a1 * component size + addi t0, t0, SZARRAY_BASE_SIZE + 7 // t0 = t0 + 7 + andi t0, t0, ~0x7 // Clear the bits[2:0] of t0 (align to 8 bytes) + + NEW_ARRAY_FAST + +LOCAL_LABEL(ArraySizeOverflow): + // We get here if the size of the final array object cannot be represented as an unsigned + // 32-bit value. We are going to tail-call to a managed helper that will throw + // an overflow exception that the caller of this allocator understands. + + li a1, 1 // Indicate that we should throw OverflowException + tail C_FUNC(RhExceptionHandling_FailedAllocation) + + LEAF_END RhpNewArrayFast, _TEXT + +#ifndef FEATURE_NATIVEAOT +// Allocate one-dimensional, zero-based array (SZARRAY) of objects (pointer sized elements). +// a0 == MethodTable +// a1 == element count + LEAF_ENTRY RhpNewObjectArrayFast, _TEXT + + // Delegate overflow handling to the generic helper conservatively + + li t0, (0x40000000 / 8) // sizeof(void*) + bgeu a1, t0, C_FUNC(RhpNewVariableSizeObject) + + // In this case we know the element size is sizeof(void *), or 8 for arm64 + // This helps us in two ways - we can shift instead of multiplying, and + // there's no need to align the size either + + sll t0, a1, 3 + addi t0, t0, SZARRAY_BASE_SIZE + + // No need for rounding in this case - element size is 8, and m_BaseSize is guaranteed + // to be a multiple of 8. + + NEW_ARRAY_FAST + + LEAF_END RhpNewObjectArrayFast, _TEXT +#endif + +// Allocate one-dimensional, zero-based array (SZARRAY) using the slow path that calls a runtime helper. +// a0 == MethodTable +// a1 == element count + NESTED_ENTRY RhpNewVariableSizeObject, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME a3 + + // Preserve data we will need later into the callee saved registers + mv s2, a0 // Preserve MethodTable + + mv a2, a1 // numElements + li a1, 0 // uFlags + + // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) + call C_FUNC(RhpGcAlloc) + + // Set the new object's MethodTable pointer and length on success. + beq a0, zero, LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory) + + .cfi_remember_state + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + + .cfi_restore_state + +LOCAL_LABEL(RhpNewVariableSizeObject_OutOfMemory): + // This is the OOM failure path. We are going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + mv a0, s2 // MethodTable Pointer + li a1, 0 // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + tail C_FUNC(RhExceptionHandling_FailedAllocation) + + NESTED_END RhpNewVariableSizeObject, _TEXT diff --git a/src/coreclr/tools/Common/Internal/Runtime/RuntimeConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/RuntimeConstants.cs index 9d356cb066813d..944adfd79f5a08 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/RuntimeConstants.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/RuntimeConstants.cs @@ -33,6 +33,9 @@ internal static class ArrayTypesConstants internal enum GC_ALLOC_FLAGS { GC_ALLOC_NO_FLAGS = 0, + GC_ALLOC_FINALIZE = 1, + GC_ALLOC_ALIGN8_BIAS = 4, + GC_ALLOC_ALIGN8 = 8, GC_ALLOC_ZEROING_OPTIONAL = 16, GC_ALLOC_PINNED_OBJECT_HEAP = 64, } diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index a3e761537d0a51..3f2eb6d70c254a 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -615,9 +615,9 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) id = ReadyToRunHelper.NewArray; break; case CorInfoHelpFunc.CORINFO_HELP_NEWARR_1_ALIGN8: - return _compilation.NodeFactory.ExternSymbol("RhpNewArrayAlign8"); + return _compilation.NodeFactory.ExternSymbol("RhpNewArrayFastAlign8"); case CorInfoHelpFunc.CORINFO_HELP_NEWARR_1_VC: - return _compilation.NodeFactory.ExternSymbol("RhpNewArray"); + return _compilation.NodeFactory.ExternSymbol("RhpNewArrayFast"); case CorInfoHelpFunc.CORINFO_HELP_STACK_PROBE: return _compilation.NodeFactory.ExternSymbol("RhpStackProbe"); diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index c0805d6fbc55a0..e2799dcdb48d1e 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -16,6 +16,10 @@ include_directories(${CLR_SRC_NATIVE_DIR}/libs/Common) add_definitions(-DUNICODE) add_definitions(-D_UNICODE) +if(CLR_CMAKE_TARGET_ANDROID) + add_definitions(-DFEATURE_EMULATED_TLS) +endif(CLR_CMAKE_TARGET_ANDROID) + if(FEATURE_AUTO_TRACE) add_definitions(-DFEATURE_AUTO_TRACE) endif(FEATURE_AUTO_TRACE) @@ -354,6 +358,7 @@ set(VM_SOURCES_WKS interpframeallocator.cpp invokeutil.cpp jithelpers.cpp + jitinterfacegen.cpp managedmdimport.cpp marshalnative.cpp methodtablebuilder.cpp @@ -633,6 +638,7 @@ if(CLR_CMAKE_TARGET_WIN32) if(CLR_CMAKE_TARGET_ARCH_AMD64) set(VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/AllocSlow.asm ${ARCH_SOURCES_DIR}/AsmHelpers.asm ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.asm ${ARCH_SOURCES_DIR}/CallDescrWorkerAMD64.asm @@ -655,6 +661,7 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64) ${ARCH_SOURCES_DIR}/UMThunkStub.asm ${ARCH_SOURCES_DIR}/VirtualCallStubAMD64.asm ${ARCH_SOURCES_DIR}/StubPrecodeDynamicHelpers.asm + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.asm ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/WriteBarriers.asm ) @@ -664,12 +671,14 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64) elseif(CLR_CMAKE_TARGET_ARCH_I386) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/RedirectedHandledJITCase.asm + ${ARCH_SOURCES_DIR}/AllocSlow.asm ${ARCH_SOURCES_DIR}/asmhelpers.asm ${ARCH_SOURCES_DIR}/ehhelpers.asm ${ARCH_SOURCES_DIR}/gmsasm.asm ${ARCH_SOURCES_DIR}/jithelp.asm ${ARCH_SOURCES_DIR}/PInvokeStubs.asm ${ARCH_SOURCES_DIR}/thunktemplates.asm + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.asm ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/WriteBarriers.asm ) @@ -678,6 +687,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_I386) ) elseif(CLR_CMAKE_TARGET_ARCH_ARM64) set(VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/AllocSlow.asm ${ARCH_SOURCES_DIR}/AsmHelpers.asm ${ARCH_SOURCES_DIR}/CallDescrWorkerARM64.asm ${ARCH_SOURCES_DIR}/patchedcode.asm @@ -686,6 +696,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) ${ARCH_SOURCES_DIR}/thunktemplates.asm ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.asm ${ARCH_SOURCES_DIR}/StubPrecodeDynamicHelpers.asm + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.asm ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/WriteBarriers.asm ) @@ -699,6 +710,7 @@ else(CLR_CMAKE_TARGET_WIN32) if(CLR_CMAKE_TARGET_ARCH_AMD64) set(VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/AllocSlow.S ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.S ${ARCH_SOURCES_DIR}/calldescrworkeramd64.S @@ -718,11 +730,13 @@ else(CLR_CMAKE_TARGET_WIN32) ${ARCH_SOURCES_DIR}/unixasmhelpers.S ${ARCH_SOURCES_DIR}/umthunkstub.S ${ARCH_SOURCES_DIR}/virtualcallstubamd64.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.S ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/WriteBarriers.S ${ARCH_SOURCES_DIR}/StubPrecodeDynamicHelpers.S ) elseif(CLR_CMAKE_TARGET_ARCH_I386) set(VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/AllocSlow.S ${ARCH_SOURCES_DIR}/ehhelpers.S ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/jithelp.S @@ -730,19 +744,23 @@ else(CLR_CMAKE_TARGET_WIN32) ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/umthunkstub.S ${ARCH_SOURCES_DIR}/thunktemplates.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.S ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/WriteBarriers.S ) elseif(CLR_CMAKE_TARGET_ARCH_ARM) set(VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/AllocSlow.S ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/ehhelpers.S ${ARCH_SOURCES_DIR}/patchedcode.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/thunktemplates.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.S ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/WriteBarriers.S ) elseif(CLR_CMAKE_TARGET_ARCH_ARM64) set(VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/AllocSlow.S ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/CachedInterfaceDispatchCoreCLR.S ${ARCH_SOURCES_DIR}/calldescrworkerarm64.S @@ -751,22 +769,27 @@ else(CLR_CMAKE_TARGET_WIN32) ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/StubDispatch.S ${ARCH_SOURCES_DIR}/thunktemplates.S ${ARCH_SOURCES_DIR}/StubPrecodeDynamicHelpers.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.S ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/WriteBarriers.S ) elseif(CLR_CMAKE_TARGET_ARCH_LOONGARCH64) set(VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/AllocSlow.S ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/calldescrworkerloongarch64.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/thunktemplates.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.S ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/WriteBarriers.S ) elseif(CLR_CMAKE_TARGET_ARCH_RISCV64) set(VM_SOURCES_WKS_ARCH_ASM + ${ARCH_SOURCES_DIR}/AllocSlow.S ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/calldescrworkerriscv64.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/thunktemplates.S + ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/AllocFast.S ${RUNTIME_DIR}/${ARCH_SOURCES_DIR}/WriteBarriers.S ) endif() @@ -794,7 +817,6 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64) ${ARCH_SOURCES_DIR}/profiler.cpp exceptionhandling.cpp gcinfodecoder.cpp - jitinterfacegen.cpp writebarriermanager.cpp ) diff --git a/src/coreclr/vm/FrameTypes.h b/src/coreclr/vm/FrameTypes.h index 19c5092466ee07..b4ef6891a5d8f3 100644 --- a/src/coreclr/vm/FrameTypes.h +++ b/src/coreclr/vm/FrameTypes.h @@ -36,9 +36,7 @@ FRAME_TYPE_NAME(PrestubMethodFrame) FRAME_TYPE_NAME(CallCountingHelperFrame) FRAME_TYPE_NAME(StubDispatchFrame) FRAME_TYPE_NAME(ExternalMethodFrame) -#ifdef FEATURE_READYTORUN FRAME_TYPE_NAME(DynamicHelperFrame) -#endif FRAME_TYPE_NAME(ProtectByRefsFrame) FRAME_TYPE_NAME(ProtectValueClassFrame) FRAME_TYPE_NAME(DebuggerClassInitMarkFrame) diff --git a/src/coreclr/vm/amd64/AllocSlow.S b/src/coreclr/vm/amd64/AllocSlow.S new file mode 100644 index 00000000000000..44f6dd33fae00a --- /dev/null +++ b/src/coreclr/vm/amd64/AllocSlow.S @@ -0,0 +1,65 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include "asmconstants.h" +#include "unixasmmacros.inc" + +// +// Object* RhpNew(MethodTable *pMT) +// +// Allocate non-array object, slow path +// +LEAF_ENTRY RhpNew, _TEXT + + mov rsi, 0 + jmp C_FUNC(RhpNewObject) + +LEAF_END RhpNew, _TEXT + +// +// Object* RhpNewMaybeFrozen(MethodTable *pMT) +// +// Allocate non-array object, may be on frozen heap +// +NESTED_ENTRY RhpNewMaybeFrozen, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME rdx + + mov rsi, 0 + call C_FUNC(RhpGcAllocMaybeFrozen) + + POP_COOP_PINVOKE_FRAME + ret + +NESTED_END RhpNewMaybeFrozen, _TEXT + +// +// Object* RhpNewArrayMaybeFrozen(MethodTable *pMT, INT_PTR size) +// +// Allocate array object, may be on frozen heap +// +NESTED_ENTRY RhpNewArrayMaybeFrozen, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME rdx + + call C_FUNC(RhpGcAllocMaybeFrozen) + + POP_COOP_PINVOKE_FRAME + ret + +NESTED_END RhpNewArrayMaybeFrozen, _TEXT + +// +// void RhExceptionHandling_FailedAllocation(MethodTable *pMT, bool isOverflow) +// +NESTED_ENTRY RhExceptionHandling_FailedAllocation, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME rdx + + call C_FUNC(RhExceptionHandling_FailedAllocation_Helper) + + POP_COOP_PINVOKE_FRAME + ret + +NESTED_END RhExceptionHandling_FailedAllocation, _TEXT diff --git a/src/coreclr/vm/amd64/AllocSlow.asm b/src/coreclr/vm/amd64/AllocSlow.asm new file mode 100644 index 00000000000000..203a72a4056d68 --- /dev/null +++ b/src/coreclr/vm/amd64/AllocSlow.asm @@ -0,0 +1,245 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +include AsmMacros.inc +include asmconstants.inc + +EXTERN RhpNewObject : PROC +EXTERN RhpNewVariableSizeObject : PROC +EXTERN RhpGcAllocMaybeFrozen : PROC +EXTERN RhExceptionHandling_FailedAllocation_Helper : PROC + +EXTERN g_global_alloc_lock : DWORD +EXTERN g_global_alloc_context : QWORD + +; +; Object* RhpNew(MethodTable *pMT) +; +; Allocate non-array object, slow path. +; +LEAF_ENTRY RhpNew, _TEXT + + mov rdx, 0 + jmp RhpNewObject + +LEAF_END RhpNew, _TEXT + +; +; Object* RhpNewMaybeFrozen(MethodTable *pMT) +; +; Allocate non-array object, may be on frozen heap. +; +NESTED_ENTRY RhpNewMaybeFrozen, _TEXT + + PUSH_COOP_PINVOKE_FRAME r8 + + mov rdx, 0 + call RhpGcAllocMaybeFrozen + + POP_COOP_PINVOKE_FRAME + ret + +NESTED_END RhpNewMaybeFrozen, _TEXT + +; +; Object* RhpNewArrayMaybeFrozen(MethodTable *pMT, INT_PTR size) +; +; Allocate array object, may be on frozen heap. +; +NESTED_ENTRY RhpNewArrayMaybeFrozen, _TEXT + + PUSH_COOP_PINVOKE_FRAME r8 + + call RhpGcAllocMaybeFrozen + + POP_COOP_PINVOKE_FRAME + ret + +NESTED_END RhpNewArrayMaybeFrozen, _TEXT + +; +; void RhExceptionHandling_FailedAllocation(MethodTable *pMT, bool isOverflow) +; +NESTED_ENTRY RhExceptionHandling_FailedAllocation, _TEXT + + PUSH_COOP_PINVOKE_FRAME r8 + + call RhExceptionHandling_FailedAllocation_Helper + + POP_COOP_PINVOKE_FRAME + ret + +NESTED_END RhExceptionHandling_FailedAllocation, _TEXT + +; +; void RhpNewFast_UP(MethodTable *pMT) +; +; Allocate non-array object, uniprocessor version +; +LEAF_ENTRY RhpNewFast_UP, _TEXT + + inc [g_global_alloc_lock] + jnz RhpNewFast_UP_RarePath + + ;; + ;; rcx contains MethodTable pointer + ;; + mov r8d, [rcx + OFFSETOF__MethodTable__m_uBaseSize] + + ;; + ;; eax: base size + ;; rcx: MethodTable pointer + ;; rdx: ee_alloc_context pointer + ;; + + mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] + add r8, rax + cmp r8, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] + ja RhpNewFast_UP_RarePath_Unlock + + ;; set the new alloc pointer + mov [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 + + ;; set the new object's MethodTable pointer + mov [rax], rcx + mov [g_global_alloc_lock], -1 + ret + +RhpNewFast_UP_RarePath_Unlock: + mov [g_global_alloc_lock], -1 + +RhpNewFast_UP_RarePath: + xor edx, edx + jmp RhpNewObject + +LEAF_END RhpNewFast_UP, _TEXT + +; +; Shared code for RhNewString_UP, RhpNewArrayFast_UP and RhpNewObjectArrayFast_UP +; RAX == string/array size +; RCX == MethodTable +; RDX == character/element count +; +NEW_ARRAY_FAST_UP MACRO + + inc [g_global_alloc_lock] + jnz RhpNewVariableSizeObject + + mov r8, rax + add rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] + jc NewArrayFast_RarePath + + ; rax == new alloc ptr + ; rcx == MethodTable + ; rdx == element count + ; r8 == array size + cmp rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] + ja NewArrayFast_RarePath + + mov [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], rax + + ; calc the new object pointer + sub rax, r8 + + mov [rax + OFFSETOF__Object__m_pEEType], rcx + mov [rax + OFFSETOF__Array__m_Length], edx + mov [g_global_alloc_lock], -1 + ret + +NewArrayFast_RarePath: + mov [g_global_alloc_lock], -1 + jmp RhpNewVariableSizeObject + +ENDM + +; +; Object* RhNewString_UP(MethodTable *pMT, DWORD stringLength) +; +; Allocate a string, uniprocessor version +; +LEAF_ENTRY RhNewString_UP, _TEXT + + ; we want to limit the element count to the non-negative 32-bit int range + cmp rdx, MAX_STRING_LENGTH + ja StringSizeOverflow + + ; Compute overall allocation size (align(base size + (element size * elements), 8)). + lea rax, [(rdx * STRING_COMPONENT_SIZE) + (STRING_BASE_SIZE + 7)] + and rax, -8 + + NEW_ARRAY_FAST_UP + +StringSizeOverflow: + ; We get here if the size of the final string object can't be represented as an unsigned + ; 32-bit value. We're going to tail-call to a managed helper that will throw + ; an OOM exception that the caller of this allocator understands. + + ; rcx holds MethodTable pointer already + xor edx, edx ; Indicate that we should throw OOM. + jmp RhExceptionHandling_FailedAllocation + +LEAF_END RhNewString_UP, _TEXT + +; +; Object* RhpNewArrayFast_UP(MethodTable *pMT, INT_PTR elementCount) +; Object* RhpNewArrayFast_UP_OBJ(MethodTable *pMT, INT_PTR elementCount) +; +; Allocate one dimensional, zero based array (SZARRAY), uniprocessor version +; +LEAF_ENTRY RhpNewArrayFast_UP, _TEXT + + ; we want to limit the element count to the non-negative 32-bit int range + cmp rdx, 07fffffffh + ja ArraySizeOverflow + + ; save element count + mov r8, rdx + + ; Compute overall allocation size (align(base size + (element size * elements), 8)). + movzx eax, word ptr [rcx + OFFSETOF__MethodTable__m_usComponentSize] + imul rax, rdx + lea rax, [rax + SZARRAY_BASE_SIZE + 7] + and rax, -8 + + mov rdx, r8 + + NEW_ARRAY_FAST_UP + +ArraySizeOverflow: + ; We get here if the size of the final array object can't be represented as an unsigned + ; 32-bit value. We're going to tail-call to a managed helper that will throw + ; an overflow exception that the caller of this allocator understands. + + ; rcx holds MethodTable pointer already + mov edx, 1 ; Indicate that we should throw OverflowException + jmp RhExceptionHandling_FailedAllocation + +LEAF_END RhpNewArrayFast_UP, _TEXT + +; +; Object* RhpNewObjectArrayFast_UP(MethodTable *pMT, INT_PTR elementCount) +; +; Allocate one dimensional, zero based array (SZARRAY) of objects (pointer sized elements), +; uniprocessor version +; +LEAF_ENTRY RhpNewObjectArrayFast_UP, _TEXT + + ; Delegate overflow handling to the generic helper conservatively + + cmp rdx, (40000000h / 8) ; sizeof(void*) + jae RhpNewVariableSizeObject + + ; In this case we know the element size is sizeof(void *), or 8 for x64 + ; This helps us in two ways - we can shift instead of multiplying, and + ; there's no need to align the size either + + lea eax, [edx * 8 + SZARRAY_BASE_SIZE] + + ; No need for rounding in this case - element size is 8, and m_BaseSize is guaranteed + ; to be a multiple of 8. + + NEW_ARRAY_FAST_UP + +LEAF_END RhpNewObjectArrayFast_UP, _TEXT + + end diff --git a/src/coreclr/vm/amd64/AsmMacros.inc b/src/coreclr/vm/amd64/AsmMacros.inc index ed370b0a31466b..8d05620571257d 100644 --- a/src/coreclr/vm/amd64/AsmMacros.inc +++ b/src/coreclr/vm/amd64/AsmMacros.inc @@ -422,6 +422,13 @@ PROLOG_WITH_TRANSITION_BLOCK macro extraLocals := <0>, stackAllocOnEntry := <0>, endm +EPILOG_WITH_TRANSITION_BLOCK macro + + add rsp, __PWTB_StackAlloc + POP_CALLEE_SAVED_REGISTERS + + endm + EPILOG_WITH_TRANSITION_BLOCK_RETURN macro add rsp, __PWTB_StackAlloc @@ -438,3 +445,45 @@ EPILOG_WITH_TRANSITION_BLOCK_TAILCALL macro POP_CALLEE_SAVED_REGISTERS endm + +t_runtime_thread_locals TEXTEQU + +; Inlined version of GetThreadEEAllocContext. Trashes RegTrash, r11 +INLINE_GET_ALLOC_CONTEXT_BASE macro Reg, RegTrash + + EXTERN _tls_index : DWORD + EXTERN t_runtime_thread_locals : DWORD + + mov r11d, [_tls_index] + mov RegTrash, gs:[OFFSET__TEB__ThreadLocalStoragePointer] + mov RegTrash, [RegTrash + r11 * 8] + mov r11d, SECTIONREL t_runtime_thread_locals + lea Reg, [RegTrash + r11] + + endm + +OFFSETOF__ee_alloc_context EQU OFFSETOF__RuntimeThreadLocals__ee_alloc_context + +; Pushes a TransitionBlock on the stack without saving the argument registers. See +; the PROLOG_WITH_TRANSITION_BLOCK macro for the stack layout. +PUSH_COOP_PINVOKE_FRAME macro target + + __PWTB_StackAlloc = SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES + 8 ; alignment to make the stack 16b aligned + __PWTB_TransitionBlock = __PWTB_StackAlloc + + PUSH_CALLEE_SAVED_REGISTERS + alloc_stack __PWTB_StackAlloc + END_PROLOGUE + lea target, [rsp + __PWTB_TransitionBlock] + + endm + +POP_COOP_PINVOKE_FRAME macro + + add rsp, __PWTB_StackAlloc + POP_CALLEE_SAVED_REGISTERS + + endm + +;; GC type flags +GC_ALLOC_FINALIZE equ 1 diff --git a/src/coreclr/vm/amd64/AsmMacros_Shared.h b/src/coreclr/vm/amd64/AsmMacros_Shared.h index 87920d58b2ac65..7141b33b8f819a 100644 --- a/src/coreclr/vm/amd64/AsmMacros_Shared.h +++ b/src/coreclr/vm/amd64/AsmMacros_Shared.h @@ -3,6 +3,6 @@ // This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible -#include "unixasmmacros.inc" #include "asmconstants.h" +#include "unixasmmacros.inc" diff --git a/src/coreclr/vm/amd64/AsmMacros_Shared.inc b/src/coreclr/vm/amd64/AsmMacros_Shared.inc index 649ee1485fe5bb..ba247ce0044154 100644 --- a/src/coreclr/vm/amd64/AsmMacros_Shared.inc +++ b/src/coreclr/vm/amd64/AsmMacros_Shared.inc @@ -19,3 +19,6 @@ endif ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP EXTERN g_write_watch_table : QWORD endif + +EXTERN RhpGcAlloc : PROC +EXTERN RhExceptionHandling_FailedAllocation : PROC diff --git a/src/coreclr/vm/amd64/JitHelpers_FastWriteBarriers.asm b/src/coreclr/vm/amd64/JitHelpers_FastWriteBarriers.asm index bb73c266d0eb46..97ead4f00581bb 100644 --- a/src/coreclr/vm/amd64/JitHelpers_FastWriteBarriers.asm +++ b/src/coreclr/vm/amd64/JitHelpers_FastWriteBarriers.asm @@ -26,9 +26,8 @@ include asmconstants.inc ; whenever they change. ; ; At anyone time, the memory pointed to by JIT_WriteBarrier will contain one -; of these functions. See StompWriteBarrierResize and StompWriteBarrierEphemeral -; in VM\AMD64\JITInterfaceAMD64.cpp and InitJITHelpers1 in VM\JITInterfaceGen.cpp -; for more info. +; of these functions. See StompWriteBarrierResize, StompWriteBarrierEphemeral +; and InitJITWriteBarrierHelpers in VM\AMD64\JITInterfaceAMD64.cpp for more info. ; ; READ THIS!!!!!! ; it is imperative that the addresses of the values that we overwrite diff --git a/src/coreclr/vm/amd64/JitHelpers_Slow.asm b/src/coreclr/vm/amd64/JitHelpers_Slow.asm index d14143cf125de3..d269472be1980d 100644 --- a/src/coreclr/vm/amd64/JitHelpers_Slow.asm +++ b/src/coreclr/vm/amd64/JitHelpers_Slow.asm @@ -39,18 +39,8 @@ EXTERN g_GCShadow:QWORD EXTERN g_GCShadowEnd:QWORD endif -JIT_NEW equ ?JIT_New@@YAPEAVObject@@PEAUCORINFO_CLASS_STRUCT_@@@Z -g_pStringClass equ ?g_pStringClass@@3PEAVMethodTable@@EA -FramedAllocateString equ ?FramedAllocateString@@YAPEAVStringObject@@K@Z -JIT_NewArr1 equ ?JIT_NewArr1@@YAPEAVObject@@PEAUCORINFO_CLASS_STRUCT_@@_J@Z - INVALIDGCVALUE equ 0CCCCCCCDh -extern JIT_NEW:proc -extern g_pStringClass:QWORD -extern FramedAllocateString:proc -extern JIT_NewArr1:proc - ifdef _DEBUG ; Version for when we're sure to be in the GC, checks whether or not the card ; needs to be updated @@ -161,195 +151,5 @@ endif LEAF_END_MARKED JIT_WriteBarrier_Debug, _TEXT endif - -extern g_global_alloc_lock:dword -extern g_global_alloc_context:qword - -LEAF_ENTRY JIT_TrialAllocSFastSP, _TEXT - - mov r8d, [rcx + OFFSET__MethodTable__m_BaseSize] - - ; m_BaseSize is guaranteed to be a multiple of 8. - - inc [g_global_alloc_lock] - jnz JIT_NEW - - mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr - mov r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__m_CombinedLimit] ; m_CombinedLimit - - add r8, rax - - cmp r8, r10 - ja AllocFailed - - mov qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr - mov [rax], rcx - mov [g_global_alloc_lock], -1 - - ret - - AllocFailed: - mov [g_global_alloc_lock], -1 - jmp JIT_NEW -LEAF_END JIT_TrialAllocSFastSP, _TEXT - -LEAF_ENTRY AllocateStringFastUP, _TEXT - - ; We were passed the number of characters in ECX - - ; we need to load the method table for string from the global - - mov r11, [g_pStringClass] - - ; Instead of doing elaborate overflow checks, we just limit the number of elements - ; to (LARGE_OBJECT_SIZE - 256)/sizeof(WCHAR) or less. - ; This will avoid all overflow problems, as well as making sure - ; big string objects are correctly allocated in the big object heap. - - cmp ecx, (ASM_LARGE_OBJECT_SIZE - 256)/2 - jae FramedAllocateString - - ; Calculate the final size to allocate. - ; We need to calculate baseSize + cnt*2, then round that up by adding 7 and anding ~7. - - lea r8d, [STRING_BASE_SIZE + ecx*2 + 7] - and r8d, -8 - - inc [g_global_alloc_lock] - jnz FramedAllocateString - - mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr - mov r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__m_CombinedLimit] ; m_CombinedLimit - - add r8, rax - - cmp r8, r10 - ja AllocFailed - - mov qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr - mov [rax], r11 - mov [g_global_alloc_lock], -1 - - mov [rax + OFFSETOF__StringObject__m_StringLength], ecx - - ret - - AllocFailed: - mov [g_global_alloc_lock], -1 - jmp FramedAllocateString -LEAF_END AllocateStringFastUP, _TEXT - -; HCIMPL2(Object*, JIT_NewArr1VC_UP, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size) -LEAF_ENTRY JIT_NewArr1VC_UP, _TEXT - - ; We were passed a (shared) method table in RCX, which contains the element type. - - ; The element count is in RDX - - ; NOTE: if this code is ported for CORINFO_HELP_NEWSFAST_ALIGN8, it will need - ; to emulate the double-specific behavior of JIT_TrialAlloc::GenAllocArray. - - ; Do a conservative check here. This is to avoid overflow while doing the calculations. We don't - ; have to worry about "large" objects, since the allocation quantum is never big enough for - ; LARGE_OBJECT_SIZE. - - ; For Value Classes, this needs to be 2^16 - slack (2^32 / max component size), - ; The slack includes the size for the array header and round-up ; for alignment. Use 256 for the - ; slack value out of laziness. - - ; In both cases we do a final overflow check after adding to the alloc_ptr. - - cmp rdx, (65535 - 256) - jae JIT_NewArr1 - - movzx r8d, word ptr [rcx + OFFSETOF__MethodTable__m_dwFlags] ; component size is low 16 bits - imul r8d, edx ; signed mul, but won't overflow due to length restriction above - add r8d, dword ptr [rcx + OFFSET__MethodTable__m_BaseSize] - - ; round the size to a multiple of 8 - - add r8d, 7 - and r8d, -8 - - inc [g_global_alloc_lock] - jnz JIT_NewArr1 - - mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr - mov r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__m_CombinedLimit] ; m_CombinedLimit - - add r8, rax - jc AllocFailed - - cmp r8, r10 - ja AllocFailed - - mov qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr - mov [rax], rcx - mov [g_global_alloc_lock], -1 - - mov dword ptr [rax + OFFSETOF__ArrayBase__m_NumComponents], edx - - ret - - AllocFailed: - mov [g_global_alloc_lock], -1 - jmp JIT_NewArr1 -LEAF_END JIT_NewArr1VC_UP, _TEXT - - -; HCIMPL2(Object*, JIT_NewArr1OBJ_UP, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size) -LEAF_ENTRY JIT_NewArr1OBJ_UP, _TEXT - - ; We were passed a (shared) method table in RCX, which contains the element type. - - ; The element count is in RDX - - ; NOTE: if this code is ported for CORINFO_HELP_NEWSFAST_ALIGN8, it will need - ; to emulate the double-specific behavior of JIT_TrialAlloc::GenAllocArray. - - ; Verifies that LARGE_OBJECT_SIZE fits in 32-bit. This allows us to do array size - ; arithmetic using 32-bit registers. - .erre ASM_LARGE_OBJECT_SIZE lt 100000000h - - cmp rdx, (ASM_LARGE_OBJECT_SIZE - 256)/8 ; sizeof(void*) - jae OversizedArray - - ; In this case we know the element size is sizeof(void *), or 8 for x64 - ; This helps us in two ways - we can shift instead of multiplying, and - ; there's no need to align the size either - - mov r8d, dword ptr [rcx + OFFSET__MethodTable__m_BaseSize] - lea r8d, [r8d + edx * 8] - - ; No need for rounding in this case - element size is 8, and m_BaseSize is guaranteed - ; to be a multiple of 8. - - inc [g_global_alloc_lock] - jnz JIT_NewArr1 - - mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr - mov r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__m_CombinedLimit] ; m_CombinedLimit - - add r8, rax - - cmp r8, r10 - ja AllocFailed - - mov qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr - mov [rax], rcx - mov [g_global_alloc_lock], -1 - - mov dword ptr [rax + OFFSETOF__ArrayBase__m_NumComponents], edx - - ret - - AllocFailed: - mov [g_global_alloc_lock], -1 - - OversizedArray: - jmp JIT_NewArr1 -LEAF_END JIT_NewArr1OBJ_UP, _TEXT - - end diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h index 9f4e3543dfaad6..299eb1af7c6229 100644 --- a/src/coreclr/vm/amd64/asmconstants.h +++ b/src/coreclr/vm/amd64/asmconstants.h @@ -113,13 +113,15 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__Thread__m_pFrame == offsetof(Thread, m_pFrame)); #define Thread_m_pFrame OFFSETOF__Thread__m_pFrame +#define OFFSETOF__RuntimeThreadLocals__ee_alloc_context 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__RuntimeThreadLocals__ee_alloc_context == offsetof(RuntimeThreadLocals, alloc_context)); #define OFFSETOF__ee_alloc_context__alloc_ptr 0x8 ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__alloc_ptr == offsetof(ee_alloc_context, m_GCAllocContext) + offsetof(gc_alloc_context, alloc_ptr)); -#define OFFSETOF__ee_alloc_context__m_CombinedLimit 0x0 -ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__m_CombinedLimit == offsetof(ee_alloc_context, m_CombinedLimit)); +#define OFFSETOF__ee_alloc_context__combined_limit 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__combined_limit == offsetof(ee_alloc_context, m_CombinedLimit)); #define OFFSETOF__ThreadExceptionState__m_pCurrentTracker 0x000 ASMCONSTANTS_C_ASSERT(OFFSETOF__ThreadExceptionState__m_pCurrentTracker @@ -167,9 +169,11 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_dwFlags == offsetof(MethodTable, m_dwFlags)); -#define OFFSET__MethodTable__m_BaseSize 0x04 -ASMCONSTANTS_C_ASSERT(OFFSET__MethodTable__m_BaseSize - == offsetof(MethodTable, m_BaseSize)); +#define OFFSETOF__MethodTable__m_usComponentSize 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_usComponentSize == offsetof(MethodTable, m_dwFlags)); + +#define OFFSETOF__MethodTable__m_uBaseSize 0x04 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_uBaseSize == offsetof(MethodTable, m_BaseSize)); #define OFFSETOF__MethodTable__m_wNumInterfaces 0x0E ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_wNumInterfaces @@ -186,10 +190,6 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_pEEClass #define METHODTABLE_OFFSET_VTABLE DBG_FRE(0x48, 0x40) ASMCONSTANTS_C_ASSERT(METHODTABLE_OFFSET_VTABLE == sizeof(MethodTable)); -#define OFFSETOF__MethodTable__m_ElementType DBG_FRE(0x38, 0x30) -ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_ElementType - == offsetof(MethodTable, m_ElementTypeHnd)); - #define OFFSETOF__MethodTable__m_pPerInstInfo DBG_FRE(0x38, 0x30) ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_pPerInstInfo == offsetof(MethodTable, m_pPerInstInfo)); @@ -198,6 +198,11 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_pPerInstInfo ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_pInterfaceMap == offsetof(MethodTable, m_pInterfaceMap)); +#define OFFSETOF__Object__m_pEEType 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__Object__m_pEEType == offsetof(Object, m_pMethTab)); + +#define OFFSETOF__Array__m_Length 0x8 +ASMCONSTANTS_C_ASSERT(OFFSETOF__Array__m_Length == offsetof(ArrayBase, m_NumComponents)); #define MethodTable_VtableSlotsPerChunk 8 ASMCONSTANTS_C_ASSERT(MethodTable_VtableSlotsPerChunk == VTABLE_SLOTS_PER_CHUNK) @@ -461,10 +466,6 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__FaultingExceptionFrame__m_SSP ASMCONSTANTS_C_ASSERT(OFFSETOF__PtrArray__m_NumComponents == offsetof(PtrArray, m_NumComponents)); -#define OFFSETOF__PtrArray__m_Array 0x10 -ASMCONSTANTS_C_ASSERT(OFFSETOF__PtrArray__m_Array - == offsetof(PtrArray, m_Array)); - #ifndef TARGET_UNIX #define OFFSET__TEB__ThreadLocalStoragePointer 0x58 ASMCONSTANTS_C_ASSERT(OFFSET__TEB__ThreadLocalStoragePointer == offsetof(TEB, ThreadLocalStoragePointer)); @@ -489,19 +490,16 @@ ASMCONSTANTS_RUNTIME_ASSERT(DELEGATE_FIELD_OFFSET__METHOD_AUX == Object::GetOffs CoreLibBinder::GetFieldOffset(FIELD__DELEGATE__METHOD_PTR_AUX)); -#define ASM_LARGE_OBJECT_SIZE 85000 -ASMCONSTANTS_C_ASSERT(ASM_LARGE_OBJECT_SIZE == LARGE_OBJECT_SIZE); +#define MAX_STRING_LENGTH 0x3FFFFFDF +ASMCONSTANTS_C_ASSERT(MAX_STRING_LENGTH == CORINFO_String_MaxLength); -#define OFFSETOF__ArrayBase__m_NumComponents 8 -ASMCONSTANTS_C_ASSERT(OFFSETOF__ArrayBase__m_NumComponents - == offsetof(ArrayBase, m_NumComponents)); +#define STRING_COMPONENT_SIZE 2 -#define STRING_BASE_SIZE 0x16 -ASMCONSTANTS_RUNTIME_ASSERT(STRING_BASE_SIZE == StringObject::GetBaseSize()); +#define STRING_BASE_SIZE 0x16 +ASMCONSTANTS_C_ASSERT(STRING_BASE_SIZE == OBJECT_BASESIZE + sizeof(DWORD) + sizeof(WCHAR)); -#define OFFSETOF__StringObject__m_StringLength 0x8 -ASMCONSTANTS_C_ASSERT(OFFSETOF__StringObject__m_StringLength - == offsetof(StringObject, m_StringLength)); +#define SZARRAY_BASE_SIZE 0x18 +ASMCONSTANTS_C_ASSERT(SZARRAY_BASE_SIZE == OBJECT_BASESIZE + sizeof(DWORD) + sizeof(DWORD)); // For JIT_PInvokeBegin and JIT_PInvokeEnd helpers #define OFFSETOF__InlinedCallFrame__m_Datum 0x10 diff --git a/src/coreclr/vm/arm/AllocSlow.S b/src/coreclr/vm/arm/AllocSlow.S new file mode 100644 index 00000000000000..0052ea77dbcee9 --- /dev/null +++ b/src/coreclr/vm/arm/AllocSlow.S @@ -0,0 +1,59 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "unixasmmacros.inc" +#include "asmconstants.h" + +.syntax unified +.thumb + +// +// Object* RhpNew(MethodTable *pMT) +// +// Allocate non-array object, slow path +// +LEAF_ENTRY RhpNew, _TEXT + mov r1, #0 + b C_FUNC(RhpNewObject) +LEAF_END RhpNew, _TEXT + +// +// Object* RhpNewMaybeFrozen(MethodTable *pMT) +// +// Allocate non-array object, may be on frozen heap +// +NESTED_ENTRY RhpNewMaybeFrozen, _TEXT, NoHandler + PUSH_COOP_PINVOKE_FRAME r2 + + mov r1, #0 + bl C_FUNC(RhpGcAllocMaybeFrozen) + + POP_COOP_PINVOKE_FRAME + bx lr +NESTED_END RhpNewMaybeFrozen, _TEXT + +// +// Object* RhpNewArrayMaybeFrozen(MethodTable *pMT, INT_PTR size) +// +// Allocate array object, may be on frozen heap +// +NESTED_ENTRY RhpNewArrayMaybeFrozen, _TEXT, NoHandler + PUSH_COOP_PINVOKE_FRAME r2 + + bl C_FUNC(RhpGcAllocMaybeFrozen) + + POP_COOP_PINVOKE_FRAME + bx lr +NESTED_END RhpNewArrayMaybeFrozen, _TEXT + +// +// void RhExceptionHandling_FailedAllocation(MethodTable *pMT, bool isOverflow) +// +NESTED_ENTRY RhExceptionHandling_FailedAllocation, _TEXT, NoHandler + PUSH_COOP_PINVOKE_FRAME r2 + + bl C_FUNC(RhExceptionHandling_FailedAllocation_Helper) + + POP_COOP_PINVOKE_FRAME + bx lr +NESTED_END RhExceptionHandling_FailedAllocation, _TEXT diff --git a/src/coreclr/vm/arm/asmconstants.h b/src/coreclr/vm/arm/asmconstants.h index 369241a0d85023..8e72ae93d6f85d 100644 --- a/src/coreclr/vm/arm/asmconstants.h +++ b/src/coreclr/vm/arm/asmconstants.h @@ -62,27 +62,41 @@ ASMCONSTANTS_C_ASSERT(LazyMachState_captureSp == offsetof(LazyMachState, capture #define LazyMachState_captureIp (LazyMachState_captureSp+4) ASMCONSTANTS_C_ASSERT(LazyMachState_captureIp == offsetof(LazyMachState, captureIp)) -#define MethodTable__m_BaseSize 0x04 -ASMCONSTANTS_C_ASSERT(MethodTable__m_BaseSize == offsetof(MethodTable, m_BaseSize)); +#define OFFSETOF__MethodTable__m_dwFlags 0x00 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_dwFlags == offsetof(MethodTable, m_dwFlags)); -#define MethodTable__m_dwFlags 0x0 -ASMCONSTANTS_C_ASSERT(MethodTable__m_dwFlags == offsetof(MethodTable, m_dwFlags)); +#define OFFSETOF__MethodTable__m_usComponentSize 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_usComponentSize == offsetof(MethodTable, m_dwFlags)); + +#define OFFSETOF__MethodTable__m_uBaseSize 0x04 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_uBaseSize == offsetof(MethodTable, m_BaseSize)); + +#define OFFSETOF__Object__m_pEEType 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__Object__m_pEEType == offsetof(Object, m_pMethTab)); + +#define OFFSETOF__Array__m_Length 0x4 +ASMCONSTANTS_C_ASSERT(OFFSETOF__Array__m_Length == offsetof(ArrayBase, m_NumComponents)); + +#define MAX_STRING_LENGTH 0x3FFFFFDF +ASMCONSTANTS_C_ASSERT(MAX_STRING_LENGTH == CORINFO_String_MaxLength); + +#define STRING_COMPONENT_SIZE 2 + +#define STRING_BASE_SIZE 0xE +ASMCONSTANTS_C_ASSERT(STRING_BASE_SIZE == OBJECT_BASESIZE + sizeof(DWORD) + sizeof(WCHAR)); + +#define SZARRAY_BASE_SIZE 0xC +ASMCONSTANTS_C_ASSERT(SZARRAY_BASE_SIZE == OBJECT_BASESIZE + sizeof(DWORD)); + +#define ASM_MIN_OBJECT_SIZE 0xC +ASMCONSTANTS_C_ASSERT(ASM_MIN_OBJECT_SIZE == MIN_OBJECT_SIZE); #define MethodTable__enum_flag_ContainsGCPointers 0x01000000 ASMCONSTANTS_C_ASSERT(MethodTable__enum_flag_ContainsGCPointers == MethodTable::enum_flag_ContainsGCPointers); -#define MethodTable__m_ElementType DBG_FRE(0x24, 0x20) -ASMCONSTANTS_C_ASSERT(MethodTable__m_ElementType == offsetof(MethodTable, m_ElementTypeHnd)); - #define SIZEOF__MethodTable DBG_FRE(0x2c, 0x28) ASMCONSTANTS_C_ASSERT(SIZEOF__MethodTable == sizeof(MethodTable)); -#define ArrayBase__m_NumComponents 0x4 -ASMCONSTANTS_C_ASSERT(ArrayBase__m_NumComponents == offsetof(ArrayBase, m_NumComponents)); - -#define PtrArray__m_Array 0x8 -ASMCONSTANTS_C_ASSERT(PtrArray__m_Array == offsetof(PtrArray, m_Array)); - #define TypeHandle_CanCast 0x1 // TypeHandle::CanCast #define SIZEOF__Frame 0x8 @@ -131,6 +145,16 @@ ASMCONSTANTS_C_ASSERT(Thread__m_fPreemptiveGCDisabled == offsetof(Thread, m_fPre ASMCONSTANTS_C_ASSERT(Thread__m_pFrame == offsetof(Thread, m_pFrame)); #define Thread_m_pFrame Thread__m_pFrame +#define OFFSETOF__RuntimeThreadLocals__ee_alloc_context 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__RuntimeThreadLocals__ee_alloc_context == offsetof(RuntimeThreadLocals, alloc_context)); + +#define OFFSETOF__ee_alloc_context__alloc_ptr 0x8 +ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__alloc_ptr == offsetof(ee_alloc_context, m_GCAllocContext) + + offsetof(gc_alloc_context, alloc_ptr)); + +#define OFFSETOF__ee_alloc_context__combined_limit 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__combined_limit == offsetof(ee_alloc_context, m_CombinedLimit)); + #define ASM__VTABLE_SLOTS_PER_CHUNK 8 ASMCONSTANTS_C_ASSERT(ASM__VTABLE_SLOTS_PER_CHUNK == VTABLE_SLOTS_PER_CHUNK) diff --git a/src/coreclr/vm/arm/stubs.cpp b/src/coreclr/vm/arm/stubs.cpp index 5edbc415bb37d1..9a91389cc830ea 100644 --- a/src/coreclr/vm/arm/stubs.cpp +++ b/src/coreclr/vm/arm/stubs.cpp @@ -1659,29 +1659,11 @@ void HijackFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats extern "C" void STDCALL JIT_PatchedCodeStart(); extern "C" void STDCALL JIT_PatchedCodeLast(); -void InitJITHelpers1() +void InitJITWriteBarrierHelpers() { STANDARD_VM_CONTRACT; - - // Allocation helpers, faster but non-logging. - if (!(TrackAllocationsEnabled() - || LoggingOn(LF_GCALLOC, LL_INFO10) -#ifdef _DEBUG - || (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP) != 0) -#endif // _DEBUG - )) - { - _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts()); - - SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable); - - ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString); - } } - VOID ResetCurrentContext() { LIMITED_METHOD_CONTRACT; diff --git a/src/coreclr/vm/arm64/AllocSlow.S b/src/coreclr/vm/arm64/AllocSlow.S new file mode 100644 index 00000000000000..469c63c3966ce5 --- /dev/null +++ b/src/coreclr/vm/arm64/AllocSlow.S @@ -0,0 +1,64 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "asmconstants.h" +#include "unixasmmacros.inc" + +// +// Object* RhpNew(MethodTable *pMT) +// +// Allocate non-array object, slow path. +// +LEAF_ENTRY RhpNew, _TEXT + + mov x1, 0 + b C_FUNC(RhpNewObject) + +LEAF_END RhpNew, _TEXT + +// +// Object* RhpNewMaybeFrozen(MethodTable *pMT) +// +// Allocate non-array object, may be on frozen heap. +// +NESTED_ENTRY RhpNewMaybeFrozen, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME x2 + + mov x1, 0 + bl C_FUNC(RhpGcAllocMaybeFrozen) + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +NESTED_END RhpNewMaybeFrozen, _TEXT + +// +// Object* RhpNewArrayMaybeFrozen(MethodTable *pMT, INT_PTR size) +// +// Allocate array object, may be on frozen heap. +// +NESTED_ENTRY RhpNewArrayMaybeFrozen, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME x2 + + bl C_FUNC(RhpGcAllocMaybeFrozen) + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +NESTED_END RhpNewArrayMaybeFrozen, _TEXT + +// +// void RhExceptionHandling_FailedAllocation(MethodTable *pMT, bool isOverflow) +// +NESTED_ENTRY RhExceptionHandling_FailedAllocation, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME x2 + + bl C_FUNC(RhExceptionHandling_FailedAllocation_Helper) + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +NESTED_END RhExceptionHandling_FailedAllocation, _TEXT diff --git a/src/coreclr/vm/arm64/AllocSlow.asm b/src/coreclr/vm/arm64/AllocSlow.asm new file mode 100644 index 00000000000000..79e13f7a6c0777 --- /dev/null +++ b/src/coreclr/vm/arm64/AllocSlow.asm @@ -0,0 +1,73 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +#include "ksarm64.h" +#include "asmconstants.h" +#include "asmmacros.h" + + IMPORT RhpNewObject + IMPORT RhpGcAllocMaybeFrozen + IMPORT RhExceptionHandling_FailedAllocation_Helper + + TEXTAREA + +; +; Object* RhpNew(MethodTable *pMT) +; +; Allocate non-array object, slow path. +; + LEAF_ENTRY RhpNew + + mov x1, #0 + b RhpNewObject + + LEAF_END + +; +; Object* RhpNewMaybeFrozen(MethodTable *pMT) +; +; Allocate non-array object, may be on frozen heap. +; + NESTED_ENTRY RhpNewMaybeFrozen + + PUSH_COOP_PINVOKE_FRAME x2 + + mov x1, 0 + bl RhpGcAllocMaybeFrozen + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + + NESTED_END + +; +; Object* RhpNewMaybeFrozen(MethodTable *pMT, INT_PTR size) +; +; Allocate array object, may be on frozen heap. +; + NESTED_ENTRY RhpNewArrayMaybeFrozen + + PUSH_COOP_PINVOKE_FRAME x2 + + bl RhpGcAllocMaybeFrozen + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + + NESTED_END + +; +; void RhExceptionHandling_FailedAllocation(MethodTable *pMT, bool isOverflow) +; + NESTED_ENTRY RhExceptionHandling_FailedAllocation + + PUSH_COOP_PINVOKE_FRAME x2 + + bl RhExceptionHandling_FailedAllocation_Helper + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + + NESTED_END RhExceptionHandling_FailedAllocation + + END \ No newline at end of file diff --git a/src/coreclr/vm/arm64/AsmMacros_Shared.h b/src/coreclr/vm/arm64/AsmMacros_Shared.h index 053145540810e4..04249f3464cd47 100644 --- a/src/coreclr/vm/arm64/AsmMacros_Shared.h +++ b/src/coreclr/vm/arm64/AsmMacros_Shared.h @@ -22,6 +22,9 @@ IMPORT g_write_watch_table #endif + IMPORT RhpGcAlloc + IMPORT RhExceptionHandling_FailedAllocation + ;;----------------------------------------------------------------------------- ;; Macro for loading a 64-bit constant by a minimal number of instructions ;; Since the asssembles doesn't support 64 bit arithmetics in expressions, diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h index ade900bbf84e3d..0711dc779f8b9c 100644 --- a/src/coreclr/vm/arm64/asmconstants.h +++ b/src/coreclr/vm/arm64/asmconstants.h @@ -48,6 +48,16 @@ ASMCONSTANTS_C_ASSERT(Thread__m_pFrame == offsetof(Thread, m_pFrame)); #define Thread_m_pFrame Thread__m_pFrame #define Thread_m_fPreemptiveGCDisabled Thread__m_fPreemptiveGCDisabled +#define OFFSETOF__RuntimeThreadLocals__ee_alloc_context 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__RuntimeThreadLocals__ee_alloc_context == offsetof(RuntimeThreadLocals, alloc_context)); + +#define OFFSETOF__ee_alloc_context__alloc_ptr 0x8 +ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__alloc_ptr == offsetof(ee_alloc_context, m_GCAllocContext) + + offsetof(gc_alloc_context, alloc_ptr)); + +#define OFFSETOF__ee_alloc_context__combined_limit 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__combined_limit == offsetof(ee_alloc_context, m_CombinedLimit)); + #define METHODDESC_REGISTER x12 #define SIZEOF__ArgumentRegisters 0x40 @@ -153,27 +163,38 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__InstantiatedMethodDesc__m_pPerInstInfo == offsetof(InstantiatedMethodDesc, m_pPerInstInfo)); //========================================= -#define MethodTable__m_dwFlags 0x0 -ASMCONSTANTS_C_ASSERT(MethodTable__m_dwFlags == offsetof(MethodTable, m_dwFlags)); +#define OFFSETOF__MethodTable__m_dwFlags 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_dwFlags == offsetof(MethodTable, m_dwFlags)); -#define MethodTable__m_BaseSize 0x04 -ASMCONSTANTS_C_ASSERT(MethodTable__m_BaseSize == offsetof(MethodTable, m_BaseSize)); +#define OFFSETOF__MethodTable__m_usComponentSize 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_usComponentSize == offsetof(MethodTable, m_dwFlags)); + +#define OFFSETOF__MethodTable__m_uBaseSize 0x04 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_uBaseSize == offsetof(MethodTable, m_BaseSize)); #define OFFSETOF__MethodTable__m_pPerInstInfo DBG_FRE(0x38, 0x30) ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_pPerInstInfo == offsetof(MethodTable, m_pPerInstInfo)); -#define MethodTable__m_ElementType DBG_FRE(0x38, 0x30) -ASMCONSTANTS_C_ASSERT(MethodTable__m_ElementType == offsetof(MethodTable, m_ElementTypeHnd)); - -#define ArrayBase__m_NumComponents 0x8 -ASMCONSTANTS_C_ASSERT(ArrayBase__m_NumComponents == offsetof(ArrayBase, m_NumComponents)); +#define OFFSETOF__Object__m_pEEType 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__Object__m_pEEType == offsetof(Object, m_pMethTab)); -#define PtrArray__m_Array 0x10 -ASMCONSTANTS_C_ASSERT(PtrArray__m_Array == offsetof(PtrArray, m_Array)); +#define OFFSETOF__Array__m_Length 0x8 +ASMCONSTANTS_C_ASSERT(OFFSETOF__Array__m_Length == offsetof(ArrayBase, m_NumComponents)); #define TypeHandle_CanCast 0x1 // TypeHandle::CanCast +#define MAX_STRING_LENGTH 0x3FFFFFDF +ASMCONSTANTS_C_ASSERT(MAX_STRING_LENGTH == CORINFO_String_MaxLength); + +#define STRING_COMPONENT_SIZE 2 + +#define STRING_BASE_SIZE 0x16 +ASMCONSTANTS_C_ASSERT(STRING_BASE_SIZE == OBJECT_BASESIZE + sizeof(DWORD) + sizeof(WCHAR)); + +#define SZARRAY_BASE_SIZE 0x18 +ASMCONSTANTS_C_ASSERT(SZARRAY_BASE_SIZE == OBJECT_BASESIZE + sizeof(DWORD) + sizeof(DWORD)); + //========================================= diff --git a/src/coreclr/vm/arm64/asmmacros.h b/src/coreclr/vm/arm64/asmmacros.h index ed1792ca971d3f..a11067633ab82e 100644 --- a/src/coreclr/vm/arm64/asmmacros.h +++ b/src/coreclr/vm/arm64/asmmacros.h @@ -91,10 +91,10 @@ __PWTB_ArgumentRegister_FirstArg SETA __PWTB_ArgumentRegisters + 8 MEND ;----------------------------------------------------------------------------- -; Provides a matching epilog to PROLOG_WITH_TRANSITION_BLOCK and returns to caller. +; Provides a matching epilog to PROLOG_WITH_TRANSITION_BLOCK. ; MACRO - EPILOG_WITH_TRANSITION_BLOCK_RETURN + EPILOG_WITH_TRANSITION_BLOCK EPILOG_STACK_FREE __PWTB_StackAlloc @@ -104,6 +104,15 @@ __PWTB_ArgumentRegister_FirstArg SETA __PWTB_ArgumentRegisters + 8 EPILOG_RESTORE_REG_PAIR x25, x26, #64 EPILOG_RESTORE_REG_PAIR x27, x28, #80 EPILOG_RESTORE_REG_PAIR fp, lr, #176! + MEND + +;----------------------------------------------------------------------------- +; Provides a matching epilog to PROLOG_WITH_TRANSITION_BLOCK and returns to caller. +; + MACRO + EPILOG_WITH_TRANSITION_BLOCK_RETURN + + EPILOG_WITH_TRANSITION_BLOCK EPILOG_RETURN MEND @@ -154,6 +163,49 @@ __EndLabelName SETS "$FuncName":CC:"_End" MEND +;----------------------------------------------------------------------------- +; Macros used for shared allocation helpers + + SETALIAS t_runtime_thread_locals, ?t_runtime_thread_locals@@3URuntimeThreadLocals@@A + + MACRO + INLINE_GET_ALLOC_CONTEXT_BASE $destReg, $trashReg + + EXTERN $t_runtime_thread_locals + + INLINE_GET_TLS_VAR $destReg, $trashReg, $t_runtime_thread_locals + MEND + +OFFSETOF__ee_alloc_context EQU OFFSETOF__RuntimeThreadLocals__ee_alloc_context + + MACRO + PUSH_COOP_PINVOKE_FRAME $Target + + PROLOG_SAVE_REG_PAIR fp, lr, #-176! + + ; Spill callee saved registers + PROLOG_SAVE_REG_PAIR x19, x20, #16 + PROLOG_SAVE_REG_PAIR x21, x22, #32 + PROLOG_SAVE_REG_PAIR x23, x24, #48 + PROLOG_SAVE_REG_PAIR x25, x26, #64 + PROLOG_SAVE_REG_PAIR x27, x28, #80 + + mov $Target, sp + MEND + + MACRO + POP_COOP_PINVOKE_FRAME + + EPILOG_RESTORE_REG_PAIR x19, x20, #16 + EPILOG_RESTORE_REG_PAIR x21, x22, #32 + EPILOG_RESTORE_REG_PAIR x23, x24, #48 + EPILOG_RESTORE_REG_PAIR x25, x26, #64 + EPILOG_RESTORE_REG_PAIR x27, x28, #80 + EPILOG_RESTORE_REG_PAIR fp, lr, #176! + MEND + +#define GC_ALLOC_FINALIZE 1 + ;----------------------------------------------------------------------------- ; Macro used to check (in debug builds only) whether the stack is 16-bytes aligned (a requirement before calling ; out into C++/OS code). Invoke this directly after your prolog (if the stack frame size is fixed) or directly diff --git a/src/coreclr/vm/arm64/stubs.cpp b/src/coreclr/vm/arm64/stubs.cpp index 43986504527d82..a06955d24a7012 100644 --- a/src/coreclr/vm/arm64/stubs.cpp +++ b/src/coreclr/vm/arm64/stubs.cpp @@ -840,38 +840,6 @@ void emitCOMStubCall (ComCallMethodDesc *pCOMMethodRX, ComCallMethodDesc *pCOMMe } #endif // FEATURE_COMINTEROP -#if !defined(DACCESS_COMPILE) - -void InitJITHelpers1() -{ - STANDARD_VM_CONTRACT; - - _ASSERTE(g_SystemInfo.dwNumberOfProcessors != 0); - - g_WriteBarrierManager.Initialize(); - - // Allocation helpers, faster but non-logging - if (!((TrackAllocationsEnabled()) || - (LoggingOn(LF_GCALLOC, LL_INFO10)) -#ifdef _DEBUG - || (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP) != 0) -#endif // _DEBUG - )) - { - if (GCHeapUtilities::UseThreadAllocationContexts()) - { - SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable); - - ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString); - } - } -} - -#endif // !defined(DACCESS_COMPILE) - #ifdef TARGET_WINDOWS PTR_CONTEXT GetCONTEXTFromRedirectedStubStackFrame(T_DISPATCHER_CONTEXT * pDispatcherContext) { diff --git a/src/coreclr/vm/ceemain.cpp b/src/coreclr/vm/ceemain.cpp index fb0ff126121562..9f6cc475e4f1e8 100644 --- a/src/coreclr/vm/ceemain.cpp +++ b/src/coreclr/vm/ceemain.cpp @@ -864,14 +864,15 @@ void EEStartupHelper() // Before setting up the execution manager initialize the first part // of the JIT helpers. - InitJITHelpers1(); + InitJITAllocationHelpers(); + InitJITWriteBarrierHelpers(); // Set up the sync block SyncBlockCache::Start(); // This isn't done as part of InitializeGarbageCollector() above because it // requires write barriers to have been set up on x86, which happens as part - // of InitJITHelpers1. + // of InitJITWriteBarrierHelpers. hr = g_pGCHeap->Initialize(); if (FAILED(hr)) { diff --git a/src/coreclr/vm/ecall.h b/src/coreclr/vm/ecall.h index f5c8df1b33f999..b28e42823fb943 100644 --- a/src/coreclr/vm/ecall.h +++ b/src/coreclr/vm/ecall.h @@ -100,7 +100,7 @@ class ECall #endif // DACCESS_COMPILE #define _DYNAMICALLY_ASSIGNED_FCALLS_BASE() \ - DYNAMICALLY_ASSIGNED_FCALL_IMPL(FastAllocateString, FramedAllocateString) \ + DYNAMICALLY_ASSIGNED_FCALL_IMPL(FastAllocateString, RhpNewVariableSizeObject) \ DYNAMICALLY_ASSIGNED_FCALL_IMPL(CtorCharArrayManaged, NULL) \ DYNAMICALLY_ASSIGNED_FCALL_IMPL(CtorCharArrayStartLengthManaged, NULL) \ DYNAMICALLY_ASSIGNED_FCALL_IMPL(CtorCharCountManaged, NULL) \ diff --git a/src/coreclr/vm/eventtrace.cpp b/src/coreclr/vm/eventtrace.cpp index 238ba6a69514c1..8852935a30349e 100644 --- a/src/coreclr/vm/eventtrace.cpp +++ b/src/coreclr/vm/eventtrace.cpp @@ -3667,27 +3667,6 @@ VOID ETW::MethodLog::StubInitialized(ULONGLONG ullHelperStartAddress, LPCWSTR pH } EX_CATCH { } EX_END_CATCH(SwallowAllExceptions); } -/**********************************************************/ -/* This is called by the runtime when helpers with stubs are initialized */ -/**********************************************************/ -VOID ETW::MethodLog::StubsInitialized(PVOID *pHelperStartAddress, PVOID *pHelperNames, LONG lNoOfHelpers) -{ - WRAPPER_NO_CONTRACT; - - if(ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_DOTNET_Context, - TRACE_LEVEL_INFORMATION, - CLR_JIT_KEYWORD)) - { - for(int i=0; iPush(CURRENT_THREAD); + + INSTALL_MANAGED_EXCEPTION_DISPATCHER; + INSTALL_UNWIND_AND_CONTINUE_HANDLER; + +#ifdef _DEBUG + if (g_pConfig->FastGCStressLevel()) { + GetThread()->DisableStressHeap(); + } +#endif // _DEBUG + + if (pMT->HasComponentSize()) + { + if (pMT == g_pStringClass) + { + newobj = AllocateString((DWORD)numElements); + } + else + { + _ASSERTE(pMT->IsFullyLoaded()); + _ASSERTE(pMT->IsArray()); + _ASSERTE(!pMT->IsMultiDimArray()); + + if (numElements < 0) + COMPlusThrow(kOverflowException); + + #ifdef HOST_64BIT + // Even though ECMA allows using a native int as the argument to newarr instruction + // (therefore size is INT_PTR), ArrayBase::m_NumComponents is 32-bit, so even on 64-bit + // platforms we can't create an array whose size exceeds 32 bits. + if (numElements > INT_MAX) + EX_THROW(EEMessageException, (kOverflowException, IDS_EE_ARRAY_DIMENSIONS_EXCEEDED)); + #endif + + newobj = AllocateSzArray(pMT, (INT32)numElements, uFlags); + } + } + else + { + newobj = AllocateObject(pMT, uFlags); + } + + UNINSTALL_UNWIND_AND_CONTINUE_HANDLER; + UNINSTALL_MANAGED_EXCEPTION_DISPATCHER; + + pFrame->Pop(CURRENT_THREAD); + + return OBJECTREFToObject(newobj); +} + +EXTERN_C Object* RhpGcAllocMaybeFrozen(MethodTable* pMT, uintptr_t numElements, TransitionBlock* pTransitionBlock) +{ + OBJECTREF newobj = NULL; + + MAKE_CURRENT_THREAD_AVAILABLE(); + + DynamicHelperFrame frame(pTransitionBlock, 0); + DynamicHelperFrame * pFrame = &frame; + + pFrame->Push(CURRENT_THREAD); + + INSTALL_MANAGED_EXCEPTION_DISPATCHER; + INSTALL_UNWIND_AND_CONTINUE_HANDLER; + +#ifdef _DEBUG + if (g_pConfig->FastGCStressLevel()) { + GetThread()->DisableStressHeap(); + } +#endif // _DEBUG + + if (pMT->HasComponentSize()) + { + _ASSERTE(pMT->IsFullyLoaded()); + _ASSERTE(pMT->IsArray()); + _ASSERTE(!pMT->IsMultiDimArray()); + + if (numElements < 0) + COMPlusThrow(kOverflowException); + +#ifdef HOST_64BIT + // Even though ECMA allows using a native int as the argument to newarr instruction + // (therefore size is INT_PTR), ArrayBase::m_NumComponents is 32-bit, so even on 64-bit + // platforms we can't create an array whose size exceeds 32 bits. + if (numElements > INT_MAX) + EX_THROW(EEMessageException, (kOverflowException, IDS_EE_ARRAY_DIMENSIONS_EXCEEDED)); +#endif + + newobj = TryAllocateFrozenSzArray(pMT, (INT32)numElements); + if (newobj == NULL) + newobj = AllocateSzArray(pMT, (INT32)numElements); + } + else + { + newobj = TryAllocateFrozenObject(pMT); + if (newobj == NULL) + newobj = AllocateObject(pMT); + } + + UNINSTALL_UNWIND_AND_CONTINUE_HANDLER; + UNINSTALL_MANAGED_EXCEPTION_DISPATCHER; + + pFrame->Pop(CURRENT_THREAD); + + return OBJECTREFToObject(newobj); +} + +EXTERN_C void RhExceptionHandling_FailedAllocation_Helper(MethodTable* pMT, bool isOverflow, TransitionBlock* pTransitionBlock) +{ + MAKE_CURRENT_THREAD_AVAILABLE(); + + DynamicHelperFrame frame(pTransitionBlock, 0); + DynamicHelperFrame * pFrame = &frame; + + pFrame->Push(CURRENT_THREAD); + + INSTALL_MANAGED_EXCEPTION_DISPATCHER; + INSTALL_UNWIND_AND_CONTINUE_HANDLER; + + if (isOverflow) + { + COMPlusThrow(kOverflowException); + } + COMPlusThrow(kOutOfMemoryException); + + UNINSTALL_UNWIND_AND_CONTINUE_HANDLER; + UNINSTALL_MANAGED_EXCEPTION_DISPATCHER; + + pFrame->Pop(CURRENT_THREAD); +} + // When not using per-thread allocation contexts, we (the EE) need to take care that // no two threads are concurrently modifying the global allocation context. This lock // must be acquired before any sort of operations involving the global allocation context diff --git a/src/coreclr/vm/hosting.cpp b/src/coreclr/vm/hosting.cpp index ed7cc1ee997a07..1c403f2c251ed4 100644 --- a/src/coreclr/vm/hosting.cpp +++ b/src/coreclr/vm/hosting.cpp @@ -115,7 +115,7 @@ BOOL ClrVirtualProtect(LPVOID lpAddress, SIZE_T dwSize, DWORD flNewProtect, PDWO // JIT_PatchedCode. Thus, their pages have the same protection, they live // in the same region (and thus, its size is the same). // - // In EEStartupHelper, when we setup the UEF and then invoke InitJitHelpers1, + // In EEStartupHelper, when we setup the UEF and then invoke InitJITWriteBarrierHelpers, // they perform some optimizations that result in the memory page protection being changed. When // the UEF is to be invoked, the OS does the check on the UEF's cached details against the current // memory pages. This check used to fail when on 64bit retail builds when JIT_PatchedCode was diff --git a/src/coreclr/vm/i386/AllocSlow.S b/src/coreclr/vm/i386/AllocSlow.S new file mode 100644 index 00000000000000..635d878994d340 --- /dev/null +++ b/src/coreclr/vm/i386/AllocSlow.S @@ -0,0 +1,100 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include "asmconstants.h" +#include "unixasmmacros.inc" + +// +// Object* __fastcall RhpNew(MethodTable *pMT) +// +// Allocate non-array object, slow path. +// +LEAF_ENTRY RhpNew, _TEXT + + PUSH_COOP_PINVOKE_FRAME eax + + push eax + push 0 + push 0 + push ecx + + CHECK_STACK_ALIGNMENT + call C_FUNC(RhpGcAlloc) + + add esp, 16 + + POP_COOP_PINVOKE_FRAME + ret + +LEAF_END RhpNew, _TEXT + +// +// Object* __fastcall RhpNewMaybeFrozen(MethodTable *pMT) +// +// Allocate non-array object, may be on frozen heap. +// +NESTED_ENTRY RhpNewMaybeFrozen, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME eax + + sub esp, 4 + push eax + push 0 + push ecx + + CHECK_STACK_ALIGNMENT + call C_FUNC(RhpGcAllocMaybeFrozen) + + add esp, 16 + + POP_COOP_PINVOKE_FRAME + ret + +NESTED_END RhpNewMaybeFrozen, _TEXT + +// +// Object* __fastcall RhpNewMaybeFrozen(MethodTable *pMT, INT_PTR size) +// +// Allocate array object, may be on frozen heap. +// +NESTED_ENTRY RhpNewArrayMaybeFrozen, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME eax + + sub esp, 4 + push eax + push edx + push ecx + + CHECK_STACK_ALIGNMENT + call C_FUNC(RhpGcAllocMaybeFrozen) + + add esp, 16 + + POP_COOP_PINVOKE_FRAME + ret + +NESTED_END RhpNewArrayMaybeFrozen, _TEXT + +// +// void __fastcall RhExceptionHandling_FailedAllocation(MethodTable *pMT, bool isOverflow) +// +NESTED_ENTRY RhExceptionHandling_FailedAllocation, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME eax + + sub esp, 4 + push eax + push edx + push ecx + + CHECK_STACK_ALIGNMENT + call C_FUNC(RhExceptionHandling_FailedAllocation_Helper) + + add esp, 16 + + POP_COOP_PINVOKE_FRAME + ret + +NESTED_END RhExceptionHandling_FailedAllocation, _TEXT diff --git a/src/coreclr/vm/i386/AllocSlow.asm b/src/coreclr/vm/i386/AllocSlow.asm new file mode 100644 index 00000000000000..fcf5a8a8df1488 --- /dev/null +++ b/src/coreclr/vm/i386/AllocSlow.asm @@ -0,0 +1,274 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + + .586 + .model flat + option casemap:none + .code + +include asmconstants.inc +include asmmacros.inc + +EXTERN _RhpGcAllocMaybeFrozen@12 : PROC +EXTERN _RhExceptionHandling_FailedAllocation_Helper@12 : PROC +EXTERN @RhpNewObject@8 : PROC +EXTERN @RhpNewVariableSizeObject@8 : PROC + +g_global_alloc_lock EQU _g_global_alloc_lock +g_global_alloc_context EQU _g_global_alloc_context + +EXTERN g_global_alloc_lock : DWORD +EXTERN g_global_alloc_context : DWORD + +; +; Object* RhpNew(MethodTable *pMT) +; +; Allocate non-array object, slow path. +; +FASTCALL_FUNC RhpNew, 4 + xor edx, edx + jmp @RhpNewObject@8 +FASTCALL_ENDFUNC + +; +; Object* RhpNewMaybeFrozen(MethodTable *pMT) +; +; Allocate non-array object, may be on frozen heap. +; +FASTCALL_FUNC RhpNewMaybeFrozen, 4 + PUSH_COOP_PINVOKE_FRAME eax + + push eax + push 0 + push ecx + call _RhpGcAllocMaybeFrozen@12 + + POP_COOP_PINVOKE_FRAME + ret +FASTCALL_ENDFUNC + +; +; Object* RhpNewMaybeFrozen(MethodTable *pMT, INT_PTR size) +; +; Allocate array object, may be on frozen heap. +; +FASTCALL_FUNC RhpNewArrayMaybeFrozen, 8 + PUSH_COOP_PINVOKE_FRAME eax + + push eax + push edx + push ecx + call _RhpGcAllocMaybeFrozen@12 + + POP_COOP_PINVOKE_FRAME + ret +FASTCALL_ENDFUNC + +; +; void RhExceptionHandling_FailedAllocation(MethodTable *pMT, bool isOverflow) +; +RhExceptionHandling_FailedAllocation PROC PUBLIC + PUSH_COOP_PINVOKE_FRAME eax + + push eax + push edx + push ecx + call _RhExceptionHandling_FailedAllocation_Helper@12 + + POP_COOP_PINVOKE_FRAME + ret +RhExceptionHandling_FailedAllocation ENDP + +; +; void RhpNewFast_UP(MethodTable *pMT) +; +; Allocate non-array object, uniprocessor version +; +FASTCALL_FUNC RhpNewFast_UP, 4 + inc [g_global_alloc_lock] + jnz AllocFailed + + mov eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize] + add eax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] + jc AllocFailed_Unlock + cmp eax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] + ja AllocFailed_Unlock + mov [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], eax + + ; calc the new object pointer and initialize it + sub eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize] + mov [eax + OFFSETOF__Object__m_pEEType], ecx + + mov [g_global_alloc_lock], -1 + ret + +AllocFailed_Unlock: + mov [g_global_alloc_lock], -1 + +AllocFailed: + xor edx, edx + jmp @RhpNewObject@8 +FASTCALL_ENDFUNC + +; +; Shared code for RhNewString_UP, RhpNewArrayFast_UP and RhpNewObjectArrayFast_UP +; EAX == string/array size +; ECX == MethodTable +; EDX == character/element count +; +NEW_ARRAY_FAST_PROLOG_UP MACRO + inc [g_global_alloc_lock] + jnz @RhpNewVariableSizeObject@8 + + push ecx + push edx +ENDM + +NEW_ARRAY_FAST_UP MACRO + LOCAL AllocContextOverflow + + ; ECX == MethodTable + ; EAX == allocation size + ; EDX == string length + + mov ecx, eax + add eax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] + jc AllocContextOverflow + cmp eax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] + ja AllocContextOverflow + + ; ECX == allocation size + ; EAX == new alloc ptr + + ; set the new alloc pointer + mov [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], eax + + ; calc the new object pointer + sub eax, ecx + + ; Restore the element count and put it in edx + pop edx + ; Restore the MethodTable and put it in ecx + pop ecx + + ; set the new object's MethodTable pointer and element count + mov [eax + OFFSETOF__Object__m_pEEType], ecx + mov [eax + OFFSETOF__Array__m_Length], edx + mov [g_global_alloc_lock], -1 + ret + +AllocContextOverflow: + ; Restore the element count and put it in edx + pop edx + ; Restore the MethodTable and put it in ecx + pop ecx + + mov [g_global_alloc_lock], -1 + jmp @RhpNewVariableSizeObject@8 +ENDM + +; +; Object* RhNewString_UP(MethodTable *pMT, DWORD stringLength) +; +; Allocate a string, uniprocessor version +; +FASTCALL_FUNC RhNewString_UP, 8 + ;; Make sure computing the aligned overall allocation size won't overflow + cmp edx, MAX_STRING_LENGTH + ja StringSizeOverflow + + ; Compute overall allocation size (align(base size + (element size * elements), 4)). + lea eax, [(edx * STRING_COMPONENT_SIZE) + (STRING_BASE_SIZE + 3)] + and eax, -4 + + NEW_ARRAY_FAST_PROLOG_UP + NEW_ARRAY_FAST_UP + +StringSizeOverflow: + ;; We get here if the size of the final string object can't be represented as an unsigned + ;; 32-bit value. We're going to tail-call to a managed helper that will throw + ;; an OOM exception that the caller of this allocator understands. + + ;; ecx holds MethodTable pointer already + xor edx, edx ; Indicate that we should throw OOM. + jmp RhExceptionHandling_FailedAllocation +FASTCALL_ENDFUNC + +; +; Object* RhpNewArrayFast_UP(MethodTable *pMT, INT_PTR elementCount) +; +; Allocate one dimensional, zero based array (SZARRAY), uniprocessor version +; +FASTCALL_FUNC RhpNewArrayFast_UP, 8 + NEW_ARRAY_FAST_PROLOG_UP + + ; Compute overall allocation size (align(base size + (element size * elements), 4)). + ; if the element count is <= 0x10000, no overflow is possible because the component size is + ; <= 0xffff, and thus the product is <= 0xffff0000, and the base size for the worst case + ; (32 dimensional MdArray) is less than 0xffff. + movzx eax, word ptr [ecx + OFFSETOF__MethodTable__m_usComponentSize] + cmp edx,010000h + ja ArraySizeBig + mul edx + lea eax, [eax + SZARRAY_BASE_SIZE + 3] +ArrayAlignSize: + and eax, -4 + + NEW_ARRAY_FAST_UP + +ArraySizeBig: + ; Compute overall allocation size (align(base size + (element size * elements), 4)). + ; if the element count is negative, it's an overflow, otherwise it's out of memory + cmp edx, 0 + jl ArraySizeOverflow + mul edx + jc ArrayOutOfMemoryNoFrame + add eax, [ecx + OFFSETOF__MethodTable__m_uBaseSize] + jc ArrayOutOfMemoryNoFrame + add eax, 3 + jc ArrayOutOfMemoryNoFrame + jmp ArrayAlignSize + +ArrayOutOfMemoryNoFrame: + add esp, 8 + + ; ecx holds MethodTable pointer already + xor edx, edx ; Indicate that we should throw OOM. + jmp RhExceptionHandling_FailedAllocation + +ArraySizeOverflow: + add esp, 8 + + ; We get here if the size of the final array object can't be represented as an unsigned + ; 32-bit value. We're going to tail-call to a managed helper that will throw + ; an overflow exception that the caller of this allocator understands. + + ; ecx holds MethodTable pointer already + mov edx, 1 ; Indicate that we should throw OverflowException + jmp RhExceptionHandling_FailedAllocation +FASTCALL_ENDFUNC + +; +; Object* RhpNewObjectArrayFast_UP(MethodTable *pMT, INT_PTR elementCount) +; +; Allocate one dimensional, zero based array (SZARRAY) of objects (pointer sized elements), +; uniprocessor version +; +FASTCALL_FUNC RhpNewObjectArrayFast_UP, 8 + ; Delegate overflow handling to the generic helper conservatively + + cmp edx, (40000000h / 4) ; sizeof(void*) + jae @RhpNewVariableSizeObject@8 + + ; In this case we know the element size is sizeof(void *), or 4 for x86 + ; This helps us in two ways - we can shift instead of multiplying, and + ; there's no need to align the size either + + lea eax, [edx * 4 + SZARRAY_BASE_SIZE] + + NEW_ARRAY_FAST_PROLOG_UP + NEW_ARRAY_FAST_UP +FASTCALL_ENDFUNC + + + end diff --git a/src/coreclr/vm/i386/AsmMacros.inc b/src/coreclr/vm/i386/AsmMacros.inc index 5acea99ddf6b7c..9098bd413928c2 100644 --- a/src/coreclr/vm/i386/AsmMacros.inc +++ b/src/coreclr/vm/i386/AsmMacros.inc @@ -30,8 +30,8 @@ t_CurrentThreadInfo TEXTEQU INLINE_GETTHREAD macro destReg, trashReg ASSUME fs : NOTHING - EXTERN __tls_index:DWORD - EXTERN t_CurrentThreadInfo:DWORD + EXTERN __tls_index : DWORD + EXTERN t_CurrentThreadInfo : DWORD mov destReg, [__tls_index] mov trashReg, fs:[__tls_array] @@ -60,7 +60,65 @@ LEAF_END_MARKED macro functionName nop endm +FASTCALL_FUNC macro FuncName,cbArgs + FuncNameReal EQU @&FuncName&@&cbArgs + FuncNameReal proc public +endm + +FASTCALL_ENDFUNC macro + FuncNameReal endp +endm + PATCH_LABEL macro labelName labelName: PUBLIC labelName endm + +t_runtime_thread_locals TEXTEQU + +; Inlined version of GetThreadEEAllocContext. Trashes trashReg +INLINE_GET_ALLOC_CONTEXT_BASE macro destReg, trashReg + ASSUME fs : NOTHING + + EXTERN __tls_index : DWORD + EXTERN t_runtime_thread_locals : DWORD + + mov destReg, [__tls_index] + mov trashReg, fs:[__tls_array] + mov destReg, [trashReg + destReg * 4] + add destReg, SECTIONREL t_runtime_thread_locals +endm + +OFFSETOF__ee_alloc_context EQU OFFSETOF__RuntimeThreadLocals__ee_alloc_context + +PUSH_COOP_PINVOKE_FRAME macro target + ; push ebp-frame + push ebp + mov ebp, esp + + ; save CalleeSavedRegisters + push ebx + push esi + push edi + + ; reserve space for ArgumentRegisters + sub esp, 8 + + mov target, esp +endm + +POP_COOP_PINVOKE_FRAME macro + ; skip over ArgumentRegisters space + add esp, 8 + + ; pop CalleeSavedRegisters + pop edi + pop esi + pop ebx + pop ebp +endm + +;; GC type flags +GC_ALLOC_FINALIZE equ 1 +GC_ALLOC_ALIGN8_BIAS equ 4 +GC_ALLOC_ALIGN8 equ 8 diff --git a/src/coreclr/vm/i386/AsmMacros_Shared.h b/src/coreclr/vm/i386/AsmMacros_Shared.h index 87920d58b2ac65..c96101dcc8f79c 100644 --- a/src/coreclr/vm/i386/AsmMacros_Shared.h +++ b/src/coreclr/vm/i386/AsmMacros_Shared.h @@ -3,6 +3,5 @@ // This file is used to allow sharing of assembly code between NativeAOT and CoreCLR, which have different conventions about how to ensure that constants offsets are accessible -#include "unixasmmacros.inc" #include "asmconstants.h" - +#include "unixasmmacros.inc" diff --git a/src/coreclr/vm/i386/AsmMacros_Shared.inc b/src/coreclr/vm/i386/AsmMacros_Shared.inc index 968bcf2674ca5e..dd9de658e38c94 100644 --- a/src/coreclr/vm/i386/AsmMacros_Shared.inc +++ b/src/coreclr/vm/i386/AsmMacros_Shared.inc @@ -17,3 +17,8 @@ EXTERN G_HIGHEST_ADDRESS : DWORD EXTERN G_EPHEMERAL_LOW : DWORD EXTERN G_EPHEMERAL_HIGH : DWORD EXTERN G_CARD_TABLE : DWORD + +RhpGcAlloc TEXTEQU <_RhpGcAlloc@16> + +EXTERN RhpGcAlloc : PROC +EXTERN RhExceptionHandling_FailedAllocation : PROC diff --git a/src/coreclr/vm/i386/asmconstants.h b/src/coreclr/vm/i386/asmconstants.h index c594922a195224..a85d6f79e95239 100644 --- a/src/coreclr/vm/i386/asmconstants.h +++ b/src/coreclr/vm/i386/asmconstants.h @@ -178,6 +178,19 @@ ASMCONSTANTS_C_ASSERT(Thread_m_pFrame == offsetof(Thread, m_pFrame)) ASMCONSTANTS_C_ASSERT(Thread::TS_Hijacked == TS_Hijacked_ASM) #endif +#define OFFSETOF__RuntimeThreadLocals__ee_alloc_context 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__RuntimeThreadLocals__ee_alloc_context == offsetof(RuntimeThreadLocals, alloc_context)); + +#ifdef TARGET_WINDOWS +#define OFFSETOF__ee_alloc_context__alloc_ptr 0x8 +#else +#define OFFSETOF__ee_alloc_context__alloc_ptr 0x4 +#endif +ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__alloc_ptr == offsetof(ee_alloc_context, m_GCAllocContext) + + offsetof(gc_alloc_context, alloc_ptr)); + +#define OFFSETOF__ee_alloc_context__combined_limit 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__combined_limit == offsetof(ee_alloc_context, m_CombinedLimit)); // from clr/src/vm/appdomain.hpp @@ -201,6 +214,33 @@ ASMCONSTANTS_C_ASSERT(SIZEOF_MethodTable == sizeof(MethodTable)) #define SIZEOF_InterfaceInfo_t 0x4 ASMCONSTANTS_C_ASSERT(SIZEOF_InterfaceInfo_t == sizeof(InterfaceInfo_t)) +#define OFFSETOF__MethodTable__m_dwFlags 0x00 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_dwFlags + == offsetof(MethodTable, m_dwFlags)); + +#define OFFSETOF__MethodTable__m_usComponentSize 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_usComponentSize == offsetof(MethodTable, m_dwFlags)); + +#define OFFSETOF__MethodTable__m_uBaseSize 0x04 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_uBaseSize == offsetof(MethodTable, m_BaseSize)); + +#define OFFSETOF__Object__m_pEEType 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__Object__m_pEEType == offsetof(Object, m_pMethTab)); + +#define OFFSETOF__Array__m_Length 0x4 +ASMCONSTANTS_C_ASSERT(OFFSETOF__Array__m_Length == offsetof(ArrayBase, m_NumComponents)); + +#define MAX_STRING_LENGTH 0x3FFFFFDF +ASMCONSTANTS_C_ASSERT(MAX_STRING_LENGTH == CORINFO_String_MaxLength); + +#define STRING_COMPONENT_SIZE 2 + +#define STRING_BASE_SIZE 0xE +ASMCONSTANTS_C_ASSERT(STRING_BASE_SIZE == OBJECT_BASESIZE + sizeof(DWORD) + sizeof(WCHAR)); + +#define SZARRAY_BASE_SIZE 0xC +ASMCONSTANTS_C_ASSERT(SZARRAY_BASE_SIZE == OBJECT_BASESIZE + sizeof(DWORD)); + #ifdef FEATURE_COMINTEROP #ifndef FEATURE_EH_FUNCLETS diff --git a/src/coreclr/vm/i386/asmhelpers.asm b/src/coreclr/vm/i386/asmhelpers.asm index 740b3de93a79db..88c801b35cc2f3 100644 --- a/src/coreclr/vm/i386/asmhelpers.asm +++ b/src/coreclr/vm/i386/asmhelpers.asm @@ -15,6 +15,7 @@ .model flat include asmconstants.inc +include asmmacros.inc assume fs: nothing option casemap:none @@ -99,15 +100,6 @@ UNREFERENCED macro arg unref equ size arg endm -FASTCALL_FUNC macro FuncName,cbArgs -FuncNameReal EQU @&FuncName&@&cbArgs -FuncNameReal proc public -endm - -FASTCALL_ENDFUNC macro -FuncNameReal endp -endm - ifndef FEATURE_EH_FUNCLETS ifdef FEATURE_COMINTEROP ifdef _DEBUG diff --git a/src/coreclr/vm/i386/cgenx86.cpp b/src/coreclr/vm/i386/cgenx86.cpp index c2c5208abdc4c8..ad536bdd9ca3f2 100644 --- a/src/coreclr/vm/i386/cgenx86.cpp +++ b/src/coreclr/vm/i386/cgenx86.cpp @@ -813,13 +813,11 @@ void TailCallFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloa } #endif // !UNIX_X86_ABI -#ifdef FEATURE_READYTORUN void DynamicHelperFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats) { WRAPPER_NO_CONTRACT; UpdateRegDisplayHelper(pRD, 0); } -#endif // FEATURE_READYTORUN //------------------------------------------------------------------------ // This is declared as returning WORD instead of PRD_TYPE because of diff --git a/src/coreclr/vm/i386/jitinterfacex86.cpp b/src/coreclr/vm/i386/jitinterfacex86.cpp index 75549b5b8998da..78fa7bf97fb00f 100644 --- a/src/coreclr/vm/i386/jitinterfacex86.cpp +++ b/src/coreclr/vm/i386/jitinterfacex86.cpp @@ -28,38 +28,6 @@ #define WRITE_BARRIER_CHECK 1 #endif -// To test with MON_DEBUG off, comment out the following line. DO NOT simply define -// to be 0 as the checks are for #ifdef not #if 0. -// -#ifdef _DEBUG -#define MON_DEBUG 1 -#endif - -class JIT_TrialAlloc -{ -public: - enum Flags - { - NORMAL = 0x0, - MP_ALLOCATOR = 0x1, - SIZE_IN_EAX = 0x2, - OBJ_ARRAY = 0x4, - ALIGN8 = 0x8, // insert a dummy object to ensure 8 byte alignment (until the next GC) - ALIGN8OBJ = 0x10, - }; - - static void *GenAllocSFast(Flags flags); - static void *GenBox(Flags flags); - static void *GenAllocArray(Flags flags); - static void *GenAllocString(Flags flags); - -private: - static void EmitAlignmentRoundup(CPUSTUBLINKER *psl,X86Reg regTestAlign, X86Reg regToAdj, Flags flags); - static void EmitDummyObject(CPUSTUBLINKER *psl, X86Reg regTestAlign, Flags flags); - static void EmitCore(CPUSTUBLINKER *psl, CodeLabel *noLock, CodeLabel *noAlloc, Flags flags); - static void EmitNoAllocCode(CPUSTUBLINKER *psl, Flags flags); -}; - extern "C" LONG g_global_alloc_lock; extern "C" void STDCALL JIT_WriteBarrierReg_PreGrow();// JIThelp.asm/JIThelp.s @@ -110,506 +78,6 @@ void STDCALL JIT_TailCallHelper(Thread * pThread) } #endif // FEATURE_HIJACK - // emit code that adds MIN_OBJECT_SIZE to reg if reg is unaligned thus making it aligned -void JIT_TrialAlloc::EmitAlignmentRoundup(CPUSTUBLINKER *psl, X86Reg testAlignReg, X86Reg adjReg, Flags flags) -{ - STANDARD_VM_CONTRACT; - - _ASSERTE((MIN_OBJECT_SIZE & 7) == 4); // want to change alignment - - CodeLabel *AlreadyAligned = psl->NewCodeLabel(); - - // test reg, 7 - psl->Emit16(0xC0F7 | (static_cast(testAlignReg) << 8)); - psl->Emit32(0x7); - - // jz alreadyAligned - if (flags & ALIGN8OBJ) - { - psl->X86EmitCondJump(AlreadyAligned, X86CondCode::kJNZ); - } - else - { - psl->X86EmitCondJump(AlreadyAligned, X86CondCode::kJZ); - } - - psl->X86EmitAddReg(adjReg, MIN_OBJECT_SIZE); - // AlreadyAligned: - psl->EmitLabel(AlreadyAligned); -} - - // if 'reg' is unaligned, then set the dummy object at EAX and increment EAX past - // the dummy object -void JIT_TrialAlloc::EmitDummyObject(CPUSTUBLINKER *psl, X86Reg alignTestReg, Flags flags) -{ - STANDARD_VM_CONTRACT; - - CodeLabel *AlreadyAligned = psl->NewCodeLabel(); - - // test reg, 7 - psl->Emit16(0xC0F7 | (static_cast(alignTestReg) << 8)); - psl->Emit32(0x7); - - // jz alreadyAligned - if (flags & ALIGN8OBJ) - { - psl->X86EmitCondJump(AlreadyAligned, X86CondCode::kJNZ); - } - else - { - psl->X86EmitCondJump(AlreadyAligned, X86CondCode::kJZ); - } - - // Make the fake object - // mov EDX, [g_pObjectClass] - psl->Emit16(0x158B); - psl->Emit32((int)(size_t)&g_pObjectClass); - - // mov [EAX], EDX - psl->X86EmitOffsetModRM(0x89, kEDX, kEAX, 0); - - // add EAX, MIN_OBJECT_SIZE - psl->X86EmitAddReg(kEAX, MIN_OBJECT_SIZE); - - // AlreadyAligned: - psl->EmitLabel(AlreadyAligned); -} - -void JIT_TrialAlloc::EmitCore(CPUSTUBLINKER *psl, CodeLabel *noLock, CodeLabel *noAlloc, Flags flags) -{ - STANDARD_VM_CONTRACT; - - // Upon entry here, ecx contains the method we are to try allocate memory for - // Upon exit, eax contains the allocated memory, edx is trashed, and ecx undisturbed - - if (flags & MP_ALLOCATOR) - { - if (flags & (ALIGN8 | SIZE_IN_EAX | ALIGN8OBJ)) - { - if (flags & ALIGN8OBJ) - { - // mov eax, [ecx]MethodTable.m_BaseSize - psl->X86EmitIndexRegLoad(kEAX, kECX, offsetof(MethodTable, m_BaseSize)); - } - - psl->X86EmitPushReg(kEBX); // we need a spare register - } - else - { - // mov eax, [ecx]MethodTable.m_BaseSize - psl->X86EmitIndexRegLoad(kEAX, kECX, offsetof(MethodTable, m_BaseSize)); - } - - assert( ((flags & ALIGN8)==0 || // EAX loaded by else statement - (flags & SIZE_IN_EAX) || // EAX already comes filled out - (flags & ALIGN8OBJ) ) // EAX loaded in the if (flags & ALIGN8OBJ) statement - && "EAX should contain size for allocation and it doesnt!!!"); - - // Fetch current thread into EDX, preserving EAX and ECX - psl->X86EmitCurrentThreadAllocContextFetch(kEDX, (1 << kEAX) | (1 << kECX)); - - // Try the allocation. - - - if (flags & (ALIGN8 | SIZE_IN_EAX | ALIGN8OBJ)) - { - // MOV EBX, [edx]alloc_context.m_GCAllocContext.alloc_ptr - psl->X86EmitOffsetModRM(0x8B, kEBX, kEDX, ee_alloc_context::getAllocPtrFieldOffset()); - // add EAX, EBX - psl->Emit16(0xC303); - if (flags & ALIGN8) - EmitAlignmentRoundup(psl, kEBX, kEAX, flags); // bump EAX up size by 12 if EBX unaligned (so that we are aligned) - } - else - { - // add eax, [edx]alloc_context.m_GCAllocContext.alloc_ptr - psl->X86EmitOffsetModRM(0x03, kEAX, kEDX, ee_alloc_context::getAllocPtrFieldOffset()); - } - - // cmp eax, [edx]alloc_context.m_CombinedLimit - psl->X86EmitOffsetModRM(0x3b, kEAX, kEDX, ee_alloc_context::getCombinedLimitFieldOffset()); - - // ja noAlloc - psl->X86EmitCondJump(noAlloc, X86CondCode::kJA); - - // Fill in the allocation and get out. - - // mov [edx]alloc_context.m_GCAllocContext.alloc_ptr, eax - psl->X86EmitIndexRegStore(kEDX, ee_alloc_context::getAllocPtrFieldOffset(), kEAX); - - if (flags & (ALIGN8 | SIZE_IN_EAX | ALIGN8OBJ)) - { - // mov EAX, EBX - psl->Emit16(0xC38B); - // pop EBX - psl->X86EmitPopReg(kEBX); - - if (flags & ALIGN8) - EmitDummyObject(psl, kEAX, flags); - } - else - { - // sub eax, [ecx]MethodTable.m_BaseSize - psl->X86EmitOffsetModRM(0x2b, kEAX, kECX, offsetof(MethodTable, m_BaseSize)); - } - - // mov dword ptr [eax], ecx - psl->X86EmitIndexRegStore(kEAX, 0, kECX); - } - else - { - // Take the GC lock (there is no lock prefix required - we will use JIT_TrialAllocSFastMP on an MP System). - // inc dword ptr [g_global_alloc_lock] - psl->Emit16(0x05ff); - psl->Emit32((int)(size_t)&g_global_alloc_lock); - - // jnz NoLock - psl->X86EmitCondJump(noLock, X86CondCode::kJNZ); - - if (flags & SIZE_IN_EAX) - { - // mov edx, eax - psl->Emit16(0xd08b); - } - else - { - // mov edx, [ecx]MethodTable.m_BaseSize - psl->X86EmitIndexRegLoad(kEDX, kECX, offsetof(MethodTable, m_BaseSize)); - } - - // mov eax, dword ptr [g_global_alloc_context.m_GCAllocContext.alloc_ptr] - psl->Emit8(0xA1); - psl->Emit32((int)(size_t)&g_global_alloc_context + ee_alloc_context::getAllocPtrFieldOffset()); - - // Try the allocation. - // add edx, eax - psl->Emit16(0xd003); - - if (flags & (ALIGN8 | ALIGN8OBJ)) - EmitAlignmentRoundup(psl, kEAX, kEDX, flags); // bump up EDX size by 12 if EAX unaligned (so that we are aligned) - - // cmp edx, dword ptr [g_global_alloc_context.m_CombinedLimit] - psl->Emit16(0x153b); - psl->Emit32((int)(size_t)&g_global_alloc_context + ee_alloc_context::getCombinedLimitFieldOffset()); - - // ja noAlloc - psl->X86EmitCondJump(noAlloc, X86CondCode::kJA); - - // Fill in the allocation and get out. - // mov dword ptr [g_global_alloc_context.m_GCAllocContext.alloc_ptr], edx - psl->Emit16(0x1589); - psl->Emit32((int)(size_t)&g_global_alloc_context + ee_alloc_context::getAllocPtrFieldOffset()); - - if (flags & (ALIGN8 | ALIGN8OBJ)) - EmitDummyObject(psl, kEAX, flags); - - // mov dword ptr [eax], ecx - psl->X86EmitIndexRegStore(kEAX, 0, kECX); - - // mov dword ptr [g_global_alloc_lock], 0FFFFFFFFh - psl->Emit16(0x05C7); - psl->Emit32((int)(size_t)&g_global_alloc_lock); - psl->Emit32(0xFFFFFFFF); - } - - -#ifdef INCREMENTAL_MEMCLR - // We're planning to get rid of this anyhow according to Patrick - _ASSERTE(!"NYI"); -#endif // INCREMENTAL_MEMCLR -} - -void JIT_TrialAlloc::EmitNoAllocCode(CPUSTUBLINKER *psl, Flags flags) -{ - STANDARD_VM_CONTRACT; - - if (flags & MP_ALLOCATOR) - { - if (flags & (ALIGN8|SIZE_IN_EAX)) - psl->X86EmitPopReg(kEBX); - } - else - { - // mov dword ptr [g_global_alloc_lock], 0FFFFFFFFh - psl->Emit16(0x05c7); - psl->Emit32((int)(size_t)&g_global_alloc_lock); - psl->Emit32(0xFFFFFFFF); - } -} - -FCDECL1(Object*, JIT_New, CORINFO_CLASS_HANDLE typeHnd_); - -void *JIT_TrialAlloc::GenAllocSFast(Flags flags) -{ - STANDARD_VM_CONTRACT; - - CPUSTUBLINKER sl; - - CodeLabel *noLock = sl.NewCodeLabel(); - CodeLabel *noAlloc = sl.NewCodeLabel(); - - // Emit the main body of the trial allocator, be it SP or MP - EmitCore(&sl, noLock, noAlloc, flags); - - // Here we are at the end of the success case - just emit a ret - sl.X86EmitReturn(0); - - // Come here in case of no space - sl.EmitLabel(noAlloc); - - // Release the lock in the uniprocessor case - EmitNoAllocCode(&sl, flags); - - // Come here in case of failure to get the lock - sl.EmitLabel(noLock); - - // Jump to the framed helper - sl.X86EmitNearJump(sl.NewExternalCodeLabel((LPVOID)JIT_New)); - - Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap(), NEWSTUB_FL_NONE, "AllocSFast"); - - return (void *)pStub->GetEntryPoint(); -} - -void *JIT_TrialAlloc::GenAllocArray(Flags flags) -{ - STANDARD_VM_CONTRACT; - - CPUSTUBLINKER sl; - - CodeLabel *noLock = sl.NewCodeLabel(); - CodeLabel *noAlloc = sl.NewCodeLabel(); - - // We were passed a (shared) method table in RCX, which contains the element type. - - // If this is the allocator for use from unmanaged code, ECX contains the - // element type descriptor, or the CorElementType. - - // We need to save ECX for later - - // push ecx - sl.X86EmitPushReg(kECX); - - // The element count is in EDX - we need to save it for later. - - // push edx - sl.X86EmitPushReg(kEDX); - - // Do a conservative check here. This is to avoid doing overflow checks within this function. We'll - // still have to do a size check before running through the body of EmitCore. The way we do the check - // against the allocation quantum there requires that we not overflow when adding the size to the - // current allocation context pointer. There is exactly LARGE_OBJECT_SIZE of headroom there, so do that - // check before we EmitCore. - // - // For reference types, we can just pick the correct value of maxElems and skip the second check. - // - // By the way, we use 258 as a "slack" value to ensure that we don't overflow because of the size of the - // array header or alignment. - sl.Emit16(0xfa81); - - unsigned maxElems = 0xffff - 256; - - if (flags & OBJ_ARRAY) - { - //Since we know that the array elements are sizeof(OBJECTREF), set maxElems exactly here (use the - //same slack from above. - maxElems = min(maxElems, (LARGE_OBJECT_SIZE/sizeof(OBJECTREF)) - 256); - } - sl.Emit32(maxElems); - - - // jae noLock - seems tempting to jump to noAlloc, but we haven't taken the lock yet - sl.X86EmitCondJump(noLock, X86CondCode::kJAE); - - if (flags & OBJ_ARRAY) - { - // In this case we know the element size is sizeof(void *), or 4 for x86 - // This helps us in two ways - we can shift instead of multiplying, and - // there's no need to align the size either - - _ASSERTE(sizeof(void *) == 4); - - // mov eax, [ecx]MethodTable.m_BaseSize - sl.X86EmitIndexRegLoad(kEAX, kECX, offsetof(MethodTable, m_BaseSize)); - - // lea eax, [eax+edx*4] - sl.X86EmitOp(0x8d, kEAX, kEAX, 0, kEDX, 4); - } - else - { - // movzx eax, [ECX]MethodTable.m_dwFlags /* component size */ - sl.Emit8(0x0f); - sl.X86EmitOffsetModRM(0xb7, kEAX, kECX, offsetof(MethodTable, m_dwFlags /* component size */)); - - // mul eax, edx - sl.Emit16(0xe2f7); - - // add eax, [ecx]MethodTable.m_BaseSize - sl.X86EmitOffsetModRM(0x03, kEAX, kECX, offsetof(MethodTable, m_BaseSize)); - - // Since this is an array of value classes, we need an extra compare here to make sure we're still - // less than LARGE_OBJECT_SIZE. This is the last bit of arithmetic before we compare against the - // allocation context, so do it here. - - // cmp eax, LARGE_OBJECT_SIZE - // ja noLock - sl.Emit8(0x3d); - sl.Emit32(LARGE_OBJECT_SIZE); - sl.X86EmitCondJump(noLock, X86CondCode::kJA); - } - -#if DATA_ALIGNMENT == 4 - if (flags & OBJ_ARRAY) - { - // No need for rounding in this case - element size is 4, and m_BaseSize is guaranteed - // to be a multiple of 4. - } - else -#endif // DATA_ALIGNMENT == 4 - { - // round the size to a multiple of 4 - - // add eax, 3 - sl.X86EmitAddReg(kEAX, (DATA_ALIGNMENT-1)); - - // and eax, ~3 - sl.Emit16(0xe083); - sl.Emit8(~(DATA_ALIGNMENT-1)); - } - - flags = (Flags)(flags | SIZE_IN_EAX); - - // Emit the main body of the trial allocator, be it SP or MP - EmitCore(&sl, noLock, noAlloc, flags); - - // Here we are at the end of the success case - store element count - // and possibly the element type descriptor and return - - // pop edx - element count - sl.X86EmitPopReg(kEDX); - - // pop ecx - array method table - sl.X86EmitPopReg(kECX); - - // mov dword ptr [eax]ArrayBase.m_NumComponents, edx - sl.X86EmitIndexRegStore(kEAX, offsetof(ArrayBase,m_NumComponents), kEDX); - - // no stack parameters - sl.X86EmitReturn(0); - - // Come here in case of no space - sl.EmitLabel(noAlloc); - - // Release the lock in the uniprocessor case - EmitNoAllocCode(&sl, flags); - - // Come here in case of failure to get the lock - sl.EmitLabel(noLock); - - // pop edx - element count - sl.X86EmitPopReg(kEDX); - - // pop ecx - array method table - sl.X86EmitPopReg(kECX); - - // Jump to the framed helper - CodeLabel * target = sl.NewExternalCodeLabel((LPVOID)JIT_NewArr1); - _ASSERTE(target->e.m_pExternalAddress); - sl.X86EmitNearJump(target); - - Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap(), NEWSTUB_FL_NONE, "AllocArray"); - - return (void *)pStub->GetEntryPoint(); -} - - -void *JIT_TrialAlloc::GenAllocString(Flags flags) -{ - STANDARD_VM_CONTRACT; - - CPUSTUBLINKER sl; - - CodeLabel *noLock = sl.NewCodeLabel(); - CodeLabel *noAlloc = sl.NewCodeLabel(); - - // We were passed the number of characters in ECX - - // push ecx - sl.X86EmitPushReg(kECX); - - // mov eax, ecx - sl.Emit16(0xc18b); - - // we need to load the method table for string from the global - - // mov ecx, [g_pStringClass] - sl.Emit16(0x0d8b); - sl.Emit32((int)(size_t)&g_pStringClass); - - // Instead of doing elaborate overflow checks, we just limit the number of elements - // to (LARGE_OBJECT_SIZE - 256)/sizeof(WCHAR) or less. - // This will avoid all overflow problems, as well as making sure - // big string objects are correctly allocated in the big object heap. - - _ASSERTE(sizeof(WCHAR) == 2); - - // cmp edx,(LARGE_OBJECT_SIZE - 256)/sizeof(WCHAR) - sl.Emit16(0xf881); - sl.Emit32((LARGE_OBJECT_SIZE - 256)/sizeof(WCHAR)); - - // jae noLock - seems tempting to jump to noAlloc, but we haven't taken the lock yet - sl.X86EmitCondJump(noLock, X86CondCode::kJAE); - - // Calculate the final size to allocate. - // We need to calculate baseSize + cnt*2, then round that up by adding 3 and anding ~3. - - // lea eax, [basesize+(alignment-1)+eax*2] - sl.Emit16(0x048d); - sl.Emit8(0x45); - sl.Emit32(StringObject::GetBaseSize() + (DATA_ALIGNMENT-1)); - - // and eax, ~3 - sl.Emit16(0xe083); - sl.Emit8(~(DATA_ALIGNMENT-1)); - - flags = (Flags)(flags | SIZE_IN_EAX); - - // Emit the main body of the trial allocator, be it SP or MP - EmitCore(&sl, noLock, noAlloc, flags); - - // Here we are at the end of the success case - store element count - // and possibly the element type descriptor and return - - // pop ecx - element count - sl.X86EmitPopReg(kECX); - - // mov dword ptr [eax]ArrayBase.m_StringLength, ecx - sl.X86EmitIndexRegStore(kEAX, offsetof(StringObject,m_StringLength), kECX); - - // no stack parameters - sl.X86EmitReturn(0); - - // Come here in case of no space - sl.EmitLabel(noAlloc); - - // Release the lock in the uniprocessor case - EmitNoAllocCode(&sl, flags); - - // Come here in case of failure to get the lock - sl.EmitLabel(noLock); - - // pop ecx - element count - sl.X86EmitPopReg(kECX); - - // Jump to the framed helper - CodeLabel * target = sl.NewExternalCodeLabel((LPVOID)FramedAllocateString); - sl.X86EmitNearJump(target); - - Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap(), NEWSTUB_FL_NONE, "AllocString"); - - return (void *)pStub->GetEntryPoint(); -} - #define NUM_WRITE_BARRIERS 6 static const BYTE c_rgWriteBarrierRegs[NUM_WRITE_BARRIERS] = { @@ -646,58 +114,10 @@ static const void * const c_rgDebugWriteBarriers[NUM_WRITE_BARRIERS] = { // EE infrastructure to be in place. /*********************************************************************/ #pragma warning (disable : 4731) -void InitJITHelpers1() +void InitJITWriteBarrierHelpers() { STANDARD_VM_CONTRACT; -#define ETW_NUM_JIT_HELPERS 10 - static const LPCWSTR pHelperNames[ETW_NUM_JIT_HELPERS] = { - W("@NewObject"), - W("@NewObjectAlign8"), - W("@NewArray1Object"), - W("@NewArray1ValueType"), - W("@NewArray1ObjectAlign8"), - W("@StaticBaseObject"), - W("@StaticBaseNonObject"), - W("@StaticBaseObjectNoCCtor"), - W("@StaticBaseNonObjectNoCCtor") - }; - - PVOID pMethodAddresses[ETW_NUM_JIT_HELPERS]={0}; - - _ASSERTE(g_SystemInfo.dwNumberOfProcessors != 0); - - JIT_TrialAlloc::Flags flags = GCHeapUtilities::UseThreadAllocationContexts() ? - JIT_TrialAlloc::MP_ALLOCATOR : JIT_TrialAlloc::NORMAL; - - if (!(TrackAllocationsEnabled() - || LoggingOn(LF_GCALLOC, LL_INFO10) -#ifdef _DEBUG - || (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP) != 0) -#endif - ) - ) - { - // Replace the slow helpers with faster version - - pMethodAddresses[0] = JIT_TrialAlloc::GenAllocSFast(flags); - SetJitHelperFunction(CORINFO_HELP_NEWSFAST, pMethodAddresses[0]); - pMethodAddresses[1] = JIT_TrialAlloc::GenAllocSFast((JIT_TrialAlloc::Flags)(flags|JIT_TrialAlloc::ALIGN8 | JIT_TrialAlloc::ALIGN8OBJ)); - SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, pMethodAddresses[1]); - pMethodAddresses[2] = JIT_TrialAlloc::GenAllocArray((JIT_TrialAlloc::Flags)(flags|JIT_TrialAlloc::OBJ_ARRAY)); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, pMethodAddresses[2]); - pMethodAddresses[3] = JIT_TrialAlloc::GenAllocArray(flags); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, pMethodAddresses[3]); - pMethodAddresses[4] = JIT_TrialAlloc::GenAllocArray((JIT_TrialAlloc::Flags)(flags|JIT_TrialAlloc::ALIGN8)); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_ALIGN8, pMethodAddresses[4]); - - // If allocation logging is on, then we divert calls to FastAllocateString to an Ecall method, not this - // generated method. Find this workaround in Ecall::Init() in ecall.cpp. - ECall::DynamicallyAssignFCallImpl((PCODE) JIT_TrialAlloc::GenAllocString(flags), ECall::FastAllocateString); - } - - ETW::MethodLog::StubsInitialized(pMethodAddresses, (PVOID *)pHelperNames, ETW_NUM_JIT_HELPERS); - // All write barrier helpers should fit into one page. // If you hit this assert on retail build, there is most likely problem with BBT script. _ASSERTE_ALL_BUILDS((BYTE*)JIT_WriteBarrierGroup_End - (BYTE*)JIT_WriteBarrierGroup < (ptrdiff_t)GetOsPageSize()); diff --git a/src/coreclr/vm/i386/stublinkerx86.cpp b/src/coreclr/vm/i386/stublinkerx86.cpp index bbb9173759f6d4..ad519187e118a2 100644 --- a/src/coreclr/vm/i386/stublinkerx86.cpp +++ b/src/coreclr/vm/i386/stublinkerx86.cpp @@ -475,42 +475,6 @@ class X86NearJump : public InstructionFormat }; -//----------------------------------------------------------------------- -// InstructionFormat for conditional jump. Set the variationCode -// to members of X86CondCode. -//----------------------------------------------------------------------- -class X86CondJump : public InstructionFormat -{ - public: - X86CondJump(UINT allowedSizes) : InstructionFormat(allowedSizes) - { - LIMITED_METHOD_CONTRACT; - } - - virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode) - { - LIMITED_METHOD_CONTRACT - return (refsize == k8 ? 2 : 6); - } - - virtual VOID EmitInstruction(UINT refsize, int64_t fixedUpReference, BYTE *pOutBufferRX, BYTE *pOutBufferRW, UINT variationCode, BYTE *pDataBuffer) - { - LIMITED_METHOD_CONTRACT - if (refsize == k8) - { - pOutBufferRW[0] = static_cast(0x70 | variationCode); - *((int8_t*)(pOutBufferRW+1)) = (int8_t)fixedUpReference; - } - else - { - pOutBufferRW[0] = 0x0f; - pOutBufferRW[1] = static_cast(0x80 | variationCode); - *((int32_t*)(pOutBufferRW+2)) = (int32_t)fixedUpReference; - } - } -}; - - //----------------------------------------------------------------------- // InstructionFormat for near call. //----------------------------------------------------------------------- @@ -770,7 +734,6 @@ static BYTE gX64LeaRIP[sizeof(X64LeaRIP)]; #endif static BYTE gX86NearJump[sizeof(X86NearJump)]; -static BYTE gX86CondJump[sizeof(X86CondJump)]; static BYTE gX86Call[sizeof(X86Call)]; static BYTE gX86PushImm32[sizeof(X86PushImm32)]; @@ -784,7 +747,6 @@ static BYTE gX86PushImm32[sizeof(X86PushImm32)]; } CONTRACTL_END; new (gX86NearJump) X86NearJump(); - new (gX86CondJump) X86CondJump( InstructionFormat::k8|InstructionFormat::k32); new (gX86Call) X86Call(); new (gX86PushImm32) X86PushImm32(InstructionFormat::k32); @@ -823,22 +785,6 @@ VOID StubLinkerCPU::X86EmitMovRegReg(X86Reg destReg, X86Reg srcReg) Emit8(static_cast(0xC0 | (srcReg << 3) | destReg)); } -//--------------------------------------------------------------- - -VOID StubLinkerCPU::X86EmitMovSPReg(X86Reg srcReg) -{ - STANDARD_VM_CONTRACT; - const X86Reg kESP = (X86Reg)4; - X86EmitMovRegReg(kESP, srcReg); -} - -VOID StubLinkerCPU::X86EmitMovRegSP(X86Reg destReg) -{ - STANDARD_VM_CONTRACT; - const X86Reg kESP = (X86Reg)4; - X86EmitMovRegReg(destReg, kESP); -} - #ifdef TARGET_X86 //--------------------------------------------------------------- // Emits: @@ -878,32 +824,6 @@ VOID StubLinkerCPU::X86EmitPushImm32(UINT32 value) } -//--------------------------------------------------------------- -// Emits: -// PUSH -//--------------------------------------------------------------- -VOID StubLinkerCPU::X86EmitPushImm32(CodeLabel &target) -{ - STANDARD_VM_CONTRACT; - - EmitLabelRef(&target, reinterpret_cast(gX86PushImm32), 0); -} - - -//--------------------------------------------------------------- -// Emits: -// PUSH -//--------------------------------------------------------------- -VOID StubLinkerCPU::X86EmitPushImm8(BYTE value) -{ - STANDARD_VM_CONTRACT; - - Emit8(0x6a); - Emit8(value); - Push(sizeof(void*)); -} - - //--------------------------------------------------------------- // Emits: // PUSH @@ -953,106 +873,6 @@ VOID StubLinkerCPU::X86EmitJumpReg(X86Reg reg) Emit8(static_cast(0xe0) | static_cast(reg)); } -//--------------------------------------------------------------- -// Emits: -// CMP ,imm32 -//--------------------------------------------------------------- -VOID StubLinkerCPU::X86EmitCmpRegImm32(X86Reg reg, INT32 imm32) -{ - CONTRACTL - { - STANDARD_VM_CHECK; - PRECONDITION((int) reg < NumX86Regs); - } - CONTRACTL_END; - -#ifdef TARGET_AMD64 - BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; - - if (reg >= kR8) - { - rex |= REX_OPCODE_REG_EXT; - reg = X86RegFromAMD64Reg(reg); - } - Emit8(rex); -#endif - - if (FitsInI1(imm32)) { - Emit8(0x83); - Emit8(static_cast(0xF8 | reg)); - Emit8((INT8)imm32); - } else { - Emit8(0x81); - Emit8(static_cast(0xF8 | reg)); - Emit32(imm32); - } -} - -#ifdef TARGET_AMD64 -//--------------------------------------------------------------- -// Emits: -// CMP [reg+offs], imm32 -// CMP [reg], imm32 -//--------------------------------------------------------------- -VOID StubLinkerCPU:: X86EmitCmpRegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32) -{ - STANDARD_VM_CONTRACT; - - BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; - - if (reg >= kR8) - { - rex |= REX_OPCODE_REG_EXT; - reg = X86RegFromAMD64Reg(reg); - } - Emit8(rex); - - X64EmitCmp32RegIndexImm32(reg, offs, imm32); -} - -VOID StubLinkerCPU:: X64EmitCmp32RegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32) -#else // TARGET_AMD64 -VOID StubLinkerCPU:: X86EmitCmpRegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32) -#endif // TARGET_AMD64 -{ - CONTRACTL - { - STANDARD_VM_CHECK; - PRECONDITION((int) reg < NumX86Regs); - } - CONTRACTL_END; - - // - // The binary representation of "cmp [mem], imm32" is : - // 1000-00sw mod11-1r/m - // - - unsigned wBit = (FitsInI1(imm32) ? 0 : 1); - Emit8(static_cast(0x80 | wBit)); - - unsigned modBits; - if (offs == 0) - modBits = 0; - else if (FitsInI1(offs)) - modBits = 1; - else - modBits = 2; - - Emit8(static_cast((modBits << 6) | 0x38 | reg)); - - if (offs) - { - if (FitsInI1(offs)) - Emit8((INT8)offs); - else - Emit32(offs); - } - - if (FitsInI1(imm32)) - Emit8((INT8)imm32); - else - Emit32(imm32); -} //--------------------------------------------------------------- // Emits: @@ -1066,94 +886,6 @@ VOID StubLinkerCPU::X86EmitNearJump(CodeLabel *target) } -//--------------------------------------------------------------- -// Emits: -// Jcc or -// Jcc -//--------------------------------------------------------------- -VOID StubLinkerCPU::X86EmitCondJump(CodeLabel *target, X86CondCode::cc condcode) -{ - STANDARD_VM_CONTRACT; - EmitLabelRef(target, reinterpret_cast(gX86CondJump), condcode); -} - - -//--------------------------------------------------------------- -// Emits: -// call -//--------------------------------------------------------------- -VOID StubLinkerCPU::X86EmitCall(CodeLabel *target, int iArgBytes) -{ - STANDARD_VM_CONTRACT; - - EmitLabelRef(target, reinterpret_cast(gX86Call), 0); - - INDEBUG(Emit8(0x90)); // Emit a nop after the call in debug so that - // we know that this is a call that can directly call - // managed code -#ifndef TARGET_AMD64 - Pop(iArgBytes); -#endif // !TARGET_AMD64 -} - - -//--------------------------------------------------------------- -// Emits: -// ret n -//--------------------------------------------------------------- -VOID StubLinkerCPU::X86EmitReturn(WORD wArgBytes) -{ - CONTRACTL - { - STANDARD_VM_CHECK; -#if defined(TARGET_AMD64) || defined(UNIX_X86_ABI) - PRECONDITION(wArgBytes == 0); -#endif - - } - CONTRACTL_END; - - if (wArgBytes == 0) - Emit8(0xc3); - else - { - Emit8(0xc2); - Emit16(wArgBytes); - } - -#ifdef TARGET_X86 - Pop(wArgBytes); -#endif -} - -#ifdef TARGET_X86 -VOID StubLinkerCPU::X86EmitPushRegs(unsigned regSet) -{ - STANDARD_VM_CONTRACT; - - for (X86Reg r = kEAX; regSet > 0; r = (X86Reg)(r+1)) - if (regSet & (1U< 0; r = (X86Reg)(r-1)) - if (regSet & (1U<, [ + ] @@ -1204,52 +936,6 @@ VOID StubLinkerCPU::X86EmitIndexPush(X86Reg srcreg, int32_t ofs) Push(sizeof(void*)); } -//--------------------------------------------------------------- -// Emits: -// push dword ptr [ + * + ] -//--------------------------------------------------------------- -VOID StubLinkerCPU::X86EmitBaseIndexPush( - X86Reg baseReg, - X86Reg indexReg, - int32_t scale, - int32_t ofs) -{ - STANDARD_VM_CONTRACT; - - X86EmitOffsetModRmSIB(0xff, (X86Reg)0x6, baseReg, indexReg, scale, ofs); - Push(sizeof(void*)); -} - -//--------------------------------------------------------------- -// Emits: -// push dword ptr [ESP + ] -//--------------------------------------------------------------- -VOID StubLinkerCPU::X86EmitSPIndexPush(int32_t ofs) -{ - STANDARD_VM_CONTRACT; - - int8_t ofs8 = (int8_t) ofs; - if (ofs == (int32_t) ofs8) - { - // The offset can be expressed in a byte (can use the byte - // form of the push esp instruction) - - BYTE code[] = {0xff, 0x74, 0x24, (BYTE)ofs8}; - EmitBytes(code, sizeof(code)); - } - else - { - // The offset requires 4 bytes (need to use the long form - // of the push esp instruction) - - BYTE code[] = {0xff, 0xb4, 0x24, 0x0, 0x0, 0x0, 0x0}; - *(int32_t *)(&code[3]) = ofs; - EmitBytes(code, sizeof(code)); - } - - Push(sizeof(void*)); -} - //--------------------------------------------------------------- // Emits: @@ -1267,84 +953,6 @@ VOID StubLinkerCPU::X86EmitIndexPop(X86Reg srcreg, int32_t ofs) Pop(sizeof(void*)); } -//--------------------------------------------------------------- -// Emits: -// sub esp, IMM -//--------------------------------------------------------------- -VOID StubLinkerCPU::X86EmitSubEsp(INT32 imm32) -{ - STANDARD_VM_CONTRACT; - - if (imm32 < 0x1000-100) - { - // As long as the esp size is less than 1 page plus a small - // safety fudge factor, we can just bump esp. - X86EmitSubEspWorker(imm32); - } - else - { - // Otherwise, must touch at least one byte for each page. - while (imm32 >= 0x1000) - { - - X86EmitSubEspWorker(0x1000-4); - X86EmitPushReg(kEAX); - - imm32 -= 0x1000; - } - if (imm32 < 500) - { - X86EmitSubEspWorker(imm32); - } - else - { - // If the remainder is large, touch the last byte - again, - // as a fudge factor. - X86EmitSubEspWorker(imm32-4); - X86EmitPushReg(kEAX); - } - } -} - - -//--------------------------------------------------------------- -// Emits: -// sub esp, IMM -//--------------------------------------------------------------- -VOID StubLinkerCPU::X86EmitSubEspWorker(INT32 imm32) -{ - CONTRACTL - { - STANDARD_VM_CHECK; - - // On Win32, stacks must be faulted in one page at a time. - PRECONDITION(imm32 < 0x1000); - } - CONTRACTL_END; - - if (!imm32) - { - // nop - } - else - { - X86_64BitOperands(); - - if (FitsInI1(imm32)) - { - Emit16(0xec83); - Emit8((INT8)imm32); - } - else - { - Emit16(0xec81); - Emit32(imm32); - } - - Push(imm32); - } -} - //--------------------------------------------------------------- // Emits: @@ -1411,61 +1019,6 @@ VOID StubLinkerCPU::X86EmitAddReg(X86Reg reg, INT32 imm32) } } -//--------------------------------------------------------------- -// Emits: add destReg, srcReg -//--------------------------------------------------------------- - -VOID StubLinkerCPU::X86EmitAddRegReg(X86Reg destReg, X86Reg srcReg) -{ - STANDARD_VM_CONTRACT; - - X86EmitR2ROp(0x01, srcReg, destReg); -} - - - - -VOID StubLinkerCPU::X86EmitSubReg(X86Reg reg, INT32 imm32) -{ - CONTRACTL - { - STANDARD_VM_CHECK; - PRECONDITION((int) reg < NumX86Regs); - } - CONTRACTL_END; - -#ifdef TARGET_AMD64 - BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; - - if (reg >= kR8) - { - rex |= REX_OPCODE_REG_EXT; - reg = X86RegFromAMD64Reg(reg); - } - Emit8(rex); -#endif - - if (FitsInI1(imm32)) { - Emit8(0x83); - Emit8(static_cast(0xE8 | reg)); - Emit8(static_cast(imm32)); - } else { - Emit8(0x81); - Emit8(static_cast(0xE8 | reg)); - Emit32(imm32); - } -} - -//--------------------------------------------------------------- -// Emits: sub destReg, srcReg -//--------------------------------------------------------------- - -VOID StubLinkerCPU::X86EmitSubRegReg(X86Reg destReg, X86Reg srcReg) -{ - STANDARD_VM_CONTRACT; - - X86EmitR2ROp(0x29, srcReg, destReg); -} #if defined(TARGET_AMD64) @@ -2143,70 +1696,6 @@ static const X86Reg c_argRegs[] = { #endif #ifdef TARGET_X86 - -#ifdef TARGET_UNIX -namespace -{ - gc_alloc_context* STDCALL GetAllocContextHelper() - { - return &t_runtime_thread_locals.alloc_context.m_GCAllocContext; - } -} -#endif - -VOID StubLinkerCPU::X86EmitCurrentThreadAllocContextFetch(X86Reg dstreg, unsigned preservedRegSet) -{ - CONTRACTL - { - STANDARD_VM_CHECK; - - // It doesn't make sense to have the destination register be preserved - PRECONDITION((preservedRegSet & (1 << dstreg)) == 0); - AMD64_ONLY(PRECONDITION(dstreg < 8)); // code below doesn't support high registers - } - CONTRACTL_END; - -#ifdef TARGET_UNIX - - X86EmitPushRegs(preservedRegSet & ((1 << kEAX) | (1 << kEDX) | (1 << kECX))); - - // call GetThread - X86EmitCall(NewExternalCodeLabel((LPVOID)GetAllocContextHelper), 0); - - // mov dstreg, eax - X86EmitMovRegReg(dstreg, kEAX); - - X86EmitPopRegs(preservedRegSet & ((1 << kEAX) | (1 << kEDX) | (1 << kECX))); - -#ifdef _DEBUG - // Trash caller saved regs that we were not told to preserve, and that aren't the dstreg. - preservedRegSet |= 1 << dstreg; - if (!(preservedRegSet & (1 << kEAX))) - X86EmitDebugTrashReg(kEAX); - if (!(preservedRegSet & (1 << kEDX))) - X86EmitDebugTrashReg(kEDX); - if (!(preservedRegSet & (1 << kECX))) - X86EmitDebugTrashReg(kECX); -#endif // _DEBUG - -#else // TARGET_UNIX - - BYTE code[] = { 0x64,0x8b,0x05 }; // mov dstreg, dword ptr fs:[IMM32] - static const int regByteIndex = 2; - - code[regByteIndex] |= (dstreg << 3); - - EmitBytes(code, sizeof(code)); - Emit32(offsetof(TEB, ThreadLocalStoragePointer)); - - X86EmitIndexRegLoad(dstreg, dstreg, sizeof(void *) * _tls_index); - - _ASSERTE(Thread::GetOffsetOfThreadStatic(&t_runtime_thread_locals.alloc_context) < INT_MAX); - X86EmitAddReg(dstreg, (int32_t)Thread::GetOffsetOfThreadStatic(&t_runtime_thread_locals.alloc_context)); - -#endif // TARGET_UNIX -} - // This method unboxes the THIS pointer and then calls pRealMD // If it's shared code for a method in a generic value class, then also extract the vtable pointer // and pass it as an extra argument. Thus this stub generator really covers both diff --git a/src/coreclr/vm/i386/stublinkerx86.h b/src/coreclr/vm/i386/stublinkerx86.h index ee158190ec11e0..bc90a14d52c956 100644 --- a/src/coreclr/vm/i386/stublinkerx86.h +++ b/src/coreclr/vm/i386/stublinkerx86.h @@ -106,45 +106,6 @@ enum X86Reg : UCHAR // to add a cast and think about what exactly they are doing. const int kESP_Unsafe = 4; -//---------------------------------------------------------------------- -// Encodes X86 conditional jumps. The numbers are chosen to match -// Intel's opcode encoding. -//---------------------------------------------------------------------- -class X86CondCode { - public: - enum cc { - kJA = 0x7, - kJAE = 0x3, - kJB = 0x2, - kJBE = 0x6, - kJC = 0x2, - kJE = 0x4, - kJZ = 0x4, - kJG = 0xf, - kJGE = 0xd, - kJL = 0xc, - kJLE = 0xe, - kJNA = 0x6, - kJNAE = 0x2, - kJNB = 0x3, - kJNBE = 0x7, - kJNC = 0x3, - kJNE = 0x5, - kJNG = 0xe, - kJNGE = 0xc, - kJNL = 0xd, - kJNLE = 0xf, - kJNO = 0x1, - kJNP = 0xb, - kJNS = 0x9, - kJNZ = 0x5, - kJO = 0x0, - kJP = 0xa, - kJPE = 0xa, - kJPO = 0xb, - kJS = 0x8, - }; -}; //---------------------------------------------------------------------- // StubLinker with extensions for generating X86 code. @@ -162,28 +123,15 @@ class StubLinkerCPU : public StubLinker #endif VOID X86EmitAddReg(X86Reg reg, INT32 imm32); - VOID X86EmitAddRegReg(X86Reg destreg, X86Reg srcReg); - VOID X86EmitSubReg(X86Reg reg, INT32 imm32); - VOID X86EmitSubRegReg(X86Reg destreg, X86Reg srcReg); VOID X86EmitMovRegReg(X86Reg destReg, X86Reg srcReg); - VOID X86EmitMovSPReg(X86Reg srcReg); - VOID X86EmitMovRegSP(X86Reg destReg); VOID X86EmitPushReg(X86Reg reg); VOID X86EmitPopReg(X86Reg reg); - VOID X86EmitPushRegs(unsigned regSet); - VOID X86EmitPopRegs(unsigned regSet); VOID X86EmitPushImm32(UINT value); - VOID X86EmitPushImm32(CodeLabel &pTarget); - VOID X86EmitPushImm8(BYTE value); VOID X86EmitPushImmPtr(LPVOID value BIT64_ARG(X86Reg tmpReg = kR10)); - VOID X86EmitCmpRegImm32(X86Reg reg, INT32 imm32); // cmp reg, imm32 - VOID X86EmitCmpRegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32); // cmp [reg+offs], imm32 #ifdef TARGET_AMD64 - VOID X64EmitCmp32RegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32); // cmp dword ptr [reg+offs], imm32 - VOID X64EmitMovXmmXmm(X86Reg destXmmreg, X86Reg srcXmmReg); VOID X64EmitMovdqaFromMem(X86Reg Xmmreg, X86Reg baseReg, int32_t ofs = 0); VOID X64EmitMovdqaToMem(X86Reg Xmmreg, X86Reg baseReg, int32_t ofs = 0); @@ -205,21 +153,13 @@ class StubLinkerCPU : public StubLinker VOID X86EmitOffsetModRmSIB(BYTE opcode, X86Reg opcodeOrReg, X86Reg baseReg, X86Reg indexReg, int32_t scale, int32_t ofs); VOID X86EmitNearJump(CodeLabel *pTarget); - VOID X86EmitCondJump(CodeLabel *pTarget, X86CondCode::cc condcode); - VOID X86EmitCall(CodeLabel *target, int iArgBytes); - VOID X86EmitReturn(WORD wArgBytes); - - VOID X86EmitCurrentThreadAllocContextFetch(X86Reg dstreg, unsigned preservedRegSet); VOID X86EmitIndexRegLoad(X86Reg dstreg, X86Reg srcreg, int32_t ofs = 0); VOID X86EmitIndexRegStore(X86Reg dstreg, int32_t ofs, X86Reg srcreg); VOID X86EmitIndexPush(X86Reg srcreg, int32_t ofs); - VOID X86EmitBaseIndexPush(X86Reg baseReg, X86Reg indexReg, int32_t scale, int32_t ofs); VOID X86EmitIndexPop(X86Reg srcreg, int32_t ofs); - VOID X86EmitSPIndexPush(int32_t ofs); - VOID X86EmitSubEsp(INT32 imm32); VOID X86EmitAddEsp(INT32 imm32); VOID X86EmitEspOffset(BYTE opcode, X86Reg altreg, @@ -319,9 +259,6 @@ class StubLinkerCPU : public StubLinker VOID X86EmitDebugTrashReg(X86Reg reg); #endif - private: - VOID X86EmitSubEspWorker(INT32 imm32); - public: static void Init(); diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 296069a9d170c4..369d21a2c3e984 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -541,192 +541,12 @@ extern "C" BOOL QCALLTYPE IsInstanceOf_NoCacheLookup(CORINFO_CLASS_HANDLE type, return result; } -//======================================================================== -// -// ALLOCATION HELPERS -// -//======================================================================== - -#include - -//************************************************************* -// Allocation fast path for typical objects -// -HCIMPL1_RAW(Object*, JIT_NewS_MP_FastPortable, CORINFO_CLASS_HANDLE typeHnd_) -{ - CONTRACTL { - THROWS; - DISABLED(GC_TRIGGERS); - MODE_COOPERATIVE; - } CONTRACTL_END; - - _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts()); - ee_alloc_context *eeAllocContext = &t_runtime_thread_locals.alloc_context; - gc_alloc_context *allocContext = &eeAllocContext->m_GCAllocContext; - - TypeHandle typeHandle(typeHnd_); - _ASSERTE(!typeHandle.IsTypeDesc()); // heap objects must have method tables - MethodTable *methodTable = typeHandle.AsMethodTable(); - - SIZE_T size = methodTable->GetBaseSize(); - _ASSERTE(size % DATA_ALIGNMENT == 0); - - BYTE *allocPtr = allocContext->alloc_ptr; - _ASSERTE(allocPtr <= eeAllocContext->getCombinedLimit()); - if (size > static_cast(eeAllocContext->getCombinedLimit() - allocPtr)) - { - // Tail call to the slow helper - return HCCALL1(JIT_New, typeHnd_); - } - - allocContext->alloc_ptr = allocPtr + size; - - _ASSERTE(allocPtr != nullptr); - Object *object = reinterpret_cast(allocPtr); - _ASSERTE(object->HasEmptySyncBlockInfo()); - object->SetMethodTable(methodTable); - - return object; -} -HCIMPLEND_RAW - -#include - -/*************************************************************/ -HCIMPL1(Object*, JIT_New, CORINFO_CLASS_HANDLE typeHnd_) -{ - FCALL_CONTRACT; - - OBJECTREF newobj = NULL; - HELPER_METHOD_FRAME_BEGIN_RET_0(); // Set up a frame - - TypeHandle typeHnd(typeHnd_); - - _ASSERTE(!typeHnd.IsTypeDesc()); // heap objects must have method tables - MethodTable *pMT = typeHnd.AsMethodTable(); - -#ifdef _DEBUG - if (g_pConfig->FastGCStressLevel()) { - GetThread()->DisableStressHeap(); - } -#endif // _DEBUG - - newobj = AllocateObject(pMT); - - HELPER_METHOD_FRAME_END(); - return(OBJECTREFToObject(newobj)); -} -HCIMPLEND - -/*************************************************************/ -HCIMPL1(Object*, JIT_NewMaybeFrozen, CORINFO_CLASS_HANDLE typeHnd_) -{ - FCALL_CONTRACT; - - OBJECTREF newobj = NULL; - HELPER_METHOD_FRAME_BEGIN_RET_0(); // Set up a frame - - TypeHandle typeHnd(typeHnd_); - - _ASSERTE(!typeHnd.IsTypeDesc()); // heap objects must have method tables - MethodTable* pMT = typeHnd.AsMethodTable(); - -#ifdef _DEBUG - if (g_pConfig->FastGCStressLevel()) { - GetThread()->DisableStressHeap(); - } -#endif // _DEBUG - - newobj = TryAllocateFrozenObject(pMT); - if (newobj == NULL) - { - // Fallback to normal heap allocation. - newobj = AllocateObject(pMT); - } - - HELPER_METHOD_FRAME_END(); - return(OBJECTREFToObject(newobj)); -} -HCIMPLEND - - //======================================================================== // // STRING HELPERS // //======================================================================== -#include - -//************************************************************* -// Allocation fast path for typical objects -// -HCIMPL1_RAW(StringObject*, AllocateString_MP_FastPortable, DWORD stringLength) -{ - CONTRACTL { - THROWS; - DISABLED(GC_TRIGGERS); - MODE_COOPERATIVE; - } CONTRACTL_END; - - _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts()); - - // Instead of doing elaborate overflow checks, we just limit the number of elements. This will avoid all overflow - // problems, as well as making sure big string objects are correctly allocated in the big object heap. - if (stringLength >= (LARGE_OBJECT_SIZE - 256) / sizeof(WCHAR)) - { - // Tail call to the slow helper - return HCCALL1(FramedAllocateString, stringLength); - } - - ee_alloc_context *eeAllocContext = &t_runtime_thread_locals.alloc_context; - gc_alloc_context *allocContext = &eeAllocContext->m_GCAllocContext; - - SIZE_T totalSize = StringObject::GetSize(stringLength); - - // The method table's base size includes space for a terminating null character - _ASSERTE(totalSize >= g_pStringClass->GetBaseSize()); - _ASSERTE((totalSize - g_pStringClass->GetBaseSize()) / sizeof(WCHAR) == stringLength); - - SIZE_T alignedTotalSize = ALIGN_UP(totalSize, DATA_ALIGNMENT); - _ASSERTE(alignedTotalSize >= totalSize); - totalSize = alignedTotalSize; - - BYTE *allocPtr = allocContext->alloc_ptr; - _ASSERTE(allocPtr <= eeAllocContext->getCombinedLimit()); - if (totalSize > static_cast(eeAllocContext->getCombinedLimit() - allocPtr)) - { - // Tail call to the slow helper - return HCCALL1(FramedAllocateString, stringLength); - } - allocContext->alloc_ptr = allocPtr + totalSize; - - _ASSERTE(allocPtr != nullptr); - StringObject *stringObject = reinterpret_cast(allocPtr); - stringObject->SetMethodTable(g_pStringClass); - stringObject->SetStringLength(stringLength); - _ASSERTE(stringObject->GetBuffer()[stringLength] == W('\0')); - - return stringObject; -} -HCIMPLEND_RAW - -#include - -HCIMPL1(StringObject*, FramedAllocateString, DWORD stringLength) -{ - FCALL_CONTRACT; - - STRINGREF result = NULL; - HELPER_METHOD_FRAME_BEGIN_RET_0(); // Set up a frame - - result = AllocateString(stringLength); - - HELPER_METHOD_FRAME_END(); - return((StringObject*) OBJECTREFToObject(result)); -} -HCIMPLEND - /*********************************************************************/ STRINGREF* ConstructStringLiteral(CORINFO_MODULE_HANDLE scopeHnd, mdToken metaTok, void** ppPinnedString) { @@ -737,221 +557,6 @@ STRINGREF* ConstructStringLiteral(CORINFO_MODULE_HANDLE scopeHnd, mdToken metaTo return module->ResolveStringRef(metaTok, ppPinnedString); } -//======================================================================== -// -// ARRAY HELPERS -// -//======================================================================== - -#include - -//************************************************************* -// Array allocation fast path for arrays of value type elements -// -HCIMPL2_RAW(Object*, JIT_NewArr1VC_MP_FastPortable, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size) -{ - CONTRACTL { - THROWS; - DISABLED(GC_TRIGGERS); - MODE_COOPERATIVE; - } CONTRACTL_END; - - _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts()); - - // Do a conservative check here. This is to avoid overflow while doing the calculations. We don't - // have to worry about "large" objects, since the allocation quantum is never big enough for - // LARGE_OBJECT_SIZE. - // - // For Value Classes, this needs to be 2^16 - slack (2^32 / max component size), - // The slack includes the size for the array header and round-up ; for alignment. Use 256 for the - // slack value out of laziness. - SIZE_T componentCount = static_cast(size); - if (componentCount >= static_cast(65535 - 256)) - { - // Tail call to the slow helper - return HCCALL2(JIT_NewArr1, arrayMT, size); - } - - ee_alloc_context* eeAllocContext = &t_runtime_thread_locals.alloc_context; - gc_alloc_context* allocContext = &eeAllocContext->m_GCAllocContext; - - MethodTable *pArrayMT = (MethodTable *)arrayMT; - - _ASSERTE(pArrayMT->HasComponentSize()); - SIZE_T componentSize = pArrayMT->RawGetComponentSize(); - SIZE_T totalSize = componentCount * componentSize; - _ASSERTE(totalSize / componentSize == componentCount); - - SIZE_T baseSize = pArrayMT->GetBaseSize(); - totalSize += baseSize; - _ASSERTE(totalSize >= baseSize); - - SIZE_T alignedTotalSize = ALIGN_UP(totalSize, DATA_ALIGNMENT); - _ASSERTE(alignedTotalSize >= totalSize); - totalSize = alignedTotalSize; - - BYTE *allocPtr = allocContext->alloc_ptr; - _ASSERTE(allocPtr <= eeAllocContext->getCombinedLimit()); - if (totalSize > static_cast(eeAllocContext->getCombinedLimit() - allocPtr)) - { - // Tail call to the slow helper - return HCCALL2(JIT_NewArr1, arrayMT, size); - } - allocContext->alloc_ptr = allocPtr + totalSize; - - _ASSERTE(allocPtr != nullptr); - ArrayBase *array = reinterpret_cast(allocPtr); - array->SetMethodTable(pArrayMT); - _ASSERTE(static_cast(componentCount) == componentCount); - array->m_NumComponents = static_cast(componentCount); - - return array; -} -HCIMPLEND_RAW - -//************************************************************* -// Array allocation fast path for arrays of object elements -// -HCIMPL2_RAW(Object*, JIT_NewArr1OBJ_MP_FastPortable, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size) -{ - CONTRACTL { - THROWS; - DISABLED(GC_TRIGGERS); - MODE_COOPERATIVE; - } CONTRACTL_END; - - _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts()); - - // Make sure that the total size cannot reach LARGE_OBJECT_SIZE, which also allows us to avoid overflow checks. The - // "256" slack is to cover the array header size and round-up, using a constant value here out of laziness. - SIZE_T componentCount = static_cast(size); - if (componentCount >= static_cast((LARGE_OBJECT_SIZE - 256) / sizeof(void *))) - { - // Tail call to the slow helper - return HCCALL2(JIT_NewArr1, arrayMT, size); - } - - SIZE_T totalSize = componentCount * sizeof(void *); - _ASSERTE(totalSize / sizeof(void *) == componentCount); - - MethodTable *pArrayMT = (MethodTable *)arrayMT; - - SIZE_T baseSize = pArrayMT->GetBaseSize(); - totalSize += baseSize; - _ASSERTE(totalSize >= baseSize); - - _ASSERTE(ALIGN_UP(totalSize, DATA_ALIGNMENT) == totalSize); - - ee_alloc_context* eeAllocContext = &t_runtime_thread_locals.alloc_context; - gc_alloc_context* allocContext = &eeAllocContext->m_GCAllocContext; - BYTE *allocPtr = allocContext->alloc_ptr; - _ASSERTE(allocPtr <= eeAllocContext->getCombinedLimit()); - if (totalSize > static_cast(eeAllocContext->getCombinedLimit() - allocPtr)) - { - // Tail call to the slow helper - return HCCALL2(JIT_NewArr1, arrayMT, size); - } - allocContext->alloc_ptr = allocPtr + totalSize; - - _ASSERTE(allocPtr != nullptr); - ArrayBase *array = reinterpret_cast(allocPtr); - array->SetMethodTable(pArrayMT); - _ASSERTE(static_cast(componentCount) == componentCount); - array->m_NumComponents = static_cast(componentCount); - - return array; -} -HCIMPLEND_RAW - -#include - -/*************************************************************/ -HCIMPL2(Object*, JIT_NewArr1, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size) -{ - FCALL_CONTRACT; - - OBJECTREF newArray = NULL; - - HELPER_METHOD_FRAME_BEGIN_RET_0(); // Set up a frame - - MethodTable *pArrayMT = (MethodTable *)arrayMT; - - _ASSERTE(pArrayMT->IsFullyLoaded()); - _ASSERTE(pArrayMT->IsArray()); - _ASSERTE(!pArrayMT->IsMultiDimArray()); - - if (size < 0) - COMPlusThrow(kOverflowException); - -#ifdef HOST_64BIT - // Even though ECMA allows using a native int as the argument to newarr instruction - // (therefore size is INT_PTR), ArrayBase::m_NumComponents is 32-bit, so even on 64-bit - // platforms we can't create an array whose size exceeds 32 bits. - if (size > INT_MAX) - EX_THROW(EEMessageException, (kOverflowException, IDS_EE_ARRAY_DIMENSIONS_EXCEEDED)); -#endif - -#ifdef _DEBUG - if (g_pConfig->FastGCStressLevel()) { - GetThread()->DisableStressHeap(); - } -#endif // _DEBUG - - newArray = AllocateSzArray(pArrayMT, (INT32)size); - HELPER_METHOD_FRAME_END(); - - return(OBJECTREFToObject(newArray)); -} -HCIMPLEND - - -/*************************************************************/ -HCIMPL2(Object*, JIT_NewArr1MaybeFrozen, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size) -{ - FCALL_CONTRACT; - - OBJECTREF newArray = NULL; - - HELPER_METHOD_FRAME_BEGIN_RET_0(); // Set up a frame - - MethodTable* pArrayMT = (MethodTable*)arrayMT; - - _ASSERTE(pArrayMT->IsFullyLoaded()); - _ASSERTE(pArrayMT->IsArray()); - _ASSERTE(!pArrayMT->IsMultiDimArray()); - - if (size < 0) - COMPlusThrow(kOverflowException); - -#ifdef HOST_64BIT - // Even though ECMA allows using a native int as the argument to newarr instruction - // (therefore size is INT_PTR), ArrayBase::m_NumComponents is 32-bit, so even on 64-bit - // platforms we can't create an array whose size exceeds 32 bits. - if (size > INT_MAX) - EX_THROW(EEMessageException, (kOverflowException, IDS_EE_ARRAY_DIMENSIONS_EXCEEDED)); -#endif - -#ifdef _DEBUG - if (g_pConfig->FastGCStressLevel()) { - GetThread()->DisableStressHeap(); - } -#endif // _DEBUG - - newArray = TryAllocateFrozenSzArray(pArrayMT, (INT32)size); - if (newArray == NULL) - { - // Fallback to default heap allocation - newArray = AllocateSzArray(pArrayMT, (INT32)size); - } - _ASSERTE(newArray != NULL); - - HELPER_METHOD_FRAME_END(); - - return(OBJECTREFToObject(newArray)); -} -HCIMPLEND - -#include //======================================================================== // diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index b8eae789628ff6..5af4f7670ba39e 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -5687,12 +5687,12 @@ CorInfoHelpFunc CEEInfo::getNewHelperStatic(MethodTable * pMT, bool * pHasSideEf } else #ifdef FEATURE_64BIT_ALIGNMENT - // @ARMTODO: Force all 8-byte alignment requiring allocations down one slow path. As performance - // measurements dictate we can spread these out to faster, more specialized helpers later. if (pMT->RequiresAlign8()) { - // Use slow helper - _ASSERTE(helper == CORINFO_HELP_NEWFAST); + if (pMT->IsValueType()) + helper = CORINFO_HELP_NEWSFAST_ALIGN8_VC; + else + helper = CORINFO_HELP_NEWSFAST_ALIGN8; } else #endif @@ -5767,17 +5767,19 @@ CorInfoHelpFunc CEEInfo::getNewArrHelperStatic(TypeHandle clsHnd) else { // These cases always must use the slow helper - if ( -#ifdef FEATURE_64BIT_ALIGNMENT - thElemType.RequiresAlign8() || -#endif - (elemType == ELEMENT_TYPE_VOID) || + if ((elemType == ELEMENT_TYPE_VOID) || LoggingOn(LF_GCALLOC, LL_INFO10) || TrackAllocationsEnabled()) { // Use the slow helper result = CORINFO_HELP_NEWARR_1_DIRECT; } +#ifdef FEATURE_64BIT_ALIGNMENT + else if (thElemType.RequiresAlign8()) + { + result = CORINFO_HELP_NEWARR_1_ALIGN8; + } +#endif else { // Yea, we can do it the fast way! diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index 0080db0a1c40e8..2b2ebf411b71ad 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -65,7 +65,9 @@ bool SigInfoFlagsAreValid (CORINFO_SIG_INFO *sig) } -void InitJITHelpers1(); +void InitJITAllocationHelpers(); + +void InitJITWriteBarrierHelpers(); PCODE UnsafeJitFunction(PrepareCodeConfig* config, COR_ILMETHOD_DECODER* header, @@ -143,15 +145,31 @@ EXTERN_C FCDECL1(void*, JIT_GetDynamicGCStaticBaseNoCtor_Portable, DynamicStatic EXTERN_C FCDECL1(void*, JIT_GetDynamicNonGCStaticBaseNoCtor, DynamicStaticsInfo* pStaticsInfo); EXTERN_C FCDECL1(void*, JIT_GetDynamicNonGCStaticBaseNoCtor_Portable, DynamicStaticsInfo* pStaticsInfo); -extern FCDECL1(Object*, JIT_NewS_MP_FastPortable, CORINFO_CLASS_HANDLE typeHnd_); -extern FCDECL1(Object*, JIT_New, CORINFO_CLASS_HANDLE typeHnd_); +EXTERN_C FCDECL1(Object*, RhpNewFast, CORINFO_CLASS_HANDLE typeHnd_); +EXTERN_C FCDECL2(Object*, RhpNewArrayFast, CORINFO_CLASS_HANDLE typeHnd_, INT_PTR size); +EXTERN_C FCDECL2(Object*, RhpNewObjectArrayFast, CORINFO_CLASS_HANDLE typeHnd_, INT_PTR size); +EXTERN_C FCDECL2(Object*, RhNewString, CORINFO_CLASS_HANDLE typeHnd_, DWORD stringLength); + +#if defined(FEATURE_64BIT_ALIGNMENT) +EXTERN_C FCDECL1(Object*, RhpNewFastAlign8, CORINFO_CLASS_HANDLE typeHnd_); +EXTERN_C FCDECL1(Object*, RhpNewFastMisalign, CORINFO_CLASS_HANDLE typeHnd_); +EXTERN_C FCDECL2(Object*, RhpNewArrayFastAlign8, CORINFO_CLASS_HANDLE typeHnd_, INT_PTR size); +#endif + +#if defined(TARGET_WINDOWS) && (defined(TARGET_AMD64) || defined(TARGET_X86)) +EXTERN_C FCDECL1(Object*, RhpNewFast_UP, CORINFO_CLASS_HANDLE typeHnd_); +EXTERN_C FCDECL2(Object*, RhpNewArrayFast_UP, CORINFO_CLASS_HANDLE typeHnd_, INT_PTR size); +EXTERN_C FCDECL2(Object*, RhpNewObjectArrayFast_UP, CORINFO_CLASS_HANDLE typeHnd_, INT_PTR size); +EXTERN_C FCDECL2(Object*, RhNewString_UP, CORINFO_CLASS_HANDLE typeHnd_, DWORD stringLength); +#endif -extern FCDECL1(StringObject*, AllocateString_MP_FastPortable, DWORD stringLength); -extern FCDECL1(StringObject*, FramedAllocateString, DWORD stringLength); +EXTERN_C FCDECL1(Object*, RhpNew, CORINFO_CLASS_HANDLE typeHnd_); +EXTERN_C FCDECL2(Object*, RhpNewVariableSizeObject, CORINFO_CLASS_HANDLE typeHnd_, INT_PTR size); +EXTERN_C FCDECL1(Object*, RhpNewMaybeFrozen, CORINFO_CLASS_HANDLE typeHnd_); +EXTERN_C FCDECL2(Object*, RhpNewArrayMaybeFrozen, CORINFO_CLASS_HANDLE typeHnd_, INT_PTR size); -extern FCDECL2(Object*, JIT_NewArr1VC_MP_FastPortable, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size); -extern FCDECL2(Object*, JIT_NewArr1OBJ_MP_FastPortable, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size); -extern FCDECL2(Object*, JIT_NewArr1, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size); +EXTERN_C FCDECL1(Object*, AllocateStringFast, DWORD stringLength); +EXTERN_C FCDECL1(Object*, AllocateStringSlow, DWORD stringLength); EXTERN_C FCDECL2(void, JITutil_MonReliableEnter, Object* obj, BYTE* pbLockTaken); EXTERN_C FCDECL3(void, JITutil_MonTryEnter, Object* obj, INT32 timeOut, BYTE* pbLockTaken); diff --git a/src/coreclr/vm/jitinterfacegen.cpp b/src/coreclr/vm/jitinterfacegen.cpp index cae7b5b9f6668f..b55418ef6e06b1 100644 --- a/src/coreclr/vm/jitinterfacegen.cpp +++ b/src/coreclr/vm/jitinterfacegen.cpp @@ -1,14 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// =========================================================================== -// File: JITinterfaceGen.CPP -// -// This contains the AMD64 version of InitJITHelpers1(). -// -// =========================================================================== - - #include "common.h" #include "clrtypes.h" #include "jitinterface.h" @@ -19,33 +11,12 @@ #include "ecall.h" #include "writebarriermanager.h" -#ifdef HOST_64BIT - -// These are the single-processor-optimized versions of the allocation helpers. -EXTERN_C Object* JIT_TrialAllocSFastSP(CORINFO_CLASS_HANDLE typeHnd_); -EXTERN_C Object* AllocateStringFastUP (CLR_I4 cch); - -EXTERN_C Object* JIT_NewArr1OBJ_UP (CORINFO_CLASS_HANDLE arrayMT, INT_PTR size); -EXTERN_C Object* JIT_NewArr1VC_UP (CORINFO_CLASS_HANDLE arrayMT, INT_PTR size); - -#endif // HOST_64BIT - -/*********************************************************************/ -// Initialize the part of the JIT helpers that require very little of -// EE infrastructure to be in place. -/*********************************************************************/ -#ifndef TARGET_X86 - -void InitJITHelpers1() +void InitJITAllocationHelpers() { STANDARD_VM_CONTRACT; _ASSERTE(g_SystemInfo.dwNumberOfProcessors != 0); -#if defined(TARGET_AMD64) - - g_WriteBarrierManager.Initialize(); - // Allocation helpers, faster but non-logging if (!((TrackAllocationsEnabled()) || (LoggingOn(LF_GCALLOC, LL_INFO10)) @@ -54,40 +25,36 @@ void InitJITHelpers1() #endif // _DEBUG )) { -#ifdef TARGET_UNIX - SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable); - - ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString); -#else // TARGET_UNIX - // if (multi-proc || server GC) + // if (multi-proc || server GC || non-Windows) if (GCHeapUtilities::UseThreadAllocationContexts()) { - SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable); + SetJitHelperFunction(CORINFO_HELP_NEWSFAST, RhpNewFast); + SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, RhpNewArrayFast); + SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, RhpNewObjectArrayFast); - ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString); +#if defined(FEATURE_64BIT_ALIGNMENT) + SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, RhpNewFastAlign8); + SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8_VC, RhpNewFastMisalign); + SetJitHelperFunction(CORINFO_HELP_NEWARR_1_ALIGN8, RhpNewArrayFastAlign8); +#endif + + ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(RhNewString), ECall::FastAllocateString); } else { +#if defined(TARGET_WINDOWS) && (defined(TARGET_AMD64) || defined(TARGET_X86)) // Replace the 1p slow allocation helpers with faster version // // When we're running Workstation GC on a single proc box we don't have // InlineGetThread versions because there is no need to call GetThread - SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_TrialAllocSFastSP); - SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_TrialAllocSFastSP); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_UP); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_UP); - - ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateStringFastUP), ECall::FastAllocateString); + SetJitHelperFunction(CORINFO_HELP_NEWSFAST, RhpNewFast_UP); + SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, RhpNewArrayFast_UP); + SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, RhpNewObjectArrayFast_UP); + + ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(RhNewString_UP), ECall::FastAllocateString); +#else + _ASSERTE(!"Expected to use ThreadAllocationContexts"); +#endif } -#endif // TARGET_UNIX } -#endif // TARGET_AMD64 } - -#endif // !TARGET_X86 diff --git a/src/coreclr/vm/loongarch64/AllocSlow.S b/src/coreclr/vm/loongarch64/AllocSlow.S new file mode 100644 index 00000000000000..cd72e5b09f2e35 --- /dev/null +++ b/src/coreclr/vm/loongarch64/AllocSlow.S @@ -0,0 +1,64 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "asmconstants.h" +#include "unixasmmacros.inc" + +// +// Object* RhpNew(MethodTable *pMT) +// +// Allocate non-array object, slow path. +// +LEAF_ENTRY RhpNew, _TEXT + + ori $a1, $zero, 0 + b C_FUNC(RhpNewObject) + +LEAF_END RhpNew, _TEXT + +// +// Object* RhpNewMaybeFrozen(MethodTable *pMT) +// +// Allocate non-array object, may be on frozen heap. +// +NESTED_ENTRY RhpNewMaybeFrozen, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME $a2 + + ori $a1, $zero, 0 + bl C_FUNC(RhpGcAllocMaybeFrozen) + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +NESTED_END RhpNewMaybeFrozen, _TEXT + +// +// Object* RhpNewMaybeFrozen(MethodTable *pMT, INT_PTR size) +// +// Allocate array object, may be on frozen heap. +// +NESTED_ENTRY RhpNewArrayMaybeFrozen, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME $a2 + + bl C_FUNC(RhpGcAllocMaybeFrozen) + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +NESTED_END RhpNewArrayMaybeFrozen, _TEXT + +// +// void RhExceptionHandling_FailedAllocation(MethodTable *pMT, bool isOverflow) +// +NESTED_ENTRY RhExceptionHandling_FailedAllocation, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME $a2 + + bl C_FUNC(RhExceptionHandling_FailedAllocation_Helper) + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +NESTED_END RhExceptionHandling_FailedAllocation, _TEXT diff --git a/src/coreclr/vm/loongarch64/asmconstants.h b/src/coreclr/vm/loongarch64/asmconstants.h index 8a7ffecd191734..b4db363e9dc60c 100644 --- a/src/coreclr/vm/loongarch64/asmconstants.h +++ b/src/coreclr/vm/loongarch64/asmconstants.h @@ -49,6 +49,16 @@ ASMCONSTANTS_C_ASSERT(Thread__m_pFrame == offsetof(Thread, m_pFrame)); #define Thread_m_pFrame Thread__m_pFrame #define Thread_m_fPreemptiveGCDisabled Thread__m_fPreemptiveGCDisabled +#define OFFSETOF__RuntimeThreadLocals__ee_alloc_context 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__RuntimeThreadLocals__ee_alloc_context == offsetof(RuntimeThreadLocals, alloc_context)); + +#define OFFSETOF__ee_alloc_context__alloc_ptr 0x8 +ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__alloc_ptr == offsetof(ee_alloc_context, m_GCAllocContext) + + offsetof(gc_alloc_context, alloc_ptr)); + +#define OFFSETOF__ee_alloc_context__combined_limit 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__combined_limit == offsetof(ee_alloc_context, m_CombinedLimit)); + #define METHODDESC_REGISTER t2 #define SIZEOF__ArgumentRegisters 0x40 @@ -111,23 +121,34 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__Frame == sizeof(Frame)); ASMCONSTANTS_C_ASSERT(SIZEOF__CONTEXT == sizeof(T_CONTEXT)); //========================================= -#define MethodTable__m_dwFlags 0x0 -ASMCONSTANTS_C_ASSERT(MethodTable__m_dwFlags == offsetof(MethodTable, m_dwFlags)); +#define OFFSETOF__MethodTable__m_dwFlags 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_dwFlags == offsetof(MethodTable, m_dwFlags)); -#define MethodTable__m_BaseSize 0x04 -ASMCONSTANTS_C_ASSERT(MethodTable__m_BaseSize == offsetof(MethodTable, m_BaseSize)); +#define OFFSETOF__MethodTable__m_usComponentSize 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_usComponentSize == offsetof(MethodTable, m_dwFlags)); -#define MethodTable__m_ElementType DBG_FRE(0x38, 0x30) -ASMCONSTANTS_C_ASSERT(MethodTable__m_ElementType == offsetof(MethodTable, m_ElementTypeHnd)); +#define OFFSETOF__MethodTable__m_uBaseSize 0x04 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_uBaseSize == offsetof(MethodTable, m_BaseSize)); -#define ArrayBase__m_NumComponents 0x8 -ASMCONSTANTS_C_ASSERT(ArrayBase__m_NumComponents == offsetof(ArrayBase, m_NumComponents)); +#define OFFSETOF__Object__m_pEEType 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__Object__m_pEEType == offsetof(Object, m_pMethTab)); -#define PtrArray__m_Array 0x10 -ASMCONSTANTS_C_ASSERT(PtrArray__m_Array == offsetof(PtrArray, m_Array)); +#define OFFSETOF__Array__m_Length 0x8 +ASMCONSTANTS_C_ASSERT(OFFSETOF__Array__m_Length == offsetof(ArrayBase, m_NumComponents)); #define TypeHandle_CanCast 0x1 // TypeHandle::CanCast +#define MAX_STRING_LENGTH 0x3FFFFFDF +ASMCONSTANTS_C_ASSERT(MAX_STRING_LENGTH == CORINFO_String_MaxLength); + +#define STRING_COMPONENT_SIZE 2 + +#define STRING_BASE_SIZE 0x16 +ASMCONSTANTS_C_ASSERT(STRING_BASE_SIZE == OBJECT_BASESIZE + sizeof(DWORD) + sizeof(WCHAR)); + +#define SZARRAY_BASE_SIZE 0x18 +ASMCONSTANTS_C_ASSERT(SZARRAY_BASE_SIZE == OBJECT_BASESIZE + sizeof(DWORD) + sizeof(DWORD)); + //========================================= #ifdef FEATURE_COMINTEROP diff --git a/src/coreclr/vm/loongarch64/stubs.cpp b/src/coreclr/vm/loongarch64/stubs.cpp index 6cd8cc4430d1ae..8513abf0e09f7a 100644 --- a/src/coreclr/vm/loongarch64/stubs.cpp +++ b/src/coreclr/vm/loongarch64/stubs.cpp @@ -874,31 +874,10 @@ static void UpdateWriteBarrierState(bool skipEphemeralCheck) } } -void InitJITHelpers1() +void InitJITWriteBarrierHelpers() { STANDARD_VM_CONTRACT; - _ASSERTE(g_SystemInfo.dwNumberOfProcessors != 0); - - // Allocation helpers, faster but non-logging - if (!((TrackAllocationsEnabled()) || - (LoggingOn(LF_GCALLOC, LL_INFO10)) -#ifdef _DEBUG - || (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP) != 0) -#endif // _DEBUG - )) - { - if (GCHeapUtilities::UseThreadAllocationContexts()) - { - SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable); - - ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString); - } - } - UpdateWriteBarrierState(GCHeapUtilities::IsServerHeap()); } diff --git a/src/coreclr/vm/object.h b/src/coreclr/vm/object.h index b0386ef5d5b706..0b97c65e53df4a 100644 --- a/src/coreclr/vm/object.h +++ b/src/coreclr/vm/object.h @@ -127,6 +127,8 @@ struct RCW; // class Object { + friend class CheckAsmOffsets; + protected: PTR_MethodTable m_pMethTab; @@ -533,8 +535,6 @@ class ArrayBase : public Object friend OBJECTREF AllocateSzArray(MethodTable *pArrayMT, INT32 length, GC_ALLOC_FLAGS flags); friend OBJECTREF TryAllocateFrozenSzArray(MethodTable* pArrayMT, INT32 length); friend OBJECTREF AllocateArrayEx(MethodTable *pArrayMT, INT32 *pArgs, DWORD dwNumArgs, GC_ALLOC_FLAGS flags); - friend FCDECL2(Object*, JIT_NewArr1VC_MP_FastPortable, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size); - friend FCDECL2(Object*, JIT_NewArr1OBJ_MP_FastPortable, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size); friend class JIT_TrialAlloc; friend class CheckAsmOffsets; friend struct _DacGlobals; diff --git a/src/coreclr/vm/riscv64/AllocSlow.S b/src/coreclr/vm/riscv64/AllocSlow.S new file mode 100644 index 00000000000000..d9aabe10eda67a --- /dev/null +++ b/src/coreclr/vm/riscv64/AllocSlow.S @@ -0,0 +1,64 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "asmconstants.h" +#include "unixasmmacros.inc" + +// +// Object* RhpNew(MethodTable *pMT) +// +// Allocate non-array object, slow path. +// +LEAF_ENTRY RhpNew, _TEXT + + li a1, 0 + tail C_FUNC(RhpNewObject) + +LEAF_END RhpNew, _TEXT + +// +// Object* RhpNewMaybeFrozen(MethodTable *pMT) +// +// Allocate non-array object, may be on frozen heap. +// +NESTED_ENTRY RhpNewMaybeFrozen, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME a2 + + li a1, 0 + call C_FUNC(RhpGcAllocMaybeFrozen) + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +NESTED_END RhpNewMaybeFrozen, _TEXT + +// +// Object* RhpNewMaybeFrozen(MethodTable *pMT, INT_PTR size) +// +// Allocate array object, may be on frozen heap. +// +NESTED_ENTRY RhpNewArrayMaybeFrozen, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME a2 + + call C_FUNC(RhpGcAllocMaybeFrozen) + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +NESTED_END RhpNewArrayMaybeFrozen, _TEXT + +// +// void RhExceptionHandling_FailedAllocation(MethodTable *pMT, bool isOverflow) +// +NESTED_ENTRY RhExceptionHandling_FailedAllocation, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME a2 + + call C_FUNC(RhExceptionHandling_FailedAllocation_Helper) + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +NESTED_END RhExceptionHandling_FailedAllocation, _TEXT diff --git a/src/coreclr/vm/riscv64/asmconstants.h b/src/coreclr/vm/riscv64/asmconstants.h index 937372b0bd39c3..c62c697d6d9f07 100644 --- a/src/coreclr/vm/riscv64/asmconstants.h +++ b/src/coreclr/vm/riscv64/asmconstants.h @@ -44,6 +44,16 @@ ASMCONSTANTS_C_ASSERT(Thread__m_pFrame == offsetof(Thread, m_pFrame)); #define Thread_m_pFrame Thread__m_pFrame #define Thread_m_fPreemptiveGCDisabled Thread__m_fPreemptiveGCDisabled +#define OFFSETOF__RuntimeThreadLocals__ee_alloc_context 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__RuntimeThreadLocals__ee_alloc_context == offsetof(RuntimeThreadLocals, alloc_context)); + +#define OFFSETOF__ee_alloc_context__alloc_ptr 0x8 +ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__alloc_ptr == offsetof(ee_alloc_context, m_GCAllocContext) + + offsetof(gc_alloc_context, alloc_ptr)); + +#define OFFSETOF__ee_alloc_context__combined_limit 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__combined_limit == offsetof(ee_alloc_context, m_CombinedLimit)); + #define METHODDESC_REGISTER t2 #define SIZEOF__ArgumentRegisters 0x40 @@ -107,23 +117,34 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__CONTEXT == sizeof(T_CONTEXT)); //========================================= -#define MethodTable__m_dwFlags 0x0 -ASMCONSTANTS_C_ASSERT(MethodTable__m_dwFlags == offsetof(MethodTable, m_dwFlags)); +#define OFFSETOF__MethodTable__m_dwFlags 0x0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_dwFlags == offsetof(MethodTable, m_dwFlags)); -#define MethodTable__m_BaseSize 0x04 -ASMCONSTANTS_C_ASSERT(MethodTable__m_BaseSize == offsetof(MethodTable, m_BaseSize)); +#define OFFSETOF__MethodTable__m_usComponentSize 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_usComponentSize == offsetof(MethodTable, m_dwFlags)); -#define MethodTable__m_ElementType DBG_FRE(0x38, 0x30) -ASMCONSTANTS_C_ASSERT(MethodTable__m_ElementType == offsetof(MethodTable, m_ElementTypeHnd)); +#define OFFSETOF__MethodTable__m_uBaseSize 0x04 +ASMCONSTANTS_C_ASSERT(OFFSETOF__MethodTable__m_uBaseSize == offsetof(MethodTable, m_BaseSize)); -#define ArrayBase__m_NumComponents 0x8 -ASMCONSTANTS_C_ASSERT(ArrayBase__m_NumComponents == offsetof(ArrayBase, m_NumComponents)); +#define OFFSETOF__Object__m_pEEType 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__Object__m_pEEType == offsetof(Object, m_pMethTab)); -#define PtrArray__m_Array 0x10 -ASMCONSTANTS_C_ASSERT(PtrArray__m_Array == offsetof(PtrArray, m_Array)); +#define OFFSETOF__Array__m_Length 0x8 +ASMCONSTANTS_C_ASSERT(OFFSETOF__Array__m_Length == offsetof(ArrayBase, m_NumComponents)); #define TypeHandle_CanCast 0x1 // TypeHandle::CanCast +#define MAX_STRING_LENGTH 0x3FFFFFDF +ASMCONSTANTS_C_ASSERT(MAX_STRING_LENGTH == CORINFO_String_MaxLength); + +#define STRING_COMPONENT_SIZE 2 + +#define STRING_BASE_SIZE 0x16 +ASMCONSTANTS_C_ASSERT(STRING_BASE_SIZE == OBJECT_BASESIZE + sizeof(DWORD) + sizeof(WCHAR)); + +#define SZARRAY_BASE_SIZE 0x18 +ASMCONSTANTS_C_ASSERT(SZARRAY_BASE_SIZE == OBJECT_BASESIZE + sizeof(DWORD) + sizeof(DWORD)); + //========================================= diff --git a/src/coreclr/vm/riscv64/stubs.cpp b/src/coreclr/vm/riscv64/stubs.cpp index 5b2b1c75952be4..8a4e95473bfd1a 100644 --- a/src/coreclr/vm/riscv64/stubs.cpp +++ b/src/coreclr/vm/riscv64/stubs.cpp @@ -791,31 +791,10 @@ static void UpdateWriteBarrierState(bool skipEphemeralCheck) } } -void InitJITHelpers1() +void InitJITWriteBarrierHelpers() { STANDARD_VM_CONTRACT; - _ASSERTE(g_SystemInfo.dwNumberOfProcessors != 0); - - // Allocation helpers, faster but non-logging - if (!((TrackAllocationsEnabled()) || - (LoggingOn(LF_GCALLOC, LL_INFO10)) -#ifdef _DEBUG - || (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP) != 0) -#endif // _DEBUG - )) - { - if (GCHeapUtilities::UseThreadAllocationContexts()) - { - SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable); - - ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString); - } - } - UpdateWriteBarrierState(GCHeapUtilities::IsServerHeap()); } diff --git a/src/coreclr/vm/writebarriermanager.cpp b/src/coreclr/vm/writebarriermanager.cpp index c9aac96ada37d5..7ce35a9faca245 100644 --- a/src/coreclr/vm/writebarriermanager.cpp +++ b/src/coreclr/vm/writebarriermanager.cpp @@ -1135,3 +1135,10 @@ int SwitchToNonWriteWatchBarrier(bool isRuntimeSuspended) return SWB_PASS; } #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + +void InitJITWriteBarrierHelpers() +{ + STANDARD_VM_CONTRACT; + + g_WriteBarrierManager.Initialize(); +}