From c2f7fecde86367d1e0f06653410c99a655ea1eba Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Mon, 19 May 2025 16:16:15 -0700 Subject: [PATCH 1/3] Reapply "Create a single copy of stub templates (#114462)" (#115665) This reverts commit f7fc17859c961a3b2b58a8502a69fd37cc5e2b6b. --- src/coreclr/clrdefinitions.cmake | 4 + src/coreclr/inc/executableallocator.h | 15 + src/coreclr/inc/loaderheap.h | 24 +- src/coreclr/minipal/Unix/doublemapping.cpp | 323 ++++++++++++++++++ src/coreclr/minipal/Windows/doublemapping.cpp | 20 ++ src/coreclr/minipal/minipal.h | 35 ++ .../nativeaot/Runtime/unix/PalRedhawkUnix.cpp | 2 +- src/coreclr/utilcode/executableallocator.cpp | 76 ++++- .../utilcode/interleavedloaderheap.cpp | 98 +++++- src/coreclr/vm/amd64/thunktemplates.S | 146 ++++++++ src/coreclr/vm/arm64/thunktemplates.S | 111 ++++++ src/coreclr/vm/callcounting.cpp | 27 +- src/coreclr/vm/callcounting.h | 2 +- src/coreclr/vm/loaderallocator.cpp | 9 +- src/coreclr/vm/precode.cpp | 53 ++- src/coreclr/vm/precode.h | 9 +- 16 files changed, 904 insertions(+), 50 deletions(-) diff --git a/src/coreclr/clrdefinitions.cmake b/src/coreclr/clrdefinitions.cmake index efb6ab0738a1a5..cb3b645ff0e58d 100644 --- a/src/coreclr/clrdefinitions.cmake +++ b/src/coreclr/clrdefinitions.cmake @@ -218,6 +218,10 @@ if (FEATURE_STUBPRECODE_DYNAMIC_HELPERS) add_definitions(-DFEATURE_STUBPRECODE_DYNAMIC_HELPERS) endif() +if (CLR_CMAKE_TARGET_APPLE) + add_definitions(-DFEATURE_MAP_THUNKS_FROM_IMAGE) +endif() + # Use this function to enable building with a specific target OS and architecture set of defines # This is known to work for the set of defines used by the JIT and gcinfo, it is not likely correct for # other components of the runtime diff --git a/src/coreclr/inc/executableallocator.h b/src/coreclr/inc/executableallocator.h index 11caf3a6857d2d..973b950ad369bc 100644 --- a/src/coreclr/inc/executableallocator.h +++ b/src/coreclr/inc/executableallocator.h @@ -182,6 +182,9 @@ class ExecutableAllocator // Return true if double mapping is enabled. static bool IsDoubleMappingEnabled(); + // Release memory allocated via DoubleMapping for either templates or normal double mapped data + void ReleaseWorker(void* pRX, bool releaseTemplate); + // Initialize the allocator instance bool Initialize(); @@ -262,6 +265,18 @@ class ExecutableAllocator // Unmap the RW mapping at the specified address void UnmapRW(void* pRW); + + // Allocate thunks from a template. pTemplate is the return value from CreateTemplate + void* AllocateThunksFromTemplate(void *pTemplate, size_t templateSize); + + // Free a set of thunks allocated from templates. pThunks must have been returned from AllocateThunksFromTemplate + void FreeThunksFromTemplate(void *pThunks, size_t templateSize); + + // Create a template + // If templateInImage is not null, it will attempt to use it as the template, otherwise it will create an temporary in memory file to serve as the template + // Some OS/Architectures may/may not be able to work with this, so this api is permitted to return NULL, and callers should have an alternate approach using + // the codePageGenerator directly. + void* CreateTemplate(void* templateInImage, size_t templateSize, void (*codePageGenerator)(uint8_t* pageBase, uint8_t* pageBaseRX, size_t size)); }; #define ExecutableWriterHolder ExecutableWriterHolderNoLog diff --git a/src/coreclr/inc/loaderheap.h b/src/coreclr/inc/loaderheap.h index 782f93cedc6264..d3040e0b4aa448 100644 --- a/src/coreclr/inc/loaderheap.h +++ b/src/coreclr/inc/loaderheap.h @@ -455,10 +455,19 @@ class UnlockedLoaderHeap : public UnlockedLoaderHeapBase static void WeGotAFaultNowWhat(UnlockedLoaderHeap *pHeap); }; +struct InterleavedLoaderHeapConfig +{ + uint32_t StubSize; + void* Template; + void (*CodePageGenerator)(uint8_t* pageBase, uint8_t* pageBaseRX, size_t size); +}; + +void InitializeLoaderHeapConfig(InterleavedLoaderHeapConfig *pConfig, size_t stubSize, void* templateInImage, void (*codePageGenerator)(uint8_t* pageBase, uint8_t* pageBaseRX, size_t size)); + //=============================================================================== // This is the base class for InterleavedLoaderHeap It's used as a simple // allocator for stubs in a scheme where each stub is a small fixed size, and is paired -// with memory which is GetOSStubPageSize() bytes away. In addition there is an +// with memory which is GetStubCodePageSize() bytes away. In addition there is an // ability to free is via a "backout" mechanism that is not considered to have good performance. // //=============================================================================== @@ -492,16 +501,13 @@ class UnlockedInterleavedLoaderHeap : public UnlockedLoaderHeapBase InterleavedStubFreeListNode *m_pFreeListHead; -public: -public: - void (*m_codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T size); + const InterleavedLoaderHeapConfig *m_pConfig; #ifndef DACCESS_COMPILE protected: UnlockedInterleavedLoaderHeap( RangeList *pRangeList, - void (*codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T size), - DWORD dwGranularity); + const InterleavedLoaderHeapConfig *pConfig); virtual ~UnlockedInterleavedLoaderHeap(); #endif @@ -1039,13 +1045,11 @@ class InterleavedLoaderHeap : public UnlockedInterleavedLoaderHeap public: InterleavedLoaderHeap(RangeList *pRangeList, BOOL fUnlocked, - void (*codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T size), - DWORD dwGranularity + const InterleavedLoaderHeapConfig *pConfig ) : UnlockedInterleavedLoaderHeap( pRangeList, - codePageGenerator, - dwGranularity), + pConfig), m_CriticalSection(fUnlocked ? NULL : CreateLoaderHeapLock()) { WRAPPER_NO_CONTRACT; diff --git a/src/coreclr/minipal/Unix/doublemapping.cpp b/src/coreclr/minipal/Unix/doublemapping.cpp index b866da9f93e6f1..4a2516bea58484 100644 --- a/src/coreclr/minipal/Unix/doublemapping.cpp +++ b/src/coreclr/minipal/Unix/doublemapping.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,11 @@ #include "minipal.h" #include "minipal/cpufeatures.h" +#ifndef TARGET_APPLE +#include +#include +#endif // TARGET_APPLE + #ifdef TARGET_APPLE #include @@ -253,3 +259,320 @@ bool VMToOSInterface::ReleaseRWMapping(void* pStart, size_t size) { return munmap(pStart, size) != -1; } + +#ifndef TARGET_APPLE +#define MAX_TEMPLATE_THUNK_TYPES 3 // Maximum number of times the CreateTemplate api can be called +struct TemplateThunkMappingData +{ + int fdImage; + off_t offsetInFileOfStartOfSection; + void* addrOfStartOfSection; // Always NULL if the template mapping data could not be initialized + void* addrOfEndOfSection; + bool imageTemplates; + int templatesCreated; + off_t nonImageTemplateCurrent; +}; + +struct InitializeTemplateThunkLocals +{ + void* pTemplate; + Dl_info info; + TemplateThunkMappingData data; +}; + +static TemplateThunkMappingData *s_pThunkData = NULL; + +#ifdef FEATURE_MAP_THUNKS_FROM_IMAGE + +static Elf32_Word Elf32_WordMin(Elf32_Word left, Elf32_Word right) +{ + return left < right ? left : right; +} + +static int InitializeTemplateThunkMappingDataPhdrCallback(struct dl_phdr_info *info, size_t size, void *dataPtr) +{ + InitializeTemplateThunkLocals *locals = (InitializeTemplateThunkLocals*)dataPtr; + + if ((void*)info->dlpi_addr == locals->info.dli_fbase) + { + for (size_t j = 0; j < info->dlpi_phnum; j++) + { + uint8_t* baseSectionAddr = (uint8_t*)locals->info.dli_fbase + info->dlpi_phdr[j].p_vaddr; + if (locals->pTemplate < baseSectionAddr) + { + // Address is before the virtual address of this section begins + continue; + } + + // Since this is all in support of mapping code from the file, we need to ensure that the region we find + // is actually present in the file. + Elf32_Word sizeOfSectionWhichCanBeMapped = Elf32_WordMin(info->dlpi_phdr[j].p_filesz, info->dlpi_phdr[j].p_memsz); + + uint8_t* endAddressAllowedForTemplate = baseSectionAddr + sizeOfSectionWhichCanBeMapped; + if (locals->pTemplate >= endAddressAllowedForTemplate) + { + // Template is after the virtual address of this section ends (or the mappable region of the file) + continue; + } + + // At this point, we have found the template section. Attempt to open the file, and record the various offsets for future use + + if (strlen(info->dlpi_name) == 0) + { + // This image cannot be directly referenced without capturing the argv[0] parameter + return -1; + } + + int fdImage = open(info->dlpi_name, O_RDONLY); + if (fdImage == -1) + { + return -1; // Opening the image didn't work + } + + locals->data.fdImage = fdImage; + locals->data.offsetInFileOfStartOfSection = info->dlpi_phdr[j].p_offset; + locals->data.addrOfStartOfSection = baseSectionAddr; + locals->data.addrOfEndOfSection = baseSectionAddr + sizeOfSectionWhichCanBeMapped; + locals->data.imageTemplates = true; + return 1; // We have found the result. Abort further processing. + } + } + + // This isn't the interesting .so + return 0; +} +#endif // FEATURE_MAP_THUNKS_FROM_IMAGE + +TemplateThunkMappingData *InitializeTemplateThunkMappingData(void* pTemplate) +{ + InitializeTemplateThunkLocals locals; + locals.pTemplate = pTemplate; + locals.data.fdImage = 0; + locals.data.offsetInFileOfStartOfSection = 0; + locals.data.addrOfStartOfSection = NULL; + locals.data.addrOfEndOfSection = NULL; + locals.data.imageTemplates = false; + locals.data.nonImageTemplateCurrent = 0; + locals.data.templatesCreated = 0; + +#ifdef FEATURE_MAP_THUNKS_FROM_IMAGE + if (dladdr(pTemplate, &locals.info) != 0) + { + dl_iterate_phdr(InitializeTemplateThunkMappingDataPhdrCallback, &locals); + } +#endif // FEATURE_MAP_THUNKS_FROM_IMAGE + + if (locals.data.addrOfStartOfSection == NULL) + { + // This is the detail of thunk data which indicates if we were able to compute the template mapping data from the image. + +#ifdef TARGET_FREEBSD + int fd = shm_open(SHM_ANON, O_RDWR | O_CREAT, S_IRWXU); +#elif defined(TARGET_LINUX) || defined(TARGET_ANDROID) + int fd = memfd_create("doublemapper-template", MFD_CLOEXEC); +#else + int fd = -1; + +#ifndef TARGET_ANDROID + // Bionic doesn't have shm_{open,unlink} + // POSIX fallback + if (fd == -1) + { + char name[24]; + sprintf(name, "/shm-dotnet-template-%d", getpid()); + name[sizeof(name) - 1] = '\0'; + shm_unlink(name); + fd = shm_open(name, O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW, 0600); + shm_unlink(name); + } +#endif // !TARGET_ANDROID +#endif + if (fd != -1) + { + off_t maxFileSize = MAX_TEMPLATE_THUNK_TYPES * 0x10000; // The largest page size we support currently is 64KB. + if (ftruncate(fd, maxFileSize) == -1) // Reserve a decent size chunk of logical memory for these things. + { + close(fd); + } + else + { + locals.data.fdImage = fd; + locals.data.offsetInFileOfStartOfSection = 0; + // We simulate the template thunk mapping data existing in mapped ram, by declaring that it exists at at + // an address which is not NULL, and which is naturally aligned on the largest page size supported by any + // architecture we support (0x10000). We do this, as the generalized logic here is designed around remapping + // already mapped memory, and by doing this we are able to share that logic. + locals.data.addrOfStartOfSection = (void*)0x10000; + locals.data.addrOfEndOfSection = ((uint8_t*)locals.data.addrOfStartOfSection) + maxFileSize; + locals.data.imageTemplates = false; + } + } + } + + + TemplateThunkMappingData *pAllocatedData = (TemplateThunkMappingData*)malloc(sizeof(TemplateThunkMappingData)); + *pAllocatedData = locals.data; + TemplateThunkMappingData *pExpectedNull = NULL; + if (__atomic_compare_exchange_n (&s_pThunkData, &pExpectedNull, pAllocatedData, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED)) + { + return pAllocatedData; + } + else + { + free(pAllocatedData); + return __atomic_load_n(&s_pThunkData, __ATOMIC_ACQUIRE); + } +} +#endif + +bool VMToOSInterface::AllocateThunksFromTemplateRespectsStartAddress() +{ +#ifdef TARGET_APPLE + return false; +#else + return true; +#endif +} + +void* VMToOSInterface::CreateTemplate(void* pImageTemplate, size_t templateSize, void (*codePageGenerator)(uint8_t* pageBase, uint8_t* pageBaseRX, size_t size)) +{ +#ifdef TARGET_APPLE + return pImageTemplate; +#elif defined(TARGET_X86) + return NULL; // X86 doesn't support high performance relative addressing, which makes the template system not work +#else + if (pImageTemplate == NULL) + return NULL; + + TemplateThunkMappingData* pThunkData = __atomic_load_n(&s_pThunkData, __ATOMIC_ACQUIRE); + if (s_pThunkData == NULL) + { + pThunkData = InitializeTemplateThunkMappingData(pImageTemplate); + } + + // Unable to create template mapping region + if (pThunkData->addrOfStartOfSection == NULL) + { + return NULL; + } + + int templatesCreated = __atomic_add_fetch(&pThunkData->templatesCreated, 1, __ATOMIC_SEQ_CST); + assert(templatesCreated <= MAX_TEMPLATE_THUNK_TYPES); + + if (!pThunkData->imageTemplates) + { + // Need to allocate a memory mapped region to fill in the data + off_t locationInFileToStoreGeneratedCode = __atomic_fetch_add((off_t*)&pThunkData->nonImageTemplateCurrent, (off_t)templateSize, __ATOMIC_SEQ_CST); + void* mappedMemory = mmap(NULL, templateSize, PROT_READ | PROT_WRITE, MAP_SHARED, pThunkData->fdImage, locationInFileToStoreGeneratedCode); + if (mappedMemory != MAP_FAILED) + { + codePageGenerator((uint8_t*)mappedMemory, (uint8_t*)mappedMemory, templateSize); + munmap(mappedMemory, templateSize); + return ((uint8_t*)pThunkData->addrOfStartOfSection) + locationInFileToStoreGeneratedCode; + } + else + { + return NULL; + } + } + else + { + return pImageTemplate; + } +#endif +} + +void* VMToOSInterface::AllocateThunksFromTemplate(void* pTemplate, size_t templateSize, void* pStartSpecification) +{ +#ifdef TARGET_APPLE + vm_address_t addr, taddr; + vm_prot_t prot, max_prot; + kern_return_t ret; + + // Allocate two contiguous ranges of memory: the first range will contain the stubs + // and the second range will contain their data. + do + { + ret = vm_allocate(mach_task_self(), &addr, templateSize * 2, VM_FLAGS_ANYWHERE); + } while (ret == KERN_ABORTED); + + if (ret != KERN_SUCCESS) + { + return NULL; + } + + do + { + ret = vm_remap( + mach_task_self(), &addr, templateSize, 0, VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, + mach_task_self(), (vm_address_t)pTemplate, FALSE, &prot, &max_prot, VM_INHERIT_SHARE); + } while (ret == KERN_ABORTED); + + if (ret != KERN_SUCCESS) + { + do + { + ret = vm_deallocate(mach_task_self(), addr, templateSize * 2); + } while (ret == KERN_ABORTED); + + return NULL; + } + return (void*)addr; +#else + TemplateThunkMappingData* pThunkData = __atomic_load_n(&s_pThunkData, __ATOMIC_ACQUIRE); + if (s_pThunkData == NULL) + { + pThunkData = InitializeTemplateThunkMappingData(pTemplate); + } + + if (pThunkData->addrOfStartOfSection == NULL) + { + // This is the detail of thunk data which indicates if we were able to compute the template mapping data + return NULL; + } + + if (pTemplate < pThunkData->addrOfStartOfSection) + { + return NULL; + } + + uint8_t* endOfTemplate = ((uint8_t*)pTemplate + templateSize); + if (endOfTemplate > pThunkData->addrOfEndOfSection) + return NULL; + + size_t sectionOffset = (uint8_t*)pTemplate - (uint8_t*)pThunkData->addrOfStartOfSection; + off_t fileOffset = pThunkData->offsetInFileOfStartOfSection + sectionOffset; + + void *pStart = mmap(pStartSpecification, templateSize * 2, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | (pStartSpecification != NULL ? MAP_FIXED : 0), -1, 0); + if (pStart == MAP_FAILED) + { + return NULL; + } + + void *pStartCode = mmap(pStart, templateSize, PROT_READ | PROT_EXEC, MAP_PRIVATE | MAP_FIXED, pThunkData->fdImage, fileOffset); + if (pStart != pStartCode) + { + munmap(pStart, templateSize * 2); + return NULL; + } + + return pStart; +#endif +} + +bool VMToOSInterface::FreeThunksFromTemplate(void* thunks, size_t templateSize) +{ +#ifdef TARGET_APPLE + kern_return_t ret; + + do + { + ret = vm_deallocate(mach_task_self(), (vm_address_t)thunks, templateSize * 2); + } while (ret == KERN_ABORTED); + + return ret == KERN_SUCCESS ? true : false; +#else + munmap(thunks, templateSize * 2); + return true; +#endif +} diff --git a/src/coreclr/minipal/Windows/doublemapping.cpp b/src/coreclr/minipal/Windows/doublemapping.cpp index 9e8ddfed8e964d..f5f25f2bec92cc 100644 --- a/src/coreclr/minipal/Windows/doublemapping.cpp +++ b/src/coreclr/minipal/Windows/doublemapping.cpp @@ -210,3 +210,23 @@ bool VMToOSInterface::ReleaseRWMapping(void* pStart, size_t size) { return UnmapViewOfFile(pStart); } + +void* VMToOSInterface::CreateTemplate(void* pImageTemplate, size_t templateSize, void (*codePageGenerator)(uint8_t* pageBase, uint8_t* pageBaseRX, size_t size)) +{ + return NULL; +} + +bool VMToOSInterface::AllocateThunksFromTemplateRespectsStartAddress() +{ + return false; +} + +void* VMToOSInterface::AllocateThunksFromTemplate(void* pTemplate, size_t templateSize, void* pStart) +{ + return NULL; +} + +bool VMToOSInterface::FreeThunksFromTemplate(void* thunks, size_t templateSize) +{ + return false; +} diff --git a/src/coreclr/minipal/minipal.h b/src/coreclr/minipal/minipal.h index afecd9ce74dc72..01f497e60e6d7e 100644 --- a/src/coreclr/minipal/minipal.h +++ b/src/coreclr/minipal/minipal.h @@ -75,6 +75,41 @@ class VMToOSInterface // Return: // true if it succeeded, false if it failed static bool ReleaseRWMapping(void* pStart, size_t size); + + // Create a template for use by AllocateThunksFromTemplate + // Parameters: + // pImageTemplate - Address of start of template in the image for coreclr. (All addresses passed to the api in a process must be from the same module, if any call uses a pImageTemplate, all calls MUST) + // templateSize - Size of the template + // codePageGenerator - If the system is unable to use pImageTemplate, use this parameter to generate the code page instead + // + // Return: + // NULL if creating the template fails + // Non-NULL, a pointer to the template + static void* CreateTemplate(void* pImageTemplate, size_t templateSize, void (*codePageGenerator)(uint8_t* pageBase, uint8_t* pageBaseRX, size_t size)); + + // Indicate if the AllocateThunksFromTemplate function respects the pStart address passed to AllocateThunksFromTemplate on this platform + // Return: + // true if the parameter is respected, false if not + static bool AllocateThunksFromTemplateRespectsStartAddress(); + + // Allocate thunks from template + // Parameters: + // pTemplate - Value returned from CreateTemplate + // templateSize - Size of the templates block in the image + // pStart - Where to allocate (Specify NULL if no particular address is required). If non-null, this must be an address returned by ReserveDoubleMappedMemory + // + // Return: + // NULL if the allocation fails + // Non-NULL, a pointer to the allocated region. + static void* AllocateThunksFromTemplate(void* pTemplate, size_t templateSize, void* pStart); + + // Free thunks allocated from template + // Parameters: + // pThunks - Address previously returned by AllocateThunksFromTemplate + // templateSize - Size of the templates block in the image + // Return: + // true if it succeeded, false if it failed + static bool FreeThunksFromTemplate(void* thunks, size_t templateSize); }; #if defined(HOST_64BIT) && defined(FEATURE_CACHED_INTERFACE_DISPATCH) diff --git a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp index a928e7018da25f..94ad25ceab8bdb 100644 --- a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp @@ -526,7 +526,7 @@ REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalAllocateThunksFromTemplate(HANDL vm_prot_t prot, max_prot; kern_return_t ret; - // Allocate two contiguous ranges of memory: the first range will contain the trampolines + // Allocate two contiguous ranges of memory: the first range will contain the stubs // and the second range will contain their data. do { diff --git a/src/coreclr/utilcode/executableallocator.cpp b/src/coreclr/utilcode/executableallocator.cpp index d145ab03987a08..0242377072238c 100644 --- a/src/coreclr/utilcode/executableallocator.cpp +++ b/src/coreclr/utilcode/executableallocator.cpp @@ -503,6 +503,11 @@ void* ExecutableAllocator::Commit(void* pStart, size_t size, bool isExecutable) } void ExecutableAllocator::Release(void* pRX) +{ + ReleaseWorker(pRX, false /* this is the standard Release of normally allocated memory */); +} + +void ExecutableAllocator::ReleaseWorker(void* pRX, bool releaseTemplate) { LIMITED_METHOD_CONTRACT; @@ -548,9 +553,19 @@ void ExecutableAllocator::Release(void* pRX) cachedMappingThatOverlaps = FindOverlappingCachedMapping(pBlock); } - if (!VMToOSInterface::ReleaseDoubleMappedMemory(m_doubleMemoryMapperHandle, pRX, pBlock->offset, pBlock->size)) + if (releaseTemplate) { - g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Releasing the double mapped memory failed")); + if (!VMToOSInterface::FreeThunksFromTemplate(pRX, pBlock->size / 2)) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Releasing the template mapped memory failed")); + } + } + else + { + if (!VMToOSInterface::ReleaseDoubleMappedMemory(m_doubleMemoryMapperHandle, pRX, pBlock->offset, pBlock->size)) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Releasing the double mapped memory failed")); + } } // Put the released block into the free block list pBlock->baseRX = NULL; @@ -962,3 +977,60 @@ void ExecutableAllocator::UnmapRW(void* pRW) g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Releasing the RW mapping failed")); } } + +void* ExecutableAllocator::AllocateThunksFromTemplate(void *pTemplate, size_t templateSize) +{ + if (IsDoubleMappingEnabled() && VMToOSInterface::AllocateThunksFromTemplateRespectsStartAddress()) + { + CRITSEC_Holder csh(m_CriticalSection); + + bool isFreeBlock; + BlockRX* block = AllocateBlock(templateSize * 2, &isFreeBlock); + if (block == NULL) + { + return NULL; + } + + void* result = VMToOSInterface::ReserveDoubleMappedMemory(m_doubleMemoryMapperHandle, block->offset, templateSize * 2, 0, 0); + + if (result != NULL) + { + block->baseRX = result; + AddRXBlock(block); + } + else + { + BackoutBlock(block, isFreeBlock); + } + + void *pTemplateAddressAllocated = VMToOSInterface::AllocateThunksFromTemplate(pTemplate, templateSize, block->baseRX); + + if (pTemplateAddressAllocated == NULL) + { + ReleaseWorker(block->baseRX, false); + } + + return pTemplateAddressAllocated; + } + else + { + return VMToOSInterface::AllocateThunksFromTemplate(pTemplate, templateSize, NULL); + } +} + +void ExecutableAllocator::FreeThunksFromTemplate(void *pThunks, size_t templateSize) +{ + if (IsDoubleMappingEnabled() && VMToOSInterface::AllocateThunksFromTemplateRespectsStartAddress()) + { + ReleaseWorker(pThunks, true /* This is a release of template allocated memory */); + } + else + { + VMToOSInterface::FreeThunksFromTemplate(pThunks, templateSize); + } +} + +void* ExecutableAllocator::CreateTemplate(void* templateInImage, size_t templateSize, void (*codePageGenerator)(uint8_t* pageBase, uint8_t* pageBaseRX, size_t size)) +{ + return VMToOSInterface::CreateTemplate(templateInImage, templateSize, codePageGenerator); +} diff --git a/src/coreclr/utilcode/interleavedloaderheap.cpp b/src/coreclr/utilcode/interleavedloaderheap.cpp index d908ea20c194db..082e337caebda1 100644 --- a/src/coreclr/utilcode/interleavedloaderheap.cpp +++ b/src/coreclr/utilcode/interleavedloaderheap.cpp @@ -33,10 +33,13 @@ namespace UnlockedInterleavedLoaderHeap::UnlockedInterleavedLoaderHeap( RangeList *pRangeList, - void (*codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T size), - DWORD dwGranularity) : + const InterleavedLoaderHeapConfig *pConfig) : UnlockedLoaderHeapBase(LoaderHeapImplementationKind::Interleaved), - m_pFreeListHead(NULL) + m_pEndReservedRegion(NULL), + m_dwGranularity(pConfig->StubSize), + m_pRangeList(pRangeList), + m_pFreeListHead(NULL), + m_pConfig(pConfig) { CONTRACTL { @@ -46,15 +49,7 @@ UnlockedInterleavedLoaderHeap::UnlockedInterleavedLoaderHeap( } CONTRACTL_END; - m_pEndReservedRegion = NULL; - - m_pRangeList = pRangeList; - _ASSERTE((GetStubCodePageSize() % GetOsPageSize()) == 0); // Stub code page size MUST be in increments of the page size. (Really it must be a power of 2 as well, but this is good enough) - m_dwGranularity = dwGranularity; - - _ASSERTE(codePageGenerator != NULL); - m_codePageGenerator = codePageGenerator; } // ~LoaderHeap is not synchronised (obviously) @@ -80,7 +75,14 @@ UnlockedInterleavedLoaderHeap::~UnlockedInterleavedLoaderHeap() pVirtualAddress = pSearch->pVirtualAddress; pNext = pSearch->pNext; - ExecutableAllocator::Instance()->Release(pVirtualAddress); + if (m_pConfig->Template != NULL) + { + ExecutableAllocator::Instance()->FreeThunksFromTemplate(pVirtualAddress, GetStubCodePageSize()); + } + else + { + ExecutableAllocator::Instance()->Release(pVirtualAddress); + } delete pSearch; } @@ -101,6 +103,7 @@ size_t UnlockedInterleavedLoaderHeap::GetBytesAvailReservedRegion() BOOL UnlockedInterleavedLoaderHeap::CommitPages(void* pData, size_t dwSizeToCommitPart) { + _ASSERTE(m_pConfig->Template == NULL); // This path should only be used for LoaderHeaps which use the standard ExecutableAllocator functions // Commit first set of pages, since it will contain the LoaderHeapBlock { void *pTemp = ExecutableAllocator::Instance()->Commit(pData, dwSizeToCommitPart, IsExecutable()); @@ -121,7 +124,7 @@ BOOL UnlockedInterleavedLoaderHeap::CommitPages(void* pData, size_t dwSizeToComm } ExecutableWriterHolder codePageWriterHolder((BYTE*)pData, dwSizeToCommitPart, ExecutableAllocator::DoNotAddToCache); - m_codePageGenerator(codePageWriterHolder.GetRW(), (BYTE*)pData, dwSizeToCommitPart); + m_pConfig->CodePageGenerator(codePageWriterHolder.GetRW(), (BYTE*)pData, dwSizeToCommitPart); FlushInstructionCache(GetCurrentProcess(), pData, dwSizeToCommitPart); return TRUE; @@ -137,6 +140,8 @@ BOOL UnlockedInterleavedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit) } CONTRACTL_END; + _ASSERTE(m_pConfig->Template == NULL); // This path should only be used for LoaderHeaps which use the standard ExecutableAllocator functions + size_t dwSizeToReserve; // Round to page size again @@ -222,6 +227,14 @@ BOOL UnlockedInterleavedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit) return TRUE; } +void ReleaseAllocatedThunks(BYTE* thunks) +{ + ExecutableAllocator::Instance()->FreeThunksFromTemplate(thunks, GetStubCodePageSize()); +} + +using ThunkMemoryHolder = SpecializedWrapper; + + // Get some more committed pages - either commit some more in the current reserved region, or, if it // has run out, reserve another set of pages. // Returns: FALSE if we can't get any more memory @@ -237,6 +250,57 @@ BOOL UnlockedInterleavedLoaderHeap::GetMoreCommittedPages(size_t dwMinSize) } CONTRACTL_END; + if (m_pConfig->Template != NULL) + { + ThunkMemoryHolder newAllocatedThunks = (BYTE*)ExecutableAllocator::Instance()->AllocateThunksFromTemplate(m_pConfig->Template, GetStubCodePageSize()); + if (newAllocatedThunks == NULL) + { + return FALSE; + } + + NewHolder pNewBlock = new (nothrow) LoaderHeapBlock; + if (pNewBlock == NULL) + { + return FALSE; + } + + size_t dwSizeToReserve = GetStubCodePageSize() * 2; + + // Record reserved range in range list, if one is specified + // Do this AFTER the commit - otherwise we'll have bogus ranges included. + if (m_pRangeList != NULL) + { + if (!m_pRangeList->AddRange((const BYTE *) newAllocatedThunks, + ((const BYTE *) newAllocatedThunks) + dwSizeToReserve, + (void *) this)) + { + return FALSE; + } + } + + m_dwTotalAlloc += dwSizeToReserve; + + pNewBlock.SuppressRelease(); + newAllocatedThunks.SuppressRelease(); + + pNewBlock->dwVirtualSize = dwSizeToReserve; + pNewBlock->pVirtualAddress = newAllocatedThunks; + pNewBlock->pNext = m_pFirstBlock; + pNewBlock->m_fReleaseMemory = TRUE; + + // Add to the linked list + m_pFirstBlock = pNewBlock; + + m_pAllocPtr = (BYTE*)newAllocatedThunks; + m_pPtrToEndOfCommittedRegion = m_pAllocPtr + GetStubCodePageSize(); + m_pEndReservedRegion = m_pAllocPtr + dwSizeToReserve; // For consistency with the non-template path m_pEndReservedRegion is after the end of the data area + m_dwTotalAlloc += GetStubCodePageSize(); + + return TRUE; + } + + // From here, all work is only for the dynamically allocated InterleavedLoaderHeap path + // If we have memory we can use, what are you doing here! _ASSERTE(dwMinSize > (SIZE_T)(m_pPtrToEndOfCommittedRegion - m_pAllocPtr)); @@ -474,5 +538,13 @@ void *UnlockedInterleavedLoaderHeap::UnlockedAllocStub( return pResult; } + +void InitializeLoaderHeapConfig(InterleavedLoaderHeapConfig *pConfig, size_t stubSize, void* templateInImage, void (*codePageGenerator)(uint8_t* pageBase, uint8_t* pageBaseRX, size_t size)) +{ + pConfig->StubSize = (uint32_t)stubSize; + pConfig->Template = ExecutableAllocator::Instance()->CreateTemplate(templateInImage, GetStubCodePageSize(), codePageGenerator); + pConfig->CodePageGenerator = codePageGenerator; +} + #endif // #ifndef DACCESS_COMPILE diff --git a/src/coreclr/vm/amd64/thunktemplates.S b/src/coreclr/vm/amd64/thunktemplates.S index ebb0f6f67f193d..611556da202bb9 100644 --- a/src/coreclr/vm/amd64/thunktemplates.S +++ b/src/coreclr/vm/amd64/thunktemplates.S @@ -5,9 +5,155 @@ #include "unixasmmacros.inc" #include "asmconstants.h" +#ifdef FEATURE_MAP_THUNKS_FROM_IMAGE + +#define POINTER_SIZE 0x08 + +#define THUNKS_MAP_SIZE 0x4000 + +#define PAGE_SIZE 0x4000 +#define PAGE_SIZE_LOG2 14 + + +#define DATA_SLOT(stub, field, thunkSize, thunkTemplateName) C_FUNC(thunkTemplateName) + THUNKS_MAP_SIZE + stub##Data__##field + IN_PAGE_INDEX * thunkSize + +// ---------- +// StubPrecode +// ---------- + +#define STUB_PRECODE_CODESIZE 0x18 // 3 instructions, 13 bytes encoded + 11 bytes of padding +#define STUB_PRECODE_DATASIZE 0x18 // 2 qwords + a BYTE +.set STUB_PRECODE_NUM_THUNKS_PER_MAPPING,(THUNKS_MAP_SIZE / STUB_PRECODE_CODESIZE) + +.macro THUNKS_BLOCK_STUB_PRECODE + IN_PAGE_INDEX = 0 + .rept STUB_PRECODE_NUM_THUNKS_PER_MAPPING + + mov r10, [rip + DATA_SLOT(StubPrecode, SecretParam, STUB_PRECODE_CODESIZE, StubPrecodeCodeTemplate)] + jmp [rip + DATA_SLOT(StubPrecode, Target, STUB_PRECODE_CODESIZE, StubPrecodeCodeTemplate)] + // The above is 13 bytes + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + IN_PAGE_INDEX = IN_PAGE_INDEX + 1 + .endr +.endm + + .text + .p2align PAGE_SIZE_LOG2 +LEAF_ENTRY StubPrecodeCodeTemplate + THUNKS_BLOCK_STUB_PRECODE +LEAF_END_MARKED StubPrecodeCodeTemplate, _TEXT + +// ---------- +// FixupPrecode +// ---------- + +#define FIXUP_PRECODE_CODESIZE 0x18 +#define FIXUP_PRECODE_DATASIZE 0x18 // 3 qwords +.set FIXUP_PRECODE_NUM_THUNKS_PER_MAPPING,(THUNKS_MAP_SIZE / FIXUP_PRECODE_CODESIZE) + +.macro THUNKS_BLOCK_FIXUP_PRECODE + IN_PAGE_INDEX = 0 + .rept FIXUP_PRECODE_NUM_THUNKS_PER_MAPPING + + jmp [rip + DATA_SLOT(FixupPrecode, Target, FIXUP_PRECODE_CODESIZE, FixupPrecodeCodeTemplate)] + mov r10, [rip + DATA_SLOT(FixupPrecode, MethodDesc, FIXUP_PRECODE_CODESIZE, FixupPrecodeCodeTemplate)] + jmp [rip + DATA_SLOT(FixupPrecode, PrecodeFixupThunk, FIXUP_PRECODE_CODESIZE, FixupPrecodeCodeTemplate)] + // The above is 19 bytes + int 3 + int 3 + int 3 + int 3 + int 3 + IN_PAGE_INDEX = IN_PAGE_INDEX + 1 + .endr +.endm + + .text + .p2align PAGE_SIZE_LOG2 +LEAF_ENTRY FixupPrecodeCodeTemplate + THUNKS_BLOCK_FIXUP_PRECODE + // We need 16 bytes of padding to pad this out + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 +LEAF_END_MARKED FixupPrecodeCodeTemplate, _TEXT + +// ---------- +// CallCountingStub +// ---------- + +#define CALLCOUNTING_CODESIZE 0x18 +#define CALLCOUNTING_DATASIZE 0x18 // 3 qwords +.set CALLCOUNTING_NUM_THUNKS_PER_MAPPING, (THUNKS_MAP_SIZE / CALLCOUNTING_CODESIZE) +.macro THUNKS_BLOCK_CALLCOUNTING + IN_PAGE_INDEX = 0 + .rept CALLCOUNTING_NUM_THUNKS_PER_MAPPING + + mov rax,QWORD PTR [rip + DATA_SLOT(CallCountingStub, RemainingCallCountCell, CALLCOUNTING_CODESIZE, CallCountingStubCodeTemplate)] + dec WORD PTR [rax] + je 0f + jmp QWORD PTR [rip + DATA_SLOT(CallCountingStub, TargetForMethod, CALLCOUNTING_CODESIZE, CallCountingStubCodeTemplate)] + 0: + jmp QWORD PTR [rip + DATA_SLOT(CallCountingStub, TargetForThresholdReached, CALLCOUNTING_CODESIZE, CallCountingStubCodeTemplate)] + IN_PAGE_INDEX = IN_PAGE_INDEX + 1 + .endr +.endm + + .text + .p2align PAGE_SIZE_LOG2 +LEAF_ENTRY CallCountingStubCodeTemplate + THUNKS_BLOCK_CALLCOUNTING + // We need 16 bytes of padding to pad this out + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 + int 3 +LEAF_END_MARKED CallCountingStubCodeTemplate, _TEXT + +#endif + // STUB_PAGE_SIZE must match the behavior of GetStubCodePageSize() on this architecture/os STUB_PAGE_SIZE = 16384 +#ifdef DATA_SLOT +#undef DATA_SLOT +#endif + #define DATA_SLOT(stub, field) C_FUNC(stub##Code) + STUB_PAGE_SIZE + stub##Data__##field LEAF_ENTRY StubPrecodeCode, _TEXT diff --git a/src/coreclr/vm/arm64/thunktemplates.S b/src/coreclr/vm/arm64/thunktemplates.S index df2abf7c29e0f7..bbbc490854721e 100644 --- a/src/coreclr/vm/arm64/thunktemplates.S +++ b/src/coreclr/vm/arm64/thunktemplates.S @@ -4,6 +4,117 @@ #include "unixasmmacros.inc" #include "asmconstants.h" +#ifdef FEATURE_MAP_THUNKS_FROM_IMAGE +#define POINTER_SIZE 0x08 +// Since Arm64 supports 4KB, 16KB and 64KB page sizes, as the templates is only defined for 16KB page size, this cannot be used +// in a general purpose Linux environment. However it CAN be used on Apple platforms, which specify that 16KB is the system standard +// page size. + +#define THUNKS_MAP_SIZE 0x4000 + +#define PAGE_SIZE 0x4000 +#define PAGE_SIZE_LOG2 14 + + +#define DATA_SLOT(stub, field, thunkSize, thunkTemplateName) C_FUNC(thunkTemplateName) + THUNKS_MAP_SIZE + stub##Data__##field + IN_PAGE_INDEX * thunkSize + +// ---------- +// StubPrecode +// ---------- + +#define STUB_PRECODE_CODESIZE 0x18 // 3 instructions, 4 bytes each (and we also have 12 bytes of padding) +#define STUB_PRECODE_DATASIZE 0x18 // 2 qwords + 1 byte +.set STUB_PRECODE_NUM_THUNKS_PER_MAPPING, (THUNKS_MAP_SIZE / STUB_PRECODE_CODESIZE) + +.macro THUNKS_BLOCK_STUB_PRECODE + IN_PAGE_INDEX = 0 + .rept STUB_PRECODE_NUM_THUNKS_PER_MAPPING + + ldr x10, DATA_SLOT(StubPrecode, Target, STUB_PRECODE_CODESIZE, StubPrecodeCodeTemplate) + ldr x12, DATA_SLOT(StubPrecode, SecretParam, STUB_PRECODE_CODESIZE, StubPrecodeCodeTemplate) + br x10 + + brk 0xf000 // Stubs need to be 24-byte in size to allow for the data to be 2 pointers + 1 byte + brk 0xf000 // Stubs need to be 24-byte in size to allow for the data to be 2 pointers + 1 byte + brk 0xf000 // Stubs need to be 24-byte in size to allow for the data to be 2 pointers + 1 byte + + IN_PAGE_INDEX = IN_PAGE_INDEX + 1 + .endr +.endm + + .text + .p2align PAGE_SIZE_LOG2 +LEAF_ENTRY StubPrecodeCodeTemplate + THUNKS_BLOCK_STUB_PRECODE +LEAF_END_MARKED StubPrecodeCodeTemplate, _TEXT + +// ---------- +// FixupPrecode +// ---------- + +#define FIXUP_PRECODE_CODESIZE 0x18 // 5 instructions, 4 bytes each (and we also have 4 bytes of padding) +#define FIXUP_PRECODE_DATASIZE 0x18 // 3 qwords +.set FIXUP_PRECODE_NUM_THUNKS_PER_MAPPING,(THUNKS_MAP_SIZE / FIXUP_PRECODE_CODESIZE) + +.macro THUNKS_BLOCK_FIXUP_PRECODE + IN_PAGE_INDEX = 0 + .rept FIXUP_PRECODE_NUM_THUNKS_PER_MAPPING + + ldr x11, DATA_SLOT(FixupPrecode, Target, FIXUP_PRECODE_CODESIZE, FixupPrecodeCodeTemplate) + br x11 + ldr x12, DATA_SLOT(FixupPrecode, MethodDesc, FIXUP_PRECODE_CODESIZE, FixupPrecodeCodeTemplate) + ldr x11, DATA_SLOT(FixupPrecode, PrecodeFixupThunk, FIXUP_PRECODE_CODESIZE, FixupPrecodeCodeTemplate) + br x11 + brk 0xf000 // Stubs need to be 24-byte in size to allow for the data to be 3 pointers + + IN_PAGE_INDEX = IN_PAGE_INDEX + 1 + .endr +.endm + + .text + .p2align PAGE_SIZE_LOG2 +LEAF_ENTRY FixupPrecodeCodeTemplate + THUNKS_BLOCK_FIXUP_PRECODE +LEAF_END_MARKED FixupPrecodeCodeTemplate, _TEXT + +// ---------- +// CallCountingStub +// ---------- + +#define CALLCOUNTING_CODESIZE 0x28 // 5 instructions, 4 bytes each (and we also have 4 bytes of padding) +#define CALLCOUNTING_DATASIZE 0x18 // 3 qwords +.set CALLCOUNTING_NUM_THUNKS_PER_MAPPING, (THUNKS_MAP_SIZE / CALLCOUNTING_CODESIZE) + +.macro THUNKS_BLOCK_CALLCOUNTING + IN_PAGE_INDEX = 0 + .rept CALLCOUNTING_NUM_THUNKS_PER_MAPPING + + ldr x9, DATA_SLOT(CallCountingStub, RemainingCallCountCell, CALLCOUNTING_CODESIZE, CallCountingStubCodeTemplate) + ldrh w10, [x9] + subs w10, w10, #1 + strh w10, [x9] + beq 0f + ldr x9, DATA_SLOT(CallCountingStub, TargetForMethod, CALLCOUNTING_CODESIZE, CallCountingStubCodeTemplate) + br x9 +0: + ldr x10, DATA_SLOT(CallCountingStub, TargetForThresholdReached, CALLCOUNTING_CODESIZE, CallCountingStubCodeTemplate) + br x10 + brk 0xf000 // Stubs need to be 40-byte in size to allow for the data to be pointer aligned + + IN_PAGE_INDEX = IN_PAGE_INDEX + 1 + .endr +.endm + + .text + .p2align PAGE_SIZE_LOG2 +LEAF_ENTRY CallCountingStubCodeTemplate + THUNKS_BLOCK_CALLCOUNTING +LEAF_END_MARKED CallCountingStubCodeTemplate, _TEXT +#endif + +#ifdef DATA_SLOT +#undef DATA_SLOT +#endif #define DATA_SLOT(stub, field) . - (. - C_FUNC(stub##Code\STUB_PAGE_SIZE)) + \STUB_PAGE_SIZE + stub##Data__##field .irp STUB_PAGE_SIZE, 16384, 32768, 65536 diff --git a/src/coreclr/vm/callcounting.cpp b/src/coreclr/vm/callcounting.cpp index 0f26b7d4090096..f5168fc0f799b1 100644 --- a/src/coreclr/vm/callcounting.cpp +++ b/src/coreclr/vm/callcounting.cpp @@ -293,6 +293,14 @@ void (*CallCountingStub::CallCountingStubCode)(); #ifndef DACCESS_COMPILE +static InterleavedLoaderHeapConfig s_callCountingHeapConfig; + +#ifdef FEATURE_MAP_THUNKS_FROM_IMAGE +extern "C" void CallCountingStubCodeTemplate(); +#else +#define CallCountingStubCodeTemplate NULL +#endif + void CallCountingStub::StaticInitialize() { #if defined(TARGET_ARM64) && defined(TARGET_UNIX) @@ -310,14 +318,22 @@ void CallCountingStub::StaticInitialize() EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("Unsupported OS page size")); } #undef ENUM_PAGE_SIZE + + if (CallCountingStubCodeTemplate != NULL && pageSize != 0x4000) + { + // This should fail if the template is used on a platform which doesn't support the supported page size for templates + ThrowHR(COR_E_EXECUTIONENGINE); + } #else _ASSERTE((SIZE_T)((BYTE*)CallCountingStubCode_End - (BYTE*)CallCountingStubCode) <= CallCountingStub::CodeSize); #endif + + InitializeLoaderHeapConfig(&s_callCountingHeapConfig, CallCountingStub::CodeSize, (void*)CallCountingStubCodeTemplate, CallCountingStub::GenerateCodePage); } #endif // DACCESS_COMPILE -void CallCountingStub::GenerateCodePage(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T pageSize) +void CallCountingStub::GenerateCodePage(uint8_t* pageBase, uint8_t* pageBaseRX, size_t pageSize) { #ifdef TARGET_X86 int totalCodeSize = (pageSize / CallCountingStub::CodeSize) * CallCountingStub::CodeSize; @@ -328,13 +344,13 @@ void CallCountingStub::GenerateCodePage(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T // Set absolute addresses of the slots in the stub BYTE* pCounterSlot = pageBaseRX + i + pageSize + offsetof(CallCountingStubData, RemainingCallCountCell); - *(BYTE**)(pageBase + i + SYMBOL_VALUE(CallCountingStubCode_RemainingCallCountCell_Offset)) = pCounterSlot; + *(uint8_t**)(pageBase + i + SYMBOL_VALUE(CallCountingStubCode_RemainingCallCountCell_Offset)) = pCounterSlot; BYTE* pTargetSlot = pageBaseRX + i + pageSize + offsetof(CallCountingStubData, TargetForMethod); - *(BYTE**)(pageBase + i + SYMBOL_VALUE(CallCountingStubCode_TargetForMethod_Offset)) = pTargetSlot; + *(uint8_t**)(pageBase + i + SYMBOL_VALUE(CallCountingStubCode_TargetForMethod_Offset)) = pTargetSlot; BYTE* pCountReachedZeroSlot = pageBaseRX + i + pageSize + offsetof(CallCountingStubData, TargetForThresholdReached); - *(BYTE**)(pageBase + i + SYMBOL_VALUE(CallCountingStubCode_TargetForThresholdReached_Offset)) = pCountReachedZeroSlot; + *(uint8_t**)(pageBase + i + SYMBOL_VALUE(CallCountingStubCode_TargetForThresholdReached_Offset)) = pCountReachedZeroSlot; } #else // TARGET_X86 FillStubCodePage(pageBase, (const void*)PCODEToPINSTR((PCODE)CallCountingStubCode), CallCountingStub::CodeSize, pageSize); @@ -354,7 +370,7 @@ NOINLINE InterleavedLoaderHeap *CallCountingManager::CallCountingStubAllocator:: _ASSERTE(m_heap == nullptr); - InterleavedLoaderHeap *heap = new InterleavedLoaderHeap(&m_heapRangeList, true /* fUnlocked */, CallCountingStub::GenerateCodePage, CallCountingStub::CodeSize); + InterleavedLoaderHeap *heap = new InterleavedLoaderHeap(&m_heapRangeList, true /* fUnlocked */, &s_callCountingHeapConfig); m_heap = heap; return heap; } @@ -475,6 +491,7 @@ CallCountingManager::~CallCountingManager() } #ifndef DACCESS_COMPILE + void CallCountingManager::StaticInitialize() { WRAPPER_NO_CONTRACT; diff --git a/src/coreclr/vm/callcounting.h b/src/coreclr/vm/callcounting.h index 75a907f4d6ea3c..59071aa51f140b 100644 --- a/src/coreclr/vm/callcounting.h +++ b/src/coreclr/vm/callcounting.h @@ -150,7 +150,7 @@ class CallCountingStub static void StaticInitialize(); #endif // !DACCESS_COMPILE - static void GenerateCodePage(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T size); + static void GenerateCodePage(uint8_t* pageBase, uint8_t* pageBaseRX, size_t size); PTR_CallCount GetRemainingCallCountCell() const; PCODE GetTargetForMethod() const; diff --git a/src/coreclr/vm/loaderallocator.cpp b/src/coreclr/vm/loaderallocator.cpp index 5fe3bb2faf2831..f31d2d068bbfb8 100644 --- a/src/coreclr/vm/loaderallocator.cpp +++ b/src/coreclr/vm/loaderallocator.cpp @@ -1208,8 +1208,7 @@ void LoaderAllocator::Init(BYTE *pExecutableHeapMemory) m_pNewStubPrecodeHeap = new (&m_NewStubPrecodeHeapInstance) InterleavedLoaderHeap( &m_stubPrecodeRangeList, false /* fUnlocked */, - StubPrecode::GenerateCodePage, - StubPrecode::CodeSize); + &s_stubPrecodeHeapConfig); #if defined(FEATURE_STUBPRECODE_DYNAMIC_HELPERS) && defined(FEATURE_READYTORUN) if (IsCollectible()) @@ -1219,14 +1218,12 @@ void LoaderAllocator::Init(BYTE *pExecutableHeapMemory) m_pDynamicHelpersStubHeap = new (&m_DynamicHelpersHeapInstance) InterleavedLoaderHeap( &m_dynamicHelpersRangeList, false /* fUnlocked */, - StubPrecode::GenerateCodePage, - StubPrecode::CodeSize); + &s_stubPrecodeHeapConfig); #endif // defined(FEATURE_STUBPRECODE_DYNAMIC_HELPERS) && defined(FEATURE_READYTORUN) m_pFixupPrecodeHeap = new (&m_FixupPrecodeHeapInstance) InterleavedLoaderHeap(&m_fixupPrecodeRangeList, false /* fUnlocked */, - FixupPrecode::GenerateCodePage, - FixupPrecode::CodeSize); + &s_fixupStubPrecodeHeapConfig); // Initialize the EE marshaling data to NULL. m_pMarshalingData = NULL; diff --git a/src/coreclr/vm/precode.cpp b/src/coreclr/vm/precode.cpp index e3e3983e8716e1..798e9849de3a6a 100644 --- a/src/coreclr/vm/precode.cpp +++ b/src/coreclr/vm/precode.cpp @@ -15,6 +15,11 @@ #include "perfmap.h" #endif +InterleavedLoaderHeapConfig s_stubPrecodeHeapConfig; +#ifdef HAS_FIXUP_PRECODE +InterleavedLoaderHeapConfig s_fixupStubPrecodeHeapConfig; +#endif + //========================================================================================== // class Precode //========================================================================================== @@ -495,6 +500,12 @@ void (*StubPrecode::StubPrecodeCode)(); void (*StubPrecode::StubPrecodeCode_End)(); #endif +#ifdef FEATURE_MAP_THUNKS_FROM_IMAGE +extern "C" void StubPrecodeCodeTemplate(); +#else +#define StubPrecodeCodeTemplate NULL +#endif + void StubPrecode::StaticInitialize() { #if defined(TARGET_ARM64) && defined(TARGET_UNIX) @@ -512,6 +523,13 @@ void StubPrecode::StaticInitialize() default: EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("Unsupported OS page size")); } + + if (StubPrecodeCodeTemplate != NULL && pageSize != 0x4000) + { + // This should fail if the template is used on a platform which doesn't support the supported page size for templates + ThrowHR(COR_E_EXECUTIONENGINE); + } + #undef ENUM_PAGE_SIZE #else _ASSERTE((SIZE_T)((BYTE*)StubPrecodeCode_End - (BYTE*)StubPrecodeCode) <= StubPrecode::CodeSize); @@ -524,21 +542,22 @@ void StubPrecode::StaticInitialize() _ASSERTE((*((BYTE*)PCODEToPINSTR((PCODE)StubPrecodeCode) + OFFSETOF_PRECODE_TYPE)) == StubPrecode::Type); #endif + InitializeLoaderHeapConfig(&s_stubPrecodeHeapConfig, StubPrecode::CodeSize, (void*)StubPrecodeCodeTemplate, StubPrecode::GenerateCodePage); } -void StubPrecode::GenerateCodePage(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T pageSize) +void StubPrecode::GenerateCodePage(uint8_t* pageBase, uint8_t* pageBaseRX, size_t pageSize) { #ifdef TARGET_X86 int totalCodeSize = (pageSize / StubPrecode::CodeSize) * StubPrecode::CodeSize; for (int i = 0; i < totalCodeSize; i += StubPrecode::CodeSize) { - memcpy(pageBase + i, (const void*)StubPrecodeCode, (BYTE*)StubPrecodeCode_End - (BYTE*)StubPrecodeCode); + memcpy(pageBase + i, (const void*)StubPrecodeCode, (uint8_t*)StubPrecodeCode_End - (uint8_t*)StubPrecodeCode); - BYTE* pTargetSlot = pageBaseRX + i + pageSize + offsetof(StubPrecodeData, Target); - *(BYTE**)(pageBase + i + SYMBOL_VALUE(StubPrecodeCode_Target_Offset)) = pTargetSlot; + uint8_t* pTargetSlot = pageBaseRX + i + pageSize + offsetof(StubPrecodeData, Target); + *(uint8_t**)(pageBase + i + SYMBOL_VALUE(StubPrecodeCode_Target_Offset)) = pTargetSlot; BYTE* pMethodDescSlot = pageBaseRX + i + pageSize + offsetof(StubPrecodeData, SecretParam); - *(BYTE**)(pageBase + i + SYMBOL_VALUE(StubPrecodeCode_MethodDesc_Offset)) = pMethodDescSlot; + *(uint8_t**)(pageBase + i + SYMBOL_VALUE(StubPrecodeCode_MethodDesc_Offset)) = pMethodDescSlot; } #else // TARGET_X86 FillStubCodePage(pageBase, (const void*)PCODEToPINSTR((PCODE)StubPrecodeCode), StubPrecode::CodeSize, pageSize); @@ -626,6 +645,12 @@ void (*FixupPrecode::FixupPrecodeCode)(); void (*FixupPrecode::FixupPrecodeCode_End)(); #endif +#ifdef FEATURE_MAP_THUNKS_FROM_IMAGE +extern "C" void FixupPrecodeCodeTemplate(); +#else +#define FixupPrecodeCodeTemplate NULL +#endif + void FixupPrecode::StaticInitialize() { #if defined(TARGET_ARM64) && defined(TARGET_UNIX) @@ -645,6 +670,12 @@ void FixupPrecode::StaticInitialize() EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("Unsupported OS page size")); } #undef ENUM_PAGE_SIZE + + if (FixupPrecodeCodeTemplate != NULL && pageSize != 0x4000) + { + // This should fail if the template is used on a platform which doesn't support the supported page size for templates + ThrowHR(COR_E_EXECUTIONENGINE); + } #else _ASSERTE((SIZE_T)((BYTE*)FixupPrecodeCode_End - (BYTE*)FixupPrecodeCode) <= FixupPrecode::CodeSize); #endif @@ -655,9 +686,11 @@ void FixupPrecode::StaticInitialize() #else _ASSERTE(*((BYTE*)PCODEToPINSTR((PCODE)FixupPrecodeCode) + OFFSETOF_PRECODE_TYPE) == FixupPrecode::Type); #endif + + InitializeLoaderHeapConfig(&s_fixupStubPrecodeHeapConfig, FixupPrecode::CodeSize, (void*)FixupPrecodeCodeTemplate, FixupPrecode::GenerateCodePage); } -void FixupPrecode::GenerateCodePage(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T pageSize) +void FixupPrecode::GenerateCodePage(uint8_t* pageBase, uint8_t* pageBaseRX, size_t pageSize) { #ifdef TARGET_X86 int totalCodeSize = (pageSize / FixupPrecode::CodeSize) * FixupPrecode::CodeSize; @@ -665,14 +698,14 @@ void FixupPrecode::GenerateCodePage(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T pag for (int i = 0; i < totalCodeSize; i += FixupPrecode::CodeSize) { memcpy(pageBase + i, (const void*)FixupPrecodeCode, FixupPrecode::CodeSize); - BYTE* pTargetSlot = pageBaseRX + i + pageSize + offsetof(FixupPrecodeData, Target); - *(BYTE**)(pageBase + i + SYMBOL_VALUE(FixupPrecodeCode_Target_Offset)) = pTargetSlot; + uint8_t* pTargetSlot = pageBaseRX + i + pageSize + offsetof(FixupPrecodeData, Target); + *(uint8_t**)(pageBase + i + SYMBOL_VALUE(FixupPrecodeCode_Target_Offset)) = pTargetSlot; BYTE* pMethodDescSlot = pageBaseRX + i + pageSize + offsetof(FixupPrecodeData, MethodDesc); - *(BYTE**)(pageBase + i + SYMBOL_VALUE(FixupPrecodeCode_MethodDesc_Offset)) = pMethodDescSlot; + *(uint8_t**)(pageBase + i + SYMBOL_VALUE(FixupPrecodeCode_MethodDesc_Offset)) = pMethodDescSlot; BYTE* pPrecodeFixupThunkSlot = pageBaseRX + i + pageSize + offsetof(FixupPrecodeData, PrecodeFixupThunk); - *(BYTE**)(pageBase + i + SYMBOL_VALUE(FixupPrecodeCode_PrecodeFixupThunk_Offset)) = pPrecodeFixupThunkSlot; + *(uint8_t**)(pageBase + i + SYMBOL_VALUE(FixupPrecodeCode_PrecodeFixupThunk_Offset)) = pPrecodeFixupThunkSlot; } #else // TARGET_X86 FillStubCodePage(pageBase, (const void*)PCODEToPINSTR((PCODE)FixupPrecodeCode), FixupPrecode::CodeSize, pageSize); diff --git a/src/coreclr/vm/precode.h b/src/coreclr/vm/precode.h index 87570f217292a0..64394d259e91a4 100644 --- a/src/coreclr/vm/precode.h +++ b/src/coreclr/vm/precode.h @@ -225,7 +225,7 @@ struct StubPrecode pData->Target = (PCODE)target; } - static void GenerateCodePage(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T size); + static void GenerateCodePage(uint8_t* pageBase, uint8_t* pageBaseRX, size_t size); #endif // !DACCESS_COMPILE }; @@ -428,7 +428,7 @@ struct FixupPrecode static void StaticInitialize(); - static void GenerateCodePage(BYTE* pageBase, BYTE* pageBaseRX, SIZE_T size); + static void GenerateCodePage(uint8_t* pageBase, uint8_t* pageBaseRX, size_t size); PTR_FixupPrecodeData GetData() const { @@ -861,4 +861,9 @@ struct PrecodeMachineDescriptor }; #endif //DACCESS_COMPILE +extern InterleavedLoaderHeapConfig s_stubPrecodeHeapConfig; +#ifdef HAS_FIXUP_PRECODE +extern InterleavedLoaderHeapConfig s_fixupStubPrecodeHeapConfig; +#endif + #endif // __PRECODE_H__ From 5a256fbd94d322cb251202175c4de361d51bbd54 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Mon, 19 May 2025 16:22:04 -0700 Subject: [PATCH 2/3] Disable feature on Apple platforms for now --- src/coreclr/clrdefinitions.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/clrdefinitions.cmake b/src/coreclr/clrdefinitions.cmake index cb3b645ff0e58d..2ce02d51585b09 100644 --- a/src/coreclr/clrdefinitions.cmake +++ b/src/coreclr/clrdefinitions.cmake @@ -219,7 +219,8 @@ if (FEATURE_STUBPRECODE_DYNAMIC_HELPERS) endif() if (CLR_CMAKE_TARGET_APPLE) - add_definitions(-DFEATURE_MAP_THUNKS_FROM_IMAGE) +# Re-enable when the dbgshim is fixed and generally available +# add_definitions(-DFEATURE_MAP_THUNKS_FROM_IMAGE) endif() # Use this function to enable building with a specific target OS and architecture set of defines From b4fc7c0f152fa53ad11150ae3152bc2123b06433 Mon Sep 17 00:00:00 2001 From: David Wrighton Date: Tue, 20 May 2025 09:43:23 -0700 Subject: [PATCH 3/3] Update src/coreclr/clrdefinitions.cmake Co-authored-by: Tom McDonald --- src/coreclr/clrdefinitions.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/clrdefinitions.cmake b/src/coreclr/clrdefinitions.cmake index 2ce02d51585b09..c7f6ae41d4d8aa 100644 --- a/src/coreclr/clrdefinitions.cmake +++ b/src/coreclr/clrdefinitions.cmake @@ -219,7 +219,7 @@ if (FEATURE_STUBPRECODE_DYNAMIC_HELPERS) endif() if (CLR_CMAKE_TARGET_APPLE) -# Re-enable when the dbgshim is fixed and generally available +# Re-enable when dbgshim containing https://github.com/dotnet/diagnostics/pull/5487 is generally available # add_definitions(-DFEATURE_MAP_THUNKS_FROM_IMAGE) endif()