Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
1fb49a3
Implement localloc in the interpreter
kotlarmilos Apr 15, 2025
e5fb653
Add comment
kotlarmilos Apr 15, 2025
6b81d9b
Implement FrameDataAllocator for dynamic stack allocations
kotlarmilos Apr 21, 2025
43f4481
Merge branch 'main' of github.com:kotlarmilos/runtime into feature/co…
kotlarmilos Apr 21, 2025
f660365
Fix merge conflicts
kotlarmilos Apr 21, 2025
e59cace
Initialize local variables to zero if the flag is set
kotlarmilos Apr 22, 2025
7c649a8
Initialize local variables based on the CORINFO_OPT_INIT_LOCALS. Move…
kotlarmilos Apr 22, 2025
1d1e226
Implement FrameDataFragment destructor
kotlarmilos Apr 22, 2025
cb0b212
Handle memory allocation failure in frame data allocation
kotlarmilos Apr 22, 2025
581e856
Fix typo
kotlarmilos Apr 22, 2025
e72e0e3
Add preprocessor directive FEATURE_INTERPRETER
kotlarmilos Apr 22, 2025
50e94f6
Fix windows build
kotlarmilos Apr 22, 2025
dfb170b
If size is 0 allocates a zero-length item and returns a valid pointer…
kotlarmilos Apr 22, 2025
c3bc007
Update frame allocator to use InterpMethodContextFrame
kotlarmilos Apr 23, 2025
c91b99d
Throw OutOfMemory exception if alloc fails
kotlarmilos Apr 23, 2025
2bfe64f
Revert assert changes
kotlarmilos Apr 23, 2025
c08ab74
Update FrameDataAllocator to return nullptr if alloc fails
kotlarmilos Apr 24, 2025
7f42176
Check if infosLen is >0
kotlarmilos Apr 24, 2025
df20036
Use consistent naming for frame pointers
kotlarmilos Apr 24, 2025
8dc3223
Test __SIZEOF_POINTER__ for pointer size checks
kotlarmilos Apr 24, 2025
cd3d5b7
Replace __SIZEOF_POINTER__ with TARGET_64BIT
kotlarmilos Apr 24, 2025
bbb4345
Encapsulate FrameDataAllocator structs into class and make them private
kotlarmilos Apr 24, 2025
4b8c03c
Implement destructor for InterpThreadContext to free FrameDataAllocator
kotlarmilos Apr 25, 2025
db6355f
Move InterpThreadContext instance to the CoreCLR Thread and implement…
kotlarmilos Apr 25, 2025
0e97a9c
Move InterpThreadContext destructor call to OnThreadTerminate
kotlarmilos Apr 26, 2025
980eb78
Refactor Alloc to accept size by value instead of pointer
kotlarmilos Apr 26, 2025
d203e02
Merge branch 'main' into feature/coreclr-interp-opcode-localloc
kotlarmilos Apr 27, 2025
902e4d0
Fix typo
kotlarmilos Apr 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion src/coreclr/interpreter/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -815,7 +815,9 @@ InterpMethod* InterpCompiler::CreateInterpMethod()
for (int i = 0; i < numDataItems; i++)
pDataItems[i] = m_dataItems.Get(i);

InterpMethod *pMethod = new InterpMethod(m_methodHnd, m_totalVarsStackSize, pDataItems);
bool initLocals = (m_methodInfo->options & CORINFO_OPT_INIT_LOCALS) != 0;

InterpMethod *pMethod = new InterpMethod(m_methodHnd, m_totalVarsStackSize, pDataItems, initLocals);

return pMethod;
}
Expand Down Expand Up @@ -2596,6 +2598,14 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo)
EmitBinaryArithmeticOp(INTOP_MUL_I4);
m_ip++;
break;
case CEE_MUL_OVF:
EmitBinaryArithmeticOp(INTOP_MUL_OVF_I4);
m_ip++;
break;
case CEE_MUL_OVF_UN:
EmitBinaryArithmeticOp(INTOP_MUL_OVF_UN_I4);
m_ip++;
break;
case CEE_DIV:
EmitBinaryArithmeticOp(INTOP_DIV_I4);
m_ip++;
Expand Down Expand Up @@ -3134,6 +3144,25 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo)
m_ip += 5;
break;
}
case CEE_LOCALLOC:
CHECK_STACK(1);
#if SIZEOF_VOID_P == 8
if (m_pStackPointer[-1].type == StackTypeI8)
EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_MOV_8);
#endif
AddIns(INTOP_LOCALLOC);
m_pStackPointer--;
if (m_pStackPointer != m_pStackBase)
{
m_hasInvalidCode = true;
goto exit_bad_code;
}

m_pLastNewIns->SetSVar(m_pStackPointer[0].var);
PushStackType(StackTypeByRef, NULL);
m_pLastNewIns->SetDVar(m_pStackPointer[-1].var);
m_ip++;
break;
default:
assert(0);
break;
Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/interpreter/interpretershared.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ struct InterpMethod
CORINFO_METHOD_HANDLE methodHnd;
int32_t allocaSize;
void** pDataItems;
uint32_t initLocals : 1;

InterpMethod(CORINFO_METHOD_HANDLE methodHnd, int32_t allocaSize, void** pDataItems)
InterpMethod(CORINFO_METHOD_HANDLE methodHnd, int32_t allocaSize, void** pDataItems, bool initLocals)
{
this->methodHnd = methodHnd;
this->allocaSize = allocaSize;
this->pDataItems = pDataItems;
this->initLocals = initLocals;
}
};

Expand Down
7 changes: 7 additions & 0 deletions src/coreclr/interpreter/intops.def
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,12 @@ OPDEF(INTOP_MUL_I8, "mul.i8", 4, 1, 2, InterpOpNoArgs)
OPDEF(INTOP_MUL_R4, "mul.r4", 4, 1, 2, InterpOpNoArgs)
OPDEF(INTOP_MUL_R8, "mul.r8", 4, 1, 2, InterpOpNoArgs)

OPDEF(INTOP_MUL_OVF_I4, "mul.ovf.i8", 4, 1, 2, InterpOpNoArgs)
OPDEF(INTOP_MUL_OVF_I8, "mul.ovf.i8", 4, 1, 2, InterpOpNoArgs)

OPDEF(INTOP_MUL_OVF_UN_I4, "mul.ovf.un.i8", 4, 1, 2, InterpOpNoArgs)
OPDEF(INTOP_MUL_OVF_UN_I8, "mul.ovf.un.i8", 4, 1, 2, InterpOpNoArgs)

OPDEF(INTOP_DIV_I4, "div.i4", 4, 1, 2, InterpOpNoArgs)
OPDEF(INTOP_DIV_I8, "div.i8", 4, 1, 2, InterpOpNoArgs)
OPDEF(INTOP_DIV_R4, "div.r4", 4, 1, 2, InterpOpNoArgs)
Expand Down Expand Up @@ -253,6 +259,7 @@ OPDEF(INTOP_NEWOBJ_VT, "newobj.vt", 5, 1, 1, InterpOpMethodToken)
OPDEF(INTOP_CALL_HELPER_PP, "call.helper.pp", 5, 1, 0, InterpOpThreeInts)

OPDEF(INTOP_ZEROBLK_IMM, "zeroblk.imm", 3, 0, 1, InterpOpInt)
OPDEF(INTOP_LOCALLOC, "localloc", 3, 1, 1, InterpOpNoArgs)
OPDEF(INTOP_BREAKPOINT, "breakpoint", 1, 0, 0, InterpOpNoArgs)
OPDEF(INTOP_FAILFAST, "failfast", 1, 0, 0, InterpOpNoArgs)

Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/vm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ set(VM_SOURCES_WKS
interopconverter.cpp
interoputil.cpp
interpexec.cpp
interpframeallocator.cpp
invokeutil.cpp
jithelpers.cpp
managedmdimport.cpp
Expand Down Expand Up @@ -444,6 +445,7 @@ set(VM_HEADERS_WKS
interoputil.h
interoputil.inl
interpexec.h
interpframeallocator.h
invokeutil.h
managedmdimport.hpp
marshalnative.h
Expand Down
79 changes: 79 additions & 0 deletions src/coreclr/vm/interpexec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ InterpThreadContext* InterpGetThreadContext()
// FIXME VirtualAlloc/mmap with INTERP_STACK_ALIGNMENT alignment
threadContext->pStackStart = threadContext->pStackPointer = (int8_t*)malloc(INTERP_STACK_SIZE);
threadContext->pStackEnd = threadContext->pStackStart + INTERP_STACK_SIZE;
threadContext->pFrameDataAllocator = new FrameDataAllocator(INTERP_STACK_FRAGMENT_SIZE);

t_pThreadContext = threadContext;
return threadContext;
Expand Down Expand Up @@ -584,7 +585,53 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr
LOCAL_VAR(ip[1], double) = LOCAL_VAR(ip[2], double) * LOCAL_VAR(ip[3], double);
ip += 4;
break;
case INTOP_MUL_OVF_I4:
{
int32_t i1 = LOCAL_VAR(ip[2], int32_t);
int32_t i2 = LOCAL_VAR(ip[3], int32_t);
int32_t i3;
if (!ClrSafeInt<int32_t>::multiply(i1, i2, i3))
assert(0); // Interpreter-TODO: OverflowException
LOCAL_VAR(ip[1], int32_t) = i3;
ip += 4;
break;
}

case INTOP_MUL_OVF_I8:
{
int64_t i1 = LOCAL_VAR(ip[2], int64_t);
int64_t i2 = LOCAL_VAR(ip[3], int64_t);
int64_t i3;
if (!ClrSafeInt<int64_t>::multiply(i1, i2, i3))
assert(0); // Interpreter-TODO: OverflowException
LOCAL_VAR(ip[1], int64_t) = i3;
ip += 4;
break;
}

case INTOP_MUL_OVF_UN_I4:
{
uint32_t i1 = LOCAL_VAR(ip[2], uint32_t);
uint32_t i2 = LOCAL_VAR(ip[3], uint32_t);
uint32_t i3;
if (!ClrSafeInt<uint32_t>::multiply(i1, i2, i3))
assert(0); // Interpreter-TODO: OverflowException
LOCAL_VAR(ip[1], uint32_t) = i3;
ip += 4;
break;
}

case INTOP_MUL_OVF_UN_I8:
{
uint64_t i1 = LOCAL_VAR(ip[2], uint64_t);
uint64_t i2 = LOCAL_VAR(ip[3], uint64_t);
uint64_t i3;
if (!ClrSafeInt<uint64_t>::multiply(i1, i2, i3))
assert(0); // Interpreter-TODO: OverflowException
LOCAL_VAR(ip[1], uint64_t) = i3;
ip += 4;
break;
}
case INTOP_DIV_I4:
{
int32_t i1 = LOCAL_VAR(ip[2], int32_t);
Expand Down Expand Up @@ -1105,6 +1152,33 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr
memset(LOCAL_VAR(ip[1], void*), 0, ip[2]);
ip += 3;
break;
case INTOP_LOCALLOC:
{
int32_t len = LOCAL_VAR(ip[2], int32_t);
void* mem;

if (len > 0)
{
len = ALIGN_UP(len, INTERP_STACK_ALIGNMENT);
mem = pThreadContext->pFrameDataAllocator->Alloc((InterpreterFrame*)pFrame, len);
if (!mem)
{
// Interpreter-TODO: OutOfMemoryException
assert(0);
}

if (pMethod->initLocals)
{
memset(mem, 0, len);
}
} else
{
mem = NULL;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Stackalloc is no expected to return null. I am not sure whether it explicitly in specified in any .NET spec, but it is specified in C++ docs. From https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/alloca?view=msvc-170#remarks: . If size is 0, _alloca allocates a zero-length item and returns a valid pointer to that item.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current behavior of localloc is to return null when the size is zero:

// Put the size value in targetReg. If it is zero, bail out by returning null in targetReg.
genConsumeRegAndCopy(size, targetReg);
endLabel = genCreateTempLabel();
GetEmitter()->emitIns_R_R(INS_test, easz, targetReg, targetReg);
inst_JMP(EJ_je, endLabel);

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for pointing it out. Looks like there is a mismatch between alloca in C++ and the IL instruction.

I think it would be best to match what RyuJIT does and return zero as well like you have done originally.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, reverted.

}
LOCAL_VAR(ip[1], void*) = mem;
ip += 3;
break;
}
case INTOP_FAILFAST:
assert(0);
break;
Expand All @@ -1115,6 +1189,9 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr
}

EXIT_FRAME:

// Interpreter-TODO: Don't run PopInfo on the main return path, Add RET_LOCALLOC instead
pThreadContext->pFrameDataAllocator->PopInfo((InterpreterFrame*)pFrame);
if (pFrame->pParent && pFrame->pParent->ip)
{
// Return to the main loop after a non-recursive interpreter call
Expand All @@ -1129,6 +1206,8 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr
goto MAIN_LOOP;
}

// Interpreter-TODO: FrameDataAllocator is owned by the current thread, free it when the thread exits instead of here
delete pThreadContext->pFrameDataAllocator;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure how can we delete the allocator here. If I understand it correctly, we allocate it in the InterpGetThreadContext, but only if the t_pThreadContext was not set yet. And we delete it here when we exit from a block of interpreted frames that can happen many times. E.g. with the EH, a catch funclet exits here and then we resume in the parent function after the catch. That means that after resuming from catch, the pFrameDataAllocator would be invalid.

Copy link
Member Author

@kotlarmilos kotlarmilos Apr 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That workaround was only temporary, but you are right. I’ve now implemented a destructor for InterpThreadContext. Please let me know if there is a deterministic place to destroy the thread context.

If this approach is reasonable, I would move the initialization into the constructor instead.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer moving the InterpThreadContext instance to the coreclr Thread class instead and performing the cleanup in the Thread::~Thread or Thread::OnThreadTerminate. Having a thread local object with a destructor is a recipe for possible problems due to the fact that the order of their destruction is arbitrary.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Implemented the InterpThreadContext constructor and destructor, and moved its instance into the CoreCLR Thread class. It is now created lazily on first pThread->GetInterpThreadContext() in prestub.cpp. This ensures that all allocated memory is properly released when the Thread destructor runs.

pThreadContext->pStackPointer = pFrame->pStack;
}

Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/vm/interpexec.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
#define _INTERPEXEC_H_

#include "../interpreter/interpretershared.h"
#include "interpframeallocator.h"

#define INTERP_STACK_SIZE 1024*1024
#define INTERP_STACK_FRAGMENT_SIZE 4096

struct StackVal
{
Expand Down Expand Up @@ -52,6 +54,9 @@ struct InterpThreadContext
// stack pointer. It is needed when re-entering interp, to know from which address we can start using
// stack, and also needed for the GC to be able to scan the stack.
int8_t *pStackPointer;

// This is an allocator for the dynamic stack memory
FrameDataAllocator *pFrameDataAllocator;
};

InterpThreadContext* InterpGetThreadContext();
Expand Down
111 changes: 111 additions & 0 deletions src/coreclr/vm/interpframeallocator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

#include "interpexec.h"
#include "interpframeallocator.h"

FrameDataFragment::FrameDataFragment(size_t size)
{
if (size < INTERP_STACK_FRAGMENT_SIZE)
{
size = INTERP_STACK_FRAGMENT_SIZE;
}

start = (uint8_t*)malloc(size);
assert(start && "Failed to allocate FrameDataFragment");
end = start + size;
pos = start;
pNext = nullptr;
}

FrameDataFragment::~FrameDataFragment()
{
free(start);
}

FrameDataAllocator::FrameDataAllocator(size_t size)
{
pFirst = new FrameDataFragment(size);
assert(pFirst && "Failed to allocate initial fragment");
pCurrent = pFirst;
pInfos = nullptr;
infosLen = 0;
infosCapacity = 0;
}

FrameDataAllocator::~FrameDataAllocator()
{
assert(pCurrent == pFirst && pCurrent->pos == pCurrent->start);
FreeFragments(pFirst);
free(pInfos);
}

void FrameDataAllocator::FreeFragments(FrameDataFragment *pFrag)
{
while (pFrag)
{
FrameDataFragment *pNext = pFrag->pNext;
delete pFrag;
pFrag = pNext;
}
}

void FrameDataAllocator::PushInfo(InterpreterFrame *pFrame)
{
if (infosLen == infosCapacity)
{
size_t newCapacity = infosCapacity == 0 ? 8 : infosCapacity * 2;
pInfos = (FrameDataInfo*)realloc(pInfos, newCapacity * sizeof(FrameDataInfo));
assert(pInfos && "Failed to reallocate frame info");
infosCapacity = newCapacity;
}

FrameDataInfo *pInfo = &pInfos[infosLen++];
pInfo->pFrame = pFrame;
pInfo->pFrag = pCurrent;
pInfo->pos = pCurrent->pos;
}

void *FrameDataAllocator::Alloc(InterpreterFrame *pFrame, size_t size)
{
if (!infosLen || (infosLen > 0 && pInfos[infosLen - 1].pFrame != pFrame))
{
PushInfo(pFrame);
}

uint8_t *pos = pCurrent->pos;

if (pos + size > pCurrent->end)
{
if (pCurrent->pNext && ((pCurrent->pNext->start + size) <= pCurrent->pNext->end))
{
pCurrent = pCurrent->pNext;
pos = pCurrent->pos = pCurrent->start;
}
else
{
FreeFragments(pCurrent->pNext);
FrameDataFragment *pNewFrag = new FrameDataFragment(size);
assert(pNewFrag && "Failed to allocate new fragment");
pCurrent->pNext = pNewFrag;
pCurrent = pNewFrag;

pos = pNewFrag->pos;
}
}

void *result = (void*)pos;
pCurrent->pos = (uint8_t*)(pos + size);
return result;
}

void FrameDataAllocator::PopInfo(InterpreterFrame *pFrame)
{
int top = infosLen - 1;
if (top >= 0 && pInfos[top].pFrame == pFrame)
{
FrameDataInfo *pInfo = &pInfos[--infosLen];
pCurrent = pInfo->pFrag;
pCurrent->pos = pInfo->pos;
}
}
Loading
Loading