Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
1fb49a3
Implement localloc in the interpreter
kotlarmilos Apr 15, 2025
e5fb653
Add comment
kotlarmilos Apr 15, 2025
6b81d9b
Implement FrameDataAllocator for dynamic stack allocations
kotlarmilos Apr 21, 2025
43f4481
Merge branch 'main' of github.com:kotlarmilos/runtime into feature/co…
kotlarmilos Apr 21, 2025
f660365
Fix merge conflicts
kotlarmilos Apr 21, 2025
e59cace
Initialize local variables to zero if the flag is set
kotlarmilos Apr 22, 2025
7c649a8
Initialize local variables based on the CORINFO_OPT_INIT_LOCALS. Move…
kotlarmilos Apr 22, 2025
1d1e226
Implement FrameDataFragment destructor
kotlarmilos Apr 22, 2025
cb0b212
Handle memory allocation failure in frame data allocation
kotlarmilos Apr 22, 2025
581e856
Fix typo
kotlarmilos Apr 22, 2025
e72e0e3
Add preprocessor directive FEATURE_INTERPRETER
kotlarmilos Apr 22, 2025
50e94f6
Fix windows build
kotlarmilos Apr 22, 2025
dfb170b
If size is 0 allocates a zero-length item and returns a valid pointer…
kotlarmilos Apr 22, 2025
c3bc007
Update frame allocator to use InterpMethodContextFrame
kotlarmilos Apr 23, 2025
c91b99d
Throw OutOfMemory exception if alloc fails
kotlarmilos Apr 23, 2025
2bfe64f
Revert assert changes
kotlarmilos Apr 23, 2025
c08ab74
Update FrameDataAllocator to return nullptr if alloc fails
kotlarmilos Apr 24, 2025
7f42176
Check if infosLen is >0
kotlarmilos Apr 24, 2025
df20036
Use consistent naming for frame pointers
kotlarmilos Apr 24, 2025
8dc3223
Test __SIZEOF_POINTER__ for pointer size checks
kotlarmilos Apr 24, 2025
cd3d5b7
Replace __SIZEOF_POINTER__ with TARGET_64BIT
kotlarmilos Apr 24, 2025
bbb4345
Encapsulate FrameDataAllocator structs into class and make them private
kotlarmilos Apr 24, 2025
4b8c03c
Implement destructor for InterpThreadContext to free FrameDataAllocator
kotlarmilos Apr 25, 2025
db6355f
Move InterpThreadContext instance to the CoreCLR Thread and implement…
kotlarmilos Apr 25, 2025
0e97a9c
Move InterpThreadContext destructor call to OnThreadTerminate
kotlarmilos Apr 26, 2025
980eb78
Refactor Alloc to accept size by value instead of pointer
kotlarmilos Apr 26, 2025
d203e02
Merge branch 'main' into feature/coreclr-interp-opcode-localloc
kotlarmilos Apr 27, 2025
902e4d0
Fix typo
kotlarmilos Apr 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions src/coreclr/interpreter/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2596,6 +2596,14 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo)
EmitBinaryArithmeticOp(INTOP_MUL_I4);
m_ip++;
break;
case CEE_MUL_OVF:
EmitBinaryArithmeticOp(INTOP_MUL_OVF_I4);
m_ip++;
break;
case CEE_MUL_OVF_UN:
EmitBinaryArithmeticOp(INTOP_MUL_OVF_UN_I4);
m_ip++;
break;
case CEE_DIV:
EmitBinaryArithmeticOp(INTOP_DIV_I4);
m_ip++;
Expand Down Expand Up @@ -3134,6 +3142,25 @@ int InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo)
m_ip += 5;
break;
}
case CEE_LOCALLOC:
CHECK_STACK(1);
#if SIZEOF_VOID_P == 8
if (m_pStackPointer[-1].type == StackTypeI8)
EmitConv(m_pStackPointer - 1, NULL, StackTypeI4, INTOP_MOV_8);
#endif
AddIns(INTOP_LOCALLOC);
m_pStackPointer--;
if (m_pStackPointer != m_pStackBase)
{
m_hasInvalidCode = true;
goto exit_bad_code;
}

m_pLastNewIns->SetSVar(m_pStackPointer[0].var);
PushStackType(StackTypeByRef, NULL);
m_pLastNewIns->SetDVar(m_pStackPointer[-1].var);
m_ip++;
break;
default:
assert(0);
break;
Expand Down
7 changes: 7 additions & 0 deletions src/coreclr/interpreter/intops.def
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,12 @@ OPDEF(INTOP_MUL_I8, "mul.i8", 4, 1, 2, InterpOpNoArgs)
OPDEF(INTOP_MUL_R4, "mul.r4", 4, 1, 2, InterpOpNoArgs)
OPDEF(INTOP_MUL_R8, "mul.r8", 4, 1, 2, InterpOpNoArgs)

OPDEF(INTOP_MUL_OVF_I4, "mul.ovf.i8", 4, 1, 2, InterpOpNoArgs)
OPDEF(INTOP_MUL_OVF_I8, "mul.ovf.i8", 4, 1, 2, InterpOpNoArgs)

OPDEF(INTOP_MUL_OVF_UN_I4, "mul.ovf.un.i8", 4, 1, 2, InterpOpNoArgs)
OPDEF(INTOP_MUL_OVF_UN_I8, "mul.ovf.un.i8", 4, 1, 2, InterpOpNoArgs)

OPDEF(INTOP_DIV_I4, "div.i4", 4, 1, 2, InterpOpNoArgs)
OPDEF(INTOP_DIV_I8, "div.i8", 4, 1, 2, InterpOpNoArgs)
OPDEF(INTOP_DIV_R4, "div.r4", 4, 1, 2, InterpOpNoArgs)
Expand Down Expand Up @@ -253,6 +259,7 @@ OPDEF(INTOP_NEWOBJ_VT, "newobj.vt", 5, 1, 1, InterpOpMethodToken)
OPDEF(INTOP_CALL_HELPER_PP, "call.helper.pp", 5, 1, 0, InterpOpThreeInts)

OPDEF(INTOP_ZEROBLK_IMM, "zeroblk.imm", 3, 0, 1, InterpOpInt)
OPDEF(INTOP_LOCALLOC, "localloc", 3, 1, 1, InterpOpNoArgs)
OPDEF(INTOP_BREAKPOINT, "breakpoint", 1, 0, 0, InterpOpNoArgs)
OPDEF(INTOP_FAILFAST, "failfast", 1, 0, 0, InterpOpNoArgs)

Expand Down
170 changes: 170 additions & 0 deletions src/coreclr/vm/interpexec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,110 @@ typedef void* (*HELPER_FTN_PP)(void*);

thread_local InterpThreadContext *t_pThreadContext = NULL;

FrameDataFragment *frame_data_new_fragment(size_t size)
{
if (size < INTERP_STACK_FRAGMENT_SIZE)
size = INTERP_STACK_FRAGMENT_SIZE;

FrameDataFragment *frag = (FrameDataFragment*)malloc(sizeof(FrameDataFragment) + size);
if (!frag) return NULL;

frag->start = (uint8_t*)(frag + 1);
frag->end = frag->start + size;
frag->pos = frag->start;
frag->next = NULL;
return frag;
}

void frame_data_allocator_init(FrameDataAllocator *allocator, size_t size)
{
allocator->first = frame_data_new_fragment(size);
assert(allocator->first && "Failed to allocate initial fragment");
allocator->current = allocator->first;
allocator->infos = NULL;
allocator->infos_len = 0;
allocator->infos_capacity = 0;
}

void frame_data_fragment_free(FrameDataFragment *frag)
{
while (frag) {
FrameDataFragment *next = frag->next;
free(frag);
frag = next;
}
}

void frame_data_allocator_destroy(FrameDataAllocator *allocator)
{
assert (allocator->current == allocator->first && allocator->current->pos == allocator->current->start);
frame_data_fragment_free(allocator->first);

free(allocator->infos);
allocator->first = allocator->current = NULL;
allocator->infos = NULL;
allocator->infos_len = allocator->infos_capacity = 0;
}

void frame_data_push_info(FrameDataAllocator *allocator, InterpreterFrame *frame)
{
if (allocator->infos_len == allocator->infos_capacity) {
int new_capacity = allocator->infos_capacity == 0 ? 8 : allocator->infos_capacity * 2;
allocator->infos = (FrameDataInfo*)realloc(allocator->infos, new_capacity * sizeof(FrameDataInfo));
assert(allocator->infos && "Failed to reallocate frame info");
allocator->infos_capacity = new_capacity;
}

FrameDataInfo *info = &allocator->infos[allocator->infos_len++];
info->frame = frame;
info->frag = allocator->current;
info->pos = allocator->current->pos;
}

void *frame_data_alloc(FrameDataAllocator *allocator, InterpreterFrame *frame, size_t size)
{

if (!allocator->infos_len || (allocator->infos_len > 0 && allocator->infos[allocator->infos_len - 1].frame != frame))
{
frame_data_push_info(allocator, frame);
}

uint8_t *pos = allocator->current->pos;

if (pos + size > allocator->current->end) {
if (allocator->current->next && ((allocator->current->next->start + size) <= allocator->current->next->end))
{
allocator->current = allocator->current->next;
pos = allocator->current->pos = allocator->current->start;
}
else
{
frame_data_fragment_free(allocator->current->next);
FrameDataFragment *new_frag = frame_data_new_fragment(size);
assert(new_frag && "Failed to allocate new fragment");
allocator->current->next = new_frag;
allocator->current = new_frag;

pos = new_frag->pos;
}
}

void *result = (void*)pos;
allocator->current->pos = (uint8_t *)(pos + size);
return result;
}

void frame_data_pop_info(FrameDataAllocator *allocator, InterpreterFrame *pFrame)
{
int top = allocator->infos_len - 1;
if (top >= 0 && allocator->infos[top].frame == pFrame)
{
FrameDataInfo *info = &allocator->infos[--allocator->infos_len];
allocator->current = info->frag;
allocator->current->pos = info->pos;
}
}

InterpThreadContext* InterpGetThreadContext()
{
InterpThreadContext *threadContext = t_pThreadContext;
Expand All @@ -21,6 +125,7 @@ InterpThreadContext* InterpGetThreadContext()
// FIXME VirtualAlloc/mmap with INTERP_STACK_ALIGNMENT alignment
threadContext->pStackStart = threadContext->pStackPointer = (int8_t*)malloc(INTERP_STACK_SIZE);
threadContext->pStackEnd = threadContext->pStackStart + INTERP_STACK_SIZE;
frame_data_allocator_init(&threadContext->frameDataAllocator, INTERP_STACK_FRAGMENT_SIZE);

t_pThreadContext = threadContext;
return threadContext;
Expand Down Expand Up @@ -584,7 +689,53 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr
LOCAL_VAR(ip[1], double) = LOCAL_VAR(ip[2], double) * LOCAL_VAR(ip[3], double);
ip += 4;
break;
case INTOP_MUL_OVF_I4:
{
int32_t i1 = LOCAL_VAR(ip[2], int32_t);
int32_t i2 = LOCAL_VAR(ip[3], int32_t);
int32_t i3;
if (__builtin_mul_overflow(i1, i2, &i3))
assert(0); // Interpreter-TODO: OverflowException
LOCAL_VAR(ip[1], int32_t) = i3;
ip += 4;
break;
}

case INTOP_MUL_OVF_I8:
{
int64_t i1 = LOCAL_VAR(ip[2], int64_t);
int64_t i2 = LOCAL_VAR(ip[3], int64_t);
int64_t i3;
if (__builtin_mul_overflow(i1, i2, &i3))
assert(0); // Interpreter-TODO: OverflowException
LOCAL_VAR(ip[1], int64_t) = i3;
ip += 4;
break;
}

case INTOP_MUL_OVF_UN_I4:
{
uint32_t i1 = LOCAL_VAR(ip[2], uint32_t);
uint32_t i2 = LOCAL_VAR(ip[3], uint32_t);
uint32_t i3;
if (__builtin_mul_overflow(i1, i2, &i3))
assert(0); // Interpreter-TODO: OverflowException
LOCAL_VAR(ip[1], uint32_t) = i3;
ip += 4;
break;
}

case INTOP_MUL_OVF_UN_I8:
{
uint64_t i1 = LOCAL_VAR(ip[2], uint64_t);
uint64_t i2 = LOCAL_VAR(ip[3], uint64_t);
uint64_t i3;
if (__builtin_mul_overflow(i1, i2, &i3))
assert(0); // Interpreter-TODO: OverflowException
LOCAL_VAR(ip[1], uint64_t) = i3;
ip += 4;
break;
}
case INTOP_DIV_I4:
{
int32_t i1 = LOCAL_VAR(ip[2], int32_t);
Expand Down Expand Up @@ -1105,6 +1256,22 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr
memset(LOCAL_VAR(ip[1], void*), 0, ip[2]);
ip += 3;
break;
case INTOP_LOCALLOC:
{
int32_t len = LOCAL_VAR(ip[2], int32_t);
void* mem;

if (len > 0)
{
mem = frame_data_alloc(&pThreadContext->frameDataAllocator, (InterpreterFrame*)pFrame, ALIGN_UP(len, INTERP_STACK_ALIGNMENT));
} else
{
mem = NULL;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Stackalloc is no expected to return null. I am not sure whether it explicitly in specified in any .NET spec, but it is specified in C++ docs. From https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/alloca?view=msvc-170#remarks: . If size is 0, _alloca allocates a zero-length item and returns a valid pointer to that item.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current behavior of localloc is to return null when the size is zero:

// Put the size value in targetReg. If it is zero, bail out by returning null in targetReg.
genConsumeRegAndCopy(size, targetReg);
endLabel = genCreateTempLabel();
GetEmitter()->emitIns_R_R(INS_test, easz, targetReg, targetReg);
inst_JMP(EJ_je, endLabel);

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for pointing it out. Looks like there is a mismatch between alloca in C++ and the IL instruction.

I think it would be best to match what RyuJIT does and return zero as well like you have done originally.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, reverted.

}
LOCAL_VAR(ip[1], void*) = mem;
ip += 3;
break;
}
case INTOP_FAILFAST:
assert(0);
break;
Expand All @@ -1115,6 +1282,8 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr
}

EXIT_FRAME:

frame_data_pop_info(&pThreadContext->frameDataAllocator, (InterpreterFrame*)pFrame);
if (pFrame->pParent && pFrame->pParent->ip)
{
// Return to the main loop after a non-recursive interpreter call
Expand All @@ -1129,6 +1298,7 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr
goto MAIN_LOOP;
}

frame_data_allocator_destroy(&pThreadContext->frameDataAllocator);
pThreadContext->pStackPointer = pFrame->pStack;
}

Expand Down
31 changes: 31 additions & 0 deletions src/coreclr/vm/interpexec.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "../interpreter/interpretershared.h"

#define INTERP_STACK_SIZE 1024*1024
#define INTERP_STACK_FRAGMENT_SIZE 4096

struct StackVal
{
Expand Down Expand Up @@ -42,6 +43,33 @@ struct InterpMethodContextFrame
#endif // DACCESS_COMPILE
};

struct FrameDataFragment {
// Memory region for this fragment
uint8_t *start, *end;
// Current allocation pointer within this fragment
uint8_t *pos;
// Pointer to the next fragment
FrameDataFragment *next;
};

struct FrameDataInfo {
// Pointer to the frame that this info is associated with
InterpreterFrame *frame;
// Pointers for restoring the localloc memory:
// frag - the current allocation fragment at frame entry
// pos - the fragment pointer at frame entry
// When the frame returns, we use these to roll back any local allocations
FrameDataFragment *frag;
uint8_t *pos;
};

struct FrameDataAllocator {
FrameDataFragment *first, *current;
FrameDataInfo *infos;
int infos_len;
int infos_capacity;
};

struct InterpThreadContext
{
int8_t *pStackStart;
Expand All @@ -52,6 +80,9 @@ struct InterpThreadContext
// stack pointer. It is needed when re-entering interp, to know from which address we can start using
// stack, and also needed for the GC to be able to scan the stack.
int8_t *pStackPointer;

// This is an allocator for the dynamic stack memory
FrameDataAllocator frameDataAllocator;
};

InterpThreadContext* InterpGetThreadContext();
Expand Down
Loading
Loading