Skip to content

Commit df8d547

Browse files
committed
JIT/AOT to interpreter calls support
Until now, we only had support for calling interpreted void returning method with no arguments by the JIT/AOT. This change enables support for passing all possible kinds of arguments and returning all types. It reuses the `CallStubGenerator` that was implemented for the interpreter to native code usage and adds support for the other direction to it in mostly trivial manner. In addition to that, assembler routines for storing argument values to the interpreter stack were added. The `CallStubGenerator` generates a list of routines to copy the arguments from CPU registers and stack to the interpreter stack. The last one makes call to the `ExecuteInterpretedMethod` and then puts the result into appropriate registers. For functions that return result via a return buffer, the buffer is passed to the `ExecuteInterpretedMethod` so that the IR opcode to return valuetype stores it directly to the return buffer. The ARM64 for Apple OSes is the most optimized version, as it is the primary target where the performance matters the most. It eliminates argument registers saving to stack on the fast path when we already have the call stub.
1 parent 211e8db commit df8d547

17 files changed

+3561
-349
lines changed

src/coreclr/interpreter/interpretershared.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
#define INTERP_METHOD_HANDLE_TAG 4 // Tag of a MethodDesc in the interp method dataItems
2121
#define INTERP_INDIRECT_HELPER_TAG 1 // When a helper ftn's address is indirect we tag it with this tag bit
2222

23+
struct CallStubHeader;
24+
2325
struct InterpMethod
2426
{
2527
#if DEBUG
@@ -28,6 +30,7 @@ struct InterpMethod
2830
CORINFO_METHOD_HANDLE methodHnd;
2931
int32_t allocaSize;
3032
void** pDataItems;
33+
CallStubHeader *pCallStub;
3134
bool initLocals;
3235

3336
InterpMethod(CORINFO_METHOD_HANDLE methodHnd, int32_t allocaSize, void** pDataItems, bool initLocals)
@@ -39,6 +42,7 @@ struct InterpMethod
3942
this->allocaSize = allocaSize;
4043
this->pDataItems = pDataItems;
4144
this->initLocals = initLocals;
45+
pCallStub = NULL;
4246
}
4347

4448
bool CheckIntegrity()

src/coreclr/pal/inc/unixasmmacrosamd64.inc

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,12 @@ C_FUNC(\Name\()_End):
222222

223223
.endm
224224

225+
.macro SKIP_ARGUMENT_REGISTERS
226+
227+
add rsp, 6 * 8
228+
229+
.endm
230+
225231
.macro SAVE_FLOAT_ARGUMENT_REGISTERS ofs
226232

227233
save_xmm128_postrsp xmm0, \ofs
@@ -344,7 +350,7 @@ C_FUNC(\Name\()_End):
344350
.macro EPILOG_WITH_TRANSITION_BLOCK_RETURN
345351

346352
free_stack __PWTB_StackAlloc
347-
POP_ARGUMENT_REGISTERS
353+
SKIP_ARGUMENT_REGISTERS
348354
POP_CALLEE_SAVED_REGISTERS
349355
ret
350356

@@ -405,3 +411,25 @@ C_FUNC(\Name\()_End):
405411
free_stack 56
406412
POP_CALLEE_SAVED_REGISTERS
407413
.endm
414+
415+
.macro INLINE_GET_TLS_VAR Var
416+
.att_syntax
417+
#if defined(__APPLE__)
418+
movq _\Var@TLVP(%rip), %rdi
419+
callq *(%rdi)
420+
#else
421+
.byte 0x66 // data16 prefix - padding to have space for linker relaxations
422+
leaq \Var@TLSGD(%rip), %rdi
423+
.byte 0x66 //
424+
.byte 0x66 //
425+
.byte 0x48 // rex.W prefix, also for padding
426+
callq __tls_get_addr@PLT
427+
#endif
428+
.intel_syntax noprefix
429+
.endm
430+
431+
.macro INLINE_GETTHREAD
432+
// Inlined version of call C_FUNC(RhpGetThread)
433+
INLINE_GET_TLS_VAR t_CurrentThreadInfo
434+
mov rax, [rax + OFFSETOF__ThreadLocalInfo__m_pThread]
435+
.endm

src/coreclr/pal/inc/unixasmmacrosarm64.inc

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ C_FUNC(\Name\()_End):
184184
// ArgumentRegisters::x2
185185
// ArgumentRegisters::x1
186186
// ArgumentRegisters::x0
187+
// ArgumentRegisters::x8
187188
// FloatRegisters::q7
188189
// FloatRegisters::q6
189190
// FloatRegisters::q5
@@ -192,7 +193,7 @@ C_FUNC(\Name\()_End):
192193
// FloatRegisters::q2
193194
// FloatRegisters::q1
194195
// FloatRegisters::q0
195-
.macro PROLOG_WITH_TRANSITION_BLOCK extraLocals = 0, SaveFPArgs = 1
196+
.macro PROLOG_WITH_TRANSITION_BLOCK extraLocals = 0, SaveFPArgs = 1, SaveGPArgs = 1
196197

197198
__PWTB_FloatArgumentRegisters = \extraLocals
198199
__PWTB_SaveFPArgs = \SaveFPArgs
@@ -222,8 +223,10 @@ C_FUNC(\Name\()_End):
222223
// Allocate space for the rest of the frame
223224
PROLOG_STACK_ALLOC __PWTB_StackAlloc
224225

225-
// Spill argument registers.
226-
SAVE_ARGUMENT_REGISTERS sp, __PWTB_ArgumentRegisters
226+
.if (\SaveGPArgs == 1)
227+
// Spill argument registers.
228+
SAVE_ARGUMENT_REGISTERS sp, __PWTB_ArgumentRegisters
229+
.endif
227230

228231
.if (__PWTB_SaveFPArgs == 1)
229232
SAVE_FLOAT_ARGUMENT_REGISTERS sp, \extraLocals
@@ -301,7 +304,6 @@ C_FUNC(\Name\()_End):
301304

302305
.endm
303306

304-
305307
//-----------------------------------------------------------------------------
306308
// Provides a matching epilog to PROLOG_WITH_TRANSITION_BLOCK and ends by preparing for tail-calling.
307309
// Since this is a tail call argument registers are restored.
@@ -470,3 +472,47 @@ $__RedirectionStubEndFuncName
470472
0:
471473
#endif
472474
.endm
475+
476+
#define xip0 x16
477+
#define xip1 x17
478+
#define xpr x18
479+
480+
// Loads the address of a thread-local variable into the target register,
481+
// which cannot be x0.
482+
// Preserves registers except for xip0 and xip1 on Apple
483+
.macro INLINE_GET_TLS_VAR target, var
484+
.ifc \target, x0
485+
.error "target cannot be x0"
486+
.endif
487+
488+
// This sequence of instructions is recognized and potentially patched
489+
// by the linker (GD->IE/LE relaxation).
490+
#if defined(__APPLE__)
491+
492+
adrp x0, \var@TLVPPAGE
493+
ldr x0, [x0, \var@TLVPPAGEOFF]
494+
ldr \target, [x0]
495+
496+
blr \target
497+
// End of the sequence
498+
499+
mov \target, x0
500+
#else
501+
adrp x0, :tlsdesc:\var
502+
ldr \target, [x0, #:tlsdesc_lo12:\var]
503+
add x0, x0, :tlsdesc_lo12:\var
504+
.tlsdesccall \var
505+
blr \target
506+
// End of the sequence
507+
508+
mrs \target, tpidr_el0
509+
add \target, \target, x0
510+
#endif
511+
512+
.endm
513+
514+
// Inlined version of RhpGetThread. Target cannot be x0.
515+
.macro INLINE_GETTHREAD target
516+
INLINE_GET_TLS_VAR \target, C_FUNC(t_CurrentThreadInfo)
517+
ldr \target, [\target, #OFFSETOF__ThreadLocalInfo__m_pThread]
518+
.endm

0 commit comments

Comments
 (0)