diff --git a/src/coreclr/interpreter/interpretershared.h b/src/coreclr/interpreter/interpretershared.h index 9d55ae47761d16..851690b33c818f 100644 --- a/src/coreclr/interpreter/interpretershared.h +++ b/src/coreclr/interpreter/interpretershared.h @@ -19,6 +19,8 @@ #define INTERP_INDIRECT_HELPER_TAG 1 // When a helper ftn's address is indirect we tag it with this tag bit +struct CallStubHeader; + struct InterpMethod { #if DEBUG @@ -27,6 +29,8 @@ struct InterpMethod CORINFO_METHOD_HANDLE methodHnd; int32_t allocaSize; void** pDataItems; + // This stub is used for calling the interpreted method from JITted/AOTed code + CallStubHeader *pCallStub; bool initLocals; InterpMethod(CORINFO_METHOD_HANDLE methodHnd, int32_t allocaSize, void** pDataItems, bool initLocals) @@ -38,6 +42,7 @@ struct InterpMethod this->allocaSize = allocaSize; this->pDataItems = pDataItems; this->initLocals = initLocals; + pCallStub = NULL; } bool CheckIntegrity() diff --git a/src/coreclr/pal/inc/unixasmmacrosamd64.inc b/src/coreclr/pal/inc/unixasmmacrosamd64.inc index d84f70e8ad57db..880a282813f738 100644 --- a/src/coreclr/pal/inc/unixasmmacrosamd64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosamd64.inc @@ -222,6 +222,12 @@ C_FUNC(\Name\()_End): .endm +.macro SKIP_ARGUMENT_REGISTERS + + add rsp, 6 * 8 + +.endm + .macro SAVE_FLOAT_ARGUMENT_REGISTERS ofs save_xmm128_postrsp xmm0, \ofs @@ -344,7 +350,7 @@ C_FUNC(\Name\()_End): .macro EPILOG_WITH_TRANSITION_BLOCK_RETURN free_stack __PWTB_StackAlloc - POP_ARGUMENT_REGISTERS + SKIP_ARGUMENT_REGISTERS POP_CALLEE_SAVED_REGISTERS ret @@ -368,19 +374,29 @@ C_FUNC(\Name\()_End): .endm -// Inlined version of GetThreadEEAllocContext. Trashes volatile registers. -.macro INLINE_GET_ALLOC_CONTEXT_BASE -#if defined(FEATURE_EMULATED_TLS) || defined(__APPLE__) - call C_FUNC(GetThreadEEAllocContext) +.macro INLINE_GET_TLS_VAR Var + .att_syntax +#if defined(__APPLE__) + movq _\Var@TLVP(%rip), %rdi + callq *(%rdi) #else - .att_syntax .byte 0x66 // data16 prefix - padding to have space for linker relaxations - leaq t_runtime_thread_locals@TLSGD(%rip), %rdi + leaq \Var@TLSGD(%rip), %rdi .byte 0x66 // .byte 0x66 // .byte 0x48 // rex.W prefix, also for padding callq __tls_get_addr@PLT - .intel_syntax noprefix +#endif + .intel_syntax noprefix +.endm + + +// Inlined version of GetThreadEEAllocContext. Trashes volatile registers. +.macro INLINE_GET_ALLOC_CONTEXT_BASE +#ifdef FEATURE_EMULATED_TLS + call C_FUNC(GetThreadEEAllocContext) +#else + INLINE_GET_TLS_VAR t_runtime_thread_locals .ifnc OFFSETOF__RuntimeThreadLocals__ee_alloc_context, 0 lea rax, [rax + OFFSETOF__RuntimeThreadLocals__ee_alloc_context] @@ -405,3 +421,9 @@ C_FUNC(\Name\()_End): free_stack 56 POP_CALLEE_SAVED_REGISTERS .endm + +.macro INLINE_GETTHREAD + // Inlined version of call C_FUNC(RhpGetThread) + INLINE_GET_TLS_VAR t_CurrentThreadInfo + mov rax, [rax + OFFSETOF__ThreadLocalInfo__m_pThread] +.endm diff --git a/src/coreclr/pal/inc/unixasmmacrosarm64.inc b/src/coreclr/pal/inc/unixasmmacrosarm64.inc index 4e8b9e7c257101..6dd2b20d58acb6 100644 --- a/src/coreclr/pal/inc/unixasmmacrosarm64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosarm64.inc @@ -184,6 +184,7 @@ C_FUNC(\Name\()_End): // ArgumentRegisters::x2 // ArgumentRegisters::x1 // ArgumentRegisters::x0 +// ArgumentRegisters::x8 // FloatRegisters::q7 // FloatRegisters::q6 // FloatRegisters::q5 @@ -192,7 +193,7 @@ C_FUNC(\Name\()_End): // FloatRegisters::q2 // FloatRegisters::q1 // FloatRegisters::q0 -.macro PROLOG_WITH_TRANSITION_BLOCK extraLocals = 0, SaveFPArgs = 1 +.macro PROLOG_WITH_TRANSITION_BLOCK extraLocals = 0, SaveFPArgs = 1, SaveGPArgs = 1 __PWTB_FloatArgumentRegisters = \extraLocals __PWTB_SaveFPArgs = \SaveFPArgs @@ -222,8 +223,10 @@ C_FUNC(\Name\()_End): // Allocate space for the rest of the frame PROLOG_STACK_ALLOC __PWTB_StackAlloc - // Spill argument registers. - SAVE_ARGUMENT_REGISTERS sp, __PWTB_ArgumentRegisters + .if (\SaveGPArgs == 1) + // Spill argument registers. + SAVE_ARGUMENT_REGISTERS sp, __PWTB_ArgumentRegisters + .endif .if (__PWTB_SaveFPArgs == 1) SAVE_FLOAT_ARGUMENT_REGISTERS sp, \extraLocals @@ -301,7 +304,6 @@ C_FUNC(\Name\()_End): .endm - //----------------------------------------------------------------------------- // Provides a matching epilog to PROLOG_WITH_TRANSITION_BLOCK and ends by preparing for tail-calling. // Since this is a tail call argument registers are restored. @@ -325,6 +327,41 @@ C_FUNC(\Name\()_End): .endm +// Loads the address of a thread-local variable into the target register, +// which cannot be x0. +// Preserves registers except for xip0 and xip1 on Apple +.macro INLINE_GET_TLS_VAR target, var + .ifc \target, x0 + .error "target cannot be x0" + .endif + + // This sequence of instructions is recognized and potentially patched + // by the linker (GD->IE/LE relaxation). +#if defined(__APPLE__) + + adrp x0, \var@TLVPPAGE + ldr x0, [x0, \var@TLVPPAGEOFF] + ldr \target, [x0] + + blr \target + // End of the sequence + + mov \target, x0 +#else + adrp x0, :tlsdesc:\var + ldr \target, [x0, #:tlsdesc_lo12:\var] + add x0, x0, :tlsdesc_lo12:\var + .tlsdesccall \var + blr \target + // End of the sequence + + mrs \target, tpidr_el0 + add \target, \target, x0 +#endif + +.endm + + // Inlined version of GetThreadEEAllocContext. Target cannot be x0 or x1. .macro INLINE_GET_ALLOC_CONTEXT_BASE target .ifc \target, x0 @@ -345,17 +382,7 @@ C_FUNC(\Name\()_End): EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 0x20 #else PROLOG_SAVE_REG_PAIR_INDEXED x0, lr, -0x10 - - // This sequence of instructions is recognized and potentially patched - // by the linker (GD->IE/LE relaxation). - adrp x0, :tlsdesc:t_runtime_thread_locals - ldr \target, [x0, :tlsdesc_lo12:t_runtime_thread_locals] - add x0, x0, :tlsdesc_lo12:t_runtime_thread_locals - blr \target - // End of the sequence - - mrs \target, TPIDR_EL0 - add \target, \target, x0 + INLINE_GET_TLS_VAR \target, t_runtime_thread_locals .ifnc OFFSETOF__RuntimeThreadLocals__ee_alloc_context, 0 add \target, x0, OFFSETOF__RuntimeThreadLocals__ee_alloc_context @@ -470,3 +497,13 @@ $__RedirectionStubEndFuncName 0: #endif .endm + +#define xip0 x16 +#define xip1 x17 +#define xpr x18 + +// Inlined version of RhpGetThread. Target cannot be x0. +.macro INLINE_GETTHREAD target + INLINE_GET_TLS_VAR \target, C_FUNC(t_CurrentThreadInfo) + ldr \target, [\target, #OFFSETOF__ThreadLocalInfo__m_pThread] +.endm diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index f06d64ec1ef30f..7d2f40deaf2440 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -558,17 +558,289 @@ NESTED_ENTRY InterpreterStub, _TEXT PROLOG_WITH_TRANSITION_BLOCK - ; - ; call ExecuteInterpretedMethod - ; - lea rcx, [rsp + __PWTB_TransitionBlock] ; pTransitionBlock* - mov rdx, METHODDESC_REGISTER - call ExecuteInterpretedMethod + __InterpreterStubArgumentRegistersOffset = __PWTB_ArgumentRegisters + ; IR bytecode address + mov rbx, METHODDESC_REGISTER + + INLINE_GETTHREAD r10; thrashes rax and r11 + + ; Load the InterpMethod pointer from the IR bytecode + mov rax, qword ptr [rbx] + mov rax, qword ptr [rax + OFFSETOF__InterpMethod__pCallStub] + lea r11, qword ptr [rax + OFFSETOF__CallStubHeader__Routines] + mov r10, qword ptr [r10 + OFFSETOF__Thread__m_pInterpThreadContext] + mov r10, qword ptr [r10 + OFFSETOF__InterpThreadContext__pStackPointer] + lea rax, [rsp + __PWTB_TransitionBlock] + ; Copy the arguments to the interpreter stack, invoke the InterpExecMethod and load the return value + call qword ptr [r11] EPILOG_WITH_TRANSITION_BLOCK_RETURN NESTED_END InterpreterStub, _TEXT +NESTED_ENTRY InterpreterStubRetVoid, _TEXT + alloc_stack 028h +END_PROLOGUE + mov rcx, rax ; pTransitionBlock* + mov rdx, rbx ; the IR bytecode pointer + xor r8, r8 + call ExecuteInterpretedMethod + add rsp, 028h + ret +NESTED_END InterpreterStubRetVoid, _TEXT + +NESTED_ENTRY InterpreterStubRetI8, _TEXT + alloc_stack 028h +END_PROLOGUE + mov rcx, rax ; pTransitionBlock* + mov rdx, rbx ; the IR bytecode pointer + xor r8, r8 + call ExecuteInterpretedMethod + mov rax, qword ptr [rax] + add rsp, 028h + ret +NESTED_END InterpreterStubRetI8, _TEXT + +NESTED_ENTRY InterpreterStubRetDouble, _TEXT + alloc_stack 028h +END_PROLOGUE + mov rcx, rax ; pTransitionBlock* + mov rdx, rbx ; the IR bytecode pointer + xor r8, r8 + call ExecuteInterpretedMethod + movsd xmm0, real8 ptr [rax] + add rsp, 028h + ret +NESTED_END InterpreterStubRetDouble, _TEXT + +NESTED_ENTRY InterpreterStubRetBuffRCX, _TEXT + alloc_stack 028h +END_PROLOGUE + mov rcx, rax ; pTransitionBlock* + mov rdx, rbx ; the IR bytecode pointer + ; Load the return buffer address from the original rcx argument register + mov r8, qword ptr [rsp + 028h + 8 + __InterpreterStubArgumentRegistersOffset] + call ExecuteInterpretedMethod + add rsp, 028h + ret +NESTED_END InterpreterStubRetBuffRCX, _TEXT + +NESTED_ENTRY InterpreterStubRetBuffRDX, _TEXT + alloc_stack 028h +END_PROLOGUE + mov rcx, rax ; pTransitionBlock* + mov rdx, rbx ; the IR bytecode pointer + ; Load the return buffer address from the original rxx argument register + mov r8, qword ptr [rsp + 028h + 8 + __InterpreterStubArgumentRegistersOffset + 8]; + call ExecuteInterpretedMethod + add rsp, 028h + ret +NESTED_END InterpreterStubRetBuffRDX, _TEXT + +; Copy arguments from the the processor stack to the interpreter stack. +; The CPU stack slots are aligned to pointer size. +LEAF_ENTRY Store_Stack, _TEXT + mov esi, dword ptr [r11 + 8] ; SP offset + mov ecx, dword ptr [r11 + 12] ; number of stack slots + ; load the caller Rsp as a based for the stack arguments + ; The 8 represent the return address slot + lea rsi, [rsp + rsi + 8 + __InterpreterStubArgumentRegistersOffset] + mov rdi, r10 + shr rcx, 3 + rep movsq + mov r10, rdi + add r11, 16 + jmp qword ptr [r11] +LEAF_END Store_Stack, _TEXT + +; Routines for passing value type arguments by reference in general purpose registers RCX, RDX, R8, R9 +; from native code to the interpreter + +Store_Ref macro argReg + +LEAF_ENTRY Store_Ref_&argReg, _TEXT + mov rsi, argReg + mov rcx, [r11 + 8] ; size of the value type + mov rdi, r10 + rep movsb + ; align rdi up to the stack slot size + lea rdi, [rdi + 7] + and rdi, 0fffffffffffffff8h + mov r10, rdi + add r11, 16 + jmp qword ptr [r11] +LEAF_END Store_Ref_&argReg, _TEXT + + endm + +Store_Ref RCX +Store_Ref RDX +Store_Ref R8 +Store_Ref R9 + +; Routines for passing arguments by value in general purpose registers RCX, RDX, R8, R9 +; from native code to the interpreter + +LEAF_ENTRY Store_RCX, _TEXT + mov [r10], rcx + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RCX, _TEXT + +LEAF_ENTRY Store_RCX_RDX, _TEXT + mov [r10], rcx + mov [r10 + 8], rdx + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RCX_RDX, _TEXT + +LEAF_ENTRY Store_RCX_RDX_R8, _TEXT + mov [r10], rcx + mov [r10 + 8], rdx + mov [r10 + 16], r8 + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RCX_RDX_R8, _TEXT + +LEAF_ENTRY Store_RCX_RDX_R8_R9, _TEXT + mov [r10], rcx + mov [r10 + 8], rdx + mov [r10 + 16], r8 + mov [r10 + 24], r9 + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RCX_RDX_R8_R9, _TEXT + +LEAF_ENTRY Store_RDX, _TEXT + mov [r10], rdx + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RDX, _TEXT + +LEAF_ENTRY Store_RDX_R8, _TEXT + mov [r10], rdx + mov [r10 + 8], r8 + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RDX_R8, _TEXT + +LEAF_ENTRY Store_RDX_R8_R9, _TEXT + mov [r10], rdx + mov [r10 + 8], r8 + mov [r10 + 16], r9 + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RDX_R8_R9, _TEXT + +LEAF_ENTRY Store_R8, _TEXT + mov [r10], r8 + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_R8, _TEXT + +LEAF_ENTRY Store_R8_R9, _TEXT + mov [r10], r8 + mov [r10 + 8], r9 + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_R8_R9, _TEXT + +LEAF_ENTRY Store_R9, _TEXT + mov [r10], r9 + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_R9, _TEXT + +LEAF_ENTRY Store_XMM0, _TEXT + movsd real8 ptr [r10], xmm0 + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM0, _TEXT + +LEAF_ENTRY Store_XMM0_XMM1, _TEXT + movsd real8 ptr [r10], xmm0 + movsd real8 ptr [r10 + 8], xmm1 + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM0_XMM1, _TEXT + +LEAF_ENTRY Store_XMM0_XMM1_XMM2, _TEXT + movsd real8 ptr [r10], xmm0 + movsd real8 ptr [r10 + 8], xmm1 + movsd real8 ptr [r10 + 16], xmm2 + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM0_XMM1_XMM2, _TEXT + +LEAF_ENTRY Store_XMM0_XMM1_XMM2_XMM3, _TEXT + movsd real8 ptr [r10], xmm0 + movsd real8 ptr [r10 + 8], xmm1 + movsd real8 ptr [r10 + 16], xmm2 + movsd real8 ptr [r10 + 24], xmm3 + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM0_XMM1_XMM2_XMM3, _TEXT + +LEAF_ENTRY Store_XMM1, _TEXT + movsd real8 ptr [r10], xmm1 + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM1, _TEXT + +LEAF_ENTRY Store_XMM1_XMM2, _TEXT + movsd real8 ptr [r10], xmm1 + movsd real8 ptr [r10 + 8], xmm2 + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM1_XMM2, _TEXT + +LEAF_ENTRY Store_XMM1_XMM2_XMM3, _TEXT + movsd real8 ptr [r10], xmm1 + movsd real8 ptr [r10 + 8], xmm2 + movsd real8 ptr [r10 + 16], xmm3 + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM1_XMM2_XMM3, _TEXT + +LEAF_ENTRY Store_XMM2, _TEXT + movsd real8 ptr [r10], xmm2 + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM2, _TEXT + +LEAF_ENTRY Store_XMM2_XMM3, _TEXT + movsd real8 ptr [r10], xmm2 + movsd real8 ptr [r10 + 8], xmm3 + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM2_XMM3, _TEXT + +LEAF_ENTRY Store_XMM3, _TEXT + movsd real8 ptr [r10], xmm3 + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM3, _TEXT + ; Copy arguments from the interpreter stack to the processor stack. ; The CPU stack slots are aligned to pointer size. LEAF_ENTRY Load_Stack, _TEXT @@ -591,6 +863,7 @@ LEAF_ENTRY Load_Stack, _TEXT LEAF_END Load_Stack, _TEXT ; Routines for passing value type arguments by reference in general purpose registers RCX, RDX, R8, R9 +; from the interpreter to native code LEAF_ENTRY Load_Ref_RCX, _TEXT mov rcx, r10 @@ -621,6 +894,7 @@ LEAF_ENTRY Load_Ref_R9, _TEXT LEAF_END Load_Ref_R9, _TEXT ; Routines for passing arguments by value in general purpose registers RCX, RDX, R8, R9 +; from the interpreter to native code LEAF_ENTRY Load_RCX, _TEXT mov rcx, [r10] @@ -785,8 +1059,8 @@ LEAF_ENTRY Load_XMM3, _TEXT LEAF_END Load_XMM3, _TEXT NESTED_ENTRY CallJittedMethodRetVoid, _TEXT - push_vol_reg rbp - mov rbp, rsp + push_nonvol_reg rbp + set_frame rbp, 0 END_PROLOGUE add r9, 20h ; argument save area + alignment sub rsp, r9 ; total stack space @@ -798,9 +1072,9 @@ END_PROLOGUE ret NESTED_END CallJittedMethodRetVoid, _TEXT -NESTED_ENTRY CallJittedMethodRetBuff, _TEXT - push_vol_reg rbp - mov rbp, rsp +NESTED_ENTRY CallJittedMethodRetBuffRCX, _TEXT + push_nonvol_reg rbp + set_frame rbp, 0 END_PROLOGUE add r9, 20h ; argument save area + alignment sub rsp, r9 ; total stack space @@ -811,11 +1085,27 @@ END_PROLOGUE mov rsp, rbp pop rbp ret -NESTED_END CallJittedMethodRetBuff, _TEXT +NESTED_END CallJittedMethodRetBuffRCX, _TEXT + +NESTED_ENTRY CallJittedMethodRetBuffRDX, _TEXT + push_nonvol_reg rbp + set_frame rbp, 0 +END_PROLOGUE + add r9, 20h ; argument save area + alignment + sub rsp, r9 ; total stack space + mov r11, rcx ; The routines list + mov r10, rdx ; interpreter stack args + mov rdx, r8 ; return buffer + call qword ptr [r11] + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetBuffRDX, _TEXT + NESTED_ENTRY CallJittedMethodRetDouble, _TEXT push_nonvol_reg rbp - mov rbp, rsp + set_frame rbp, 0 push_vol_reg r8 push_vol_reg rax ; align END_PROLOGUE @@ -824,7 +1114,6 @@ END_PROLOGUE mov r11, rcx ; The routines list mov r10, rdx ; interpreter stack args call qword ptr [r11] - add rsp, 20h mov r8, [rbp - 8] movsd real8 ptr [r8], xmm0 mov rsp, rbp @@ -834,7 +1123,7 @@ NESTED_END CallJittedMethodRetDouble, _TEXT NESTED_ENTRY CallJittedMethodRetI8, _TEXT push_nonvol_reg rbp - mov rbp, rsp + set_frame rbp, 0 push_vol_reg r8 push_vol_reg rax ; align END_PROLOGUE @@ -843,7 +1132,6 @@ END_PROLOGUE mov r11, rcx ; The routines list mov r10, rdx ; interpreter stack args call qword ptr [r11] - add rsp, 20h mov r8, [rbp - 8] mov qword ptr [r8], rax mov rsp, rbp diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h index 9c935d0fa74e0f..96bb2f26d96239 100644 --- a/src/coreclr/vm/amd64/asmconstants.h +++ b/src/coreclr/vm/amd64/asmconstants.h @@ -555,6 +555,39 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__InterfaceDispatchCache__m_rgEntries == offsetof( ASMCONSTANTS_C_ASSERT(OFFSETOF__InterfaceDispatchCell__m_pCache == offsetof(InterfaceDispatchCell, m_pCache)) #endif // FEATURE_CACHED_INTERFACE_DISPATCH +#define OFFSETOF__ThreadLocalInfo__m_pThread 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__ThreadLocalInfo__m_pThread == offsetof(ThreadLocalInfo, m_pThread)) + +#ifdef FEATURE_INTERPRETER +#ifdef _DEBUG +#define OFFSETOF__InterpMethod__pCallStub 0x20 +#else +#define OFFSETOF__InterpMethod__pCallStub 0x18 +#endif +ASMCONSTANTS_C_ASSERT(OFFSETOF__InterpMethod__pCallStub == offsetof(InterpMethod, pCallStub)) + +#ifdef TARGET_UNIX +#define OFFSETOF__Thread__m_pInterpThreadContext 0xb50 +#else // TARGET_UNIX +#define OFFSETOF__Thread__m_pInterpThreadContext 0xba8 +#endif // TARGET_UNIX +ASMCONSTANTS_C_ASSERT(OFFSETOF__Thread__m_pInterpThreadContext == offsetof(Thread, m_pInterpThreadContext)) + +#define OFFSETOF__InterpThreadContext__pStackPointer 0x10 +ASMCONSTANTS_C_ASSERT(OFFSETOF__InterpThreadContext__pStackPointer == offsetof(InterpThreadContext, pStackPointer)) + +#define OFFSETOF__CallStubHeader__Routines 0x10 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CallStubHeader__Routines == offsetof(CallStubHeader, Routines)) + +#ifdef TARGET_UNIX +#define SIZEOF__TransitionBlock 0x68 +#else // TARGET_UNIX +#define SIZEOF__TransitionBlock 0x48 +#endif // TARGET_UNIX +ASMCONSTANTS_C_ASSERT(SIZEOF__TransitionBlock == sizeof(TransitionBlock)) + +#endif // FEATURE_INTERPRETER + #ifdef PROFILING_SUPPORTED #define PROFILE_ENTER 0x1 #define PROFILE_LEAVE 0x2 diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 4bd36922a520de..4eae153c4a0f0e 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -2,8 +2,8 @@ # The .NET Foundation licenses this file to you under the MIT license. .intel_syntax noprefix -#include "unixasmmacros.inc" #include "asmconstants.h" +#include "unixasmmacros.inc" #define real4 dword #define real8 qword @@ -432,22 +432,711 @@ NESTED_END CallEHFilterFunclet, _TEXT NESTED_ENTRY InterpreterStub, _TEXT, NoHandler - PROLOG_WITH_TRANSITION_BLOCK 8, 0, 0, 0, 0 - mov [rsp], rax // Return buffer in Swift calling convention + PROLOG_WITH_TRANSITION_BLOCK + + __InterpreterStubStackArgumentsOffset = __PWTB_TransitionBlock + SIZEOF__TransitionBlock + __InterpreterStubArgumentRegistersOffset = __PWTB_TransitionBlock + + // IR bytecode address + mov rbx, METHODDESC_REGISTER + + INLINE_GETTHREAD // result in rax, it can thrash all argument registers as it can call a helper + mov r10, rax + + // Load the InterpMethod pointer from the IR bytecode + mov rax, qword ptr [rbx] + mov rax, qword ptr [rax + OFFSETOF__InterpMethod__pCallStub] + // Reload the argument registers, the macro to get the thread have likely overwritten them + mov rdi, qword ptr [rsp + __InterpreterStubArgumentRegistersOffset] + mov rsi, qword ptr [rsp + __InterpreterStubArgumentRegistersOffset + 8] + mov rdx, qword ptr [rsp + __InterpreterStubArgumentRegistersOffset + 0x10] + mov rcx, qword ptr [rsp + __InterpreterStubArgumentRegistersOffset + 0x18] + mov r8, qword ptr [rsp + __InterpreterStubArgumentRegistersOffset + 0x20] + mov r9, qword ptr [rsp + __InterpreterStubArgumentRegistersOffset + 0x28] + movsd xmm0, real8 ptr [rsp + __PWTB_FloatArgumentRegisters] + movsd xmm1, real8 ptr [rsp + __PWTB_FloatArgumentRegisters + 0x10] + movsd xmm2, real8 ptr [rsp + __PWTB_FloatArgumentRegisters + 0x20] + movsd xmm3, real8 ptr [rsp + __PWTB_FloatArgumentRegisters + 0x30] + movsd xmm4, real8 ptr [rsp + __PWTB_FloatArgumentRegisters + 0x40] + movsd xmm5, real8 ptr [rsp + __PWTB_FloatArgumentRegisters + 0x50] + movsd xmm6, real8 ptr [rsp + __PWTB_FloatArgumentRegisters + 0x60] + movsd xmm7, real8 ptr [rsp + __PWTB_FloatArgumentRegisters + 0x70] + lea r11, qword ptr [rax + OFFSETOF__CallStubHeader__Routines] + mov r10, qword ptr [r10 + OFFSETOF__Thread__m_pInterpThreadContext] + mov r10, qword ptr [r10 + OFFSETOF__InterpThreadContext__pStackPointer] + lea rax, [rsp + __PWTB_TransitionBlock] + // rbx contains IR bytecode address + // Copy the arguments to the interpreter stack, invoke the InterpExecMethod and load the return value + call qword ptr [r11] - # - # call ExecuteInterpretedMethod - # - lea rdi, [rsp + __PWTB_TransitionBlock] // pTransitionBlock* - mov rsi, METHODDESC_REGISTER - call C_FUNC(ExecuteInterpretedMethod) - - mov rax, [rsp] EPILOG_WITH_TRANSITION_BLOCK_RETURN NESTED_END InterpreterStub, _TEXT -// Copy arguments from the interpreter stack to the processor stack. +NESTED_ENTRY InterpreterStubRetVoid, _TEXT, NoHandler + push_register rax // stack alignment +END_PROLOGUE + mov rdi, rax // pTransitionBlock* + mov rsi, rbx // the IR bytecode pointer + xor rdx, rdx + call C_FUNC(ExecuteInterpretedMethod) + pop_register rax + ret +NESTED_END InterpreterStubRetVoid, _TEXT + +NESTED_ENTRY InterpreterStubRetI8, _TEXT, NoHandler + push_register rax // stack alignment +END_PROLOGUE + mov rdi, rax // pTransitionBlock* + mov rsi, rbx // the IR bytecode pointer + xor rdx, rdx + call C_FUNC(ExecuteInterpretedMethod) + mov rax, qword ptr [rax] + pop_register rax + ret +NESTED_END InterpreterStubRetI8, _TEXT + +NESTED_ENTRY InterpreterStubRetDouble, _TEXT, NoHandler + push_register rax // stack alignment +END_PROLOGUE + mov rdi, rax // pTransitionBlock* + mov rsi, rbx // the IR bytecode pointer + xor rdx, rdx + call C_FUNC(ExecuteInterpretedMethod) + movsd xmm0, real8 ptr [rax] + pop_register rax + ret +NESTED_END InterpreterStubRetDouble, _TEXT + +NESTED_ENTRY InterpreterStubRetI8I8, _TEXT, NoHandler + push_register rax // stack alignment +END_PROLOGUE + mov rdi, rax // pTransitionBlock* + mov rsi, rbx // the IR bytecode pointer + xor rdx, rdx + call C_FUNC(ExecuteInterpretedMethod) + mov rdx, qword ptr [rax + 8] + mov rax, qword ptr [rax] + pop_register rax + ret +NESTED_END InterpreterStubRetI8I8, _TEXT + +NESTED_ENTRY InterpreterStubRetDoubleDouble, _TEXT, NoHandler + push_register rax // stack alignment +END_PROLOGUE + mov rdi, rax // pTransitionBlock* + mov rsi, rbx // the IR bytecode pointer + xor rdx, rdx + call C_FUNC(ExecuteInterpretedMethod) + movsd xmm0, real8 ptr [rax] + movsd xmm1, real8 ptr [rax + 8] + pop_register rax + ret +NESTED_END InterpreterStubRetDoubleDouble, _TEXT + +NESTED_ENTRY InterpreterStubRetI8Double, _TEXT, NoHandler + push_register rax // stack alignment +END_PROLOGUE + mov rdi, rax // pTransitionBlock* + mov rsi, rbx // the IR bytecode pointer + xor rdx, rdx + call C_FUNC(ExecuteInterpretedMethod) + movsd xmm0, real8 ptr [rax + 8] + mov rax, qword ptr [rax] + pop_register rax + ret +NESTED_END InterpreterStubRetI8Double, _TEXT + +NESTED_ENTRY InterpreterStubRetDoubleI8, _TEXT, NoHandler + push_register rax // stack alignment +END_PROLOGUE + mov rdi, rax // pTransitionBlock* + mov rsi, rbx // the IR bytecode pointer + xor rdx, rdx + call C_FUNC(ExecuteInterpretedMethod) + movsd xmm0, real8 ptr [rax] + mov rax, qword ptr [rax + 8] + pop_register rax + ret +NESTED_END InterpreterStubRetDoubleI8, _TEXT + +NESTED_ENTRY InterpreterStubRetBuff, _TEXT, NoHandler + push_register rax // stack alignment +END_PROLOGUE + mov rdi, rax // pTransitionBlock* + mov rsi, rbx // the IR bytecode pointer + // Load the return buffer address + // The 8 + 8 is for the push and the return address slot + mov rdx, qword ptr [rsp + 8 + 8 + __InterpreterStubArgumentRegistersOffset] + call C_FUNC(ExecuteInterpretedMethod) + pop_register rax + ret +NESTED_END InterpreterStubRetBuff, _TEXT + +// Routines for passing value type arguments by reference in general purpose registers RDI, RSI, RDX, RCX, R8, R9 +// from native code to the interpreter + +// Copy arguments from the the processor stack to the interpreter stack. +// The CPU stack slots are aligned to pointer size. +LEAF_ENTRY Store_Stack, _TEXT + push_nonvol_reg rdi + push_nonvol_reg rsi + push_register rcx + mov esi, dword ptr [r11 + 8] // SP offset + mov ecx, dword ptr [r11 + 12] // number of stack slots + // load the caller Rsp as a based for the stack arguments + // The 4 * 8 represent the three pushes above and the return address slot + lea rsi, [rsp + rsi + 4 * 8 + __InterpreterStubStackArgumentsOffset] + mov rdi, r10 + shr rcx, 3 + rep movsq + mov r10, rdi + pop_register rcx + pop_nonvol_reg rsi + pop_nonvol_reg rdi + add r11, 16 + jmp qword ptr [r11] +LEAF_END Store_Stack, _TEXT + +LEAF_ENTRY Store_RDI, _TEXT + mov [r10], rdi + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RDI, _TEXT + +LEAF_ENTRY Store_RDI_RSI, _TEXT + mov [r10], rdi + mov [r10 + 8], rsi + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RDI_RSI, _TEXT + +LEAF_ENTRY Store_RDI_RSI_RDX, _TEXT + mov [r10], rdi + mov [r10 + 8], rsi + mov [r10 + 16], rdx + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RDI_RSI_RDX, _TEXT + +LEAF_ENTRY Store_RDI_RSI_RDX_RCX, _TEXT + mov [r10], rdi + mov [r10 + 8], rsi + mov [r10 + 16], rdx + mov [r10 + 24], rcx + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RDI_RSI_RDX_RCX, _TEXT + +LEAF_ENTRY Store_RDI_RSI_RDX_RCX_R8, _TEXT + mov [r10], rdi + mov [r10 + 8], rsi + mov [r10 + 16], rdx + mov [r10 + 24], rcx + mov [r10 + 32], r8 + add r10, 40 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RDI_RSI_RDX_RCX_R8, _TEXT + +LEAF_ENTRY Store_RDI_RSI_RDX_RCX_R8_R9, _TEXT + mov [r10], rdi + mov [r10 + 8], rsi + mov [r10 + 16], rdx + mov [r10 + 24], rcx + mov [r10 + 32], r8 + mov [r10 + 40], r9 + add r10, 48 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RDI_RSI_RDX_RCX_R8_R9, _TEXT + +LEAF_ENTRY Store_RSI, _TEXT + mov [r10], rsi + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RSI, _TEXT + +LEAF_ENTRY Store_RSI_RDX, _TEXT + mov [r10], rsi + mov [r10 + 8], rdx + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RSI_RDX, _TEXT + +LEAF_ENTRY Store_RSI_RDX_RCX, _TEXT + mov [r10], rsi + mov [r10 + 8], rdx + mov [r10 + 16], rcx + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RSI_RDX_RCX, _TEXT + +LEAF_ENTRY Store_RSI_RDX_RCX_R8, _TEXT + mov [r10], rsi + mov [r10 + 8], rdx + mov [r10 + 16], rcx + mov [r10 + 24], r8 + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RSI_RDX_RCX_R8, _TEXT + +LEAF_ENTRY Store_RSI_RDX_RCX_R8_R9, _TEXT + mov [r10], rsi + mov [r10 + 8], rdx + mov [r10 + 16], rcx + mov [r10 + 24], r8 + mov [r10 + 32], r9 + add r10, 40 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RSI_RDX_RCX_R8_R9, _TEXT + +LEAF_ENTRY Store_RDX, _TEXT + mov [r10], rdx + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RDX, _TEXT + +LEAF_ENTRY Store_RDX_RCX, _TEXT + mov [r10], rdx + mov [r10 + 8], rcx + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RDX_RCX, _TEXT + +LEAF_ENTRY Store_RDX_RCX_R8, _TEXT + mov [r10], rdx + mov [r10 + 8], rcx + mov [r10 + 16], r8 + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RDX_RCX_R8, _TEXT + +LEAF_ENTRY Store_RDX_RCX_R8_R9, _TEXT + mov [r10], rdx + mov [r10 + 8], rcx + mov [r10 + 16], r8 + mov [r10 + 24], r9 + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RDX_RCX_R8_R9, _TEXT + +LEAF_ENTRY Store_RCX, _TEXT + mov [r10], rcx + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RCX, _TEXT + +LEAF_ENTRY Store_RCX_R8, _TEXT + mov [r10], rcx + mov [r10 + 8], r8 + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RCX_R8, _TEXT + +LEAF_ENTRY Store_RCX_R8_R9, _TEXT + mov [r10], rcx + mov [r10 + 8], r8 + mov [r10 + 16], r9 + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RCX_R8_R9, _TEXT + +LEAF_ENTRY Store_RDX_R8, _TEXT + mov [r10], rdx + mov [r10 + 8], r8 + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RDX_R8, _TEXT + +LEAF_ENTRY Store_RDX_R8_R9, _TEXT + mov [r10], rdx + mov [r10 + 8], r8 + mov [r10 + 16], r9 + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_RDX_R8_R9, _TEXT + +LEAF_ENTRY Store_R8, _TEXT + mov [r10], r8 + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_R8, _TEXT + +LEAF_ENTRY Store_R8_R9, _TEXT + mov [r10], r8 + mov [r10 + 8], r9 + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_R8_R9, _TEXT + +LEAF_ENTRY Store_R9, _TEXT + mov [r10], r9 + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_R9, _TEXT + +LEAF_ENTRY Store_XMM0, _TEXT + movsd real8 ptr [r10], xmm0 + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM0, _TEXT + +LEAF_ENTRY Store_XMM0_XMM1, _TEXT + movsd real8 ptr [r10], xmm0 + movsd real8 ptr [r10 + 8], xmm1 + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM0_XMM1, _TEXT + +LEAF_ENTRY Store_XMM0_XMM1_XMM2, _TEXT + movsd real8 ptr [r10], xmm0 + movsd real8 ptr [r10 + 8], xmm1 + movsd real8 ptr [r10 + 16], xmm2 + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM0_XMM1_XMM2, _TEXT + +LEAF_ENTRY Store_XMM0_XMM1_XMM2_XMM3, _TEXT + movsd real8 ptr [r10], xmm0 + movsd real8 ptr [r10 + 8], xmm1 + movsd real8 ptr [r10 + 16], xmm2 + movsd real8 ptr [r10 + 24], xmm3 + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM0_XMM1_XMM2_XMM3, _TEXT + +LEAF_ENTRY Store_XMM0_XMM1_XMM2_XMM3_XMM4, _TEXT + movsd real8 ptr [r10], xmm0 + movsd real8 ptr [r10 + 8], xmm1 + movsd real8 ptr [r10 + 16], xmm2 + movsd real8 ptr [r10 + 24], xmm3 + movsd real8 ptr [r10 + 32], xmm4 + add r10, 40 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM0_XMM1_XMM2_XMM3_XMM4, _TEXT + +LEAF_ENTRY Store_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5, _TEXT + movsd real8 ptr [r10], xmm0 + movsd real8 ptr [r10 + 8], xmm1 + movsd real8 ptr [r10 + 16], xmm2 + movsd real8 ptr [r10 + 24], xmm3 + movsd real8 ptr [r10 + 32], xmm4 + movsd real8 ptr [r10 + 40], xmm5 + add r10, 48 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5, _TEXT + +LEAF_ENTRY Store_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6, _TEXT + movsd real8 ptr [r10], xmm0 + movsd real8 ptr [r10 + 8], xmm1 + movsd real8 ptr [r10 + 16], xmm2 + movsd real8 ptr [r10 + 24], xmm3 + movsd real8 ptr [r10 + 32], xmm4 + movsd real8 ptr [r10 + 40], xmm5 + movsd real8 ptr [r10 + 48], xmm6 + add r10, 56 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6, _TEXT + +LEAF_ENTRY Store_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + movsd real8 ptr [r10], xmm0 + movsd real8 ptr [r10 + 8], xmm1 + movsd real8 ptr [r10 + 16], xmm2 + movsd real8 ptr [r10 + 24], xmm3 + movsd real8 ptr [r10 + 32], xmm4 + movsd real8 ptr [r10 + 40], xmm5 + movsd real8 ptr [r10 + 48], xmm6 + movsd real8 ptr [r10 + 56], xmm7 + add r10, 64 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + +LEAF_ENTRY Store_XMM1, _TEXT + movsd real8 ptr [r10], xmm1 + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM1, _TEXT + +LEAF_ENTRY Store_XMM1_XMM2, _TEXT + movsd real8 ptr [r10], xmm1 + movsd real8 ptr [r10 + 8], xmm2 + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM1_XMM2, _TEXT + +LEAF_ENTRY Store_XMM1_XMM2_XMM3, _TEXT + movsd real8 ptr [r10], xmm1 + movsd real8 ptr [r10 + 8], xmm2 + movsd real8 ptr [r10 + 16], xmm3 + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM1_XMM2_XMM3, _TEXT + +LEAF_ENTRY Store_XMM1_XMM2_XMM3_XMM4, _TEXT + movsd real8 ptr [r10], xmm1 + movsd real8 ptr [r10 + 8], xmm2 + movsd real8 ptr [r10 + 16], xmm3 + movsd real8 ptr [r10 + 24], xmm4 + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM1_XMM2_XMM3_XMM4, _TEXT + +LEAF_ENTRY Store_XMM1_XMM2_XMM3_XMM4_XMM5, _TEXT + movsd real8 ptr [r10], xmm1 + movsd real8 ptr [r10 + 8], xmm2 + movsd real8 ptr [r10 + 16], xmm3 + movsd real8 ptr [r10 + 24], xmm4 + movsd real8 ptr [r10 + 32], xmm5 + add r10, 40 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM1_XMM2_XMM3_XMM4_XMM5, _TEXT + +LEAF_ENTRY Store_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6, _TEXT + movsd real8 ptr [r10], xmm1 + movsd real8 ptr [r10 + 8], xmm2 + movsd real8 ptr [r10 + 16], xmm3 + movsd real8 ptr [r10 + 24], xmm4 + movsd real8 ptr [r10 + 32], xmm5 + movsd real8 ptr [r10 + 40], xmm6 + add r10, 48 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6, _TEXT + +LEAF_ENTRY Store_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + movsd real8 ptr [r10], xmm1 + movsd real8 ptr [r10 + 8], xmm2 + movsd real8 ptr [r10 + 16], xmm3 + movsd real8 ptr [r10 + 24], xmm4 + movsd real8 ptr [r10 + 32], xmm5 + movsd real8 ptr [r10 + 40], xmm6 + movsd real8 ptr [r10 + 48], xmm7 + add r10, 56 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + +LEAF_ENTRY Store_XMM2, _TEXT + movsd real8 ptr [r10], xmm2 + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM2, _TEXT + +LEAF_ENTRY Store_XMM2_XMM3, _TEXT + movsd real8 ptr [r10], xmm2 + movsd real8 ptr [r10 + 8], xmm3 + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM2_XMM3, _TEXT + +LEAF_ENTRY Store_XMM2_XMM3_XMM4, _TEXT + movsd real8 ptr [r10], xmm2 + movsd real8 ptr [r10 + 8], xmm3 + movsd real8 ptr [r10 + 16], xmm4 + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM2_XMM3_XMM4, _TEXT + +LEAF_ENTRY Store_XMM2_XMM3_XMM4_XMM5, _TEXT + movsd real8 ptr [r10], xmm2 + movsd real8 ptr [r10 + 8], xmm3 + movsd real8 ptr [r10 + 16], xmm4 + movsd real8 ptr [r10 + 24], xmm5 + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM2_XMM3_XMM4_XMM5, _TEXT + +LEAF_ENTRY Store_XMM2_XMM3_XMM4_XMM5_XMM6, _TEXT + movsd real8 ptr [r10], xmm2 + movsd real8 ptr [r10 + 8], xmm3 + movsd real8 ptr [r10 + 16], xmm4 + movsd real8 ptr [r10 + 24], xmm5 + movsd real8 ptr [r10 + 32], xmm6 + add r10, 40 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM2_XMM3_XMM4_XMM5_XMM6, _TEXT + +LEAF_ENTRY Store_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + movsd real8 ptr [r10], xmm2 + movsd real8 ptr [r10 + 8], xmm3 + movsd real8 ptr [r10 + 16], xmm4 + movsd real8 ptr [r10 + 24], xmm5 + movsd real8 ptr [r10 + 32], xmm6 + movsd real8 ptr [r10 + 40], xmm7 + add r10, 48 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + +LEAF_ENTRY Store_XMM3, _TEXT + movsd real8 ptr [r10], xmm3 + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM3, _TEXT + +LEAF_ENTRY Store_XMM3_XMM4, _TEXT + movsd real8 ptr [r10], xmm3 + movsd real8 ptr [r10 + 8], xmm4 + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM3_XMM4, _TEXT + +LEAF_ENTRY Store_XMM3_XMM4_XMM5, _TEXT + movsd real8 ptr [r10], xmm3 + movsd real8 ptr [r10 + 8], xmm4 + movsd real8 ptr [r10 + 16], xmm5 + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM3_XMM4_XMM5, _TEXT + +LEAF_ENTRY Store_XMM3_XMM4_XMM5_XMM6, _TEXT + movsd real8 ptr [r10], xmm3 + movsd real8 ptr [r10 + 8], xmm4 + movsd real8 ptr [r10 + 16], xmm5 + movsd real8 ptr [r10 + 24], xmm6 + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM3_XMM4_XMM5_XMM6, _TEXT + +LEAF_ENTRY Store_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + movsd real8 ptr [r10], xmm3 + movsd real8 ptr [r10 + 8], xmm4 + movsd real8 ptr [r10 + 16], xmm5 + movsd real8 ptr [r10 + 24], xmm6 + movsd real8 ptr [r10 + 32], xmm7 + add r10, 40 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + +LEAF_ENTRY Store_XMM4, _TEXT + movsd real8 ptr [r10], xmm4 + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM4, _TEXT + +LEAF_ENTRY Store_XMM4_XMM5, _TEXT + movsd real8 ptr [r10], xmm4 + movsd real8 ptr [r10 + 8], xmm5 + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM4_XMM5, _TEXT + +LEAF_ENTRY Store_XMM4_XMM5_XMM6, _TEXT + movsd real8 ptr [r10], xmm4 + movsd real8 ptr [r10 + 8], xmm5 + movsd real8 ptr [r10 + 16], xmm6 + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM4_XMM5_XMM6, _TEXT + +LEAF_ENTRY Store_XMM4_XMM5_XMM6_XMM7, _TEXT + movsd real8 ptr [r10], xmm4 + movsd real8 ptr [r10 + 8], xmm5 + movsd real8 ptr [r10 + 16], xmm6 + movsd real8 ptr [r10 + 24], xmm7 + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM4_XMM5_XMM6_XMM7, _TEXT + +LEAF_ENTRY Store_XMM5, _TEXT + movsd real8 ptr [r10], xmm5 + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM5, _TEXT + +LEAF_ENTRY Store_XMM5_XMM6, _TEXT + movsd real8 ptr [r10], xmm5 + movsd real8 ptr [r10 + 8], xmm6 + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM5_XMM6, _TEXT + +LEAF_ENTRY Store_XMM5_XMM6_XMM7, _TEXT + movsd real8 ptr [r10], xmm5 + movsd real8 ptr [r10 + 8], xmm6 + movsd real8 ptr [r10 + 16], xmm7 + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM5_XMM6_XMM7, _TEXT + +LEAF_ENTRY Store_XMM6, _TEXT + movsd real8 ptr [r10], xmm6 + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM6, _TEXT + +LEAF_ENTRY Store_XMM6_XMM7, _TEXT + movsd real8 ptr [r10], xmm6 + movsd real8 ptr [r10 + 8], xmm7 + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM6_XMM7, _TEXT + +LEAF_ENTRY Store_XMM7, _TEXT + movsd real8 ptr [r10], xmm7 + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Store_XMM7, _TEXT + +// Routines for passing value type arguments by reference in general purpose registers RDI, RSI, RDX, RCX, R8, R9 +// from the interpreter to native code + +// Copy arguments from the the interpreter stack to the CPU stack. // The CPU stack slots are aligned to pointer size. LEAF_ENTRY Load_Stack, _TEXT push rdi @@ -468,8 +1157,6 @@ LEAF_ENTRY Load_Stack, _TEXT jmp qword ptr [r11] LEAF_END Load_Stack, _TEXT -// Routines for passing arguments by value in general purpose registers RDI, RSI, RDX, RCX, R8, R9 - LEAF_ENTRY Load_RDI, _TEXT mov rdi, [r10] add r10, 8 diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h index 3178ad1e297b1b..cb772c16bd1a95 100644 --- a/src/coreclr/vm/arm64/asmconstants.h +++ b/src/coreclr/vm/arm64/asmconstants.h @@ -285,6 +285,35 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__InterfaceDispatchCache__m_rgEntries == offsetof( ASMCONSTANTS_C_ASSERT(OFFSETOF__InterfaceDispatchCell__m_pCache == offsetof(InterfaceDispatchCell, m_pCache)) #endif // FEATURE_CACHED_INTERFACE_DISPATCH +#define OFFSETOF__ThreadLocalInfo__m_pThread 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__ThreadLocalInfo__m_pThread == offsetof(ThreadLocalInfo, m_pThread)) + +#ifdef FEATURE_INTERPRETER +#ifdef _DEBUG +#define OFFSETOF__InterpMethod__pCallStub 0x20 +#else +#define OFFSETOF__InterpMethod__pCallStub 0x18 +#endif +ASMCONSTANTS_C_ASSERT(OFFSETOF__InterpMethod__pCallStub == offsetof(InterpMethod, pCallStub)) + +#ifdef TARGET_UNIX +#define OFFSETOF__Thread__m_pInterpThreadContext 0xb78 +#else // TARGET_UNIX +#define OFFSETOF__Thread__m_pInterpThreadContext 0xba0 +#endif // TARGET_UNIX +ASMCONSTANTS_C_ASSERT(OFFSETOF__Thread__m_pInterpThreadContext == offsetof(Thread, m_pInterpThreadContext)) + +#define OFFSETOF__InterpThreadContext__pStackPointer 0x10 +ASMCONSTANTS_C_ASSERT(OFFSETOF__InterpThreadContext__pStackPointer == offsetof(InterpThreadContext, pStackPointer)) + +#define OFFSETOF__CallStubHeader__Routines 0x10 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CallStubHeader__Routines == offsetof(CallStubHeader, Routines)) + +#define SIZEOF__TransitionBlock 0xb0 +ASMCONSTANTS_C_ASSERT(SIZEOF__TransitionBlock == sizeof(TransitionBlock)) + +#endif // FEATURE_INTERPRETER + #ifdef PROFILING_SUPPORTED #define PROFILE_ENTER 0x1 #define PROFILE_LEAVE 0x2 diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 394f75b80f1eca..90bb8c053c835f 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -681,17 +681,566 @@ LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT #ifdef FEATURE_INTERPRETER NESTED_ENTRY InterpreterStub, _TEXT, NoHandler +#ifdef TARGET_APPLE + PROLOG_WITH_TRANSITION_BLOCK extraLocals=8*16, SaveFPArgs=0,SaveGPArgs=0 +#else PROLOG_WITH_TRANSITION_BLOCK +#endif - add x0, sp, #__PWTB_TransitionBlock // pTransitionBlock - mov x1, METHODDESC_REGISTER // pMethodDesc + // IR bytecode address + mov x19, METHODDESC_REGISTER - bl C_FUNC(ExecuteInterpretedMethod) +#ifdef TARGET_APPLE + mov x21, x0 +#endif + INLINE_GETTHREAD x20 // thrashes x0 on Apple OSes (and possibly other arg registers on other Unixes) + + // On Apple, the INLINE_GETTHREAD is guaranteed to not to thrash argument registers other than x0 + // On other Unixes, there is no such guarantee, so we need to always restore the argument registers +#ifndef TARGET_APPLE + ldp x0, x1, [sp, #__PWTB_ArgumentRegisters + 8] + ldp x2, x3, [sp, #__PWTB_ArgumentRegisters + 0x18] + ldp x4, x5, [sp, #__PWTB_ArgumentRegisters + 0x28] + ldp x6, x7, [sp, #__PWTB_ArgumentRegisters + 0x38] + ldp q0, q1, [sp, #__PWTB_FloatArgumentRegisters] + ldp q2, q3, [sp, #__PWTB_FloatArgumentRegisters + 0x20] + ldp q4, q5, [sp, #__PWTB_FloatArgumentRegisters + 0x40] + ldp q6, q7, [sp, #__PWTB_FloatArgumentRegisters + 0x60] +#else // !TARGET_APPLE + // Restore the thrashed x0 + mov x0, x21 +#endif // !TARGET_APPLE + + ldr x9, [x19] // InterpMethod* + ldr x9, [x9, #OFFSETOF__InterpMethod__pCallStub] + add x10, x9, #OFFSETOF__CallStubHeader__Routines + ldr x9, [x20, #OFFSETOF__Thread__m_pInterpThreadContext] + ldr x9, [x9, #OFFSETOF__InterpThreadContext__pStackPointer] + // x19 contains IR bytecode address + // Copy the arguments to the interpreter stack, invoke the InterpExecMethod and load the return value + ldr x11, [x10], #8 + blr x11 EPILOG_WITH_TRANSITION_BLOCK_RETURN NESTED_END InterpreterStub, _TEXT +NESTED_ENTRY InterpreterStubRetVoid, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp, lr, -16 + // The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 // the IR bytecode pointer + mov x2, xzr + bl C_FUNC(ExecuteInterpretedMethod) + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRetVoid, _TEXT + +NESTED_ENTRY InterpreterStubRetI8, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp, lr, -16 + // The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 // the IR bytecode pointer + mov x2, xzr + bl C_FUNC(ExecuteInterpretedMethod) + ldr x0, [x0] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRetI8, _TEXT + +NESTED_ENTRY InterpreterStubRetDouble, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp, lr, -16 + // The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 // the IR bytecode pointer + mov x2, xzr + bl C_FUNC(ExecuteInterpretedMethod) + ldr d0, [x0] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRetDouble, _TEXT + +NESTED_ENTRY InterpreterStubRetBuff, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp, lr, -16 + // The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 // the IR bytecode pointer + // Load the return buffer address + // 16 is the size of the pushed registers above + ldr x2, [sp, #__PWTB_ArgumentRegisters + 16] + bl C_FUNC(ExecuteInterpretedMethod) + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRetBuff, _TEXT + +NESTED_ENTRY InterpreterStubRet2I8, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp, lr, -16 + // The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 // the IR bytecode pointer + mov x2, xzr + bl C_FUNC(ExecuteInterpretedMethod) + ldr x1, [x0, #8] + ldr x0, [x0] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRet2I8, _TEXT + +NESTED_ENTRY InterpreterStubRet2Double, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp, lr, -16 + // The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 // the IR bytecode pointer + mov x2, xzr + bl C_FUNC(ExecuteInterpretedMethod) + ldp d0, d1, [x0] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRet2Double, _TEXT + +NESTED_ENTRY InterpreterStubRet3Double, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp, lr, -16 + // The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 // the IR bytecode pointer + mov x2, xzr + bl C_FUNC(ExecuteInterpretedMethod) + ldp d0, d1, [x0] + ldr d2, [x0, #16] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRet3Double, _TEXT + +NESTED_ENTRY InterpreterStubRet4Double, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp, lr, -16 + // The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 // the IR bytecode pointer + mov x2, xzr + bl C_FUNC(ExecuteInterpretedMethod) + ldp d0, d1, [x0] + ldp d2, d3, [x0, #16] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRet4Double, _TEXT + +NESTED_ENTRY InterpreterStubRetFloat, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp, lr, -16 + // The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 // the IR bytecode pointer + mov x2, xzr + bl C_FUNC(ExecuteInterpretedMethod) + ldr s0, [x0] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRetFloat, _TEXT + +NESTED_ENTRY InterpreterStubRet2Float, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp, lr, -16 + // The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 // the IR bytecode pointer + mov x2, xzr + bl C_FUNC(ExecuteInterpretedMethod) + ldp s0, s1, [x0] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRet2Float, _TEXT + +NESTED_ENTRY InterpreterStubRet3Float, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp, lr, -16 + // The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 // the IR bytecode pointer + mov x2, xzr + bl C_FUNC(ExecuteInterpretedMethod) + ldp s0, s1, [x0] + ldr s2, [x0, #8] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRet3Float, _TEXT + +NESTED_ENTRY InterpreterStubRet4Float, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp, lr, -16 + // The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 // the IR bytecode pointer + mov x2, xzr + bl C_FUNC(ExecuteInterpretedMethod) + ldp s0, s1, [x0] + ldp s2, s3, [x0, #8] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRet4Float, _TEXT + +// Copy arguments from the processor stack to the interpreter stack +// The CPU stack slots are aligned to pointer size. + +LEAF_ENTRY Store_Stack + ldr w11, [x10], #4 // SP offset + ldr w12, [x10], #4 // number of stack slots + add x11, sp, x11 + add x11, x11, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock +LOCAL_LABEL(StoreCopyLoop): + ldr x13, [x11], #8 + str x13, [x9], #8 + subs x12, x12, #8 + bne LOCAL_LABEL(StoreCopyLoop) + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_Stack + +#ifdef TARGET_APPLE + +// Copy single byte argument from the interpreter stack to the processor stack +LEAF_ENTRY Store_Stack_1B + ldr x11, [x10], #8 // SP offset + add x11, sp, x11 + ldrb w13, [x11, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock] + strb w13, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_Stack_1B + +// Copy two byte argument from the interpreter stack to the processor stack +LEAF_ENTRY Store_Stack_2B + ldr x11, [x10], #8 // SP offset + add x11, sp, x11 + ldrh w13, [x11, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock] + strh w13, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_Stack_2B + +// Copy four byte argument from the interpreter stack to the processor stack +LEAF_ENTRY Store_Stack_4B + ldr x11, [x10], #8 // SP offset + add x11, sp, x11 + ldr w13, [x11, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock] + str w13, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_Stack_4B + +#endif // TARGET_APPLE + +// Routines for passing value type arguments by reference in general purpose registers X0..X7 +// from native code to the interpreter + +.macro Store_Ref argReg + +LEAF_ENTRY Store_Ref_\argReg + ldr x11, [x10], #8 // size of the value type + cmp x11, #16 + blt LOCAL_LABEL(CopyBy8\argReg) +LOCAL_LABEL(RefCopyLoop16\argReg): + ldp x13, x14, [\argReg], #16 + stp x13, x14, [x9], #16 + subs x11, x11, #16 + bgt LOCAL_LABEL(RefCopyLoop16\argReg) + beq LOCAL_LABEL(RefCopyDone\argReg) + add x11, x11, #16 +LOCAL_LABEL(CopyBy8\argReg): + cmp x11, #8 + blt LOCAL_LABEL(RefCopyLoop1\argReg) +LOCAL_LABEL(RefCopyLoop8\argReg): + ldr x13, [\argReg], #8 + str x13, [x9], #8 + subs x11, x11, #8 + bgt LOCAL_LABEL(RefCopyLoop8\argReg) + beq LOCAL_LABEL(RefCopyDone\argReg) + add x11, x11, #8 +LOCAL_LABEL(RefCopyLoop1\argReg): + ldrb w13, [\argReg], #1 + strb w13, [x9], #1 + subs x11, x11, #1 + bne LOCAL_LABEL(RefCopyLoop1\argReg) +LOCAL_LABEL(RefCopyDone\argReg): + // Align x9 to the stack slot size + add x9, x9, 7 + and x9, x9, 0xfffffffffffffff8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_Ref_\argReg + +.endm + +Store_Ref X0 +Store_Ref X1 +Store_Ref X2 +Store_Ref X3 +Store_Ref X4 +Store_Ref X5 +Store_Ref X6 +Store_Ref X7 + +LEAF_ENTRY Store_X0 + str x0, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_X0 + +LEAF_ENTRY Store_X0_X1 + stp x0, x1, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_X0_X1 + +LEAF_ENTRY Store_X0_X1_X2 + stp x0, x1, [x9], #16 +ALTERNATE_ENTRY Store_X2 + str x2, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_X0_X1_X2 + +LEAF_ENTRY Store_X0_X1_X2_X3 + stp x0, x1, [x9], #16 +ALTERNATE_ENTRY Store_X2_X3 + stp x2, x3, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_X0_X1_X2_X3 + +LEAF_ENTRY Store_X0_X1_X2_X3_X4 + stp x0, x1, [x9], #16 +ALTERNATE_ENTRY Store_X2_X3_X4 + stp x2, x3, [x9], #16 +ALTERNATE_ENTRY Store_X4 + str x4, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_X0_X1_X2_X3_X4 + +LEAF_ENTRY Store_X0_X1_X2_X3_X4_X5 + stp x0, x1, [x9], #16 +ALTERNATE_ENTRY Store_X2_X3_X4_X5 + stp x2, x3, [x9], #16 +ALTERNATE_ENTRY Store_X4_X5 + stp x4, x5, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_X0_X1_X2_X3_X4_X5 + +LEAF_ENTRY Store_X0_X1_X2_X3_X4_X5_X6 + stp x0, x1, [x9], #16 +ALTERNATE_ENTRY Store_X2_X3_X4_X5_X6 + stp x2, x3, [x9], #16 +ALTERNATE_ENTRY Store_X4_X5_X6 + stp x4, x5, [x9], #16 +ALTERNATE_ENTRY Store_X6 + str x6, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_X0_X1_X2_X3_X4_X5_X6 + +LEAF_ENTRY Store_X0_X1_X2_X3_X4_X5_X6_X7 + stp x0, x1, [x9], #16 +ALTERNATE_ENTRY Store_X2_X3_X4_X5_X6_X7 + stp x2, x3, [x9], #16 +ALTERNATE_ENTRY Store_X4_X5_X6_X7 + stp x4, x5, [x9], #16 +ALTERNATE_ENTRY Store_X6_X7 + stp x6, x7, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_X0_X1_X2_X3_X4_X5_X6_X7 + +LEAF_ENTRY Store_X1 + str x1, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_X1 + +LEAF_ENTRY Store_X1_X2 + stp x1, x2, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_X1_X2 + +LEAF_ENTRY Store_X1_X2_X3 + stp x1, x2, [x9], #16 +ALTERNATE_ENTRY Store_X3 + str x3, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_X1_X2_X3 + +LEAF_ENTRY Store_X1_X2_X3_X4 + stp x1, x2, [x9], #16 +ALTERNATE_ENTRY Store_X3_X4 + stp x3, x4, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_X1_X2_X3_X4 + +LEAF_ENTRY Store_X1_X2_X3_X4_X5 + stp x1, x2, [x9], #16 +ALTERNATE_ENTRY Store_X3_X4_X5 + stp x3, x4, [x9], #16 +ALTERNATE_ENTRY Store_X5 + str x5, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_X1_X2_X3_X4_X5 + +LEAF_ENTRY Store_X1_X2_X3_X4_X5_X6 + stp x1, x2, [x9], #16 +ALTERNATE_ENTRY Store_X3_X4_X5_X6 + stp x3, x4, [x9], #16 +ALTERNATE_ENTRY Store_X5_X6 + stp x5, x6, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_X1_X2_X3_X4_X5_X6 + +LEAF_ENTRY Store_X1_X2_X3_X4_X5_X6_X7 + stp x1, x2, [x9], #16 +ALTERNATE_ENTRY Store_X3_X4_X5_X6_X7 + stp x3, x4, [x9], #16 +ALTERNATE_ENTRY Store_X5_X6_X7 + stp x5, x6, [x9], #16 +ALTERNATE_ENTRY Store_X7 + str x7, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_X1_X2_X3_X4_X5_X6_X7 + +// Floating point stores using stp wherever possible + +LEAF_ENTRY Store_D0 + str d0, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_D0 + +LEAF_ENTRY Store_D1 + str d1, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_D1 + +LEAF_ENTRY Store_D0_D1 + stp d0, d1, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_D0_D1 + +LEAF_ENTRY Store_D0_D1_D2 + stp d0, d1, [x9], #16 +ALTERNATE_ENTRY Store_D2 + str d2, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_D0_D1_D2 + +LEAF_ENTRY Store_D0_D1_D2_D3 + stp d0, d1, [x9], #16 +ALTERNATE_ENTRY Store_D2_D3 + stp d2, d3, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_D0_D1_D2_D3 + +LEAF_ENTRY Store_D0_D1_D2_D3_D4 + stp d0, d1, [x9], #16 +ALTERNATE_ENTRY Store_D2_D3_D4 + stp d2, d3, [x9], #16 +ALTERNATE_ENTRY Store_D4 + str d4, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_D0_D1_D2_D3_D4 + +LEAF_ENTRY Store_D0_D1_D2_D3_D4_D5 + stp d0, d1, [x9], #16 +ALTERNATE_ENTRY Store_D2_D3_D4_D5 + stp d2, d3, [x9], #16 +ALTERNATE_ENTRY Store_D4_D5 + stp d4, d5, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_D0_D1_D2_D3_D4_D5 + +LEAF_ENTRY Store_D0_D1_D2_D3_D4_D5_D6 + stp d0, d1, [x9], #16 +ALTERNATE_ENTRY Store_D2_D3_D4_D5_D6 + stp d2, d3, [x9], #16 +ALTERNATE_ENTRY Store_D4_D5_D6 + stp d4, d5, [x9], #16 +ALTERNATE_ENTRY Store_D6 + str d6, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_D0_D1_D2_D3_D4_D5_D6 + +LEAF_ENTRY Store_D0_D1_D2_D3_D4_D5_D6_D7 + stp d0, d1, [x9], #16 +ALTERNATE_ENTRY Store_D2_D3_D4_D5_D6_D7 + stp d2, d3, [x9], #16 +ALTERNATE_ENTRY Store_D4_D5_D6_D7 + stp d4, d5, [x9], #16 +ALTERNATE_ENTRY Store_D6_D7 + stp d6, d7, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_D0_D1_D2_D3_D4_D5_D6_D7 + +LEAF_ENTRY Store_D1_D2 + stp d1, d2, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_D1_D2 + +LEAF_ENTRY Store_D1_D2_D3 + stp d1, d2, [x9], #16 +ALTERNATE_ENTRY Store_D3 + str d3, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_D1_D2_D3 + +LEAF_ENTRY Store_D1_D2_D3_D4 + stp d1, d2, [x9], #16 +ALTERNATE_ENTRY Store_D3_D4 + stp d3, d4, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_D1_D2_D3_D4 + +LEAF_ENTRY Store_D1_D2_D3_D4_D5 + stp d1, d2, [x9], #16 +ALTERNATE_ENTRY Store_D3_D4_D5 + stp d3, d4, [x9], #16 +ALTERNATE_ENTRY Store_D5 + str d5, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_D1_D2_D3_D4_D5 + +LEAF_ENTRY Store_D1_D2_D3_D4_D5_D6 + stp d1, d2, [x9], #16 +ALTERNATE_ENTRY Store_D3_D4_D5_D6 + stp d3, d4, [x9], #16 +ALTERNATE_ENTRY Store_D5_D6 + stp d5, d6, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_D1_D2_D3_D4_D5_D6 + +LEAF_ENTRY Store_D1_D2_D3_D4_D5_D6_D7 + stp d1, d2, [x9], #16 +ALTERNATE_ENTRY Store_D3_D4_D5_D6_D7 + stp d3, d4, [x9], #16 +ALTERNATE_ENTRY Store_D5_D6_D7 + stp d5, d6, [x9], #16 +ALTERNATE_ENTRY Store_D7 + str d7, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_D1_D2_D3_D4_D5_D6_D7 + // Copy arguments from the interpreter stack to the processor stack // The CPU stack slots are aligned to pointer size. LEAF_ENTRY Load_Stack @@ -742,70 +1291,28 @@ LEAF_END Load_Stack_4B #endif // TARGET_APPLE // Routines for passing value type arguments by reference in general purpose registers X0..X7 +// from the interpreter to native code -LEAF_ENTRY Load_Ref_X0 - mov x0, x9 - ldr x12, [x10], #8 - add x9, x9, x12 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X0 - -LEAF_ENTRY Load_Ref_X1 - mov x1, x9 - ldr x12, [x10], #8 - add x9, x9, x12 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X1 - -LEAF_ENTRY Load_Ref_X2 - mov x2, x9 - ldr x12, [x10], #8 - add x9, x9, x12 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_Ref_X2 +.macro Load_Ref argReg -LEAF_ENTRY Load_Ref_X3 - mov x3, x9 +LEAF_ENTRY Load_Ref_\argReg + mov \argReg, x9 ldr x12, [x10], #8 add x9, x9, x12 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 -LEAF_END Load_Ref_X3 +LEAF_END Load_Ref_\argReg -LEAF_ENTRY Load_Ref_X4 - mov x4, x9 - ldr x12, [x10], #8 - add x9, x9, x12 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_Ref_X4 - -LEAF_ENTRY Load_Ref_X5 - mov x5, x9 - ldr x12, [x10], #8 - add x9, x9, x12 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_Ref_X5 - -LEAF_ENTRY Load_Ref_X6 - mov x6, x9 - ldr x12, [x10], #8 - add x9, x9, x12 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_Ref_X6 +.endm -LEAF_ENTRY Load_Ref_X7 - mov x7, x9 - ldr x12, [x10], #8 - add x9, x9, x12 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_Ref_X7 +Load_Ref X0 +Load_Ref X1 +Load_Ref X2 +Load_Ref X3 +Load_Ref X4 +Load_Ref X5 +Load_Ref X6 +Load_Ref X7 // Routines for passing arguments by value in general purpose registers X0..X7 diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 815e2c8face8e0..8b75420ce49547 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -1062,95 +1062,536 @@ JIT_PollGCRarePath PROLOG_WITH_TRANSITION_BLOCK - add x0, sp, #__PWTB_TransitionBlock ; pTransitionBlock - mov x1, METHODDESC_REGISTER ; pMethodDesc - - bl ExecuteInterpretedMethod + INLINE_GETTHREAD x20, x19 + + ; IR bytecode address + mov x19, METHODDESC_REGISTER + ldr x9, [METHODDESC_REGISTER] + ldr x9, [x9, #OFFSETOF__InterpMethod__pCallStub] + add x10, x9, #OFFSETOF__CallStubHeader__Routines + ldr x9, [x20, #OFFSETOF__Thread__m_pInterpThreadContext] + ldr x9, [x9, #OFFSETOF__InterpThreadContext__pStackPointer] + ; x19 contains IR bytecode address + ; Copy the arguments to the interpreter stack, invoke the InterpExecMethod and load the return value + ldr x11, [x10], #8 + blr x11 EPILOG_WITH_TRANSITION_BLOCK_RETURN - NESTED_END + NESTED_END InterpreterStub - ; Copy arguments from the interpreter stack to the processor stack + NESTED_ENTRY InterpreterStubRetVoid + PROLOG_SAVE_REG_PAIR fp, lr, #-16! + ; The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 ; the IR bytecode pointer + bl ExecuteInterpretedMethod + EPILOG_RESTORE_REG_PAIR fp, lr, #16! + EPILOG_RETURN + NESTED_END InterpreterStubRetVoid + + NESTED_ENTRY InterpreterStubRetI8 + PROLOG_SAVE_REG_PAIR fp, lr, #-16! + ; The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 ; the IR bytecode pointer + bl ExecuteInterpretedMethod + ldr x0, [x0] + EPILOG_RESTORE_REG_PAIR fp, lr, #16! + EPILOG_RETURN + NESTED_END InterpreterStubRetI8 + + NESTED_ENTRY InterpreterStubRetDouble + PROLOG_SAVE_REG_PAIR fp, lr, #-16! + ; The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 ; the IR bytecode pointer + bl ExecuteInterpretedMethod + ldr d0, [x0] + EPILOG_RESTORE_REG_PAIR fp, lr, #16! + EPILOG_RETURN + NESTED_END InterpreterStubRetDouble + + NESTED_ENTRY InterpreterStubRetBuff + PROLOG_SAVE_REG_PAIR fp, lr, #-16! + ; The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 ; the IR bytecode pointer + ; Load the return buffer address + ; 16 is the size of the pushed registers above + ldr x2, [sp, #__PWTB_ArgumentRegisters + 16] + bl ExecuteInterpretedMethod + EPILOG_RESTORE_REG_PAIR fp, lr, #16! + EPILOG_RETURN + NESTED_END InterpreterStubRetBuff + + NESTED_ENTRY InterpreterStubRet2I8 + PROLOG_SAVE_REG_PAIR fp, lr, #-16! + ; The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 ; the IR bytecode pointer + bl ExecuteInterpretedMethod + ldr x1, [x0, #8] + ldr x0, [x0] + EPILOG_RESTORE_REG_PAIR fp, lr, #16! + EPILOG_RETURN + NESTED_END InterpreterStubRet2I8 + + NESTED_ENTRY InterpreterStubRet2Double + PROLOG_SAVE_REG_PAIR fp, lr, #-16! + ; The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 ; the IR bytecode pointer + bl ExecuteInterpretedMethod + ldp d0, d1, [x0] + EPILOG_RESTORE_REG_PAIR fp, lr, #16! + EPILOG_RETURN + NESTED_END InterpreterStubRet2Double + + NESTED_ENTRY InterpreterStubRet3Double + PROLOG_SAVE_REG_PAIR fp, lr, #-16! + ; The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 ; the IR bytecode pointer + bl ExecuteInterpretedMethod + ldp d0, d1, [x0] + ldr d2, [x0, #16] + EPILOG_RESTORE_REG_PAIR fp, lr, #16! + EPILOG_RETURN + NESTED_END InterpreterStubRet3Double + + NESTED_ENTRY InterpreterStubRet4Double + PROLOG_SAVE_REG_PAIR fp, lr, #-16! + ; The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 ; the IR bytecode pointer + bl ExecuteInterpretedMethod + ldp d0, d1, [x0] + ldp d2, d3, [x0, #16] + EPILOG_RESTORE_REG_PAIR fp, lr, #16! + EPILOG_RETURN + NESTED_END InterpreterStubRet4Double + + NESTED_ENTRY InterpreterStubRetFloat + PROLOG_SAVE_REG_PAIR fp, lr, #-16! + ; The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 ; the IR bytecode pointer + bl ExecuteInterpretedMethod + ldr s0, [x0] + EPILOG_RESTORE_REG_PAIR fp, lr, #16! + EPILOG_RETURN + NESTED_END InterpreterStubRetFloat + + NESTED_ENTRY InterpreterStubRet2Float + PROLOG_SAVE_REG_PAIR fp, lr, #-16! + ; The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 ; the IR bytecode pointer + bl ExecuteInterpretedMethod + ldp s0, s1, [x0] + EPILOG_RESTORE_REG_PAIR fp, lr, #16! + EPILOG_RETURN + NESTED_END InterpreterStubRet2Float + + NESTED_ENTRY InterpreterStubRet3Float + PROLOG_SAVE_REG_PAIR fp, lr, #-16! + ; The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 ; the IR bytecode pointer + bl ExecuteInterpretedMethod + ldp s0, s1, [x0] + ldr s2, [x0, #8] + EPILOG_RESTORE_REG_PAIR fp, lr, #16! + EPILOG_RETURN + NESTED_END InterpreterStubRet3Float + + NESTED_ENTRY InterpreterStubRet4Float + PROLOG_SAVE_REG_PAIR fp, lr, #-16! + ; The +16 is for the fp, lr above + add x0, sp, #__PWTB_TransitionBlock + 16 + mov x1, x19 ; the IR bytecode pointer + bl ExecuteInterpretedMethod + ldp s0, s1, [x0] + ldp s2, s3, [x0, #8] + EPILOG_RESTORE_REG_PAIR fp, lr, #16! + EPILOG_RETURN + NESTED_END InterpreterStubRet4Float + + ; Routines for passing value type arguments by reference in general purpose registers X0..X7 + ; from native code to the interpreter + + ; Copy arguments from the processor stack to the interpreter stack ; The CPU stack slots are aligned to pointer size. - LEAF_ENTRY Load_Stack - ldr w14, [x10], #4 ; SP offset + + LEAF_ENTRY Store_Stack + ldr w11, [x10], #4 ; SP offset ldr w12, [x10], #4 ; number of stack slots - add x14, sp, x14 -CopyLoop - ldr x13, [x9], #8 - str x13, [x14], #8 + add x11, sp, x11 + add x11, x11, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock +StoreCopyLoop + ldr x13, [x11], #8 + str x13, [x9], #8 subs x12, x12, #8 - bne CopyLoop + bne StoreCopyLoop ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 LEAF_END Load_Stack + + MACRO + Store_Ref $argReg + + LEAF_ENTRY Store_Ref_$argReg + ldr x11, [x10], #8 ; size of the value type + cmp x11, #16 + blt CopyBy8$argReg +RefCopyLoop16$argReg + ldp x13, x14, [$argReg], #16 + stp x13, x14, [x9], #16 + subs x11, x11, #16 + bgt RefCopyLoop16$argReg + beq RefCopyDone$argReg + add x11, x11, #16 +CopyBy8$argReg + cmp x11, #8 + blt RefCopyLoop1$argReg +RefCopyLoop8$argReg + ldr x13, [$argReg], #8 + str x13, [x9], #8 + subs x11, x11, #8 + bgt RefCopyLoop8$argReg + beq RefCopyDone$argReg + add x11, x11, #8 +RefCopyLoop1$argReg + ldrb w13, [$argReg], #1 + strb w13, [x9], #1 + subs x11, x11, #1 + bne RefCopyLoop1$argReg +RefCopyDone$argReg + ; Align x9 to the stack slot size + add x9, x9, 7 + and x9, x9, 0xfffffffffffffff8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_Ref_$argReg - ; Routines for passing value type arguments by reference in general purpose registers X0..X7 + MEND - LEAF_ENTRY Load_Ref_X0 - mov x0, x9 - ldr x12, [x10], #8 - add x9, x9, x12 + Store_Ref X0 + Store_Ref X1 + Store_Ref X2 + Store_Ref X3 + Store_Ref X4 + Store_Ref X5 + Store_Ref X6 + Store_Ref X7 + + LEAF_ENTRY Store_X0 + str x0, [x9], #8 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 - LEAF_END Load_X0 + LEAF_END Store_X0 - LEAF_ENTRY Load_Ref_X1 - mov x1, x9 - ldr x12, [x10], #8 - add x9, x9, x12 + LEAF_ENTRY Store_X0_X1 + stp x0, x1, [x9], #16 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 - LEAF_END Load_X1 + LEAF_END Store_X0_X1 - LEAF_ENTRY Load_Ref_X2 - mov x2, x9 - ldr x12, [x10], #8 - add x9, x9, x12 + LEAF_ENTRY Store_X0_X1_X2 + stp x0, x1, [x9], #16 + ALTERNATE_ENTRY Store_X2 + str x2, [x9], #8 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 - LEAF_END Load_Ref_X2 + LEAF_END Store_X0_X1_X2 - LEAF_ENTRY Load_Ref_X3 - mov x3, x9 - ldr x12, [x10], #8 - add x9, x9, x12 + LEAF_ENTRY Store_X0_X1_X2_X3 + stp x0, x1, [x9], #16 + ALTERNATE_ENTRY Store_X2_X3 + stp x2, x3, [x9], #16 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 - LEAF_END Load_Ref_X3 + LEAF_END Store_X0_X1_X2_X3 + + LEAF_ENTRY Store_X0_X1_X2_X3_X4 + stp x0, x1, [x9], #16 + ALTERNATE_ENTRY Store_X2_X3_X4 + stp x2, x3, [x9], #16 + ALTERNATE_ENTRY Store_X4 + str x4, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_X0_X1_X2_X3_X4 + + LEAF_ENTRY Store_X0_X1_X2_X3_X4_X5 + stp x0, x1, [x9], #16 + ALTERNATE_ENTRY Store_X2_X3_X4_X5 + stp x2, x3, [x9], #16 + ALTERNATE_ENTRY Store_X4_X5 + stp x4, x5, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_X0_X1_X2_X3_X4_X5 + + LEAF_ENTRY Store_X0_X1_X2_X3_X4_X5_X6 + stp x0, x1, [x9], #16 + ALTERNATE_ENTRY Store_X2_X3_X4_X5_X6 + stp x2, x3, [x9], #16 + ALTERNATE_ENTRY Store_X4_X5_X6 + stp x4, x5, [x9], #16 + ALTERNATE_ENTRY Store_X6 + str x6, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_X0_X1_X2_X3_X4_X5_X6 + + LEAF_ENTRY Store_X0_X1_X2_X3_X4_X5_X6_X7 + stp x0, x1, [x9], #16 + ALTERNATE_ENTRY Store_X2_X3_X4_X5_X6_X7 + stp x2, x3, [x9], #16 + ALTERNATE_ENTRY Store_X4_X5_X6_X7 + stp x4, x5, [x9], #16 + ALTERNATE_ENTRY Store_X6_X7 + stp x6, x7, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_X0_X1_X2_X3_X4_X5_X6_X7 - LEAF_ENTRY Load_Ref_X4 - mov x4, x9 - ldr x12, [x10], #8 - add x9, x9, x12 + LEAF_ENTRY Store_X1 + str x1, [x9], #8 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 - LEAF_END Load_Ref_X4 + LEAF_END Store_X1 - LEAF_ENTRY Load_Ref_X5 - mov x5, x9 - ldr x12, [x10], #8 - add x9, x9, x12 + LEAF_ENTRY Store_X1_X2 + stp x1, x2, [x9], #16 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 - LEAF_END Load_Ref_X5 + LEAF_END Store_X1_X2 - LEAF_ENTRY Load_Ref_X6 - mov x6, x9 - ldr x12, [x10], #8 - add x9, x9, x12 + LEAF_ENTRY Store_X1_X2_X3 + stp x1, x2, [x9], #16 + ALTERNATE_ENTRY Store_X3 + str x3, [x9], #8 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 - LEAF_END Load_Ref_X6 + LEAF_END Store_X1_X2_X3 - LEAF_ENTRY Load_Ref_X7 - mov x7, x9 + LEAF_ENTRY Store_X1_X2_X3_X4 + stp x1, x2, [x9], #16 + ALTERNATE_ENTRY Store_X3_X4 + stp x3, x4, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_X1_X2_X3_X4 + + LEAF_ENTRY Store_X1_X2_X3_X4_X5 + stp x1, x2, [x9], #16 + ALTERNATE_ENTRY Store_X3_X4_X5 + stp x3, x4, [x9], #16 + ALTERNATE_ENTRY Store_X5 + str x5, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_X1_X2_X3_X4_X5 + + LEAF_ENTRY Store_X1_X2_X3_X4_X5_X6 + stp x1, x2, [x9], #16 + ALTERNATE_ENTRY Store_X3_X4_X5_X6 + stp x3, x4, [x9], #16 + ALTERNATE_ENTRY Store_X5_X6 + stp x5, x6, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_X1_X2_X3_X4_X5_X6 + + LEAF_ENTRY Store_X1_X2_X3_X4_X5_X6_X7 + stp x1, x2, [x9], #16 + ALTERNATE_ENTRY Store_X3_X4_X5_X6_X7 + stp x3, x4, [x9], #16 + ALTERNATE_ENTRY Store_X5_X6_X7 + stp x5, x6, [x9], #16 + ALTERNATE_ENTRY Store_X7 + str x7, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_X1_X2_X3_X4_X5_X6_X7 + + ; Floating point stores using stp wherever possible + + LEAF_ENTRY Store_D0 + str d0, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_D0 + + LEAF_ENTRY Store_D1 + str d1, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_D1 + + LEAF_ENTRY Store_D0_D1 + stp d0, d1, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_D0_D1 + + LEAF_ENTRY Store_D0_D1_D2 + stp d0, d1, [x9], #16 + ALTERNATE_ENTRY Store_D2 + str d2, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_D0_D1_D2 + + LEAF_ENTRY Store_D0_D1_D2_D3 + stp d0, d1, [x9], #16 + ALTERNATE_ENTRY Store_D2_D3 + stp d2, d3, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_D0_D1_D2_D3 + + LEAF_ENTRY Store_D0_D1_D2_D3_D4 + stp d0, d1, [x9], #16 + ALTERNATE_ENTRY Store_D2_D3_D4 + stp d2, d3, [x9], #16 + ALTERNATE_ENTRY Store_D4 + str d4, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_D0_D1_D2_D3_D4 + + LEAF_ENTRY Store_D0_D1_D2_D3_D4_D5 + stp d0, d1, [x9], #16 + ALTERNATE_ENTRY Store_D2_D3_D4_D5 + stp d2, d3, [x9], #16 + ALTERNATE_ENTRY Store_D4_D5 + stp d4, d5, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_D0_D1_D2_D3_D4_D5 + + LEAF_ENTRY Store_D0_D1_D2_D3_D4_D5_D6 + stp d0, d1, [x9], #16 + ALTERNATE_ENTRY Store_D2_D3_D4_D5_D6 + stp d2, d3, [x9], #16 + ALTERNATE_ENTRY Store_D4_D5_D6 + stp d4, d5, [x9], #16 + ALTERNATE_ENTRY Store_D6 + str d6, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_D0_D1_D2_D3_D4_D5_D6 + + LEAF_ENTRY Store_D0_D1_D2_D3_D4_D5_D6_D7 + stp d0, d1, [x9], #16 + ALTERNATE_ENTRY Store_D2_D3_D4_D5_D6_D7 + stp d2, d3, [x9], #16 + ALTERNATE_ENTRY Store_D4_D5_D6_D7 + stp d4, d5, [x9], #16 + ALTERNATE_ENTRY Store_D6_D7 + stp d6, d7, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_D0_D1_D2_D3_D4_D5_D6_D7 + + LEAF_ENTRY Store_D1_D2 + stp d1, d2, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_D1_D2 + + LEAF_ENTRY Store_D1_D2_D3 + stp d1, d2, [x9], #16 + ALTERNATE_ENTRY Store_D3 + str d3, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_D1_D2_D3 + + LEAF_ENTRY Store_D1_D2_D3_D4 + stp d1, d2, [x9], #16 + ALTERNATE_ENTRY Store_D3_D4 + stp d3, d4, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_D1_D2_D3_D4 + + LEAF_ENTRY Store_D1_D2_D3_D4_D5 + stp d1, d2, [x9], #16 + ALTERNATE_ENTRY Store_D3_D4_D5 + stp d3, d4, [x9], #16 + ALTERNATE_ENTRY Store_D5 + str d5, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_D1_D2_D3_D4_D5 + + LEAF_ENTRY Store_D1_D2_D3_D4_D5_D6 + stp d1, d2, [x9], #16 + ALTERNATE_ENTRY Store_D3_D4_D5_D6 + stp d3, d4, [x9], #16 + ALTERNATE_ENTRY Store_D5_D6 + stp d5, d6, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_D1_D2_D3_D4_D5_D6 + + LEAF_ENTRY Store_D1_D2_D3_D4_D5_D6_D7 + stp d1, d2, [x9], #16 + ALTERNATE_ENTRY Store_D3_D4_D5_D6_D7 + stp d3, d4, [x9], #16 + ALTERNATE_ENTRY Store_D5_D6_D7 + stp d5, d6, [x9], #16 + ALTERNATE_ENTRY Store_D7 + str d7, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_D1_D2_D3_D4_D5_D6_D7 + + ; Routines for passing value type arguments by reference in general purpose registers X0..X7 + ; from the interpreter to native code + ; Copy arguments from the interpreter stack to the processor stack + ; The CPU stack slots are aligned to pointer size. + LEAF_ENTRY Load_Stack + ldr w14, [x10], #4 ; SP offset + ldr w12, [x10], #4 ; number of stack slots + add x14, sp, x14 +CopyLoop + ldr x13, [x9], #8 + str x13, [x14], #8 + subs x12, x12, #8 + bne CopyLoop + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_Stack + + ; Routines for passing value type arguments by reference in general purpose registers X0..X7 + + MACRO + Load_Ref $argReg + + LEAF_ENTRY Load_Ref_$argReg + mov $argReg, x9 ldr x12, [x10], #8 add x9, x9, x12 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 - LEAF_END Load_Ref_X7 + LEAF_END Load_Ref_$argReg + + MEND + + Load_Ref X0 + Load_Ref X1 + Load_Ref X2 + Load_Ref X3 + Load_Ref X4 + Load_Ref X5 + Load_Ref X6 + Load_Ref X7 ; Routines for passing arguments by value in general purpose registers X0..X7 diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index bdb402b8b99fe0..a8558b9f58a091 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -7,11 +7,16 @@ #include "ecall.h" extern "C" void Load_Stack(); +extern "C" void Store_Stack(); #if defined(TARGET_APPLE) && defined(TARGET_ARM64) extern "C" void Load_Stack_1B(); extern "C" void Load_Stack_2B(); extern "C" void Load_Stack_4B(); + +extern "C" void Store_Stack_1B(); +extern "C" void Store_Stack_2B(); +extern "C" void Store_Stack_4B(); #endif // TARGET_APPLE && TARGET_ARM64 #ifdef TARGET_AMD64 @@ -42,6 +47,31 @@ extern "C" void Load_Ref_RDX(); extern "C" void Load_Ref_R8(); extern "C" void Load_Ref_R9(); +extern "C" void Store_RCX(); +extern "C" void Store_RCX_RDX(); +extern "C" void Store_RCX_RDX_R8(); +extern "C" void Store_RCX_RDX_R8_R9(); +extern "C" void Store_RDX(); +extern "C" void Store_RDX_R8(); +extern "C" void Store_RDX_R8_R9(); +extern "C" void Store_R8(); +extern "C" void Store_R8_R9(); +extern "C" void Store_R9(); +extern "C" void Store_XMM0(); +extern "C" void Store_XMM0_XMM1(); +extern "C" void Store_XMM0_XMM1_XMM2(); +extern "C" void Store_XMM0_XMM1_XMM2_XMM3(); +extern "C" void Store_XMM1(); +extern "C" void Store_XMM1_XMM2(); +extern "C" void Store_XMM1_XMM2_XMM3(); +extern "C" void Store_XMM2(); +extern "C" void Store_XMM2_XMM3(); +extern "C" void Store_XMM3(); +extern "C" void Store_Ref_RCX(); +extern "C" void Store_Ref_RDX(); +extern "C" void Store_Ref_R8(); +extern "C" void Store_Ref_R9(); + PCODE GPRegsRoutines[] = { (PCODE)Load_RCX, // 00 @@ -62,6 +92,26 @@ PCODE GPRegsRoutines[] = (PCODE)Load_R9 // 33 }; +PCODE GPRegsStoreRoutines[] = +{ + (PCODE)Store_RCX, // 00 + (PCODE)Store_RCX_RDX, // 01 + (PCODE)Store_RCX_RDX_R8, // 02 + (PCODE)Store_RCX_RDX_R8_R9, // 03 + (PCODE)0, // 10 + (PCODE)Store_RDX, // 11 + (PCODE)Store_RDX_R8, // 12 + (PCODE)Store_RDX_R8_R9, // 13 + (PCODE)0, // 20 + (PCODE)0, // 21 + (PCODE)Store_R8, // 22 + (PCODE)Store_R8_R9, // 23 + (PCODE)0, // 30 + (PCODE)0, // 31 + (PCODE)0, // 32 + (PCODE)Store_R9 // 33 +}; + PCODE GPRegsRefRoutines[] = { (PCODE)Load_Ref_RCX, // 0 @@ -70,6 +120,14 @@ PCODE GPRegsRefRoutines[] = (PCODE)Load_Ref_R9, // 3 }; +PCODE GPRegsRefStoreRoutines[] = +{ + (PCODE)Store_Ref_RCX, // 0 + (PCODE)Store_Ref_RDX, // 1 + (PCODE)Store_Ref_R8, // 2 + (PCODE)Store_Ref_R9, // 3 +}; + PCODE FPRegsRoutines[] = { (PCODE)Load_XMM0, // 00 @@ -90,6 +148,26 @@ PCODE FPRegsRoutines[] = (PCODE)Load_XMM3 // 33 }; +PCODE FPRegsStoreRoutines[] = +{ + (PCODE)Store_XMM0, // 00 + (PCODE)Store_XMM0_XMM1, // 01 + (PCODE)Store_XMM0_XMM1_XMM2, // 02 + (PCODE)Store_XMM0_XMM1_XMM2_XMM3, // 03 + (PCODE)0, // 10 + (PCODE)Store_XMM1, // 11 + (PCODE)Store_XMM1_XMM2, // 12 + (PCODE)Store_XMM1_XMM2_XMM3, // 13 + (PCODE)0, // 20 + (PCODE)0, // 21 + (PCODE)Store_XMM2, // 22 + (PCODE)Store_XMM2_XMM3, // 23 + (PCODE)0, // 30 + (PCODE)0, // 31 + (PCODE)0, // 32 + (PCODE)Store_XMM3 // 33 +}; + #else // TARGET_WINDOWS extern "C" void Load_RDI(); @@ -114,6 +192,28 @@ extern "C" void Load_R8(); extern "C" void Load_R8_R9(); extern "C" void Load_R9(); +extern "C" void Store_RDI(); +extern "C" void Store_RDI_RSI(); +extern "C" void Store_RDI_RSI_RDX(); +extern "C" void Store_RDI_RSI_RDX_RCX(); +extern "C" void Store_RDI_RSI_RDX_RCX_R8(); +extern "C" void Store_RDI_RSI_RDX_RCX_R8_R9(); +extern "C" void Store_RSI(); +extern "C" void Store_RSI_RDX(); +extern "C" void Store_RSI_RDX_RCX(); +extern "C" void Store_RSI_RDX_RCX_R8(); +extern "C" void Store_RSI_RDX_RCX_R8_R9(); +extern "C" void Store_RDX(); +extern "C" void Store_RDX_RCX(); +extern "C" void Store_RDX_RCX_R8(); +extern "C" void Store_RDX_RCX_R8_R9(); +extern "C" void Store_RCX(); +extern "C" void Store_RCX_R8(); +extern "C" void Store_RCX_R8_R9(); +extern "C" void Store_R8(); +extern "C" void Store_R8_R9(); +extern "C" void Store_R9(); + PCODE GPRegsRoutines[] = { (PCODE)Load_RDI, // 00 @@ -154,6 +254,46 @@ PCODE GPRegsRoutines[] = (PCODE)Load_R9 // 55 }; +PCODE GPRegsStoreRoutines[] = +{ + (PCODE)Store_RDI, // 00 + (PCODE)Store_RDI_RSI, // 01 + (PCODE)Store_RDI_RSI_RDX, // 02 + (PCODE)Store_RDI_RSI_RDX_RCX, // 03 + (PCODE)Store_RDI_RSI_RDX_RCX_R8, // 04 + (PCODE)Store_RDI_RSI_RDX_RCX_R8_R9, // 05 + (PCODE)0, // 10 + (PCODE)Store_RSI, // 11 + (PCODE)Store_RSI_RDX, // 12 + (PCODE)Store_RSI_RDX_RCX, // 13 + (PCODE)Store_RSI_RDX_RCX_R8, // 14 + (PCODE)Store_RSI_RDX_RCX_R8_R9, // 15 + (PCODE)0, // 20 + (PCODE)0, // 21 + (PCODE)Store_RDX, // 22 + (PCODE)Store_RDX_RCX, // 23 + (PCODE)Store_RDX_RCX_R8, // 24 + (PCODE)Store_RDX_RCX_R8_R9, // 25 + (PCODE)0, // 30 + (PCODE)0, // 31 + (PCODE)0, // 32 + (PCODE)Store_RCX, // 33 + (PCODE)Store_RCX_R8, // 34 + (PCODE)Store_RCX_R8_R9, // 35 + (PCODE)0, // 40 + (PCODE)0, // 41 + (PCODE)0, // 42 + (PCODE)0, // 43 + (PCODE)Store_R8, // 44 + (PCODE)Store_R8_R9, // 45 + (PCODE)0, // 50 + (PCODE)0, // 51 + (PCODE)0, // 52 + (PCODE)0, // 53 + (PCODE)0, // 54 + (PCODE)Store_R9 // 55 +}; + extern "C" void Load_XMM0(); extern "C" void Load_XMM0_XMM1(); extern "C" void Load_XMM0_XMM1_XMM2(); @@ -191,6 +331,43 @@ extern "C" void Load_XMM6(); extern "C" void Load_XMM6_XMM7(); extern "C" void Load_XMM7(); +extern "C" void Store_XMM0(); +extern "C" void Store_XMM0_XMM1(); +extern "C" void Store_XMM0_XMM1_XMM2(); +extern "C" void Store_XMM0_XMM1_XMM2_XMM3(); +extern "C" void Store_XMM0_XMM1_XMM2_XMM3_XMM4(); +extern "C" void Store_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5(); +extern "C" void Store_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6(); +extern "C" void Store_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7(); +extern "C" void Store_XMM1(); +extern "C" void Store_XMM1_XMM2(); +extern "C" void Store_XMM1_XMM2_XMM3(); +extern "C" void Store_XMM1_XMM2_XMM3_XMM4(); +extern "C" void Store_XMM1_XMM2_XMM3_XMM4_XMM5(); +extern "C" void Store_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6(); +extern "C" void Store_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7(); +extern "C" void Store_XMM2(); +extern "C" void Store_XMM2_XMM3(); +extern "C" void Store_XMM2_XMM3_XMM4(); +extern "C" void Store_XMM2_XMM3_XMM4_XMM5(); +extern "C" void Store_XMM2_XMM3_XMM4_XMM5_XMM6(); +extern "C" void Store_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7(); +extern "C" void Store_XMM3(); +extern "C" void Store_XMM3_XMM4(); +extern "C" void Store_XMM3_XMM4_XMM5(); +extern "C" void Store_XMM3_XMM4_XMM5_XMM6(); +extern "C" void Store_XMM3_XMM4_XMM5_XMM6_XMM7(); +extern "C" void Store_XMM4(); +extern "C" void Store_XMM4_XMM5(); +extern "C" void Store_XMM4_XMM5_XMM6(); +extern "C" void Store_XMM4_XMM5_XMM6_XMM7(); +extern "C" void Store_XMM5(); +extern "C" void Store_XMM5_XMM6(); +extern "C" void Store_XMM5_XMM6_XMM7(); +extern "C" void Store_XMM6(); +extern "C" void Store_XMM6_XMM7(); +extern "C" void Store_XMM7(); + PCODE FPRegsRoutines[] = { (PCODE)Load_XMM0, // 00 @@ -259,6 +436,74 @@ PCODE FPRegsRoutines[] = (PCODE)Load_XMM7 // 77 }; +PCODE FPRegsStoreRoutines[] = +{ + (PCODE)Store_XMM0, // 00 + (PCODE)Store_XMM0_XMM1, // 01 + (PCODE)Store_XMM0_XMM1_XMM2, // 02 + (PCODE)Store_XMM0_XMM1_XMM2_XMM3, // 03 + (PCODE)Store_XMM0_XMM1_XMM2_XMM3_XMM4, // 04 + (PCODE)Store_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5, // 05 + (PCODE)Store_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6, // 06 + (PCODE)Store_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7,// 07 + (PCODE)0, // 10 + (PCODE)Store_XMM1, // 11 + (PCODE)Store_XMM1_XMM2, // 12 + (PCODE)Store_XMM1_XMM2_XMM3, // 13 + (PCODE)Store_XMM1_XMM2_XMM3_XMM4, // 14 + (PCODE)Store_XMM1_XMM2_XMM3_XMM4_XMM5, // 15 + (PCODE)Store_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6, // 16 + (PCODE)Store_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, // 17 + (PCODE)0, // 20 + (PCODE)0, // 21 + (PCODE)Store_XMM2, // 22 + (PCODE)Store_XMM2_XMM3, // 23 + (PCODE)Store_XMM2_XMM3_XMM4, // 24 + (PCODE)Store_XMM2_XMM3_XMM4_XMM5, // 25 + (PCODE)Store_XMM2_XMM3_XMM4_XMM5_XMM6, // 26 + (PCODE)Store_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, // 27 + (PCODE)0, // 30 + (PCODE)0, // 31 + (PCODE)0, // 32 + (PCODE)Store_XMM3, // 33 + (PCODE)Store_XMM3_XMM4, // 34 + (PCODE)Store_XMM3_XMM4_XMM5, // 35 + (PCODE)Store_XMM3_XMM4_XMM5_XMM6, // 36 + (PCODE)Store_XMM3_XMM4_XMM5_XMM6_XMM7, // 37 + (PCODE)0, // 40 + (PCODE)0, // 41 + (PCODE)0, // 42 + (PCODE)0, // 43 + (PCODE)Store_XMM4, // 44 + (PCODE)Store_XMM4_XMM5, // 45 + (PCODE)Store_XMM4_XMM5_XMM6, // 46 + (PCODE)Store_XMM4_XMM5_XMM6_XMM7, // 47 + (PCODE)0, // 50 + (PCODE)0, // 51 + (PCODE)0, // 52 + (PCODE)0, // 53 + (PCODE)0, // 54 + (PCODE)Store_XMM5, // 55 + (PCODE)Store_XMM5_XMM6, // 56 + (PCODE)Store_XMM5_XMM6_XMM7, // 57 + (PCODE)0, // 60 + (PCODE)0, // 61 + (PCODE)0, // 62 + (PCODE)0, // 63 + (PCODE)0, // 64 + (PCODE)0, // 65 + (PCODE)Store_XMM6, // 66 + (PCODE)Store_XMM6_XMM7, // 67 + (PCODE)0, // 70 + (PCODE)0, // 71 + (PCODE)0, // 72 + (PCODE)0, // 73 + (PCODE)0, // 74 + (PCODE)0, // 75 + (PCODE)0, // 76 + (PCODE)Store_XMM7 // 77 +}; + #endif // TARGET_WINDOWS #endif // TARGET_AMD64 @@ -302,6 +547,43 @@ extern "C" void Load_X6(); extern "C" void Load_X6_X7(); extern "C" void Load_X7(); +extern "C" void Store_X0(); +extern "C" void Store_X0_X1(); +extern "C" void Store_X0_X1_X2(); +extern "C" void Store_X0_X1_X2_X3(); +extern "C" void Store_X0_X1_X2_X3_X4(); +extern "C" void Store_X0_X1_X2_X3_X4_X5(); +extern "C" void Store_X0_X1_X2_X3_X4_X5_X6(); +extern "C" void Store_X0_X1_X2_X3_X4_X5_X6_X7(); +extern "C" void Store_X1(); +extern "C" void Store_X1_X2(); +extern "C" void Store_X1_X2_X3(); +extern "C" void Store_X1_X2_X3_X4(); +extern "C" void Store_X1_X2_X3_X4_X5(); +extern "C" void Store_X1_X2_X3_X4_X5_X6(); +extern "C" void Store_X1_X2_X3_X4_X5_X6_X7(); +extern "C" void Store_X2(); +extern "C" void Store_X2_X3(); +extern "C" void Store_X2_X3_X4(); +extern "C" void Store_X2_X3_X4_X5(); +extern "C" void Store_X2_X3_X4_X5_X6(); +extern "C" void Store_X2_X3_X4_X5_X6_X7(); +extern "C" void Store_X3(); +extern "C" void Store_X3_X4(); +extern "C" void Store_X3_X4_X5(); +extern "C" void Store_X3_X4_X5_X6(); +extern "C" void Store_X3_X4_X5_X6_X7(); +extern "C" void Store_X4(); +extern "C" void Store_X4_X5(); +extern "C" void Store_X4_X5_X6(); +extern "C" void Store_X4_X5_X6_X7(); +extern "C" void Store_X5(); +extern "C" void Store_X5_X6(); +extern "C" void Store_X5_X6_X7(); +extern "C" void Store_X6(); +extern "C" void Store_X6_X7(); +extern "C" void Store_X7(); + extern "C" void Load_Ref_X0(); extern "C" void Load_Ref_X1(); extern "C" void Load_Ref_X2(); @@ -311,6 +593,14 @@ extern "C" void Load_Ref_X5(); extern "C" void Load_Ref_X6(); extern "C" void Load_Ref_X7(); +extern "C" void Store_Ref_X0(); +extern "C" void Store_Ref_X1(); +extern "C" void Store_Ref_X2(); +extern "C" void Store_Ref_X3(); +extern "C" void Store_Ref_X4(); +extern "C" void Store_Ref_X5(); +extern "C" void Store_Ref_X6(); +extern "C" void Store_Ref_X7(); PCODE GPRegsRoutines[] = { @@ -341,7 +631,7 @@ PCODE GPRegsRoutines[] = (PCODE)0, // 30 (PCODE)0, // 31 (PCODE)0, // 32 - (PCODE)Load_X3, // 33 + (PCODE)Load_X3, // 33 (PCODE)Load_X3_X4, // 34 (PCODE)Load_X3_X4_X5, // 35 (PCODE)Load_X3_X4_X5_X6, // 36 @@ -380,6 +670,74 @@ PCODE GPRegsRoutines[] = (PCODE)Load_X7 // 77 }; +PCODE GPRegsStoreRoutines[] = +{ + (PCODE)Store_X0, // 00 + (PCODE)Store_X0_X1, // 01 + (PCODE)Store_X0_X1_X2, // 02 + (PCODE)Store_X0_X1_X2_X3, // 03 + (PCODE)Store_X0_X1_X2_X3_X4, // 04 + (PCODE)Store_X0_X1_X2_X3_X4_X5, // 05 + (PCODE)Store_X0_X1_X2_X3_X4_X5_X6, // 06 + (PCODE)Store_X0_X1_X2_X3_X4_X5_X6_X7, // 07 + (PCODE)0, // 10 + (PCODE)Store_X1, // 11 + (PCODE)Store_X1_X2, // 12 + (PCODE)Store_X1_X2_X3, // 13 + (PCODE)Store_X1_X2_X3_X4, // 14 + (PCODE)Store_X1_X2_X3_X4_X5, // 15 + (PCODE)Store_X1_X2_X3_X4_X5_X6, // 16 + (PCODE)Store_X1_X2_X3_X4_X5_X6_X7, // 17 + (PCODE)0, // 20 + (PCODE)0, // 21 + (PCODE)Store_X2, // 22 + (PCODE)Store_X2_X3, // 23 + (PCODE)Store_X2_X3_X4, // 24 + (PCODE)Store_X2_X3_X4_X5, // 25 + (PCODE)Store_X2_X3_X4_X5_X6, // 26 + (PCODE)Store_X2_X3_X4_X5_X6_X7, // 27 + (PCODE)0, // 30 + (PCODE)0, // 31 + (PCODE)0, // 32 + (PCODE)Store_X3, // 33 + (PCODE)Store_X3_X4, // 34 + (PCODE)Store_X3_X4_X5, // 35 + (PCODE)Store_X3_X4_X5_X6, // 36 + (PCODE)Store_X3_X4_X5_X6_X7, // 37 + (PCODE)0, // 40 + (PCODE)0, // 41 + (PCODE)0, // 42 + (PCODE)0, // 43 + (PCODE)Store_X4, // 44 + (PCODE)Store_X4_X5, // 45 + (PCODE)Store_X4_X5_X6, // 46 + (PCODE)Store_X4_X5_X6_X7, // 47 + (PCODE)0, // 50 + (PCODE)0, // 51 + (PCODE)0, // 52 + (PCODE)0, // 53 + (PCODE)0, // 54 + (PCODE)Store_X5, // 55 + (PCODE)Store_X5_X6, // 56 + (PCODE)Store_X5_X6_X7, // 57 + (PCODE)0, // 60 + (PCODE)0, // 61 + (PCODE)0, // 62 + (PCODE)0, // 63 + (PCODE)0, // 64 + (PCODE)0, // 65 + (PCODE)Store_X6, // 66 + (PCODE)Store_X6_X7, // 67 + (PCODE)0, // 70 + (PCODE)0, // 71 + (PCODE)0, // 72 + (PCODE)0, // 73 + (PCODE)0, // 74 + (PCODE)0, // 75 + (PCODE)0, // 76 + (PCODE)Store_X7 // 77 +}; + PCODE GPRegsRefRoutines[] = { (PCODE)Load_Ref_X0, // 0 @@ -392,6 +750,18 @@ PCODE GPRegsRefRoutines[] = (PCODE)Load_Ref_X7 // 7 }; +PCODE GPRegsRefStoreRoutines[] = +{ + (PCODE)Store_Ref_X0, // 0 + (PCODE)Store_Ref_X1, // 1 + (PCODE)Store_Ref_X2, // 2 + (PCODE)Store_Ref_X3, // 3 + (PCODE)Store_Ref_X4, // 4 + (PCODE)Store_Ref_X5, // 5 + (PCODE)Store_Ref_X6, // 6 + (PCODE)Store_Ref_X7 // 7 +}; + extern "C" void Load_D0(); extern "C" void Load_D0_D1(); extern "C" void Load_D0_D1_D2(); @@ -429,6 +799,111 @@ extern "C" void Load_D6(); extern "C" void Load_D6_D7(); extern "C" void Load_D7(); +extern "C" void Store_D0(); +extern "C" void Store_D0_D1(); +extern "C" void Store_D0_D1_D2(); +extern "C" void Store_D0_D1_D2_D3(); +extern "C" void Store_D0_D1_D2_D3_D4(); +extern "C" void Store_D0_D1_D2_D3_D4_D5(); +extern "C" void Store_D0_D1_D2_D3_D4_D5_D6(); +extern "C" void Store_D0_D1_D2_D3_D4_D5_D6_D7(); +extern "C" void Store_D1(); +extern "C" void Store_D1_D2(); +extern "C" void Store_D1_D2_D3(); +extern "C" void Store_D1_D2_D3_D4(); +extern "C" void Store_D1_D2_D3_D4_D5(); +extern "C" void Store_D1_D2_D3_D4_D5_D6(); +extern "C" void Store_D1_D2_D3_D4_D5_D6_D7(); +extern "C" void Store_D2(); +extern "C" void Store_D2_D3(); +extern "C" void Store_D2_D3_D4(); +extern "C" void Store_D2_D3_D4_D5(); +extern "C" void Store_D2_D3_D4_D5_D6(); +extern "C" void Store_D2_D3_D4_D5_D6_D7(); +extern "C" void Store_D3(); +extern "C" void Store_D3_D4(); +extern "C" void Store_D3_D4_D5(); +extern "C" void Store_D3_D4_D5_D6(); +extern "C" void Store_D3_D4_D5_D6_D7(); +extern "C" void Store_D4(); +extern "C" void Store_D4_D5(); +extern "C" void Store_D4_D5_D6(); +extern "C" void Store_D4_D5_D6_D7(); +extern "C" void Store_D5(); +extern "C" void Store_D5_D6(); +extern "C" void Store_D5_D6_D7(); +extern "C" void Store_D6(); +extern "C" void Store_D6_D7(); +extern "C" void Store_D7(); + +PCODE FPRegsStoreRoutines[] = +{ + (PCODE)Store_D0, // 00 + (PCODE)Store_D0_D1, // 01 + (PCODE)Store_D0_D1_D2, // 02 + (PCODE)Store_D0_D1_D2_D3, // 03 + (PCODE)Store_D0_D1_D2_D3_D4, // 04 + (PCODE)Store_D0_D1_D2_D3_D4_D5, // 05 + (PCODE)Store_D0_D1_D2_D3_D4_D5_D6, // 06 + (PCODE)Store_D0_D1_D2_D3_D4_D5_D6_D7, // 07 + (PCODE)0, // 10 + (PCODE)Store_D1, // 11 + (PCODE)Store_D1_D2, // 12 + (PCODE)Store_D1_D2_D3, // 13 + (PCODE)Store_D1_D2_D3_D4, // 14 + (PCODE)Store_D1_D2_D3_D4_D5, // 15 + (PCODE)Store_D1_D2_D3_D4_D5_D6, // 16 + (PCODE)Store_D1_D2_D3_D4_D5_D6_D7, // 17 + (PCODE)0, // 20 + (PCODE)0, // 21 + (PCODE)Store_D2, // 22 + (PCODE)Store_D2_D3, // 23 + (PCODE)Store_D2_D3_D4, // 24 + (PCODE)Store_D2_D3_D4_D5, // 25 + (PCODE)Store_D2_D3_D4_D5_D6, // 26 + (PCODE)Store_D2_D3_D4_D5_D6_D7, // 27 + (PCODE)0, // 30 + (PCODE)0, // 31 + (PCODE)0, // 32 + (PCODE)Store_D3, // 33 + (PCODE)Store_D3_D4, // 34 + (PCODE)Store_D3_D4_D5, // 35 + (PCODE)Store_D3_D4_D5_D6, // 36 + (PCODE)Store_D3_D4_D5_D6_D7, // 37 + (PCODE)0, // 40 + (PCODE)0, // 41 + (PCODE)0, // 42 + (PCODE)0, // 43 + (PCODE)Store_D4, // 44 + (PCODE)Store_D4_D5, // 45 + (PCODE)Store_D4_D5_D6, // 46 + (PCODE)Store_D4_D5_D6_D7, // 47 + (PCODE)0, // 50 + (PCODE)0, // 51 + (PCODE)0, // 52 + (PCODE)0, // 53 + (PCODE)0, // 54 + (PCODE)Store_D5, // 55 + (PCODE)Store_D5_D6, // 56 + (PCODE)Store_D5_D6_D7, // 57 + (PCODE)0, // 60 + (PCODE)0, // 61 + (PCODE)0, // 62 + (PCODE)0, // 63 + (PCODE)0, // 64 + (PCODE)0, // 65 + (PCODE)Store_D6, // 66 + (PCODE)Store_D6_D7, // 67 + (PCODE)0, // 70 + (PCODE)0, // 71 + (PCODE)0, // 72 + (PCODE)0, // 73 + (PCODE)0, // 74 + (PCODE)0, // 75 + (PCODE)0, // 76 + (PCODE)Store_D7 // 77 +}; + PCODE FPRegsRoutines[] = { (PCODE)Load_D0, // 00 @@ -496,37 +971,77 @@ PCODE FPRegsRoutines[] = (PCODE)0, // 76 (PCODE)Load_D7 // 77 }; + #endif // TARGET_ARM64 -PCODE GetGPRegRangeLoadRoutine(int r1, int r2) +PCODE CallStubGenerator::GetStackRoutine() +{ + return m_interpreterToNative ? (PCODE)Load_Stack : (PCODE)Store_Stack; +} + +#if defined(TARGET_APPLE) && defined(TARGET_ARM64) +PCODE CallStubGenerator::GetStackRoutine_1B() +{ + return m_interpreterToNative ? (PCODE)Load_Stack_1B : (PCODE)Store_Stack_1B; +} + +PCODE CallStubGenerator::GetStackRoutine_2B() +{ + return m_interpreterToNative ? (PCODE)Load_Stack_2B : (PCODE)Store_Stack_2B; +} + +PCODE CallStubGenerator::GetStackRoutine_4B() +{ + return m_interpreterToNative ? (PCODE)Load_Stack_4B : (PCODE)Store_Stack_4B; +} +#endif // TARGET_APPLE && TARGET_ARM64 + +PCODE CallStubGenerator::GetGPRegRangeRoutine(int r1, int r2) { int index = r1 * NUM_ARGUMENT_REGISTERS + r2; - return GPRegsRoutines[index]; + return m_interpreterToNative ? GPRegsRoutines[index] : GPRegsStoreRoutines[index]; } #ifndef UNIX_AMD64_ABI -PCODE GetGPRegRefLoadRoutine(int r) +PCODE CallStubGenerator::GetGPRegRefRoutine(int r) { - return GPRegsRefRoutines[r]; + return m_interpreterToNative ? GPRegsRefRoutines[r] : GPRegsRefStoreRoutines[r]; } + #endif // UNIX_AMD64_ABI -PCODE GetFPRegRangeLoadRoutine(int x1, int x2) +PCODE CallStubGenerator::GetFPRegRangeRoutine(int x1, int x2) { int index = x1 * NUM_FLOAT_ARGUMENT_REGISTERS + x2; - return FPRegsRoutines[index]; + return m_interpreterToNative ? FPRegsRoutines[index] : FPRegsStoreRoutines[index]; } extern "C" void CallJittedMethodRetVoid(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); extern "C" void CallJittedMethodRetDouble(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); extern "C" void CallJittedMethodRetI8(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void InterpreterStubRetVoid(); +extern "C" void InterpreterStubRetDouble(); +extern "C" void InterpreterStubRetI8(); + +#if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) +extern "C" void CallJittedMethodRetBuffRCX(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void CallJittedMethodRetBuffRDX(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void InterpreterStubRetBuffRCX(); +extern "C" void InterpreterStubRetBuffRDX(); +#else // TARGET_WINDOWS && TARGET_AMD64 extern "C" void CallJittedMethodRetBuff(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void InterpreterStubRetBuff(); +#endif // TARGET_WINDOWS && TARGET_AMD64 #ifdef UNIX_AMD64_ABI extern "C" void CallJittedMethodRetI8I8(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); extern "C" void CallJittedMethodRetI8Double(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); extern "C" void CallJittedMethodRetDoubleI8(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); extern "C" void CallJittedMethodRetDoubleDouble(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void InterpreterStubRetI8I8(); +extern "C" void InterpreterStubRetI8Double(); +extern "C" void InterpreterStubRetDoubleI8(); +extern "C" void InterpreterStubRetDoubleDouble(); #endif #ifdef TARGET_ARM64 @@ -538,11 +1053,129 @@ extern "C" void CallJittedMethodRetFloat(PCODE *routines, int8_t*pArgs, int8_t*p extern "C" void CallJittedMethodRet2Float(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); extern "C" void CallJittedMethodRet3Float(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); extern "C" void CallJittedMethodRet4Float(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void InterpreterStubRet2I8(); +extern "C" void InterpreterStubRet2Double(); +extern "C" void InterpreterStubRet3Double(); +extern "C" void InterpreterStubRet4Double(); +extern "C" void InterpreterStubRetFloat(); +extern "C" void InterpreterStubRet2Float(); +extern "C" void InterpreterStubRet3Float(); +extern "C" void InterpreterStubRet4Float(); +#endif // TARGET_ARM64 + +CallStubHeader::InvokeFunctionPtr CallStubGenerator::GetInvokeFunctionPtr(CallStubGenerator::ReturnType returnType) +{ + STANDARD_VM_CONTRACT; + + switch (returnType) + { + case ReturnTypeVoid: + return CallJittedMethodRetVoid; + case ReturnTypeDouble: + return CallJittedMethodRetDouble; + case ReturnTypeI8: + return CallJittedMethodRetI8; +#if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) + case ReturnTypeBuffArg1: + return CallJittedMethodRetBuffRCX; + case ReturnTypeBuffArg2: + return CallJittedMethodRetBuffRDX; +#else // TARGET_WINDOWS && TARGET_AMD64 + case ReturnTypeBuff: + return CallJittedMethodRetBuff; +#endif // TARGET_WINDOWS && TARGET_AMD64 +#ifdef UNIX_AMD64_ABI + case ReturnTypeI8I8: + return CallJittedMethodRetI8I8; + case ReturnTypeI8Double: + return CallJittedMethodRetI8Double; + case ReturnTypeDoubleI8: + return CallJittedMethodRetDoubleI8; + case ReturnTypeDoubleDouble: + return CallJittedMethodRetDoubleDouble; +#endif // UNIX_AMD64_ABI +#ifdef TARGET_ARM64 + case ReturnType2I8: + return CallJittedMethodRet2I8; + case ReturnType2Double: + return CallJittedMethodRet2Double; + case ReturnType3Double: + return CallJittedMethodRet3Double; + case ReturnType4Double: + return CallJittedMethodRet4Double; + case ReturnTypeFloat: + return CallJittedMethodRetFloat; + case ReturnType2Float: + return CallJittedMethodRet2Float; + case ReturnType3Float: + return CallJittedMethodRet3Float; + case ReturnType4Float: + return CallJittedMethodRet4Float; +#endif // TARGET_ARM64 + default: + _ASSERTE(!"Unexpected return type for interpreter stub"); + return NULL; // This should never happen, but just in case. + } +} + +PCODE CallStubGenerator::GetInterpreterReturnTypeHandler(CallStubGenerator::ReturnType returnType) +{ + STANDARD_VM_CONTRACT; + + switch (returnType) + { + case ReturnTypeVoid: + return (PCODE)InterpreterStubRetVoid; + case ReturnTypeDouble: + return (PCODE)InterpreterStubRetDouble; + case ReturnTypeI8: + return (PCODE)InterpreterStubRetI8; +#if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) + case ReturnTypeBuffArg1: + return (PCODE)InterpreterStubRetBuffRCX; + case ReturnTypeBuffArg2: + return (PCODE)InterpreterStubRetBuffRDX; +#else // TARGET_WINDOWS && TARGET_AMD64 + case ReturnTypeBuff: + return (PCODE)InterpreterStubRetBuff; +#endif // TARGET_WINDOWS && TARGET_AMD64 +#ifdef UNIX_AMD64_ABI + case ReturnTypeI8I8: + return (PCODE)InterpreterStubRetI8I8; + case ReturnTypeI8Double: + return (PCODE)InterpreterStubRetI8Double; + case ReturnTypeDoubleI8: + return (PCODE)InterpreterStubRetDoubleI8; + case ReturnTypeDoubleDouble: + return (PCODE)InterpreterStubRetDoubleDouble; +#endif // UNIX_AMD64_ABI +#ifdef TARGET_ARM64 + case ReturnType2I8: + return (PCODE)InterpreterStubRet2I8; + case ReturnType2Double: + return (PCODE)InterpreterStubRet2Double; + case ReturnType3Double: + return (PCODE)InterpreterStubRet3Double; + case ReturnType4Double: + return (PCODE)InterpreterStubRet4Double; + case ReturnTypeFloat: + return (PCODE)InterpreterStubRetFloat; + case ReturnType2Float: + return (PCODE)InterpreterStubRet2Float; + case ReturnType3Float: + return (PCODE)InterpreterStubRet3Float; + case ReturnType4Float: + return (PCODE)InterpreterStubRet4Float; #endif // TARGET_ARM64 + default: + _ASSERTE(!"Unexpected return type for interpreter stub"); + return 0; // This should never happen, but just in case. + } +} // Generate the call stub for the given method. // The returned call stub header must be freed by the caller using FreeCallStub. -CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD, AllocMemTracker *pamTracker) +CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD, AllocMemTracker *pamTracker, bool interpreterToNative) { STANDARD_VM_CONTRACT; @@ -557,6 +1190,8 @@ CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD, AllocMemTra _ASSERTE(pMD != NULL); + m_interpreterToNative = interpreterToNative; + MetaSig sig(pMD); ArgIterator argIt(&sig); @@ -674,196 +1309,32 @@ CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD, AllocMemTra // Process such a range if any. if (m_r1 != NoRange) { - pRoutines[m_routineIndex++] = GetGPRegRangeLoadRoutine(m_r1, m_r2); + pRoutines[m_routineIndex++] = GetGPRegRangeRoutine(m_r1, m_r2); } else if (m_x1 != NoRange) { - pRoutines[m_routineIndex++] = GetFPRegRangeLoadRoutine(m_x1, m_x2); + pRoutines[m_routineIndex++] = GetFPRegRangeRoutine(m_x1, m_x2); } else if (m_s1 != NoRange) { m_totalStackSize += m_s2 - m_s1 + 1; - pRoutines[m_routineIndex++] = (PCODE)Load_Stack; + pRoutines[m_routineIndex++] = GetStackRoutine(); pRoutines[m_routineIndex++] = ((int64_t)(m_s2 - m_s1 + 1) << 32) | m_s1; } CallStubHeader::InvokeFunctionPtr pInvokeFunction = NULL; + ReturnType returnType = GetReturnType(&argIt); - if (argIt.HasRetBuffArg()) + if (m_interpreterToNative) { - pInvokeFunction = CallJittedMethodRetBuff; + pInvokeFunction = GetInvokeFunctionPtr(returnType); + m_routineIndex++; // Reserve one extra slot for the target method pointer } else { - TypeHandle thReturnValueType; - CorElementType thReturnType = sig.GetReturnTypeNormalized(&thReturnValueType); - - switch (thReturnType) - { - case ELEMENT_TYPE_BOOLEAN: - case ELEMENT_TYPE_CHAR: - case ELEMENT_TYPE_I1: - case ELEMENT_TYPE_U1: - case ELEMENT_TYPE_I2: - case ELEMENT_TYPE_U2: - case ELEMENT_TYPE_I4: - case ELEMENT_TYPE_U4: - case ELEMENT_TYPE_I8: - case ELEMENT_TYPE_U8: - case ELEMENT_TYPE_I: - case ELEMENT_TYPE_U: - case ELEMENT_TYPE_CLASS: - case ELEMENT_TYPE_OBJECT: - case ELEMENT_TYPE_STRING: - case ELEMENT_TYPE_PTR: - case ELEMENT_TYPE_BYREF: - case ELEMENT_TYPE_TYPEDBYREF: - case ELEMENT_TYPE_ARRAY: - case ELEMENT_TYPE_SZARRAY: - case ELEMENT_TYPE_FNPTR: - pInvokeFunction = CallJittedMethodRetI8; - break; - case ELEMENT_TYPE_R4: - case ELEMENT_TYPE_R8: - pInvokeFunction = CallJittedMethodRetDouble; - break; - case ELEMENT_TYPE_VOID: - pInvokeFunction = CallJittedMethodRetVoid; - break; - case ELEMENT_TYPE_VALUETYPE: -#ifdef TARGET_AMD64 -#ifdef TARGET_WINDOWS - if (thReturnValueType.AsMethodTable()->IsIntrinsicType()) - { - // E.g. Vector2 - pInvokeFunction = CallJittedMethodRetDouble; - } - else - { - // POD structs smaller than 64 bits are returned in rax - pInvokeFunction = CallJittedMethodRetI8; - } -#else // TARGET_WINDOWS - if (thReturnValueType.AsMethodTable()->IsRegPassedStruct()) - { - UINT fpReturnSize = argIt.GetFPReturnSize(); - if (fpReturnSize == 0) - { - pInvokeFunction = CallJittedMethodRetI8; - } - else if (fpReturnSize == 8) - { - pInvokeFunction = CallJittedMethodRetDouble; - } - else - { - _ASSERTE((fpReturnSize & 16) != 0); - // The fpReturnSize bits 0..1 have the following meaning: - // Bit 0 - the first 8 bytes of the struct is integer (0) or floating point (1) - // Bit 1 - the second 8 bytes of the struct is integer (0) or floating point (1) - switch (fpReturnSize & 0x3) - { - case 0: - pInvokeFunction = CallJittedMethodRetI8I8; - break; - case 1: - pInvokeFunction = CallJittedMethodRetDoubleI8; - break; - case 2: - pInvokeFunction = CallJittedMethodRetI8Double; - break; - case 3: - pInvokeFunction = CallJittedMethodRetDoubleDouble; - break; - } - } - } - else - { - _ASSERTE(!"All value types that are not returnable structs in registers should be returned using return buffer"); - } -#endif // TARGET_WINDOWS -#elif TARGET_ARM64 - // HFA, HVA, POD structs smaller than 128 bits - if (thReturnValueType.IsHFA()) - { - switch (thReturnValueType.GetHFAType()) - { - case CORINFO_HFA_ELEM_FLOAT: - switch (thReturnValueType.GetSize()) - { - case 4: - pInvokeFunction = CallJittedMethodRetFloat; - break; - case 8: - pInvokeFunction = CallJittedMethodRet2Float; - break; - case 12: - pInvokeFunction = CallJittedMethodRet3Float; - break; - case 16: - pInvokeFunction = CallJittedMethodRet4Float; - break; - default: - _ASSERTE(!"Should not get here"); - break; - } - break; - case CORINFO_HFA_ELEM_DOUBLE: - switch (thReturnValueType.GetSize()) - { - case 8: - pInvokeFunction = CallJittedMethodRetDouble; - break; - case 16: - pInvokeFunction = CallJittedMethodRet2Double; - break; - case 24: - pInvokeFunction = CallJittedMethodRet3Double; - break; - case 32: - pInvokeFunction = CallJittedMethodRet4Double; - break; - default: - _ASSERTE(!"Should not get here"); - break; - } - break; - default: - _ASSERTE(!"HFA types other than float and double are not supported yet"); - break; - } - } - else - { - switch (thReturnValueType.GetSize()) - { - case 1: - case 2: - case 4: - case 8: - pInvokeFunction = CallJittedMethodRetI8; - break; - case 16: - pInvokeFunction = CallJittedMethodRet2I8; - break; - default: - _ASSERTE(!"The return types that are not HFA should be <= 16 bytes in size"); - break; - } - } -#else - _ASSERTE(!"Struct returns by value are not supported yet"); -#endif - break; - default: - _ASSERTE(!"Unexpected return type"); - break; - } + pRoutines[m_routineIndex++] = GetInterpreterReturnTypeHandler(returnType); } - - m_routineIndex++; // Reserve one extra slot for the target method pointer - + LoaderAllocator *pLoaderAllocator = pMD->GetLoaderAllocator(); S_SIZE_T finalStubSize(sizeof(CallStubHeader) + m_routineIndex * sizeof(PCODE)); void *pHeaderStorage = pamTracker->Track(pLoaderAllocator->GetHighFrequencyHeap()->AllocMem(finalStubSize)); @@ -885,14 +1356,14 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD { // No GP register is used to pass the current argument, but we already have a range of GP registers, // store the routine for the range - pRoutines[m_routineIndex++] = GetGPRegRangeLoadRoutine(m_r1, m_r2); + pRoutines[m_routineIndex++] = GetGPRegRangeRoutine(m_r1, m_r2); m_r1 = NoRange; } else if (((argLocDesc.m_cFloatReg == 0)) && (m_x1 != NoRange)) { // No floating point register is used to pass the current argument, but we already have a range of FP registers, // store the routine for the range - pRoutines[m_routineIndex++] = GetFPRegRangeLoadRoutine(m_x1, m_x2); + pRoutines[m_routineIndex++] = GetFPRegRangeRoutine(m_x1, m_x2); m_x1 = NoRange; } else if ((argLocDesc.m_byteStackSize == 0) && (m_s1 != NoRange)) @@ -900,7 +1371,7 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD // No stack argument is used to pass the current argument, but we already have a range of stack arguments, // store the routine for the range m_totalStackSize += m_s2 - m_s1 + 1; - pRoutines[m_routineIndex++] = (PCODE)Load_Stack; + pRoutines[m_routineIndex++] = GetStackRoutine(); pRoutines[m_routineIndex++] = ((int64_t)(m_s2 - m_s1 + 1) << 32) | m_s1; m_s1 = NoRange; } @@ -922,7 +1393,7 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD else { // Discontinuous range - store a routine for the current and start a new one - pRoutines[m_routineIndex++] = GetGPRegRangeLoadRoutine(m_r1, m_r2); + pRoutines[m_routineIndex++] = GetGPRegRangeRoutine(m_r1, m_r2); m_r1 = argLocDesc.m_idxGenReg; m_r2 = m_r1 + argLocDesc.m_cGenReg - 1; } @@ -944,7 +1415,7 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD else { // Discontinuous range - store a routine for the current and start a new one - pRoutines[m_routineIndex++] = GetFPRegRangeLoadRoutine(m_x1, m_x2); + pRoutines[m_routineIndex++] = GetFPRegRangeRoutine(m_x1, m_x2); m_x1 = argLocDesc.m_idxFloatReg; m_x2 = m_x1 + argLocDesc.m_cFloatReg - 1; } @@ -970,7 +1441,7 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD { // Discontinuous range - store a routine for the current and start a new one m_totalStackSize += m_s2 - m_s1 + 1; - pRoutines[m_routineIndex++] = (PCODE)Load_Stack; + pRoutines[m_routineIndex++] = GetStackRoutine(); pRoutines[m_routineIndex++] = ((int64_t)(m_s2 - m_s1 + 1) << 32) | m_s1; m_s1 = argLocDesc.m_byteStackIndex; m_s2 = m_s1 + argLocDesc.m_byteStackSize - 1; @@ -983,13 +1454,13 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD switch (argLocDesc.m_byteStackSize) { case 1: - pRoutines[m_routineIndex++] = (PCODE)Load_Stack_1B; + pRoutines[m_routineIndex++] = GetStackRoutine_1B(); break; case 2: - pRoutines[m_routineIndex++] = (PCODE)Load_Stack_2B; + pRoutines[m_routineIndex++] = GetStackRoutine_2B(); break; case 4: - pRoutines[m_routineIndex++] = (PCODE)Load_Stack_4B; + pRoutines[m_routineIndex++] = GetStackRoutine_4B(); break; default: _ASSERTE(!"Unexpected stack argument size"); @@ -1008,11 +1479,180 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD if (pArgIt != NULL && pArgIt->IsArgPassedByRef()) { _ASSERTE(argLocDesc.m_cGenReg == 1); - pRoutines[m_routineIndex++] = GetGPRegRefLoadRoutine(argLocDesc.m_idxGenReg); + pRoutines[m_routineIndex++] = GetGPRegRefRoutine(argLocDesc.m_idxGenReg); pRoutines[m_routineIndex++] = pArgIt->GetArgSize(); m_r1 = NoRange; } #endif // UNIX_AMD64_ABI } +CallStubGenerator::ReturnType CallStubGenerator::GetReturnType(ArgIterator *pArgIt) +{ + if (pArgIt->HasRetBuffArg()) + { +#if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) + if (pArgIt->HasThis()) + { + return ReturnTypeBuffArg2; + } + else + { + return ReturnTypeBuffArg1; + } +#else + return ReturnTypeBuff; +#endif + } + else + { + TypeHandle thReturnValueType; + CorElementType thReturnType = pArgIt->GetSig()->GetReturnTypeNormalized(&thReturnValueType); + + switch (thReturnType) + { + case ELEMENT_TYPE_BOOLEAN: + case ELEMENT_TYPE_CHAR: + case ELEMENT_TYPE_I1: + case ELEMENT_TYPE_U1: + case ELEMENT_TYPE_I2: + case ELEMENT_TYPE_U2: + case ELEMENT_TYPE_I4: + case ELEMENT_TYPE_U4: + case ELEMENT_TYPE_I8: + case ELEMENT_TYPE_U8: + case ELEMENT_TYPE_I: + case ELEMENT_TYPE_U: + case ELEMENT_TYPE_CLASS: + case ELEMENT_TYPE_OBJECT: + case ELEMENT_TYPE_STRING: + case ELEMENT_TYPE_PTR: + case ELEMENT_TYPE_BYREF: + case ELEMENT_TYPE_TYPEDBYREF: + case ELEMENT_TYPE_ARRAY: + case ELEMENT_TYPE_SZARRAY: + case ELEMENT_TYPE_FNPTR: + return ReturnTypeI8; + break; + case ELEMENT_TYPE_R4: + case ELEMENT_TYPE_R8: + return ReturnTypeDouble; + break; + case ELEMENT_TYPE_VOID: + return ReturnTypeVoid; + break; + case ELEMENT_TYPE_VALUETYPE: +#ifdef TARGET_AMD64 +#ifdef TARGET_WINDOWS + // POD structs smaller than 64 bits are returned in rax + return ReturnTypeI8; +#else // TARGET_WINDOWS + if (thReturnValueType.AsMethodTable()->IsRegPassedStruct()) + { + UINT fpReturnSize = pArgIt->GetFPReturnSize(); + if (fpReturnSize == 0) + { + return ReturnTypeI8; + } + else if (fpReturnSize == 8) + { + return ReturnTypeDouble; + } + else + { + _ASSERTE((fpReturnSize & 16) != 0); + // The fpReturnSize bits 0..1 have the following meaning: + // Bit 0 - the first 8 bytes of the struct is integer (0) or floating point (1) + // Bit 1 - the second 8 bytes of the struct is integer (0) or floating point (1) + switch (fpReturnSize & 0x3) + { + case 0: + return ReturnTypeI8I8; + case 1: + return ReturnTypeDoubleI8; + case 2: + return ReturnTypeI8Double; + case 3: + return ReturnTypeDoubleDouble; + } + } + } + else + { + _ASSERTE(!"All value types that are not returnable structs in registers should be returned using return buffer"); + } +#endif // TARGET_WINDOWS +#elif TARGET_ARM64 + // HFA, HVA, POD structs smaller than 128 bits + if (thReturnValueType.IsHFA()) + { + switch (thReturnValueType.GetHFAType()) + { + case CORINFO_HFA_ELEM_FLOAT: + switch (thReturnValueType.GetSize()) + { + case 4: + return ReturnTypeFloat; + case 8: + return ReturnType2Float; + case 12: + return ReturnType3Float; + case 16: + return ReturnType4Float; + default: + _ASSERTE(!"Should not get here"); + break; + } + break; + case CORINFO_HFA_ELEM_DOUBLE: + switch (thReturnValueType.GetSize()) + { + case 8: + return ReturnTypeDouble; + case 16: + return ReturnType2Double; + case 24: + return ReturnType3Double; + case 32: + return ReturnType4Double; + default: + _ASSERTE(!"Should not get here"); + break; + } + break; + default: + _ASSERTE(!"HFA types other than float and double are not supported yet"); + break; + } + } + else + { + switch (thReturnValueType.GetSize()) + { + case 1: + case 2: + case 4: + case 8: + return ReturnTypeI8; + break; + case 16: + return ReturnType2I8; + default: + _ASSERTE(!"The return types that are not HFA should be <= 16 bytes in size"); + break; + } + } +#else + _ASSERTE(!"Struct returns by value are not supported yet"); +#endif + break; + default: + _ASSERTE(!"Unexpected return type"); + break; + } + } + + // We should never reach this spot + return ReturnTypeVoid; +} + #endif // FEATURE_INTERPRETER diff --git a/src/coreclr/vm/callstubgenerator.h b/src/coreclr/vm/callstubgenerator.h index bce448b5e00ce1..09519287d0dcec 100644 --- a/src/coreclr/vm/callstubgenerator.h +++ b/src/coreclr/vm/callstubgenerator.h @@ -50,6 +50,35 @@ struct CallStubHeader // how to translate the arguments from the interpreter stack to the CPU registers and native stack. class CallStubGenerator { + enum ReturnType + { + ReturnTypeVoid, + ReturnTypeI8, + ReturnTypeDouble, +#if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) + ReturnTypeBuffArg1, + ReturnTypeBuffArg2, +#else + ReturnTypeBuff, +#endif +#ifdef UNIX_AMD64_ABI + ReturnTypeI8I8, + ReturnTypeDoubleDouble, + ReturnTypeI8Double, + ReturnTypeDoubleI8, +#endif // UNIX_AMD64_ABI +#ifdef TARGET_ARM64 + ReturnType2I8, + ReturnType2Double, + ReturnType3Double, + ReturnType4Double, + ReturnTypeFloat, + ReturnType2Float, + ReturnType3Float, + ReturnType4Float +#endif // TARGET_ARM64 + }; + // When the m_r1, m_x1 or m_s1 are set to NoRange, it means that there is no active range of registers or stack arguments. static const int NoRange = -1; @@ -67,11 +96,28 @@ class CallStubGenerator // The total stack size used for the arguments. int m_totalStackSize; + bool m_interpreterToNative; + +#ifndef UNIX_AMD64_ABI + PCODE GetGPRegRefRoutine(int r); +#endif // !UNIX_AMD64_ABI + PCODE GetStackRoutine(); +#if defined(TARGET_APPLE) && defined(TARGET_ARM64) + PCODE GetStackRoutine_1B(); + PCODE GetStackRoutine_2B(); + PCODE GetStackRoutine_4B(); +#endif // TARGET_APPLE && TARGET_ARM64 + PCODE GetFPRegRangeRoutine(int x1, int x2); + PCODE GetGPRegRangeRoutine(int r1, int r2); + ReturnType GetReturnType(ArgIterator *pArgIt); + CallStubHeader::InvokeFunctionPtr GetInvokeFunctionPtr(ReturnType returnType); + PCODE GetInterpreterReturnTypeHandler(ReturnType returnType); + // Process the argument described by argLocDesc. This function is called for each argument in the method signature. void ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocDesc, PCODE *pRoutines); public: // Generate the call stub for the given method. - CallStubHeader *GenerateCallStub(MethodDesc *pMD, AllocMemTracker *pamTracker); + CallStubHeader *GenerateCallStub(MethodDesc *pMD, AllocMemTracker *pamTracker, bool interpreterToNative); }; #endif // CALLSTUBGENERATOR_H diff --git a/src/coreclr/vm/ceeload.cpp b/src/coreclr/vm/ceeload.cpp index 029da5ad02e72a..d85261fc781dba 100644 --- a/src/coreclr/vm/ceeload.cpp +++ b/src/coreclr/vm/ceeload.cpp @@ -66,6 +66,8 @@ #include "typekey.h" #include "peimagelayout.inl" +#include "interpexec.h" + #ifdef TARGET_64BIT #define COR_VTABLE_PTRSIZED COR_VTABLE_64BIT #define COR_VTABLE_NOT_PTRSIZED COR_VTABLE_32BIT diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index 9814484e4942a4..18c4622942a240 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -36,7 +36,7 @@ void InvokeCompiledMethod(MethodDesc *pMD, int8_t *pArgs, int8_t *pRet) GCX_PREEMP(); AllocMemTracker amTracker; - pHeader = callStubGenerator.GenerateCallStub(pMD, &amTracker); + pHeader = callStubGenerator.GenerateCallStub(pMD, &amTracker, true /* interpreterToNative */); if (pMD->SetCallStub(pHeader)) { @@ -55,6 +55,41 @@ void InvokeCompiledMethod(MethodDesc *pMD, int8_t *pArgs, int8_t *pRet) pHeader->Invoke(pHeader->Routines, pArgs, pRet, pHeader->TotalStackSize); } +// Create call stub for calling interpreted methods from JITted/AOTed code. +CallStubHeader *CreateNativeToInterpreterCallStub(InterpMethod* pInterpMethod) +{ + CallStubGenerator callStubGenerator; + CallStubHeader *pHeader = VolatileLoadWithoutBarrier(&pInterpMethod->pCallStub); + GCX_PREEMP(); + + AllocMemTracker amTracker; + if (pHeader == NULL) + { + // Ensure that there is an interpreter thread context instance and thus an interpreter stack + // allocated for this thread. This allows us to not to have to check and allocate it from + // the interpreter stub right after this call. + GetThread()->GetInterpThreadContext(); + + GCX_PREEMP(); + + AllocMemTracker amTracker; + pHeader = callStubGenerator.GenerateCallStub((MethodDesc*)pInterpMethod->methodHnd, &amTracker, false /* interpreterToNative */); + + if (InterlockedCompareExchangeT(&pInterpMethod->pCallStub, pHeader, NULL) == NULL) + { + amTracker.SuppressRelease(); + } + else + { + // We have lost the race for generating the header, use the one that was generated by another thread + // and let the amTracker release the memory of the one we generated. + pHeader = VolatileLoadWithoutBarrier(&pInterpMethod->pCallStub); + } + } + + return pHeader; +} + typedef void* (*HELPER_FTN_PP)(void*); typedef void* (*HELPER_FTN_BOX_UNBOX)(MethodTable*, void*); typedef Object* (*HELPER_FTN_NEWARR)(CORINFO_CLASS_HANDLE, intptr_t); diff --git a/src/coreclr/vm/interpexec.h b/src/coreclr/vm/interpexec.h index 27d920bb97b666..698638f56537bd 100644 --- a/src/coreclr/vm/interpexec.h +++ b/src/coreclr/vm/interpexec.h @@ -24,6 +24,9 @@ struct StackVal } data; }; +typedef DPTR(struct InterpMethodContextFrame) PTR_InterpMethodContextFrame; +class InterpreterFrame; + struct InterpMethodContextFrame { PTR_InterpMethodContextFrame pParent; @@ -76,4 +79,6 @@ struct ExceptionClauseArgs void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFrame *pFrame, InterpThreadContext *pThreadContext, ExceptionClauseArgs *pExceptionClauseArgs = NULL); +CallStubHeader *CreateNativeToInterpreterCallStub(InterpMethod* pInterpMethod); + #endif diff --git a/src/coreclr/vm/interpframeallocator.h b/src/coreclr/vm/interpframeallocator.h index 3069dd6c36a5fa..b821a2aceef42a 100644 --- a/src/coreclr/vm/interpframeallocator.h +++ b/src/coreclr/vm/interpframeallocator.h @@ -4,6 +4,8 @@ #ifndef _INTERPFRAMEALLOCATOR_H_ #define _INTERPFRAMEALLOCATOR_H_ +struct InterpMethodContextFrame; + class FrameDataAllocator { private: diff --git a/src/coreclr/vm/prestub.cpp b/src/coreclr/vm/prestub.cpp index fa1fb7a25b18f3..f99615cf4b0cd0 100644 --- a/src/coreclr/vm/prestub.cpp +++ b/src/coreclr/vm/prestub.cpp @@ -1991,7 +1991,7 @@ extern "C" PCODE STDCALL PreStubWorker(TransitionBlock* pTransitionBlock, Method } #ifdef FEATURE_INTERPRETER -extern "C" void STDCALL ExecuteInterpretedMethod(TransitionBlock* pTransitionBlock, TADDR byteCodeAddr) +extern "C" void* STDCALL ExecuteInterpretedMethod(TransitionBlock* pTransitionBlock, TADDR byteCodeAddr, void* retBuff) { // Argument registers are in the TransitionBlock // The stack arguments are right after the pTransitionBlock @@ -2017,15 +2017,15 @@ extern "C" void STDCALL ExecuteInterpretedMethod(TransitionBlock* pTransitionBlo } frames(pTransitionBlock); - StackVal retVal; - frames.interpMethodContextFrame.startIp = dac_cast(byteCodeAddr); frames.interpMethodContextFrame.pStack = sp; - frames.interpMethodContextFrame.pRetVal = (int8_t*)&retVal; + frames.interpMethodContextFrame.pRetVal = (retBuff != NULL) ? (int8_t*)retBuff : sp; InterpExecMethod(&frames.interpreterFrame, &frames.interpMethodContextFrame, threadContext); frames.interpreterFrame.Pop(); + + return frames.interpMethodContextFrame.pRetVal; } #endif // FEATURE_INTERPRETER @@ -2181,12 +2181,15 @@ PCODE MethodDesc::DoPrestub(MethodTable *pDispatchingMT, CallerGCMode callerGCMo if (doBackpatch) { - RETURN DoBackpatch(pMT, pDispatchingMT, doFullBackpatch); + pCode = DoBackpatch(pMT, pDispatchingMT, doFullBackpatch); + } + else + { + _ASSERTE(!doFullBackpatch); } _ASSERTE(pCode != (PCODE)NULL); - _ASSERTE(!doFullBackpatch); - RETURN pCode; + goto Return; } #endif @@ -2198,7 +2201,8 @@ PCODE MethodDesc::DoPrestub(MethodTable *pDispatchingMT, CallerGCMode callerGCMo MarkMethodNotPitchingCandidate(this); #endif - RETURN DoBackpatch(pMT, pDispatchingMT, TRUE); + pCode = DoBackpatch(pMT, pDispatchingMT, TRUE); + goto Return; } /************************** CODE CREATION *************************/ @@ -2320,7 +2324,19 @@ PCODE MethodDesc::DoPrestub(MethodTable *pDispatchingMT, CallerGCMode callerGCMo _ASSERTE(!IsPointingToPrestub()); _ASSERTE(HasStableEntryPoint()); - RETURN DoBackpatch(pMT, pDispatchingMT, FALSE); + + pCode = DoBackpatch(pMT, pDispatchingMT, FALSE); + +Return: +#ifdef FEATURE_INTERPRETER + InterpByteCodeStart *pInterpreterCode = GetInterpreterCode(); + if (pInterpreterCode != NULL) + { + CreateNativeToInterpreterCallStub(pInterpreterCode->Method); + } +#endif // FEATURE_INTERPRETER + + RETURN pCode; } #endif // !DACCESS_COMPILE diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp index 1f5eaed0415897..1834db25e7b421 100644 --- a/src/coreclr/vm/threads.cpp +++ b/src/coreclr/vm/threads.cpp @@ -72,7 +72,7 @@ TailCallTls::TailCallTls() } #ifndef _MSC_VER -thread_local RuntimeThreadLocals t_runtime_thread_locals; +__thread RuntimeThreadLocals t_runtime_thread_locals; #endif Thread* STDCALL GetThreadHelper() @@ -345,7 +345,7 @@ bool Thread::DetectHandleILStubsForDebugger() } #ifndef _MSC_VER -thread_local ThreadLocalInfo t_CurrentThreadInfo; +__thread ThreadLocalInfo t_CurrentThreadInfo; #endif // _MSC_VER #ifndef DACCESS_COMPILE diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index 35dacabc6b94c7..fa3d3342e903e2 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -433,11 +433,11 @@ struct RuntimeThreadLocals #ifdef _MSC_VER // use selectany to avoid initialization de-optimization issues in the compiler -__declspec(selectany) +__declspec(selectany) thread_local #else -extern +extern __thread #endif -thread_local RuntimeThreadLocals t_runtime_thread_locals; +RuntimeThreadLocals t_runtime_thread_locals; typedef DPTR(struct RuntimeThreadLocals) PTR_RuntimeThreadLocals; typedef DPTR(struct gc_alloc_context) PTR_gc_alloc_context; @@ -3964,10 +3964,8 @@ friend class DebuggerController; friend struct ::cdac_data; #ifdef FEATURE_INTERPRETER -private: - InterpThreadContext *m_pInterpThreadContext; - public: + InterpThreadContext *m_pInterpThreadContext; InterpThreadContext* GetInterpThreadContext(); #endif // FEATURE_INTERPRETER }; diff --git a/src/coreclr/vm/threads.inl b/src/coreclr/vm/threads.inl index 83683f62bf3648..8b59caaa10437e 100644 --- a/src/coreclr/vm/threads.inl +++ b/src/coreclr/vm/threads.inl @@ -27,11 +27,11 @@ EXTERN_C UINT32 _tls_index; #endif #ifdef _MSC_VER -__declspec(selectany) +__declspec(selectany) thread_local #else -EXTERN_C +EXTERN_C __thread #endif -thread_local ThreadLocalInfo t_CurrentThreadInfo; +ThreadLocalInfo t_CurrentThreadInfo; inline Thread* GetThreadNULLOk() { diff --git a/src/tests/JIT/interpreter/Interpreter.cs b/src/tests/JIT/interpreter/Interpreter.cs index f1d7baf90a6693..d7aaaba2196a95 100644 --- a/src/tests/JIT/interpreter/Interpreter.cs +++ b/src/tests/JIT/interpreter/Interpreter.cs @@ -135,16 +135,80 @@ public struct TestStruct3d public class InterpreterTest { + [MethodImpl(MethodImplOptions.NoInlining)] static void TestCallingConvention0(int a, float b, int c, double d, int e, double f) { Console.WriteLine("TestCallingConvention0: a = {0}, b = {1}, c = {2}, d = {3}, e = {4}, f = {5}", a, b, c, d, e, f); } + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention0Rev(int a, float b, int c, double d, int e, double f) + { + Console.Write("TestCallingConvention0Rev: a = "); + Console.Write(a); + Console.Write(", b = "); + Console.Write(b); + Console.Write(", c = "); + Console.Write(c); + Console.Write(", d = "); + Console.Write(d); + Console.Write(", e = "); + Console.Write(e); + Console.Write(", f = "); + Console.Write(f); + Console.WriteLine(); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention0JitToInterpreter(bool init) + { + if (!init) + { + TestCallingConvention0Rev(1, 2.0f, 3, 4.0, 5, 6.0); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] static void TestCallingConvention1(TestStruct s) { Console.WriteLine("TestCallingConvention1: a = {0}, b = {1}, c = {2}, d = {3}, e = {4}, f = {5}", s.a, s.b, s.c, s.d, s.e, s.f); } + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention1Rev(TestStruct s) + { + Console.Write("TestCallingConvention1Rev: a = "); + Console.Write(s.a); + Console.Write(", b = "); + Console.Write(s.b); + Console.Write(", c = "); + Console.Write(s.c); + Console.Write(", d = "); + Console.Write(s.d); + Console.Write(", e = "); + Console.Write(s.e); + Console.Write(", f = "); + Console.Write(s.f); + Console.WriteLine(); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention1JitToInterpreter(bool init) + { + if (!init) + { + TestStruct s; + s.a = 1; + s.b = 2; + s.c = 3; + s.d = 4; + s.e = 5; + s.f = 6; + TestCallingConvention1Rev(s); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] static TestStruct2 TestCallingConvention2() { TestStruct2 s; @@ -153,12 +217,50 @@ static TestStruct2 TestCallingConvention2() return s; } + [MethodImpl(MethodImplOptions.NoInlining)] + static TestStruct2 TestCallingConvention2Rev() + { + TestStruct2 s; + s.a = 1; + s.b = 2; + return s; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention2JitToInterpreter(bool init) + { + if (!init) + { + TestStruct2 s = TestCallingConvention2Rev(); + Console.WriteLine("TestCallingConvention2Rev: s = {0}, {1}", s.a, s.b); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] static Vector2 TestCallingConvention3() { Vector2 v = new Vector2(1, 2); return v; } + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector2 TestCallingConvention3Rev() + { + Vector2 v = new Vector2(1, 2); + return v; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention3JitToInterpreter(bool init) + { + if (!init) + { + Vector2 v = TestCallingConvention3Rev(); + Console.WriteLine("TestCallingConvention3Rev: v = {0}, {1}", v[0], v[1]); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] static TestStruct TestCallingConvention4() { TestStruct s; @@ -171,6 +273,30 @@ static TestStruct TestCallingConvention4() return s; } + [MethodImpl(MethodImplOptions.NoInlining)] + static TestStruct TestCallingConvention4Rev() + { + TestStruct s; + s.a = 1; + s.b = 2; + s.c = 3; + s.d = 4; + s.e = 5; + s.f = 6; + return s; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention4JitToInterpreter(bool init) + { + if (!init) + { + TestStruct s = TestCallingConvention4Rev(); + Console.WriteLine("TestCallingConvention4Rev: s = {0}, {1}, {2}, {3}, {4}, {5}", s.a, s.b, s.c, s.d, s.e, s.f); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] static TestStruct4ii TestCallingConvention5() { TestStruct4ii s; @@ -181,6 +307,28 @@ static TestStruct4ii TestCallingConvention5() return s; } + [MethodImpl(MethodImplOptions.NoInlining)] + static TestStruct4ii TestCallingConvention5Rev() + { + TestStruct4ii s; + s.a = 1; + s.b = 2; + s.c = 3; + s.d = 4; + return s; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention5JitToInterpreter(bool init) + { + if (!init) + { + TestStruct4ii s = TestCallingConvention5Rev(); + Console.WriteLine("TestCallingConvention5Rev: s = {0}, {1}, {2}, {3}", s.a, s.b, s.c, s.d); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] static TestStruct4if TestCallingConvention6() { TestStruct4if s; @@ -191,6 +339,28 @@ static TestStruct4if TestCallingConvention6() return s; } + [MethodImpl(MethodImplOptions.NoInlining)] + static TestStruct4if TestCallingConvention6Rev() + { + TestStruct4if s; + s.a = 1; + s.b = 2; + s.c = 3.0f; + s.d = 4.0f; + return s; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention6JitToInterpreter(bool init) + { + if (!init) + { + TestStruct4if s = TestCallingConvention6Rev(); + Console.WriteLine("TestCallingConvention6Rev: s = {0}, {1}, {2}, {3}", s.a, s.b, s.c, s.d); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] static TestStruct4fi TestCallingConvention7() { TestStruct4fi s; @@ -201,6 +371,28 @@ static TestStruct4fi TestCallingConvention7() return s; } + [MethodImpl(MethodImplOptions.NoInlining)] + static TestStruct4fi TestCallingConvention7Rev() + { + TestStruct4fi s; + s.a = 1.0f; + s.b = 2.0f; + s.c = 3; + s.d = 4; + return s; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention7JitToInterpreter(bool init) + { + if (!init) + { + TestStruct4fi s = TestCallingConvention7Rev(); + Console.WriteLine("TestCallingConvention7Rev: s = {0}, {1}, {2}, {3}", s.a, s.b, s.c, s.d); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] static TestStruct4ff TestCallingConvention8() { TestStruct4ff s; @@ -211,16 +403,93 @@ static TestStruct4ff TestCallingConvention8() return s; } + [MethodImpl(MethodImplOptions.NoInlining)] + static TestStruct4ff TestCallingConvention8Rev() + { + TestStruct4ff s; + s.a = 1.0f; + s.b = 2.0f; + s.c = 3.0f; + s.d = 4.0f; + return s; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention8JitToInterpreter(bool init) + { + if (!init) + { + TestStruct4ff s = TestCallingConvention8Rev(); + Console.WriteLine("TestCallingConvention8Rev: s = {0}, {1}, {2}, {3}", s.a, s.b, s.c, s.d); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] static void TestCallingConvention9(TestStruct4fi s) { Console.WriteLine("TestCallingConvention9: a = {0}, b = {1}, c = {2}, d = {3}", s.a, s.b, s.c, s.d); } + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention9Rev(TestStruct4fi s) + { + Console.Write("TestCallingConvention9Rev: a = "); + Console.Write(s.a); + Console.Write(", b = "); + Console.Write(s.b); + Console.Write(", c = "); + Console.Write(s.c); + Console.Write(", d = "); + Console.Write(s.d); + Console.WriteLine(); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention9JitToInterpreter(bool init) + { + if (!init) + { + TestStruct4fi s = new TestStruct4fi(); + s.a = 1.0f; + s.b = 2.0f; + s.c = 3; + s.d = 4; + TestCallingConvention9Rev(s); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] static void TestCallingConvention10(TestStruct3d s) { Console.WriteLine("TestCallingConvention10: a = {0}, b = {1}, c = {2}", s.a, s.b, s.c); } + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention10Rev(TestStruct3d s) + { + Console.Write("TestCallingConvention10Rev: a = "); + Console.Write(s.a); + Console.Write(", b = "); + Console.Write(s.b); + Console.Write(", c = "); + Console.Write(s.c); + Console.WriteLine(); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention10JitToInterpreter(bool init) + { + if (!init) + { + TestStruct3d s = new TestStruct3d(); + s.a = 1.0f; + s.b = 2.0f; + s.c = 3.0f; + TestCallingConvention10Rev(s); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] static TestStruct3d TestCallingConvention11() { TestStruct3d s; @@ -230,11 +499,73 @@ static TestStruct3d TestCallingConvention11() return s; } + [MethodImpl(MethodImplOptions.NoInlining)] + static TestStruct3d TestCallingConvention11Rev() + { + TestStruct3d s; + s.a = 1.0f; + s.b = 2.0f; + s.c = 3.0f; + return s; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention11JitToInterpreter(bool init) + { + if (!init) + { + TestStruct3d s = TestCallingConvention11Rev(); + Console.WriteLine("TestCallingConvention11Rev: s = {0}, {1}, {2}", s.a, s.b, s.c); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] static void TestCallingConvention12(byte a, byte b, byte c, byte d, byte e, byte f, byte g, byte h, byte i, char j, int k, int l, long m) { Console.WriteLine("TestCallingConvention12: a = {0}, b = {1}, c = {2}, d = {3}, e = {4}, f = {5}, g = {6}, h = {7}, i = {8}, j = {9}, k = {10}, l = {11}, m = {12}", a, b, c, d, e, f, g, h, i, j, k, l, m); } + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention12Rev(byte a, byte b, byte c, byte d, byte e, byte f, byte g, byte h, byte i, char j, int k, int l, long m) + { + Console.Write("TestCallingConvention12Rev: a = "); + Console.Write(a); + Console.Write(", b = "); + Console.Write(b); + Console.Write(", c = "); + Console.Write(c); + Console.Write(", d = "); + Console.Write(d); + Console.Write(", e = "); + Console.Write(e); + Console.Write(", f = "); + Console.Write(f); + Console.Write(", g = "); + Console.Write(g); + Console.Write(", h = "); + Console.Write(h); + Console.Write(", i = "); + Console.Write(i); + Console.Write(", j = "); + Console.Write(j); + Console.Write(", k = "); + Console.Write(k); + Console.Write(", l = "); + Console.Write(l); + Console.Write(", m = "); + Console.Write(m); + Console.WriteLine(); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention12JitToInterpreter(bool init) + { + if (!init) + { + TestCallingConvention12Rev(1, 2, 3, 4, 5, 6, 7, 8, 9, 'a', 10, 11, 12); + } + } + // This method is invoked before we start interpretting anything, so the methods invoked in it will be jitted. // This is necessary for the calling convention tests that test calls from the interpreter to the JITted code // to actually test things. @@ -285,6 +616,20 @@ static void EnsureCallingConventionTestTargetMethodsAreJitted() Console.WriteLine(s11.c); TestCallingConvention12(1, 2, 3, 4, 5, 6, 7, 8, 9, 'a', 10, 11, 12); + + TestCallingConvention0JitToInterpreter(true); + TestCallingConvention1JitToInterpreter(true); + TestCallingConvention2JitToInterpreter(true); + TestCallingConvention3JitToInterpreter(true); + TestCallingConvention4JitToInterpreter(true); + TestCallingConvention5JitToInterpreter(true); + TestCallingConvention6JitToInterpreter(true); + TestCallingConvention7JitToInterpreter(true); + TestCallingConvention8JitToInterpreter(true); + TestCallingConvention9JitToInterpreter(true); + TestCallingConvention10JitToInterpreter(true); + TestCallingConvention11JitToInterpreter(true); + TestCallingConvention12JitToInterpreter(true); } static int Main(string[] args) @@ -301,6 +646,20 @@ static int Main(string[] args) [MethodImpl(MethodImplOptions.NoInlining)] public static void RunInterpreterTests() { + TestCallingConvention0JitToInterpreter(false); + TestCallingConvention1JitToInterpreter(false); + TestCallingConvention2JitToInterpreter(false); + TestCallingConvention3JitToInterpreter(false); + TestCallingConvention4JitToInterpreter(false); + TestCallingConvention5JitToInterpreter(false); + TestCallingConvention6JitToInterpreter(false); + TestCallingConvention7JitToInterpreter(false); + TestCallingConvention8JitToInterpreter(false); + TestCallingConvention9JitToInterpreter(false); + TestCallingConvention10JitToInterpreter(false); + TestCallingConvention11JitToInterpreter(false); + TestCallingConvention12JitToInterpreter(false); + TestCallingConvention0(1, 2.0f, 3, 4.0, 5, 6.0); TestStruct s = new TestStruct();