/* * This file was generated automatically by gen-template.py for 'mips'. * * --> DO NOT EDIT <-- */ /* File: mips/header.S */ /* * Copyright (C) 2008 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #if defined(WITH_JIT) /* * This is a #include, not a %include, because we want the C pre-processor * to expand the macros into assembler assignment statements. */ #include "../../../mterp/common/asm-constants.h" #include "../../../mterp/common/mips-defines.h" #include "../../../mterp/common/jit-config.h" #include <asm/regdef.h> #include <asm/fpregdef.h> #ifdef __mips_hard_float #define HARD_FLOAT #else #define SOFT_FLOAT #endif /* MIPS definitions and declarations reg nick purpose s0 rPC interpreted program counter, used for fetching instructions s1 rFP interpreted frame pointer, used for accessing locals and args s2 rSELF pointer to thread s3 rIBASE interpreted instruction base pointer, used for computed goto s4 rINST first 16-bit code unit of current instruction */ /* register offsets */ #define r_ZERO 0 #define r_AT 1 #define r_V0 2 #define r_V1 3 #define r_A0 4 #define r_A1 5 #define r_A2 6 #define r_A3 7 #define r_T0 8 #define r_T1 9 #define r_T2 10 #define r_T3 11 #define r_T4 12 #define r_T5 13 #define r_T6 14 #define r_T7 15 #define r_S0 16 #define r_S1 17 #define r_S2 18 #define r_S3 19 #define r_S4 20 #define r_S5 21 #define r_S6 22 #define r_S7 23 #define r_T8 24 #define r_T9 25 #define r_K0 26 #define r_K1 27 #define r_GP 28 #define r_SP 29 #define r_FP 30 #define r_RA 31 #define r_F0 32 #define r_F1 33 #define r_F2 34 #define r_F3 35 #define r_F4 36 #define r_F5 37 #define r_F6 38 #define r_F7 39 #define r_F8 40 #define r_F9 41 #define r_F10 42 #define r_F11 43 #define r_F12 44 #define r_F13 45 #define r_F14 46 #define r_F15 47 #define r_F16 48 #define r_F17 49 #define r_F18 50 #define r_F19 51 #define r_F20 52 #define r_F21 53 #define r_F22 54 #define r_F23 55 #define r_F24 56 #define r_F25 57 #define r_F26 58 #define r_F27 59 #define r_F28 60 #define r_F29 61 #define r_F30 62 #define r_F31 63 /* single-purpose registers, given names for clarity */ #define rPC s0 #define rFP s1 #define rSELF s2 #define rIBASE s3 #define rINST s4 #define rOBJ s5 #define rBIX s6 #define rTEMP s7 /* The long arguments sent to function calls in Big-endian mode should be register swapped when sent to functions in little endian mode. In other words long variable sent as a0(MSW), a1(LSW) for a function call in LE mode should be sent as a1, a0 in Big Endian mode */ #ifdef HAVE_LITTLE_ENDIAN #define rARG0 a0 #define rARG1 a1 #define rARG2 a2 #define rARG3 a3 #define rRESULT0 v0 #define rRESULT1 v1 #else #define rARG0 a1 #define rARG1 a0 #define rARG2 a3 #define rARG3 a2 #define rRESULT0 v1 #define rRESULT1 v0 #endif /* save/restore the PC and/or FP from the thread struct */ #define LOAD_PC_FROM_SELF() lw rPC, offThread_pc(rSELF) #define SAVE_PC_TO_SELF() sw rPC, offThread_pc(rSELF) #define LOAD_FP_FROM_SELF() lw rFP, offThread_curFrame(rSELF) #define SAVE_FP_TO_SELF() sw rFP, offThread_curFrame(rSELF) #define EXPORT_PC() \ sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP) #define SAVEAREA_FROM_FP(rd, _fpreg) \ subu rd, _fpreg, sizeofStackSaveArea #define FETCH_INST() lhu rINST, (rPC) #define FETCH_ADVANCE_INST(_count) lhu rINST, (_count*2)(rPC); \ addu rPC, rPC, (_count * 2) #define FETCH_ADVANCE_INST_RB(rd) addu rPC, rPC, rd; \ lhu rINST, (rPC) #define FETCH(rd, _count) lhu rd, (_count * 2)(rPC) #define FETCH_S(rd, _count) lh rd, (_count * 2)(rPC) #ifdef HAVE_LITTLE_ENDIAN #define FETCH_B(rd, _count) lbu rd, (_count * 2)(rPC) #define FETCH_C(rd, _count) lbu rd, (_count * 2 + 1)(rPC) #else #define FETCH_B(rd, _count) lbu rd, (_count * 2 + 1)(rPC) #define FETCH_C(rd, _count) lbu rd, (_count * 2)(rPC) #endif #define GET_INST_OPCODE(rd) and rd, rINST, 0xFF #define GOTO_OPCODE(rd) sll rd, rd, -1000; \ addu rd, rIBASE, rd; \ jr rd #define LOAD(rd, rbase) lw rd, 0(rbase) #define LOAD_F(rd, rbase) l.s rd, (rbase) #define STORE(rd, rbase) sw rd, 0(rbase) #define STORE_F(rd, rbase) s.s rd, (rbase) #define GET_VREG(rd, rix) LOAD_eas2(rd,rFP,rix) #define GET_VREG_F(rd, rix) EAS2(AT, rFP, rix); \ .set noat; l.s rd, (AT); .set at #define SET_VREG(rd, rix) STORE_eas2(rd, rFP, rix) #define SET_VREG_GOTO(rd, rix, dst) .set noreorder; \ sll dst, dst, -1000; \ addu dst, rIBASE, dst; \ sll t8, rix, 2; \ addu t8, t8, rFP; \ jr dst; \ sw rd, 0(t8); \ .set reorder #define SET_VREG_F(rd, rix) EAS2(AT, rFP, rix); \ .set noat; s.s rd, (AT); .set at #define GET_OPA(rd) srl rd, rINST, 8 #ifndef MIPS32R2 #define GET_OPA4(rd) GET_OPA(rd); and rd, 0xf #else #define GET_OPA4(rd) ext rd, rd, 8, 4 #endif #define GET_OPB(rd) srl rd, rINST, 12 #define LOAD_rSELF_OFF(rd,off) lw rd, offThread_##off##(rSELF) #define LOAD_rSELF_method(rd) LOAD_rSELF_OFF(rd, method) #define LOAD_rSELF_methodClassDex(rd) LOAD_rSELF_OFF(rd, methodClassDex) #define LOAD_rSELF_interpStackEnd(rd) LOAD_rSELF_OFF(rd, interpStackEnd) #define LOAD_rSELF_retval(rd) LOAD_rSELF_OFF(rd, retval) #define LOAD_rSELF_pActiveProfilers(rd) LOAD_rSELF_OFF(rd, pActiveProfilers) #define LOAD_rSELF_bailPtr(rd) LOAD_rSELF_OFF(rd, bailPtr) #define GET_JIT_PROF_TABLE(rd) LOAD_rSELF_OFF(rd,pJitProfTable) #define GET_JIT_THRESHOLD(rd) LOAD_rSELF_OFF(rd,jitThreshold) /* * Form an Effective Address rd = rbase + roff<<n; * Uses reg AT */ #define EASN(rd,rbase,roff,rshift) .set noat; \ sll AT, roff, rshift; \ addu rd, rbase, AT; \ .set at #define EAS1(rd,rbase,roff) EASN(rd,rbase,roff,1) #define EAS2(rd,rbase,roff) EASN(rd,rbase,roff,2) #define EAS3(rd,rbase,roff) EASN(rd,rbase,roff,3) #define EAS4(rd,rbase,roff) EASN(rd,rbase,roff,4) /* * Form an Effective Shift Right rd = rbase + roff>>n; * Uses reg AT */ #define ESRN(rd,rbase,roff,rshift) .set noat; \ srl AT, roff, rshift; \ addu rd, rbase, AT; \ .set at #define LOAD_eas2(rd,rbase,roff) EAS2(AT, rbase, roff); \ .set noat; lw rd, 0(AT); .set at #define STORE_eas2(rd,rbase,roff) EAS2(AT, rbase, roff); \ .set noat; sw rd, 0(AT); .set at #define LOAD_RB_OFF(rd,rbase,off) lw rd, off(rbase) #define LOADu2_RB_OFF(rd,rbase,off) lhu rd, off(rbase) #define STORE_RB_OFF(rd,rbase,off) sw rd, off(rbase) #ifdef HAVE_LITTLE_ENDIAN #define STORE64_off(rlo,rhi,rbase,off) sw rlo, off(rbase); \ sw rhi, (off+4)(rbase) #define LOAD64_off(rlo,rhi,rbase,off) lw rlo, off(rbase); \ lw rhi, (off+4)(rbase) #define STORE64_off_F(rlo,rhi,rbase,off) s.s rlo, off(rbase); \ s.s rhi, (off+4)(rbase) #define LOAD64_off_F(rlo,rhi,rbase,off) l.s rlo, off(rbase); \ l.s rhi, (off+4)(rbase) #else #define STORE64_off(rlo,rhi,rbase,off) sw rlo, (off+4)(rbase); \ sw rhi, (off)(rbase) #define LOAD64_off(rlo,rhi,rbase,off) lw rlo, (off+4)(rbase); \ lw rhi, (off)(rbase) #define STORE64_off_F(rlo,rhi,rbase,off) s.s rlo, (off+4)(rbase); \ s.s rhi, (off)(rbase) #define LOAD64_off_F(rlo,rhi,rbase,off) l.s rlo, (off+4)(rbase); \ l.s rhi, (off)(rbase) #endif #define STORE64(rlo,rhi,rbase) STORE64_off(rlo,rhi,rbase,0) #define LOAD64(rlo,rhi,rbase) LOAD64_off(rlo,rhi,rbase,0) #define STORE64_F(rlo,rhi,rbase) STORE64_off_F(rlo,rhi,rbase,0) #define LOAD64_F(rlo,rhi,rbase) LOAD64_off_F(rlo,rhi,rbase,0) #define STORE64_lo(rd,rbase) sw rd, 0(rbase) #define STORE64_hi(rd,rbase) sw rd, 4(rbase) #define LOAD_offThread_exception(rd,rbase) LOAD_RB_OFF(rd,rbase,offThread_exception) #define LOAD_base_offArrayObject_length(rd,rbase) LOAD_RB_OFF(rd,rbase,offArrayObject_length) #define LOAD_base_offClassObject_accessFlags(rd,rbase) LOAD_RB_OFF(rd,rbase,offClassObject_accessFlags) #define LOAD_base_offClassObject_descriptor(rd,rbase) LOAD_RB_OFF(rd,rbase,offClassObject_descriptor) #define LOAD_base_offClassObject_super(rd,rbase) LOAD_RB_OFF(rd,rbase,offClassObject_super) #define LOAD_base_offClassObject_vtable(rd,rbase) LOAD_RB_OFF(rd,rbase,offClassObject_vtable) #define LOAD_base_offClassObject_vtableCount(rd,rbase) LOAD_RB_OFF(rd,rbase,offClassObject_vtableCount) #define LOAD_base_offDvmDex_pResClasses(rd,rbase) LOAD_RB_OFF(rd,rbase,offDvmDex_pResClasses) #define LOAD_base_offDvmDex_pResFields(rd,rbase) LOAD_RB_OFF(rd,rbase,offDvmDex_pResFields) #define LOAD_base_offDvmDex_pResMethods(rd,rbase) LOAD_RB_OFF(rd,rbase,offDvmDex_pResMethods) #define LOAD_base_offDvmDex_pResStrings(rd,rbase) LOAD_RB_OFF(rd,rbase,offDvmDex_pResStrings) #define LOAD_base_offInstField_byteOffset(rd,rbase) LOAD_RB_OFF(rd,rbase,offInstField_byteOffset) #define LOAD_base_offStaticField_value(rd,rbase) LOAD_RB_OFF(rd,rbase,offStaticField_value) #define LOAD_base_offMethod_clazz(rd,rbase) LOAD_RB_OFF(rd,rbase,offMethod_clazz) #define LOAD_base_offMethod_name(rd,rbase) LOAD_RB_OFF(rd,rbase,offMethod_name) #define LOAD_base_offObject_clazz(rd,rbase) LOAD_RB_OFF(rd,rbase,offObject_clazz) #define LOADu2_offMethod_methodIndex(rd,rbase) LOADu2_RB_OFF(rd,rbase,offMethod_methodIndex) #define STORE_offThread_exception(rd,rbase) STORE_RB_OFF(rd,rbase,offThread_exception) #define STACK_STORE(rd,off) sw rd, off(sp) #define STACK_LOAD(rd,off) lw rd, off(sp) #define CREATE_STACK(n) subu sp, sp, n #define DELETE_STACK(n) addu sp, sp, n #define SAVE_RA(offset) STACK_STORE(ra, offset) #define LOAD_RA(offset) STACK_LOAD(ra, offset) #define LOAD_ADDR(dest,addr) la dest, addr #define LOAD_IMM(dest, imm) li dest, imm #define MOVE_REG(dest,src) move dest, src #define RETURN jr ra #define STACK_SIZE 128 #define STACK_OFFSET_ARG04 16 #define STACK_OFFSET_GP 84 #define STACK_OFFSET_rFP 112 /* This directive will make sure all subsequent jal restore gp at a known offset */ .cprestore STACK_OFFSET_GP #define JAL(func) move rTEMP, ra; \ jal func; \ move ra, rTEMP #define JALR(reg) move rTEMP, ra; \ jalr ra, reg; \ move ra, rTEMP #define BAL(n) bal n #define STACK_STORE_RA() CREATE_STACK(STACK_SIZE); \ STACK_STORE(gp, STACK_OFFSET_GP); \ STACK_STORE(ra, 124) #define STACK_STORE_S0() STACK_STORE_RA(); \ STACK_STORE(s0, 116) #define STACK_STORE_S0S1() STACK_STORE_S0(); \ STACK_STORE(s1, STACK_OFFSET_rFP) #define STACK_LOAD_RA() STACK_LOAD(ra, 124); \ STACK_LOAD(gp, STACK_OFFSET_GP); \ DELETE_STACK(STACK_SIZE) #define STACK_LOAD_S0() STACK_LOAD(s0, 116); \ STACK_LOAD_RA() #define STACK_LOAD_S0S1() STACK_LOAD(s1, STACK_OFFSET_rFP); \ STACK_LOAD_S0() #define STACK_STORE_FULL() CREATE_STACK(STACK_SIZE); \ STACK_STORE(ra, 124); \ STACK_STORE(fp, 120); \ STACK_STORE(s0, 116); \ STACK_STORE(s1, STACK_OFFSET_rFP); \ STACK_STORE(s2, 108); \ STACK_STORE(s3, 104); \ STACK_STORE(s4, 100); \ STACK_STORE(s5, 96); \ STACK_STORE(s6, 92); \ STACK_STORE(s7, 88); #define STACK_LOAD_FULL() STACK_LOAD(gp, STACK_OFFSET_GP); \ STACK_LOAD(s7, 88); \ STACK_LOAD(s6, 92); \ STACK_LOAD(s5, 96); \ STACK_LOAD(s4, 100); \ STACK_LOAD(s3, 104); \ STACK_LOAD(s2, 108); \ STACK_LOAD(s1, STACK_OFFSET_rFP); \ STACK_LOAD(s0, 116); \ STACK_LOAD(fp, 120); \ STACK_LOAD(ra, 124); \ DELETE_STACK(STACK_SIZE) /* * first 8 words are reserved for function calls * Maximum offset is STACK_OFFSET_SCRMX-STACK_OFFSET_SCR */ #define STACK_OFFSET_SCR 32 #define SCRATCH_STORE(r,off) \ STACK_STORE(r, STACK_OFFSET_SCR+off); #define SCRATCH_LOAD(r,off) \ STACK_LOAD(r, STACK_OFFSET_SCR+off); /* File: mips/platform.S */ /* * =========================================================================== * CPU-version-specific defines and utility * =========================================================================== */ .global dvmCompilerTemplateStart .type dvmCompilerTemplateStart, %function .section .data.rel.ro dvmCompilerTemplateStart: /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_CMP_LONG dvmCompiler_TEMPLATE_CMP_LONG: /* File: mips/TEMPLATE_CMP_LONG.S */ /* * Compare two 64-bit values * x = y return 0 * x < y return -1 * x > y return 1 * * I think I can improve on the ARM code by the following observation * slt t0, x.hi, y.hi; # (x.hi < y.hi) ? 1:0 * sgt t1, x.hi, y.hi; # (y.hi > x.hi) ? 1:0 * subu v0, t0, t1 # v0= -1:1:0 for [ < > = ] * * This code assumes the register pair ordering will depend on endianess (a1:a0 or a0:a1). * a1:a0 => vBB * a3:a2 => vCC */ /* cmp-long vAA, vBB, vCC */ slt t0, rARG1, rARG3 # compare hi sgt t1, rARG1, rARG3 subu v0, t1, t0 # v0<- (-1,1,0) bnez v0, .LTEMPLATE_CMP_LONG_finish # at this point x.hi==y.hi sltu t0, rARG0, rARG2 # compare lo sgtu t1, rARG0, rARG2 subu v0, t1, t0 # v0<- (-1,1,0) for [< > =] .LTEMPLATE_CMP_LONG_finish: RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_RETURN dvmCompiler_TEMPLATE_RETURN: /* File: mips/TEMPLATE_RETURN.S */ /* * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX. * If the stored value in returnAddr * is non-zero, the caller is compiled by the JIT thus return to the * address in the code cache following the invoke instruction. Otherwise * return to the special dvmJitToInterpNoChain entry point. */ #if defined(TEMPLATE_INLINE_PROFILING) # preserve a0-a2 and ra SCRATCH_STORE(a0, 0) SCRATCH_STORE(a1, 4) SCRATCH_STORE(a2, 8) SCRATCH_STORE(ra, 12) # a0=rSELF move a0, rSELF la t9, dvmFastMethodTraceExit JALR(t9) lw gp, STACK_OFFSET_GP(sp) # restore a0-a2 and ra SCRATCH_LOAD(ra, 12) SCRATCH_LOAD(a2, 8) SCRATCH_LOAD(a1, 4) SCRATCH_LOAD(a0, 0) #endif SAVEAREA_FROM_FP(a0, rFP) # a0<- saveArea (old) lw t0, offStackSaveArea_prevFrame(a0) # t0<- saveArea->prevFrame lbu t1, offThread_breakFlags(rSELF) # t1<- breakFlags lw rPC, offStackSaveArea_savedPc(a0) # rPC<- saveArea->savedPc #if !defined(WITH_SELF_VERIFICATION) lw t2, offStackSaveArea_returnAddr(a0) # t2<- chaining cell ret #else move t2, zero # disable chaining #endif lw a2, offStackSaveArea_method - sizeofStackSaveArea(t0) # a2<- method we're returning to #if !defined(WITH_SELF_VERIFICATION) beq a2, zero, 1f # bail to interpreter #else bne a2, zero, 2f JALR(ra) # punt to interpreter and compare state # DOUG: assume this does not return ??? 2: #endif la t4, .LdvmJitToInterpNoChainNoProfile # defined in footer.S lw a1, (t4) move rFP, t0 # publish new FP beq a2, zero, 4f lw t0, offMethod_clazz(a2) # t0<- method->clazz 4: sw a2, offThread_method(rSELF) # self->method = newSave->method lw a0, offClassObject_pDvmDex(t0) # a0<- method->clazz->pDvmDex sw rFP, offThread_curFrame(rSELF) # self->curFrame = fp add rPC, rPC, 3*2 # publish new rPC sw a0, offThread_methodClassDex(rSELF) movn t2, zero, t1 # check the breadFlags and # clear the chaining cell address sw t2, offThread_inJitCodeCache(rSELF) # in code cache or not beq t2, zero, 3f # chaining cell exists? JALR(t2) # jump to the chaining cell # DOUG: assume this does not return ??? 3: #if defined(WITH_JIT_TUNING) li a0, kCallsiteInterpreted #endif j a1 # callsite is interpreted 1: sw zero, offThread_inJitCodeCache(rSELF) # reset inJitCodeCache SAVE_PC_TO_SELF() # SAVE_PC_FP_TO_SELF() SAVE_FP_TO_SELF() la t4, .LdvmMterpStdBail # defined in footer.S lw a2, (t4) move a0, rSELF # Expecting rSELF in a0 JALR(a2) # exit the interpreter # DOUG: assume this does not return ??? /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT: /* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT.S */ /* * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC * into rPC then jump to dvmJitToInterpNoChain to dispatch the * runtime-resolved callee. */ # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite lh t7, offMethod_registersSize(a0) # t7<- methodToCall->regsSize lh a2, offMethod_outsSize(a0) # a2<- methodToCall->outsSize lw t9, offThread_interpStackEnd(rSELF) # t9<- interpStackEnd lbu t8, offThread_breakFlags(rSELF) # t8<- breakFlags move a3, a1 # a3<- returnCell SAVEAREA_FROM_FP(a1, rFP) # a1<- stack save area sll t6, t7, 2 # multiply regsSize by 4 (4 bytes per reg) sub a1, a1, t6 # a1<- newFp(old savearea-regsSize) SAVEAREA_FROM_FP(t0, a1) # t0<- stack save area sll t6, a2, 2 # multiply outsSize by 4 (4 bytes per reg) sub t0, t0, t6 # t0<- bottom (newsave-outsSize) bgeu t0, t9, 1f # bottom < interpStackEnd? RETURN # return to raise stack overflow excep. 1: # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite lw t9, offMethod_clazz(a0) # t9<- methodToCall->clazz lw t0, offMethod_accessFlags(a0) # t0<- methodToCall->accessFlags sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP) sw rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1) lw rPC, offMethod_insns(a0) # rPC<- methodToCall->insns # set up newSaveArea sw rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1) sw a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1) sw a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1) beqz t8, 2f # breakFlags != 0 RETURN # bail to the interpreter 2: and t6, t0, ACC_NATIVE beqz t6, 3f #if !defined(WITH_SELF_VERIFICATION) j .LinvokeNative #else RETURN # bail to the interpreter #endif 3: # continue executing the next instruction through the interpreter la t0, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S lw rTEMP, (t0) lw a3, offClassObject_pDvmDex(t9) # a3<- method->clazz->pDvmDex # Update "thread" values for the new method sw a0, offThread_method(rSELF) # self->method = methodToCall sw a3, offThread_methodClassDex(rSELF) # self->methodClassDex = ... move rFP, a1 # fp = newFp sw rFP, offThread_curFrame(rSELF) # self->curFrame = newFp #if defined(TEMPLATE_INLINE_PROFILING) # preserve a0-a3 SCRATCH_STORE(a0, 0) SCRATCH_STORE(a1, 4) SCRATCH_STORE(a2, 8) SCRATCH_STORE(a3, 12) # a0=methodToCall, a1=rSELF move a1, rSELF la t9, dvmFastMethodTraceEnter JALR(t9) lw gp, STACK_OFFSET_GP(sp) # restore a0-a3 SCRATCH_LOAD(a3, 12) SCRATCH_LOAD(a2, 8) SCRATCH_LOAD(a1, 4) SCRATCH_LOAD(a0, 0) #endif # Start executing the callee #if defined(WITH_JIT_TUNING) li a0, kInlineCacheMiss #endif jr rTEMP # dvmJitToInterpTraceSelectNoChain /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN: /* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN.S */ /* * For monomorphic callsite, setup the Dalvik frame and return to the * Thumb code through the link register to transfer control to the callee * method through a dedicated chaining cell. */ # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite # methodToCall is guaranteed to be non-native .LinvokeChain: lh t7, offMethod_registersSize(a0) # t7<- methodToCall->regsSize lh a2, offMethod_outsSize(a0) # a2<- methodToCall->outsSize lw t9, offThread_interpStackEnd(rSELF) # t9<- interpStackEnd lbu t8, offThread_breakFlags(rSELF) # t8<- breakFlags move a3, a1 # a3<- returnCell SAVEAREA_FROM_FP(a1, rFP) # a1<- stack save area sll t6, t7, 2 # multiply regsSize by 4 (4 bytes per reg) sub a1, a1, t6 # a1<- newFp(old savearea-regsSize) SAVEAREA_FROM_FP(t0, a1) # t0<- stack save area add t2, ra, 8 # setup the punt-to-interp address # 8 bytes skips branch and delay slot sll t6, a2, 2 # multiply outsSize by 4 (4 bytes per reg) sub t0, t0, t6 # t0<- bottom (newsave-outsSize) bgeu t0, t9, 1f # bottom < interpStackEnd? jr t2 # return to raise stack overflow excep. 1: # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite lw t9, offMethod_clazz(a0) # t9<- methodToCall->clazz sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP) sw rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1) lw rPC, offMethod_insns(a0) # rPC<- methodToCall->insns # set up newSaveArea sw rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1) sw a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1) sw a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1) beqz t8, 2f # breakFlags != 0 jr t2 # bail to the interpreter 2: lw a3, offClassObject_pDvmDex(t9) # a3<- methodToCall->clazz->pDvmDex # Update "thread" values for the new method sw a0, offThread_method(rSELF) # self->method = methodToCall sw a3, offThread_methodClassDex(rSELF) # self->methodClassDex = ... move rFP, a1 # fp = newFp sw rFP, offThread_curFrame(rSELF) # self->curFrame = newFp #if defined(TEMPLATE_INLINE_PROFILING) # preserve a0-a2 and ra SCRATCH_STORE(a0, 0) SCRATCH_STORE(a1, 4) SCRATCH_STORE(a2, 8) SCRATCH_STORE(ra, 12) move a1, rSELF # a0=methodToCall, a1=rSELF la t9, dvmFastMethodTraceEnter jalr t9 lw gp, STACK_OFFSET_GP(sp) # restore a0-a2 and ra SCRATCH_LOAD(ra, 12) SCRATCH_LOAD(a2, 8) SCRATCH_LOAD(a1, 4) SCRATCH_LOAD(a0, 0) #endif RETURN # return to the callee-chaining cell /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN: /* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */ /* * For polymorphic callsite, check whether the cached class pointer matches * the current one. If so setup the Dalvik frame and return to the * Thumb code through the link register to transfer control to the callee * method through a dedicated chaining cell. * * The predicted chaining cell is declared in ArmLIR.h with the * following layout: * * typedef struct PredictedChainingCell { * u4 branch; * u4 delay_slot; * const ClassObject *clazz; * const Method *method; * u4 counter; * } PredictedChainingCell; * * Upon returning to the callsite: * - lr : to branch to the chaining cell * - lr+8 : to punt to the interpreter * - lr+16: to fully resolve the callee and may rechain. * a3 <- class */ # a0 = this, a1 = returnCell, a2 = predictedChainCell, rPC = dalvikCallsite lw a3, offObject_clazz(a0) # a3 <- this->class lw rIBASE, 8(a2) # t0 <- predictedChainCell->clazz lw a0, 12(a2) # a0 <- predictedChainCell->method lw t1, offThread_icRechainCount(rSELF) # t1 <- shared rechainCount #if defined(WITH_JIT_TUNING) la rINST, .LdvmICHitCount #add t2, t2, 1 bne a3, rIBASE, 1f nop lw t2, 0(rINST) add t2, t2, 1 sw t2, 0(rINST) 1: #add t2, t2, 1 #endif beq a3, rIBASE, .LinvokeChain # branch if predicted chain is valid lw rINST, offClassObject_vtable(a3) # rINST <- this->class->vtable beqz rIBASE, 2f # initialized class or not sub a1, t1, 1 # count-- sw a1, offThread_icRechainCount(rSELF) # write back to InterpState b 3f 2: move a1, zero 3: add ra, ra, 16 # return to fully-resolve landing pad /* * a1 <- count * a2 <- &predictedChainCell * a3 <- this->class * rPC <- dPC * rINST <- this->class->vtable */ RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE: /* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE.S */ # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite lh t7, offMethod_registersSize(a0) # t7<- methodToCall->regsSize lw t9, offThread_interpStackEnd(rSELF) # t9<- interpStackEnd lbu t8, offThread_breakFlags(rSELF) # t8<- breakFlags move a3, a1 # a3<- returnCell SAVEAREA_FROM_FP(a1, rFP) # a1<- stack save area sll t6, t7, 2 # multiply regsSize by 4 (4 bytes per reg) sub a1, a1, t6 # a1<- newFp(old savearea-regsSize) SAVEAREA_FROM_FP(t0, a1) # t0<- stack save area bgeu t0, t9, 1f # bottom < interpStackEnd? RETURN # return to raise stack overflow excep. 1: # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP) sw rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1) lw rPC, offMethod_insns(a0) # rPC<- methodToCall->insns # set up newSaveArea sw rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1) sw a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1) sw a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1) lw rTEMP, offMethod_nativeFunc(a0) # t9<- method->nativeFunc #if !defined(WITH_SELF_VERIFICATION) beqz t8, 2f # breakFlags != 0 RETURN # bail to the interpreter 2: #else RETURN # bail to the interpreter unconditionally #endif # go ahead and transfer control to the native code lw t6, offThread_jniLocal_topCookie(rSELF) # t6<- thread->localRef->... sw a1, offThread_curFrame(rSELF) # self->curFrame = newFp sw zero, offThread_inJitCodeCache(rSELF) # not in the jit code cache sw t6, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1) # newFp->localRefCookie=top SAVEAREA_FROM_FP(rBIX, a1) # rBIX<- new stack save area move a2, a0 # a2<- methodToCall move a0, a1 # a0<- newFp add a1, rSELF, offThread_retval # a1<- &retval move a3, rSELF # a3<- self #if defined(TEMPLATE_INLINE_PROFILING) # a2: methodToCall # preserve a0-a3 SCRATCH_STORE(a0, 0) SCRATCH_STORE(a1, 4) SCRATCH_STORE(a2, 8) SCRATCH_STORE(a3, 12) move a0, a2 move a1, rSELF # a0=JNIMethod, a1=rSELF la t9, dvmFastMethodTraceEnter JALR(t9) # off to the native code lw gp, STACK_OFFSET_GP(sp) # restore a0-a3 SCRATCH_LOAD(a3, 12) SCRATCH_LOAD(a2, 8) SCRATCH_LOAD(a1, 4) SCRATCH_LOAD(a0, 0) move rOBJ, a2 # save a2 #endif JALR(rTEMP) # off to the native code lw gp, STACK_OFFSET_GP(sp) #if defined(TEMPLATE_INLINE_PROFILING) move a0, rOBJ move a1, rSELF # a0=JNIMethod, a1=rSELF la t9, dvmFastNativeMethodTraceExit JALR(t9) lw gp, STACK_OFFSET_GP(sp) #endif # native return; rBIX=newSaveArea # equivalent to dvmPopJniLocals lw a2, offStackSaveArea_returnAddr(rBIX) # a2 = chaining cell ret addr lw a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top lw a1, offThread_exception(rSELF) # check for exception sw rFP, offThread_curFrame(rSELF) # self->curFrame = fp sw a0, offThread_jniLocal_topCookie(rSELF) # new top <- old top lw a0, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP) # a0 = dalvikCallsitePC bnez a1, .LhandleException # handle exception if any sw a2, offThread_inJitCodeCache(rSELF) # set the mode properly beqz a2, 3f jr a2 # go if return chaining cell still exist 3: # continue executing the next instruction through the interpreter la a1, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S lw a1, (a1) add rPC, a0, 3*2 # reconstruct new rPC (advance 3 dalvik instr) #if defined(WITH_JIT_TUNING) li a0, kCallsiteInterpreted #endif jr a1 /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_MUL_LONG dvmCompiler_TEMPLATE_MUL_LONG: /* File: mips/TEMPLATE_MUL_LONG.S */ /* * Signed 64-bit integer multiply. * * For JIT: op1 in a0/a1, op2 in a2/a3, return in v0/v1 * * Consider WXxYZ (a1a0 x a3a2) with a long multiply: * * a1 a0 * x a3 a2 * ------------- * a2a1 a2a0 * a3a0 * a3a1 (<= unused) * --------------- * v1 v0 * */ /* mul-long vAA, vBB, vCC */ mul rRESULT1,rARG3,rARG0 # v1= a3a0 multu rARG2,rARG0 mfhi t1 mflo rRESULT0 # v0= a2a0 mul t0,rARG2,rARG1 # t0= a2a1 addu rRESULT1,rRESULT1,t1 # v1= a3a0 + hi(a2a0) addu rRESULT1,rRESULT1,t0 # v1= a3a0 + hi(a2a0) + a2a1; RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_SHL_LONG dvmCompiler_TEMPLATE_SHL_LONG: /* File: mips/TEMPLATE_SHL_LONG.S */ /* * Long integer shift. This is different from the generic 32/64-bit * binary operations because vAA/vBB are 64-bit but vCC (the shift * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low * 6 bits. */ /* shl-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */ sll rRESULT0, rARG0, a2 # rlo<- alo << (shift&31) not rRESULT1, a2 # rhi<- 31-shift (shift is 5b) srl rARG0, 1 srl rARG0, rRESULT1 # alo<- alo >> (32-(shift&31)) sll rRESULT1, rARG1, a2 # rhi<- ahi << (shift&31) or rRESULT1, rARG0 # rhi<- rhi | alo andi a2, 0x20 # shift< shift & 0x20 movn rRESULT1, rRESULT0, a2 # rhi<- rlo (if shift&0x20) movn rRESULT0, zero, a2 # rlo<- 0 (if shift&0x20) RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_SHR_LONG dvmCompiler_TEMPLATE_SHR_LONG: /* File: mips/TEMPLATE_SHR_LONG.S */ /* * Long integer shift. This is different from the generic 32/64-bit * binary operations because vAA/vBB are 64-bit but vCC (the shift * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low * 6 bits. */ /* shr-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */ sra rRESULT1, rARG1, a2 # rhi<- ahi >> (shift&31) srl rRESULT0, rARG0, a2 # rlo<- alo >> (shift&31) sra a3, rARG1, 31 # a3<- sign(ah) not rARG0, a2 # alo<- 31-shift (shift is 5b) sll rARG1, 1 sll rARG1, rARG0 # ahi<- ahi << (32-(shift&31)) or rRESULT0, rARG1 # rlo<- rlo | ahi andi a2, 0x20 # shift & 0x20 movn rRESULT0, rRESULT1, a2 # rlo<- rhi (if shift&0x20) movn rRESULT1, a3, a2 # rhi<- sign(ahi) (if shift&0x20) RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_USHR_LONG dvmCompiler_TEMPLATE_USHR_LONG: /* File: mips/TEMPLATE_USHR_LONG.S */ /* * Long integer shift. This is different from the generic 32/64-bit * binary operations because vAA/vBB are 64-bit but vCC (the shift * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low * 6 bits. */ /* ushr-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */ srl rRESULT1, rARG1, a2 # rhi<- ahi >> (shift&31) srl rRESULT0, rARG0, a2 # rlo<- alo >> (shift&31) not rARG0, a2 # alo<- 31-n (shift is 5b) sll rARG1, 1 sll rARG1, rARG0 # ahi<- ahi << (32-(shift&31)) or rRESULT0, rARG1 # rlo<- rlo | ahi andi a2, 0x20 # shift & 0x20 movn rRESULT0, rRESULT1, a2 # rlo<- rhi (if shift&0x20) movn rRESULT1, zero, a2 # rhi<- 0 (if shift&0x20) RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_ADD_FLOAT_VFP dvmCompiler_TEMPLATE_ADD_FLOAT_VFP: /* File: mips/TEMPLATE_ADD_FLOAT_VFP.S */ /* File: mips/fbinop.S */ /* * Generic 32-bit binary float operation. a0 = a1 op a2. * * For: add-fp, sub-fp, mul-fp, div-fp * * On entry: * a0 = target dalvik register address * a1 = op1 address * a2 = op2 address * * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. * */ move rOBJ, a0 # save a0 #ifdef SOFT_FLOAT LOAD(a0, a1) # a0<- vBB LOAD(a1, a2) # a1<- vCC .if 0 beqz a1, common_errDivideByZero # is second operand zero? .endif # optional op JAL(__addsf3) # v0 = result STORE(v0, rOBJ) # vAA <- v0 #else LOAD_F(fa0, a1) # fa0<- vBB LOAD_F(fa1, a2) # fa1<- vCC .if 0 # is second operand zero? li.s ft0, 0 c.eq.s fcc0, ft0, fa1 # condition bit and comparision with 0 bc1t fcc0, common_errDivideByZero .endif # optional op add.s fv0, fa0, fa1 # fv0 = result STORE_F(fv0, rOBJ) # vAA <- fv0 #endif RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_SUB_FLOAT_VFP dvmCompiler_TEMPLATE_SUB_FLOAT_VFP: /* File: mips/TEMPLATE_SUB_FLOAT_VFP.S */ /* File: mips/fbinop.S */ /* * Generic 32-bit binary float operation. a0 = a1 op a2. * * For: add-fp, sub-fp, mul-fp, div-fp * * On entry: * a0 = target dalvik register address * a1 = op1 address * a2 = op2 address * * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. * */ move rOBJ, a0 # save a0 #ifdef SOFT_FLOAT LOAD(a0, a1) # a0<- vBB LOAD(a1, a2) # a1<- vCC .if 0 beqz a1, common_errDivideByZero # is second operand zero? .endif # optional op JAL(__subsf3) # v0 = result STORE(v0, rOBJ) # vAA <- v0 #else LOAD_F(fa0, a1) # fa0<- vBB LOAD_F(fa1, a2) # fa1<- vCC .if 0 # is second operand zero? li.s ft0, 0 c.eq.s fcc0, ft0, fa1 # condition bit and comparision with 0 bc1t fcc0, common_errDivideByZero .endif # optional op sub.s fv0, fa0, fa1 # fv0 = result STORE_F(fv0, rOBJ) # vAA <- fv0 #endif RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_MUL_FLOAT_VFP dvmCompiler_TEMPLATE_MUL_FLOAT_VFP: /* File: mips/TEMPLATE_MUL_FLOAT_VFP.S */ /* File: mips/fbinop.S */ /* * Generic 32-bit binary float operation. a0 = a1 op a2. * * For: add-fp, sub-fp, mul-fp, div-fp * * On entry: * a0 = target dalvik register address * a1 = op1 address * a2 = op2 address * * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. * */ move rOBJ, a0 # save a0 #ifdef SOFT_FLOAT LOAD(a0, a1) # a0<- vBB LOAD(a1, a2) # a1<- vCC .if 0 beqz a1, common_errDivideByZero # is second operand zero? .endif # optional op JAL(__mulsf3) # v0 = result STORE(v0, rOBJ) # vAA <- v0 #else LOAD_F(fa0, a1) # fa0<- vBB LOAD_F(fa1, a2) # fa1<- vCC .if 0 # is second operand zero? li.s ft0, 0 c.eq.s fcc0, ft0, fa1 # condition bit and comparision with 0 bc1t fcc0, common_errDivideByZero .endif # optional op mul.s fv0, fa0, fa1 # fv0 = result STORE_F(fv0, rOBJ) # vAA <- fv0 #endif RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_DIV_FLOAT_VFP dvmCompiler_TEMPLATE_DIV_FLOAT_VFP: /* File: mips/TEMPLATE_DIV_FLOAT_VFP.S */ /* File: mips/fbinop.S */ /* * Generic 32-bit binary float operation. a0 = a1 op a2. * * For: add-fp, sub-fp, mul-fp, div-fp * * On entry: * a0 = target dalvik register address * a1 = op1 address * a2 = op2 address * * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. * */ move rOBJ, a0 # save a0 #ifdef SOFT_FLOAT LOAD(a0, a1) # a0<- vBB LOAD(a1, a2) # a1<- vCC .if 0 beqz a1, common_errDivideByZero # is second operand zero? .endif # optional op JAL(__divsf3) # v0 = result STORE(v0, rOBJ) # vAA <- v0 #else LOAD_F(fa0, a1) # fa0<- vBB LOAD_F(fa1, a2) # fa1<- vCC .if 0 # is second operand zero? li.s ft0, 0 c.eq.s fcc0, ft0, fa1 # condition bit and comparision with 0 bc1t fcc0, common_errDivideByZero .endif # optional op div.s fv0, fa0, fa1 # fv0 = result STORE_F(fv0, rOBJ) # vAA <- fv0 #endif RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP: /* File: mips/TEMPLATE_ADD_DOUBLE_VFP.S */ /* File: mips/fbinopWide.S */ /* * Generic 64-bit binary operation. Provide an "instr" line that * specifies an instruction that performs "result = a0-a1 op a2-a3". * This could be an MIPS instruction or a function call. * If "chkzero" is set to 1, we perform a divide-by-zero check on * vCC (a1). Useful for integer division and modulus. * * for: add-long, sub-long, div-long, rem-long, and-long, or-long, * xor-long, add-double, sub-double, mul-double, div-double, * rem-double * * On entry: * a0 = target dalvik register address * a1 = op1 address * a2 = op2 address * * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. */ move rOBJ, a0 # save a0 #ifdef SOFT_FLOAT move t0, a1 # save a1 move t1, a2 # save a2 LOAD64(rARG0, rARG1, t0) # a0/a1<- vBB/vBB+1 LOAD64(rARG2, rARG3, t1) # a2/a3<- vCC/vCC+1 .if 0 or t0, rARG2, rARG3 # second arg (a2-a3) is zero? beqz t0, common_errDivideByZero .endif # optional op JAL(__adddf3) # result<- op, a0-a3 changed STORE64(rRESULT0, rRESULT1, rOBJ) #else LOAD64_F(fa0, fa0f, a1) LOAD64_F(fa1, fa1f, a2) .if 0 li.d ft0, 0 c.eq.d fcc0, fa1, ft0 bc1t fcc0, common_errDivideByZero .endif # optional op add.d fv0, fa0, fa1 STORE64_F(fv0, fv0f, rOBJ) #endif RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP: /* File: mips/TEMPLATE_SUB_DOUBLE_VFP.S */ /* File: mips/fbinopWide.S */ /* * Generic 64-bit binary operation. Provide an "instr" line that * specifies an instruction that performs "result = a0-a1 op a2-a3". * This could be an MIPS instruction or a function call. * If "chkzero" is set to 1, we perform a divide-by-zero check on * vCC (a1). Useful for integer division and modulus. * * for: add-long, sub-long, div-long, rem-long, and-long, or-long, * xor-long, add-double, sub-double, mul-double, div-double, * rem-double * * On entry: * a0 = target dalvik register address * a1 = op1 address * a2 = op2 address * * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. */ move rOBJ, a0 # save a0 #ifdef SOFT_FLOAT move t0, a1 # save a1 move t1, a2 # save a2 LOAD64(rARG0, rARG1, t0) # a0/a1<- vBB/vBB+1 LOAD64(rARG2, rARG3, t1) # a2/a3<- vCC/vCC+1 .if 0 or t0, rARG2, rARG3 # second arg (a2-a3) is zero? beqz t0, common_errDivideByZero .endif # optional op JAL(__subdf3) # result<- op, a0-a3 changed STORE64(rRESULT0, rRESULT1, rOBJ) #else LOAD64_F(fa0, fa0f, a1) LOAD64_F(fa1, fa1f, a2) .if 0 li.d ft0, 0 c.eq.d fcc0, fa1, ft0 bc1t fcc0, common_errDivideByZero .endif # optional op sub.d fv0, fa0, fa1 STORE64_F(fv0, fv0f, rOBJ) #endif RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP: /* File: mips/TEMPLATE_MUL_DOUBLE_VFP.S */ /* File: mips/fbinopWide.S */ /* * Generic 64-bit binary operation. Provide an "instr" line that * specifies an instruction that performs "result = a0-a1 op a2-a3". * This could be an MIPS instruction or a function call. * If "chkzero" is set to 1, we perform a divide-by-zero check on * vCC (a1). Useful for integer division and modulus. * * for: add-long, sub-long, div-long, rem-long, and-long, or-long, * xor-long, add-double, sub-double, mul-double, div-double, * rem-double * * On entry: * a0 = target dalvik register address * a1 = op1 address * a2 = op2 address * * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. */ move rOBJ, a0 # save a0 #ifdef SOFT_FLOAT move t0, a1 # save a1 move t1, a2 # save a2 LOAD64(rARG0, rARG1, t0) # a0/a1<- vBB/vBB+1 LOAD64(rARG2, rARG3, t1) # a2/a3<- vCC/vCC+1 .if 0 or t0, rARG2, rARG3 # second arg (a2-a3) is zero? beqz t0, common_errDivideByZero .endif # optional op JAL(__muldf3) # result<- op, a0-a3 changed STORE64(rRESULT0, rRESULT1, rOBJ) #else LOAD64_F(fa0, fa0f, a1) LOAD64_F(fa1, fa1f, a2) .if 0 li.d ft0, 0 c.eq.d fcc0, fa1, ft0 bc1t fcc0, common_errDivideByZero .endif # optional op mul.d fv0, fa0, fa1 STORE64_F(fv0, fv0f, rOBJ) #endif RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP: /* File: mips/TEMPLATE_DIV_DOUBLE_VFP.S */ /* File: mips/fbinopWide.S */ /* * Generic 64-bit binary operation. Provide an "instr" line that * specifies an instruction that performs "result = a0-a1 op a2-a3". * This could be an MIPS instruction or a function call. * If "chkzero" is set to 1, we perform a divide-by-zero check on * vCC (a1). Useful for integer division and modulus. * * for: add-long, sub-long, div-long, rem-long, and-long, or-long, * xor-long, add-double, sub-double, mul-double, div-double, * rem-double * * On entry: * a0 = target dalvik register address * a1 = op1 address * a2 = op2 address * * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. */ move rOBJ, a0 # save a0 #ifdef SOFT_FLOAT move t0, a1 # save a1 move t1, a2 # save a2 LOAD64(rARG0, rARG1, t0) # a0/a1<- vBB/vBB+1 LOAD64(rARG2, rARG3, t1) # a2/a3<- vCC/vCC+1 .if 0 or t0, rARG2, rARG3 # second arg (a2-a3) is zero? beqz t0, common_errDivideByZero .endif # optional op JAL(__divdf3) # result<- op, a0-a3 changed STORE64(rRESULT0, rRESULT1, rOBJ) #else LOAD64_F(fa0, fa0f, a1) LOAD64_F(fa1, fa1f, a2) .if 0 li.d ft0, 0 c.eq.d fcc0, fa1, ft0 bc1t fcc0, common_errDivideByZero .endif # optional op div.d fv0, fa0, fa1 STORE64_F(fv0, fv0f, rOBJ) #endif RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP: /* File: mips/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S */ /* File: mips/funopNarrower.S */ /* * Generic 64bit-to-32bit unary operation. Provide an "instr" line * that specifies an instruction that performs "result = op a0/a1", where * "result" is a 32-bit quantity in a0. * * For: long-to-float, double-to-int, double-to-float * If hard floating point support is available, use fa0 as the parameter, except for * long-to-float opcode. * (This would work for long-to-int, but that instruction is actually * an exact match for OP_MOVE.) * * On entry: * a0 = target dalvik register address * a1 = src dalvik register address * */ move rINST, a0 # save a0 #ifdef SOFT_FLOAT move t0, a1 # save a1 LOAD64(rARG0, rARG1, t0) # a0/a1<- vB/vB+1 # optional op JAL(__truncdfsf2) # v0<- op, a0-a3 changed .LTEMPLATE_DOUBLE_TO_FLOAT_VFP_set_vreg: STORE(v0, rINST) # vA<- v0 #else LOAD64_F(fa0, fa0f, a1) # optional op cvt.s.d fv0,fa0 # fv0 = result .LTEMPLATE_DOUBLE_TO_FLOAT_VFP_set_vreg_f: STORE_F(fv0, rINST) # vA<- fv0 #endif RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP: /* File: mips/TEMPLATE_DOUBLE_TO_INT_VFP.S */ /* File: mips/funopNarrower.S */ /* * Generic 64bit-to-32bit unary operation. Provide an "instr" line * that specifies an instruction that performs "result = op a0/a1", where * "result" is a 32-bit quantity in a0. * * For: long-to-float, double-to-int, double-to-float * If hard floating point support is available, use fa0 as the parameter, except for * long-to-float opcode. * (This would work for long-to-int, but that instruction is actually * an exact match for OP_MOVE.) * * On entry: * a0 = target dalvik register address * a1 = src dalvik register address * */ move rINST, a0 # save a0 #ifdef SOFT_FLOAT move t0, a1 # save a1 LOAD64(rARG0, rARG1, t0) # a0/a1<- vB/vB+1 # optional op b d2i_doconv # v0<- op, a0-a3 changed .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg: STORE(v0, rINST) # vA<- v0 #else LOAD64_F(fa0, fa0f, a1) # optional op b d2i_doconv # fv0 = result .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f: STORE_F(fv0, rINST) # vA<- fv0 #endif RETURN /* * Convert the double in a0/a1 to an int in a0. * * We have to clip values to int min/max per the specification. The * expected common case is a "reasonable" value that converts directly * to modest integer. The EABI convert function isn't doing this for us. * Use rBIX / rOBJ as global to hold arguments (they are not bound to a global var) */ d2i_doconv: #ifdef SOFT_FLOAT la t0, .LDOUBLE_TO_INT_max LOAD64(rARG2, rARG3, t0) move rBIX, rARG0 # save a0 move rOBJ, rARG1 # and a1 JAL(__gedf2) # is arg >= maxint? move t0, v0 li v0, ~0x80000000 # return maxint (7fffffff) bgez t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg # nonzero == yes move rARG0, rBIX # recover arg move rARG1, rOBJ la t0, .LDOUBLE_TO_INT_min LOAD64(rARG2, rARG3, t0) JAL(__ledf2) # is arg <= minint? move t0, v0 li v0, 0x80000000 # return minint (80000000) blez t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg # nonzero == yes move rARG0, rBIX # recover arg move rARG1, rOBJ move rARG2, rBIX # compare against self move rARG3, rOBJ JAL(__nedf2) # is arg == self? move t0, v0 # zero == no li v0, 0 bnez t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg # return zero for NaN move rARG0, rBIX # recover arg move rARG1, rOBJ JAL(__fixdfsi) # convert double to int b .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg #else la t0, .LDOUBLE_TO_INT_max LOAD64_F(fa1, fa1f, t0) c.ole.d fcc0, fa1, fa0 l.s fv0, .LDOUBLE_TO_INT_maxret bc1t .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f la t0, .LDOUBLE_TO_INT_min LOAD64_F(fa1, fa1f, t0) c.ole.d fcc0, fa0, fa1 l.s fv0, .LDOUBLE_TO_INT_minret bc1t .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f mov.d fa1, fa0 c.un.d fcc0, fa0, fa1 li.s fv0, 0 bc1t .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f trunc.w.d fv0, fa0 b .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f #endif .LDOUBLE_TO_INT_max: .dword 0x41dfffffffc00000 .LDOUBLE_TO_INT_min: .dword 0xc1e0000000000000 # minint, as a double (high word) .LDOUBLE_TO_INT_maxret: .word 0x7fffffff .LDOUBLE_TO_INT_minret: .word 0x80000000 /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP: /* File: mips/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S */ /* File: mips/funopWider.S */ /* * Generic 32bit-to-64bit floating point unary operation. Provide an * "instr" line that specifies an instruction that performs "d0 = op s0". * * For: int-to-double, float-to-double * * On entry: * a0 = target dalvik register address * a1 = src dalvik register address */ /* unop vA, vB */ move rOBJ, a0 # save a0 #ifdef SOFT_FLOAT LOAD(a0, a1) # a0<- vB # optional op JAL(__extendsfdf2) # result<- op, a0-a3 changed .LTEMPLATE_FLOAT_TO_DOUBLE_VFP_set_vreg: STORE64(rRESULT0, rRESULT1, rOBJ) # vA/vA+1<- v0/v1 #else LOAD_F(fa0, a1) # fa0<- vB # optional op cvt.d.s fv0, fa0 .LTEMPLATE_FLOAT_TO_DOUBLE_VFP_set_vreg: STORE64_F(fv0, fv0f, rOBJ) # vA/vA+1<- fv0/fv0f #endif RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP: /* File: mips/TEMPLATE_FLOAT_TO_INT_VFP.S */ /* File: mips/funop.S */ /* * Generic 32-bit unary operation. Provide an "instr" line that * specifies an instruction that performs "result = op a0". * This could be a MIPS instruction or a function call. * * for: int-to-float, float-to-int * * On entry: * a0 = target dalvik register address * a1 = src dalvik register address * * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. * */ move rOBJ, a0 # save a0 #ifdef SOFT_FLOAT LOAD(a0, a1) # a0<- vBB # optional op b f2i_doconv # v0<- op, a0-a3 changed .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg: STORE(v0, rOBJ) # vAA<- v0 #else LOAD_F(fa0, a1) # fa0<- vBB # optional op b f2i_doconv # fv0 = result .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f: STORE_F(fv0, rOBJ) # vAA <- fv0 #endif RETURN /* * Not an entry point as it is used only once !! */ f2i_doconv: #ifdef SOFT_FLOAT li a1, 0x4f000000 # (float)maxint move rBIX, a0 JAL(__gesf2) # is arg >= maxint? move t0, v0 li v0, ~0x80000000 # return maxint (7fffffff) bgez t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg move a0, rBIX # recover arg li a1, 0xcf000000 # (float)minint JAL(__lesf2) move t0, v0 li v0, 0x80000000 # return minint (80000000) blez t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg move a0, rBIX move a1, rBIX JAL(__nesf2) move t0, v0 li v0, 0 # return zero for NaN bnez t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg move a0, rBIX JAL(__fixsfsi) b .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg #else l.s fa1, .LFLOAT_TO_INT_max c.ole.s fcc0, fa1, fa0 l.s fv0, .LFLOAT_TO_INT_ret_max bc1t .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f l.s fa1, .LFLOAT_TO_INT_min c.ole.s fcc0, fa0, fa1 l.s fv0, .LFLOAT_TO_INT_ret_min bc1t .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f mov.s fa1, fa0 c.un.s fcc0, fa0, fa1 li.s fv0, 0 bc1t .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f trunc.w.s fv0, fa0 b .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f #endif .LFLOAT_TO_INT_max: .word 0x4f000000 .LFLOAT_TO_INT_min: .word 0xcf000000 .LFLOAT_TO_INT_ret_max: .word 0x7fffffff .LFLOAT_TO_INT_ret_min: .word 0x80000000 /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP: /* File: mips/TEMPLATE_INT_TO_DOUBLE_VFP.S */ /* File: mips/funopWider.S */ /* * Generic 32bit-to-64bit floating point unary operation. Provide an * "instr" line that specifies an instruction that performs "d0 = op s0". * * For: int-to-double, float-to-double * * On entry: * a0 = target dalvik register address * a1 = src dalvik register address */ /* unop vA, vB */ move rOBJ, a0 # save a0 #ifdef SOFT_FLOAT LOAD(a0, a1) # a0<- vB # optional op JAL(__floatsidf) # result<- op, a0-a3 changed .LTEMPLATE_INT_TO_DOUBLE_VFP_set_vreg: STORE64(rRESULT0, rRESULT1, rOBJ) # vA/vA+1<- v0/v1 #else LOAD_F(fa0, a1) # fa0<- vB # optional op cvt.d.w fv0, fa0 .LTEMPLATE_INT_TO_DOUBLE_VFP_set_vreg: STORE64_F(fv0, fv0f, rOBJ) # vA/vA+1<- fv0/fv0f #endif RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP: /* File: mips/TEMPLATE_INT_TO_FLOAT_VFP.S */ /* File: mips/funop.S */ /* * Generic 32-bit unary operation. Provide an "instr" line that * specifies an instruction that performs "result = op a0". * This could be a MIPS instruction or a function call. * * for: int-to-float, float-to-int * * On entry: * a0 = target dalvik register address * a1 = src dalvik register address * * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. * */ move rOBJ, a0 # save a0 #ifdef SOFT_FLOAT LOAD(a0, a1) # a0<- vBB # optional op JAL(__floatsisf) # v0<- op, a0-a3 changed .LTEMPLATE_INT_TO_FLOAT_VFP_set_vreg: STORE(v0, rOBJ) # vAA<- v0 #else LOAD_F(fa0, a1) # fa0<- vBB # optional op cvt.s.w fv0, fa0 # fv0 = result .LTEMPLATE_INT_TO_FLOAT_VFP_set_vreg_f: STORE_F(fv0, rOBJ) # vAA <- fv0 #endif RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP: /* File: mips/TEMPLATE_CMPG_DOUBLE_VFP.S */ /* File: mips/TEMPLATE_CMPL_DOUBLE_VFP.S */ /* * Compare two double precision floating-point values. Puts 0, 1, or -1 into the * destination register based on the results of the comparison. * * Provide a "naninst" instruction that puts 1 or -1 into a1 depending * on what value we'd like to return when one of the operands is NaN. * * The operation we're implementing is: * if (x == y) * return 0; * else if (x < y) * return -1; * else if (x > y) * return 1; * else * return {-1,1}; // one or both operands was NaN * * On entry: * a0 = &op1 [vBB] * a1 = &op2 [vCC] * * for: cmpl-double, cmpg-double */ /* op vAA, vBB, vCC */ /* "clasic" form */ #ifdef SOFT_FLOAT move rOBJ, a0 # save a0 move rBIX, a1 # save a1 LOAD64(rARG0, rARG1, rOBJ) # a0/a1<- vBB/vBB+1 LOAD64(rARG2, rARG3, rBIX) # a2/a3<- vCC/vCC+1 JAL(__eqdf2) # v0<- (vBB == vCC) li rTEMP, 0 # vAA<- 0 beqz v0, TEMPLATE_CMPG_DOUBLE_VFP_finish LOAD64(rARG0, rARG1, rOBJ) # a0/a1<- vBB/vBB+1 LOAD64(rARG2, rARG3, rBIX) # a2/a3<- vCC/vCC+1 JAL(__ltdf2) # a0<- (vBB < vCC) li rTEMP, -1 # vAA<- -1 bltz v0, TEMPLATE_CMPG_DOUBLE_VFP_finish LOAD64(rARG0, rARG1, rOBJ) # a0/a1<- vBB/vBB+1 LOAD64(rARG2, rARG3, rBIX) # a2/a3<- vCC/vCC+1 JAL(__gtdf2) # v0<- (vBB > vCC) li rTEMP, 1 # vAA<- 1 bgtz v0, TEMPLATE_CMPG_DOUBLE_VFP_finish #else LOAD64_F(fs0, fs0f, a0) # fs0<- vBB LOAD64_F(fs1, fs1f, a1) # fs1<- vCC c.olt.d fcc0, fs0, fs1 # Is fs0 < fs1 li rTEMP, -1 bc1t fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish c.olt.d fcc0, fs1, fs0 li rTEMP, 1 bc1t fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish c.eq.d fcc0, fs0, fs1 li rTEMP, 0 bc1t fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish #endif li rTEMP, 1 TEMPLATE_CMPG_DOUBLE_VFP_finish: move v0, rTEMP # v0<- vAA RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP: /* File: mips/TEMPLATE_CMPL_DOUBLE_VFP.S */ /* * Compare two double precision floating-point values. Puts 0, 1, or -1 into the * destination register based on the results of the comparison. * * Provide a "naninst" instruction that puts 1 or -1 into a1 depending * on what value we'd like to return when one of the operands is NaN. * * The operation we're implementing is: * if (x == y) * return 0; * else if (x < y) * return -1; * else if (x > y) * return 1; * else * return {-1,1}; // one or both operands was NaN * * On entry: * a0 = &op1 [vBB] * a1 = &op2 [vCC] * * for: cmpl-double, cmpg-double */ /* op vAA, vBB, vCC */ /* "clasic" form */ #ifdef SOFT_FLOAT move rOBJ, a0 # save a0 move rBIX, a1 # save a1 LOAD64(rARG0, rARG1, rOBJ) # a0/a1<- vBB/vBB+1 LOAD64(rARG2, rARG3, rBIX) # a2/a3<- vCC/vCC+1 JAL(__eqdf2) # v0<- (vBB == vCC) li rTEMP, 0 # vAA<- 0 beqz v0, TEMPLATE_CMPL_DOUBLE_VFP_finish LOAD64(rARG0, rARG1, rOBJ) # a0/a1<- vBB/vBB+1 LOAD64(rARG2, rARG3, rBIX) # a2/a3<- vCC/vCC+1 JAL(__ltdf2) # a0<- (vBB < vCC) li rTEMP, -1 # vAA<- -1 bltz v0, TEMPLATE_CMPL_DOUBLE_VFP_finish LOAD64(rARG0, rARG1, rOBJ) # a0/a1<- vBB/vBB+1 LOAD64(rARG2, rARG3, rBIX) # a2/a3<- vCC/vCC+1 JAL(__gtdf2) # v0<- (vBB > vCC) li rTEMP, 1 # vAA<- 1 bgtz v0, TEMPLATE_CMPL_DOUBLE_VFP_finish #else LOAD64_F(fs0, fs0f, a0) # fs0<- vBB LOAD64_F(fs1, fs1f, a1) # fs1<- vCC c.olt.d fcc0, fs0, fs1 # Is fs0 < fs1 li rTEMP, -1 bc1t fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish c.olt.d fcc0, fs1, fs0 li rTEMP, 1 bc1t fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish c.eq.d fcc0, fs0, fs1 li rTEMP, 0 bc1t fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish #endif li rTEMP, -1 TEMPLATE_CMPL_DOUBLE_VFP_finish: move v0, rTEMP # v0<- vAA RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP: /* File: mips/TEMPLATE_CMPG_FLOAT_VFP.S */ /* File: mips/TEMPLATE_CMPL_FLOAT_VFP.S */ /* * Compare two floating-point values. Puts 0, 1, or -1 into the * destination register based on the results of the comparison. * * Provide a "naninst" instruction that puts 1 or -1 into a1 depending * on what value we'd like to return when one of the operands is NaN. * * The operation we're implementing is: * if (x == y) * return 0; * else if (x < y) * return -1; * else if (x > y) * return 1; * else * return {-1,1}; // one or both operands was NaN * * On entry: * a0 = &op1 [vBB] * a1 = &op2 [vCC] * * for: cmpl-float, cmpg-float */ /* op vAA, vBB, vCC */ /* "clasic" form */ #ifdef SOFT_FLOAT LOAD(rOBJ, a0) # rOBJ<- vBB LOAD(rBIX, a1) # rBIX<- vCC move a0, rOBJ # a0<- vBB move a1, rBIX # a1<- vCC JAL(__eqsf2) # v0<- (vBB == vCC) li rTEMP, 0 # vAA<- 0 beqz v0, TEMPLATE_CMPG_FLOAT_VFP_finish move a0, rOBJ # a0<- vBB move a1, rBIX # a1<- vCC JAL(__ltsf2) # a0<- (vBB < vCC) li rTEMP, -1 # vAA<- -1 bltz v0, TEMPLATE_CMPG_FLOAT_VFP_finish move a0, rOBJ # a0<- vBB move a1, rBIX # a1<- vCC JAL(__gtsf2) # v0<- (vBB > vCC) li rTEMP, 1 # vAA<- 1 bgtz v0, TEMPLATE_CMPG_FLOAT_VFP_finish #else LOAD_F(fs0, a0) # fs0<- vBB LOAD_F(fs1, a1) # fs1<- vCC c.olt.s fcc0, fs0, fs1 #Is fs0 < fs1 li rTEMP, -1 bc1t fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish c.olt.s fcc0, fs1, fs0 li rTEMP, 1 bc1t fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish c.eq.s fcc0, fs0, fs1 li rTEMP, 0 bc1t fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish #endif li rTEMP, 1 TEMPLATE_CMPG_FLOAT_VFP_finish: move v0, rTEMP # v0<- vAA RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP: /* File: mips/TEMPLATE_CMPL_FLOAT_VFP.S */ /* * Compare two floating-point values. Puts 0, 1, or -1 into the * destination register based on the results of the comparison. * * Provide a "naninst" instruction that puts 1 or -1 into a1 depending * on what value we'd like to return when one of the operands is NaN. * * The operation we're implementing is: * if (x == y) * return 0; * else if (x < y) * return -1; * else if (x > y) * return 1; * else * return {-1,1}; // one or both operands was NaN * * On entry: * a0 = &op1 [vBB] * a1 = &op2 [vCC] * * for: cmpl-float, cmpg-float */ /* op vAA, vBB, vCC */ /* "clasic" form */ #ifdef SOFT_FLOAT LOAD(rOBJ, a0) # rOBJ<- vBB LOAD(rBIX, a1) # rBIX<- vCC move a0, rOBJ # a0<- vBB move a1, rBIX # a1<- vCC JAL(__eqsf2) # v0<- (vBB == vCC) li rTEMP, 0 # vAA<- 0 beqz v0, TEMPLATE_CMPL_FLOAT_VFP_finish move a0, rOBJ # a0<- vBB move a1, rBIX # a1<- vCC JAL(__ltsf2) # a0<- (vBB < vCC) li rTEMP, -1 # vAA<- -1 bltz v0, TEMPLATE_CMPL_FLOAT_VFP_finish move a0, rOBJ # a0<- vBB move a1, rBIX # a1<- vCC JAL(__gtsf2) # v0<- (vBB > vCC) li rTEMP, 1 # vAA<- 1 bgtz v0, TEMPLATE_CMPL_FLOAT_VFP_finish #else LOAD_F(fs0, a0) # fs0<- vBB LOAD_F(fs1, a1) # fs1<- vCC c.olt.s fcc0, fs0, fs1 #Is fs0 < fs1 li rTEMP, -1 bc1t fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish c.olt.s fcc0, fs1, fs0 li rTEMP, 1 bc1t fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish c.eq.s fcc0, fs0, fs1 li rTEMP, 0 bc1t fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish #endif li rTEMP, -1 TEMPLATE_CMPL_FLOAT_VFP_finish: move v0, rTEMP # v0<- vAA RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP: /* File: mips/TEMPLATE_SQRT_DOUBLE_VFP.S */ /* * 64-bit floating point sqrt operation. * If the result is a NaN, bail out to library code to do * the right thing. * * On entry: * a2 src addr of op1 * On exit: * v0,v1/fv0 = res */ #ifdef SOFT_FLOAT LOAD64(rARG0, rARG1, a2) # a0/a1<- vBB/vBB+1 #else LOAD64_F(fa0, fa0f, a2) # fa0/fa0f<- vBB/vBB+1 sqrt.d fv0, fa0 c.eq.d fv0, fv0 bc1t 1f #endif JAL(sqrt) 1: RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON: /* File: mips/TEMPLATE_THROW_EXCEPTION_COMMON.S */ /* * Throw an exception from JIT'ed code. * On entry: * a0 Dalvik PC that raises the exception */ j .LhandleException /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_MEM_OP_DECODE dvmCompiler_TEMPLATE_MEM_OP_DECODE: /* File: mips/TEMPLATE_MEM_OP_DECODE.S */ #if defined(WITH_SELF_VERIFICATION) /* * This handler encapsulates heap memory ops for selfVerification mode. * * The call to the handler is inserted prior to a heap memory operation. * This handler then calls a function to decode the memory op, and process * it accordingly. Afterwards, the handler changes the return address to * skip the memory op so it never gets executed. */ #ifdef HARD_FLOAT /* push f0-f31 onto stack */ sw f0, fr0*-4(sp) # push f0 sw f1, fr1*-4(sp) # push f1 sw f2, fr2*-4(sp) # push f2 sw f3, fr3*-4(sp) # push f3 sw f4, fr4*-4(sp) # push f4 sw f5, fr5*-4(sp) # push f5 sw f6, fr6*-4(sp) # push f6 sw f7, fr7*-4(sp) # push f7 sw f8, fr8*-4(sp) # push f8 sw f9, fr9*-4(sp) # push f9 sw f10, fr10*-4(sp) # push f10 sw f11, fr11*-4(sp) # push f11 sw f12, fr12*-4(sp) # push f12 sw f13, fr13*-4(sp) # push f13 sw f14, fr14*-4(sp) # push f14 sw f15, fr15*-4(sp) # push f15 sw f16, fr16*-4(sp) # push f16 sw f17, fr17*-4(sp) # push f17 sw f18, fr18*-4(sp) # push f18 sw f19, fr19*-4(sp) # push f19 sw f20, fr20*-4(sp) # push f20 sw f21, fr21*-4(sp) # push f21 sw f22, fr22*-4(sp) # push f22 sw f23, fr23*-4(sp) # push f23 sw f24, fr24*-4(sp) # push f24 sw f25, fr25*-4(sp) # push f25 sw f26, fr26*-4(sp) # push f26 sw f27, fr27*-4(sp) # push f27 sw f28, fr28*-4(sp) # push f28 sw f29, fr29*-4(sp) # push f29 sw f30, fr30*-4(sp) # push f30 sw f31, fr31*-4(sp) # push f31 sub sp, (32-0)*4 # adjust stack pointer #endif /* push gp registers (except zero, gp, sp, and fp) */ .set noat sw AT, r_AT*-4(sp) # push at .set at sw v0, r_V0*-4(sp) # push v0 sw v1, r_V1*-4(sp) # push v1 sw a0, r_A0*-4(sp) # push a0 sw a1, r_A1*-4(sp) # push a1 sw a2, r_A2*-4(sp) # push a2 sw a3, r_A3*-4(sp) # push a3 sw t0, r_T0*-4(sp) # push t0 sw t1, r_T1*-4(sp) # push t1 sw t2, r_T2*-4(sp) # push t2 sw t3, r_T3*-4(sp) # push t3 sw t4, r_T4*-4(sp) # push t4 sw t5, r_T5*-4(sp) # push t5 sw t6, r_T6*-4(sp) # push t6 sw t7, r_T7*-4(sp) # push t7 sw s0, r_S0*-4(sp) # push s0 sw s1, r_S1*-4(sp) # push s1 sw s2, r_S2*-4(sp) # push s2 sw s3, r_S3*-4(sp) # push s3 sw s4, r_S4*-4(sp) # push s4 sw s5, r_S5*-4(sp) # push s5 sw s6, r_S6*-4(sp) # push s6 sw s7, r_S7*-4(sp) # push s7 sw t8, r_T8*-4(sp) # push t8 sw t9, r_T9*-4(sp) # push t9 sw k0, r_K0*-4(sp) # push k0 sw k1, r_K1*-4(sp) # push k1 sw ra, r_RA*-4(sp) # push RA # Note: even if we don't save all 32 registers, we still need to # adjust SP by 32 registers due to the way we are storing # the registers on the stack. sub sp, (32-0)*4 # adjust stack pointer la a2, .LdvmSelfVerificationMemOpDecode # defined in footer.S lw a2, (a2) move a0, ra # a0<- link register move a1, sp # a1<- stack pointer JALR(a2) /* pop gp registers (except zero, gp, sp, and fp) */ # Note: even if we don't save all 32 registers, we still need to # adjust SP by 32 registers due to the way we are storing # the registers on the stack. add sp, (32-0)*4 # adjust stack pointer .set noat lw AT, r_AT*-4(sp) # pop at .set at lw v0, r_V0*-4(sp) # pop v0 lw v1, r_V1*-4(sp) # pop v1 lw a0, r_A0*-4(sp) # pop a0 lw a1, r_A1*-4(sp) # pop a1 lw a2, r_A2*-4(sp) # pop a2 lw a3, r_A3*-4(sp) # pop a3 lw t0, r_T0*-4(sp) # pop t0 lw t1, r_T1*-4(sp) # pop t1 lw t2, r_T2*-4(sp) # pop t2 lw t3, r_T3*-4(sp) # pop t3 lw t4, r_T4*-4(sp) # pop t4 lw t5, r_T5*-4(sp) # pop t5 lw t6, r_T6*-4(sp) # pop t6 lw t7, r_T7*-4(sp) # pop t7 lw s0, r_S0*-4(sp) # pop s0 lw s1, r_S1*-4(sp) # pop s1 lw s2, r_S2*-4(sp) # pop s2 lw s3, r_S3*-4(sp) # pop s3 lw s4, r_S4*-4(sp) # pop s4 lw s5, r_S5*-4(sp) # pop s5 lw s6, r_S6*-4(sp) # pop s6 lw s7, r_S7*-4(sp) # pop s7 lw t8, r_T8*-4(sp) # pop t8 lw t9, r_T9*-4(sp) # pop t9 lw k0, r_K0*-4(sp) # pop k0 lw k1, r_K1*-4(sp) # pop k1 lw ra, r_RA*-4(sp) # pop RA #ifdef HARD_FLOAT /* pop f0-f31 from stack */ add sp, (32-0)*4 # adjust stack pointer lw f0, fr0*-4(sp) # pop f0 lw f1, fr1*-4(sp) # pop f1 lw f2, fr2*-4(sp) # pop f2 lw f3, fr3*-4(sp) # pop f3 lw f4, fr4*-4(sp) # pop f4 lw f5, fr5*-4(sp) # pop f5 lw f6, fr6*-4(sp) # pop f6 lw f7, fr7*-4(sp) # pop f7 lw f8, fr8*-4(sp) # pop f8 lw f9, fr9*-4(sp) # pop f9 lw f10, fr10*-4(sp) # pop f10 lw f11, fr11*-4(sp) # pop f11 lw f12, fr12*-4(sp) # pop f12 lw f13, fr13*-4(sp) # pop f13 lw f14, fr14*-4(sp) # pop f14 lw f15, fr15*-4(sp) # pop f15 lw f16, fr16*-4(sp) # pop f16 lw f17, fr17*-4(sp) # pop f17 lw f18, fr18*-4(sp) # pop f18 lw f19, fr19*-4(sp) # pop f19 lw f20, fr20*-4(sp) # pop f20 lw f21, fr21*-4(sp) # pop f21 lw f22, fr22*-4(sp) # pop f22 lw f23, fr23*-4(sp) # pop f23 lw f24, fr24*-4(sp) # pop f24 lw f25, fr25*-4(sp) # pop f25 lw f26, fr26*-4(sp) # pop f26 lw f27, fr27*-4(sp) # pop f27 lw f28, fr28*-4(sp) # pop f28 lw f29, fr29*-4(sp) # pop f29 lw f30, fr30*-4(sp) # pop f30 lw f31, fr31*-4(sp) # pop f31 #endif RETURN #endif /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_STRING_COMPARETO dvmCompiler_TEMPLATE_STRING_COMPARETO: /* File: mips/TEMPLATE_STRING_COMPARETO.S */ /* * String's compareTo. * * Requires a0/a1 to have been previously checked for null. Will * return negative if this's string is < comp, 0 if they are the * same and positive if >. * * IMPORTANT NOTE: * * This code relies on hard-coded offsets for string objects, and must be * kept in sync with definitions in UtfString.h. See asm-constants.h * * On entry: * a0: this object pointer * a1: comp object pointer * */ subu v0, a0, a1 # Same? bnez v0, 1f RETURN 1: lw t0, STRING_FIELDOFF_OFFSET(a0) lw t1, STRING_FIELDOFF_OFFSET(a1) lw t2, STRING_FIELDOFF_COUNT(a0) lw a2, STRING_FIELDOFF_COUNT(a1) lw a0, STRING_FIELDOFF_VALUE(a0) lw a1, STRING_FIELDOFF_VALUE(a1) /* * At this point, we have this/comp: * offset: t0/t1 * count: t2/a2 * value: a0/a1 * We're going to compute * a3 <- countDiff * a2 <- minCount */ subu a3, t2, a2 # a3<- countDiff sleu t7, t2, a2 movn a2, t2, t7 # a2<- minCount /* * Note: data pointers point to first element. */ addu a0, 16 # point to contents[0] addu a1, 16 # point to contents[0] /* Now, build pointers to the string data */ sll t7, t0, 1 # multiply offset by 2 addu a0, a0, t7 sll t7, t1, 1 # multiply offset by 2 addu a1, a1, t7 /* * At this point we have: * a0: *this string data * a1: *comp string data * a2: iteration count for comparison * a3: value to return if the first part of the string is equal * v0: reserved for result * t0-t5 available for loading string data */ subu a2, 2 bltz a2, do_remainder2 /* * Unroll the first two checks so we can quickly catch early mismatch * on long strings (but preserve incoming alignment) */ lhu t0, 0(a0) lhu t1, 0(a1) subu v0, t0, t1 beqz v0, 1f RETURN 1: lhu t2, 2(a0) lhu t3, 2(a1) subu v0, t2, t3 beqz v0, 2f RETURN 2: addu a0, 4 # offset to contents[2] addu a1, 4 # offset to contents[2] li t7, 28 bgt a2, t7, do_memcmp16 subu a2, 3 bltz a2, do_remainder loopback_triple: lhu t0, 0(a0) lhu t1, 0(a1) subu v0, t0, t1 beqz v0, 1f RETURN 1: lhu t2, 2(a0) lhu t3, 2(a1) subu v0, t2, t3 beqz v0, 2f RETURN 2: lhu t4, 4(a0) lhu t5, 4(a1) subu v0, t4, t5 beqz v0, 3f RETURN 3: addu a0, 6 # offset to contents[i+3] addu a1, 6 # offset to contents[i+3] subu a2, 3 bgez a2, loopback_triple do_remainder: addu a2, 3 beqz a2, returnDiff loopback_single: lhu t0, 0(a0) lhu t1, 0(a1) subu v0, t0, t1 bnez v0, 1f addu a0, 2 # offset to contents[i+1] addu a1, 2 # offset to contents[i+1] subu a2, 1 bnez a2, loopback_single returnDiff: move v0, a3 1: RETURN do_remainder2: addu a2, 2 bnez a2, loopback_single move v0, a3 RETURN /* Long string case */ do_memcmp16: move rOBJ, a3 # save return value if strings are equal JAL(__memcmp16) seq t0, v0, zero movn v0, rOBJ, t0 # overwrite return value if strings are equal RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_STRING_INDEXOF dvmCompiler_TEMPLATE_STRING_INDEXOF: /* File: mips/TEMPLATE_STRING_INDEXOF.S */ /* * String's indexOf. * * Requires a0 to have been previously checked for null. Will * return index of match of a1 in v0. * * IMPORTANT NOTE: * * This code relies on hard-coded offsets for string objects, and must be * kept in sync wth definitions in UtfString.h See asm-constants.h * * On entry: * a0: string object pointer * a1: char to match * a2: Starting offset in string data */ lw t0, STRING_FIELDOFF_OFFSET(a0) lw t1, STRING_FIELDOFF_COUNT(a0) lw v0, STRING_FIELDOFF_VALUE(a0) /* * At this point, we have: * v0: object pointer * a1: char to match * a2: starting offset * t0: offset * t1: string length */ /* Point to first element */ addu v0, 16 # point to contents[0] /* Build pointer to start of string data */ sll t7, t0, 1 # multiply offset by 2 addu v0, v0, t7 /* Save a copy of starting data in v1 */ move v1, v0 /* Clamp start to [0..count] */ slt t7, a2, zero movn a2, zero, t7 sgt t7, a2, t1 movn a2, t1, t7 /* Build pointer to start of data to compare */ sll t7, a2, 1 # multiply offset by 2 addu v0, v0, t7 /* Compute iteration count */ subu a3, t1, a2 /* * At this point we have: * v0: start of data to test * a1: char to compare * a3: iteration count * v1: original start of string * t0-t7 available for loading string data */ subu a3, 4 bltz a3, indexof_remainder indexof_loop4: lhu t0, 0(v0) beq t0, a1, match_0 lhu t0, 2(v0) beq t0, a1, match_1 lhu t0, 4(v0) beq t0, a1, match_2 lhu t0, 6(v0) beq t0, a1, match_3 addu v0, 8 # offset to contents[i+4] subu a3, 4 bgez a3, indexof_loop4 indexof_remainder: addu a3, 4 beqz a3, indexof_nomatch indexof_loop1: lhu t0, 0(v0) beq t0, a1, match_0 addu v0, 2 # offset to contents[i+1] subu a3, 1 bnez a3, indexof_loop1 indexof_nomatch: li v0, -1 RETURN match_0: subu v0, v1 sra v0, v0, 1 # divide by 2 RETURN match_1: addu v0, 2 subu v0, v1 sra v0, v0, 1 # divide by 2 RETURN match_2: addu v0, 4 subu v0, v1 sra v0, v0, 1 # divide by 2 RETURN match_3: addu v0, 6 subu v0, v1 sra v0, v0, 1 # divide by 2 RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_INTERPRET dvmCompiler_TEMPLATE_INTERPRET: /* File: mips/TEMPLATE_INTERPRET.S */ /* * This handler transfers control to the interpeter without performing * any lookups. It may be called either as part of a normal chaining * operation, or from the transition code in header.S. We distinquish * the two cases by looking at the link register. If called from a * translation chain, it will point to the chaining Dalvik PC. * On entry: * ra - if NULL: * a1 - the Dalvik PC to begin interpretation. * else * [ra] contains Dalvik PC to begin interpretation * rSELF - pointer to thread * rFP - Dalvik frame pointer */ la t0, dvmJitToInterpPunt move a0, a1 beq ra, zero, 1f lw a0, 0(ra) 1: jr t0 # doesn't return /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_MONITOR_ENTER dvmCompiler_TEMPLATE_MONITOR_ENTER: /* File: mips/TEMPLATE_MONITOR_ENTER.S */ /* * Call out to the runtime to lock an object. Because this thread * may have been suspended in THREAD_MONITOR state and the Jit's * translation cache subsequently cleared, we cannot return directly. * Instead, unconditionally transition to the interpreter to resume. * * On entry: * a0 - self pointer * a1 - the object (which has already been null-checked by the caller * rPC - the Dalvik PC of the following instruction. */ la a2, .LdvmLockObject lw t9, (a2) sw zero, offThread_inJitCodeCache(a0) # record that we're not returning JALR(t9) # dvmLockObject(self, obj) lw gp, STACK_OFFSET_GP(sp) la a2, .LdvmJitToInterpNoChain lw a2, (a2) # Bail to interpreter - no chain [note - rPC still contains dPC] #if defined(WITH_JIT_TUNING) li a0, kHeavyweightMonitor #endif jr a2 /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG: /* File: mips/TEMPLATE_MONITOR_ENTER_DEBUG.S */ /* * To support deadlock prediction, this version of MONITOR_ENTER * will always call the heavyweight dvmLockObject, check for an * exception and then bail out to the interpreter. * * On entry: * a0 - self pointer * a1 - the object (which has already been null-checked by the caller * rPC - the Dalvik PC of the following instruction. * */ la a2, .LdvmLockObject lw t9, (a2) sw zero, offThread_inJitCodeCache(a0) # record that we're not returning JALR(t9) # dvmLockObject(self, obj) lw gp, STACK_OFFSET_GP(sp) # test for exception lw a1, offThread_exception(rSELF) beqz a1, 1f sub a0, rPC, 2 # roll dPC back to this monitor instruction j .LhandleException 1: # Bail to interpreter - no chain [note - rPC still contains dPC] #if defined(WITH_JIT_TUNING) li a0, kHeavyweightMonitor #endif la a2, .LdvmJitToInterpNoChain lw a2, (a2) jr a2 /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_RESTORE_STATE dvmCompiler_TEMPLATE_RESTORE_STATE: /* File: mips/TEMPLATE_RESTORE_STATE.S */ /* * This handler restores state following a selfVerification memory access. * On entry: * a0 - offset from rSELF to the 1st element of the coreRegs save array. * Note: the following registers are not restored * zero, AT, gp, sp, fp, ra */ add a0, a0, rSELF # pointer to heapArgSpace.coreRegs[0] #if 0 lw zero, r_ZERO*4(a0) # restore zero #endif .set noat lw AT, r_AT*4(a0) # restore at .set at lw v0, r_V0*4(a0) # restore v0 lw v1, r_V1*4(a0) # restore v1 lw a1, r_A1*4(a0) # restore a1 lw a2, r_A2*4(a0) # restore a2 lw a3, r_A3*4(a0) # restore a3 lw t0, r_T0*4(a0) # restore t0 lw t1, r_T1*4(a0) # restore t1 lw t2, r_T2*4(a0) # restore t2 lw t3, r_T3*4(a0) # restore t3 lw t4, r_T4*4(a0) # restore t4 lw t5, r_T5*4(a0) # restore t5 lw t6, r_T6*4(a0) # restore t6 lw t7, r_T7*4(a0) # restore t7 lw s0, r_S0*4(a0) # restore s0 lw s1, r_S1*4(a0) # restore s1 lw s2, r_S2*4(a0) # restore s2 lw s3, r_S3*4(a0) # restore s3 lw s4, r_S4*4(a0) # restore s4 lw s5, r_S5*4(a0) # restore s5 lw s6, r_S6*4(a0) # restore s6 lw s7, r_S7*4(a0) # restore s7 lw t8, r_T8*4(a0) # restore t8 lw t9, r_T9*4(a0) # restore t9 lw k0, r_K0*4(a0) # restore k0 lw k1, r_K1*4(a0) # restore k1 #if 0 lw gp, r_GP*4(a0) # restore gp lw sp, r_SP*4(a0) # restore sp lw fp, r_FP*4(a0) # restore fp lw ra, r_RA*4(a0) # restore ra #endif /* #ifdef HARD_FLOAT */ #if 0 lw f0, fr0*4(a0) # restore f0 lw f1, fr1*4(a0) # restore f1 lw f2, fr2*4(a0) # restore f2 lw f3, fr3*4(a0) # restore f3 lw f4, fr4*4(a0) # restore f4 lw f5, fr5*4(a0) # restore f5 lw f6, fr6*4(a0) # restore f6 lw f7, fr7*4(a0) # restore f7 lw f8, fr8*4(a0) # restore f8 lw f9, fr9*4(a0) # restore f9 lw f10, fr10*4(a0) # restore f10 lw f11, fr11*4(a0) # restore f11 lw f12, fr12*4(a0) # restore f12 lw f13, fr13*4(a0) # restore f13 lw f14, fr14*4(a0) # restore f14 lw f15, fr15*4(a0) # restore f15 lw f16, fr16*4(a0) # restore f16 lw f17, fr17*4(a0) # restore f17 lw f18, fr18*4(a0) # restore f18 lw f19, fr19*4(a0) # restore f19 lw f20, fr20*4(a0) # restore f20 lw f21, fr21*4(a0) # restore f21 lw f22, fr22*4(a0) # restore f22 lw f23, fr23*4(a0) # restore f23 lw f24, fr24*4(a0) # restore f24 lw f25, fr25*4(a0) # restore f25 lw f26, fr26*4(a0) # restore f26 lw f27, fr27*4(a0) # restore f27 lw f28, fr28*4(a0) # restore f28 lw f29, fr29*4(a0) # restore f29 lw f30, fr30*4(a0) # restore f30 lw f31, fr31*4(a0) # restore f31 #endif lw a0, r_A1*4(a0) # restore a0 RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_SAVE_STATE dvmCompiler_TEMPLATE_SAVE_STATE: /* File: mips/TEMPLATE_SAVE_STATE.S */ /* * This handler performs a register save for selfVerification mode. * On entry: * Top of stack + 4: a1 value to save * Top of stack + 0: a0 value to save * a0 - offset from rSELF to the beginning of the heapArgSpace record * a1 - the value of regMap * * The handler must save regMap, r0-r31, f0-f31 if FPU, and then return with * r0-r31 with their original values (note that this means a0 and a1 must take * the values on the stack - not the ones in those registers on entry. * Finally, the two registers previously pushed must be popped. * Note: the following registers are not saved * zero, AT, gp, sp, fp, ra */ add a0, a0, rSELF # pointer to heapArgSpace sw a1, 0(a0) # save regMap add a0, a0, 4 # pointer to coreRegs #if 0 sw zero, r_ZERO*4(a0) # save zero #endif .set noat sw AT, r_AT*4(a0) # save at .set at sw v0, r_V0*4(a0) # save v0 sw v1, r_V1*4(a0) # save v1 lw a1, 0(sp) # recover a0 value sw a1, r_A0*4(a0) # save a0 lw a1, 4(sp) # recover a1 value sw a1, r_A1*4(a0) # save a1 sw a2, r_A2*4(a0) # save a2 sw a3, r_A3*4(a0) # save a3 sw t0, r_T0*4(a0) # save t0 sw t1, r_T1*4(a0) # save t1 sw t2, r_T2*4(a0) # save t2 sw t3, r_T3*4(a0) # save t3 sw t4, r_T4*4(a0) # save t4 sw t5, r_T5*4(a0) # save t5 sw t6, r_T6*4(a0) # save t6 sw t7, r_T7*4(a0) # save t7 sw s0, r_S0*4(a0) # save s0 sw s1, r_S1*4(a0) # save s1 sw s2, r_S2*4(a0) # save s2 sw s3, r_S3*4(a0) # save s3 sw s4, r_S4*4(a0) # save s4 sw s5, r_S5*4(a0) # save s5 sw s6, r_S6*4(a0) # save s6 sw s7, r_S7*4(a0) # save s7 sw t8, r_T8*4(a0) # save t8 sw t9, r_T9*4(a0) # save t9 sw k0, r_K0*4(a0) # save k0 sw k1, r_K1*4(a0) # save k1 #if 0 sw gp, r_GP*4(a0) # save gp sw sp, r_SP*4(a0) # save sp (need to adjust??? ) sw fp, r_FP*4(a0) # save fp sw ra, r_RA*4(a0) # save ra #endif /* #ifdef HARD_FLOAT */ #if 0 sw f0, fr0*4(a0) # save f0 sw f1, fr1*4(a0) # save f1 sw f2, fr2*4(a0) # save f2 sw f3, fr3*4(a0) # save f3 sw f4, fr4*4(a0) # save f4 sw f5, fr5*4(a0) # save f5 sw f6, fr6*4(a0) # save f6 sw f7, fr7*4(a0) # save f7 sw f8, fr8*4(a0) # save f8 sw f9, fr9*4(a0) # save f9 sw f10, fr10*4(a0) # save f10 sw f11, fr11*4(a0) # save f11 sw f12, fr12*4(a0) # save f12 sw f13, fr13*4(a0) # save f13 sw f14, fr14*4(a0) # save f14 sw f15, fr15*4(a0) # save f15 sw f16, fr16*4(a0) # save f16 sw f17, fr17*4(a0) # save f17 sw f18, fr18*4(a0) # save f18 sw f19, fr19*4(a0) # save f19 sw f20, fr20*4(a0) # save f20 sw f21, fr21*4(a0) # save f21 sw f22, fr22*4(a0) # save f22 sw f23, fr23*4(a0) # save f23 sw f24, fr24*4(a0) # save f24 sw f25, fr25*4(a0) # save f25 sw f26, fr26*4(a0) # save f26 sw f27, fr27*4(a0) # save f27 sw f28, fr28*4(a0) # save f28 sw f29, fr29*4(a0) # save f29 sw f30, fr30*4(a0) # save f30 sw f31, fr31*4(a0) # save f31 #endif lw a1, 0(sp) # recover a0 value lw a1, 4(sp) # recover a1 value sub sp, sp, 8 # adjust stack ptr RETURN /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_PERIODIC_PROFILING dvmCompiler_TEMPLATE_PERIODIC_PROFILING: /* File: mips/TEMPLATE_PERIODIC_PROFILING.S */ /* * Increment profile counter for this trace, and decrement * sample counter. If sample counter goes below zero, turn * off profiling. * * On entry * (ra-16) is address of pointer to counter. Note: the counter * actually exists 16 bytes before the return target for mips. * - 4 bytes for prof count addr. * - 4 bytes for chain cell offset (2bytes 32 bit aligned). * - 4 bytes for call TEMPLATE_PERIODIC_PROFILING. * - 4 bytes for call delay slot. */ lw a0, -16(ra) lw a1, offThread_pProfileCountdown(rSELF) lw a2, 0(a0) # get counter lw a3, 0(a1) # get countdown timer addu a2, 1 sub a3, 1 # FIXME - bug in ARM code??? bltz a3, .LTEMPLATE_PERIODIC_PROFILING_disable_profiling sw a2, 0(a0) sw a3, 0(a1) RETURN .LTEMPLATE_PERIODIC_PROFILING_disable_profiling: move rTEMP, ra # preserve ra la a0, dvmJitTraceProfilingOff JALR(a0) jr rTEMP /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_RETURN_PROF dvmCompiler_TEMPLATE_RETURN_PROF: /* File: mips/TEMPLATE_RETURN_PROF.S */ #define TEMPLATE_INLINE_PROFILING /* File: mips/TEMPLATE_RETURN.S */ /* * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX. * If the stored value in returnAddr * is non-zero, the caller is compiled by the JIT thus return to the * address in the code cache following the invoke instruction. Otherwise * return to the special dvmJitToInterpNoChain entry point. */ #if defined(TEMPLATE_INLINE_PROFILING) # preserve a0-a2 and ra SCRATCH_STORE(a0, 0) SCRATCH_STORE(a1, 4) SCRATCH_STORE(a2, 8) SCRATCH_STORE(ra, 12) # a0=rSELF move a0, rSELF la t9, dvmFastMethodTraceExit JALR(t9) lw gp, STACK_OFFSET_GP(sp) # restore a0-a2 and ra SCRATCH_LOAD(ra, 12) SCRATCH_LOAD(a2, 8) SCRATCH_LOAD(a1, 4) SCRATCH_LOAD(a0, 0) #endif SAVEAREA_FROM_FP(a0, rFP) # a0<- saveArea (old) lw t0, offStackSaveArea_prevFrame(a0) # t0<- saveArea->prevFrame lbu t1, offThread_breakFlags(rSELF) # t1<- breakFlags lw rPC, offStackSaveArea_savedPc(a0) # rPC<- saveArea->savedPc #if !defined(WITH_SELF_VERIFICATION) lw t2, offStackSaveArea_returnAddr(a0) # t2<- chaining cell ret #else move t2, zero # disable chaining #endif lw a2, offStackSaveArea_method - sizeofStackSaveArea(t0) # a2<- method we're returning to #if !defined(WITH_SELF_VERIFICATION) beq a2, zero, 1f # bail to interpreter #else bne a2, zero, 2f JALR(ra) # punt to interpreter and compare state # DOUG: assume this does not return ??? 2: #endif la t4, .LdvmJitToInterpNoChainNoProfile # defined in footer.S lw a1, (t4) move rFP, t0 # publish new FP beq a2, zero, 4f lw t0, offMethod_clazz(a2) # t0<- method->clazz 4: sw a2, offThread_method(rSELF) # self->method = newSave->method lw a0, offClassObject_pDvmDex(t0) # a0<- method->clazz->pDvmDex sw rFP, offThread_curFrame(rSELF) # self->curFrame = fp add rPC, rPC, 3*2 # publish new rPC sw a0, offThread_methodClassDex(rSELF) movn t2, zero, t1 # check the breadFlags and # clear the chaining cell address sw t2, offThread_inJitCodeCache(rSELF) # in code cache or not beq t2, zero, 3f # chaining cell exists? JALR(t2) # jump to the chaining cell # DOUG: assume this does not return ??? 3: #if defined(WITH_JIT_TUNING) li a0, kCallsiteInterpreted #endif j a1 # callsite is interpreted 1: sw zero, offThread_inJitCodeCache(rSELF) # reset inJitCodeCache SAVE_PC_TO_SELF() # SAVE_PC_FP_TO_SELF() SAVE_FP_TO_SELF() la t4, .LdvmMterpStdBail # defined in footer.S lw a2, (t4) move a0, rSELF # Expecting rSELF in a0 JALR(a2) # exit the interpreter # DOUG: assume this does not return ??? #undef TEMPLATE_INLINE_PROFILING /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT_PROF dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT_PROF: /* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT_PROF.S */ #define TEMPLATE_INLINE_PROFILING /* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT.S */ /* * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC * into rPC then jump to dvmJitToInterpNoChain to dispatch the * runtime-resolved callee. */ # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite lh t7, offMethod_registersSize(a0) # t7<- methodToCall->regsSize lh a2, offMethod_outsSize(a0) # a2<- methodToCall->outsSize lw t9, offThread_interpStackEnd(rSELF) # t9<- interpStackEnd lbu t8, offThread_breakFlags(rSELF) # t8<- breakFlags move a3, a1 # a3<- returnCell SAVEAREA_FROM_FP(a1, rFP) # a1<- stack save area sll t6, t7, 2 # multiply regsSize by 4 (4 bytes per reg) sub a1, a1, t6 # a1<- newFp(old savearea-regsSize) SAVEAREA_FROM_FP(t0, a1) # t0<- stack save area sll t6, a2, 2 # multiply outsSize by 4 (4 bytes per reg) sub t0, t0, t6 # t0<- bottom (newsave-outsSize) bgeu t0, t9, 1f # bottom < interpStackEnd? RETURN # return to raise stack overflow excep. 1: # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite lw t9, offMethod_clazz(a0) # t9<- methodToCall->clazz lw t0, offMethod_accessFlags(a0) # t0<- methodToCall->accessFlags sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP) sw rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1) lw rPC, offMethod_insns(a0) # rPC<- methodToCall->insns # set up newSaveArea sw rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1) sw a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1) sw a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1) beqz t8, 2f # breakFlags != 0 RETURN # bail to the interpreter 2: and t6, t0, ACC_NATIVE beqz t6, 3f #if !defined(WITH_SELF_VERIFICATION) j .LinvokeNative #else RETURN # bail to the interpreter #endif 3: # continue executing the next instruction through the interpreter la t0, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S lw rTEMP, (t0) lw a3, offClassObject_pDvmDex(t9) # a3<- method->clazz->pDvmDex # Update "thread" values for the new method sw a0, offThread_method(rSELF) # self->method = methodToCall sw a3, offThread_methodClassDex(rSELF) # self->methodClassDex = ... move rFP, a1 # fp = newFp sw rFP, offThread_curFrame(rSELF) # self->curFrame = newFp #if defined(TEMPLATE_INLINE_PROFILING) # preserve a0-a3 SCRATCH_STORE(a0, 0) SCRATCH_STORE(a1, 4) SCRATCH_STORE(a2, 8) SCRATCH_STORE(a3, 12) # a0=methodToCall, a1=rSELF move a1, rSELF la t9, dvmFastMethodTraceEnter JALR(t9) lw gp, STACK_OFFSET_GP(sp) # restore a0-a3 SCRATCH_LOAD(a3, 12) SCRATCH_LOAD(a2, 8) SCRATCH_LOAD(a1, 4) SCRATCH_LOAD(a0, 0) #endif # Start executing the callee #if defined(WITH_JIT_TUNING) li a0, kInlineCacheMiss #endif jr rTEMP # dvmJitToInterpTraceSelectNoChain #undef TEMPLATE_INLINE_PROFILING /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN_PROF dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN_PROF: /* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN_PROF.S */ #define TEMPLATE_INLINE_PROFILING /* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN.S */ /* * For monomorphic callsite, setup the Dalvik frame and return to the * Thumb code through the link register to transfer control to the callee * method through a dedicated chaining cell. */ # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite # methodToCall is guaranteed to be non-native .LinvokeChainProf: lh t7, offMethod_registersSize(a0) # t7<- methodToCall->regsSize lh a2, offMethod_outsSize(a0) # a2<- methodToCall->outsSize lw t9, offThread_interpStackEnd(rSELF) # t9<- interpStackEnd lbu t8, offThread_breakFlags(rSELF) # t8<- breakFlags move a3, a1 # a3<- returnCell SAVEAREA_FROM_FP(a1, rFP) # a1<- stack save area sll t6, t7, 2 # multiply regsSize by 4 (4 bytes per reg) sub a1, a1, t6 # a1<- newFp(old savearea-regsSize) SAVEAREA_FROM_FP(t0, a1) # t0<- stack save area add t2, ra, 8 # setup the punt-to-interp address # 8 bytes skips branch and delay slot sll t6, a2, 2 # multiply outsSize by 4 (4 bytes per reg) sub t0, t0, t6 # t0<- bottom (newsave-outsSize) bgeu t0, t9, 1f # bottom < interpStackEnd? jr t2 # return to raise stack overflow excep. 1: # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite lw t9, offMethod_clazz(a0) # t9<- methodToCall->clazz sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP) sw rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1) lw rPC, offMethod_insns(a0) # rPC<- methodToCall->insns # set up newSaveArea sw rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1) sw a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1) sw a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1) beqz t8, 2f # breakFlags != 0 jr t2 # bail to the interpreter 2: lw a3, offClassObject_pDvmDex(t9) # a3<- methodToCall->clazz->pDvmDex # Update "thread" values for the new method sw a0, offThread_method(rSELF) # self->method = methodToCall sw a3, offThread_methodClassDex(rSELF) # self->methodClassDex = ... move rFP, a1 # fp = newFp sw rFP, offThread_curFrame(rSELF) # self->curFrame = newFp #if defined(TEMPLATE_INLINE_PROFILING) # preserve a0-a2 and ra SCRATCH_STORE(a0, 0) SCRATCH_STORE(a1, 4) SCRATCH_STORE(a2, 8) SCRATCH_STORE(ra, 12) move a1, rSELF # a0=methodToCall, a1=rSELF la t9, dvmFastMethodTraceEnter jalr t9 lw gp, STACK_OFFSET_GP(sp) # restore a0-a2 and ra SCRATCH_LOAD(ra, 12) SCRATCH_LOAD(a2, 8) SCRATCH_LOAD(a1, 4) SCRATCH_LOAD(a0, 0) #endif RETURN # return to the callee-chaining cell #undef TEMPLATE_INLINE_PROFILING /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF: /* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF.S */ #define TEMPLATE_INLINE_PROFILING /* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */ /* * For polymorphic callsite, check whether the cached class pointer matches * the current one. If so setup the Dalvik frame and return to the * Thumb code through the link register to transfer control to the callee * method through a dedicated chaining cell. * * The predicted chaining cell is declared in ArmLIR.h with the * following layout: * * typedef struct PredictedChainingCell { * u4 branch; * u4 delay_slot; * const ClassObject *clazz; * const Method *method; * u4 counter; * } PredictedChainingCell; * * Upon returning to the callsite: * - lr : to branch to the chaining cell * - lr+8 : to punt to the interpreter * - lr+16: to fully resolve the callee and may rechain. * a3 <- class */ # a0 = this, a1 = returnCell, a2 = predictedChainCell, rPC = dalvikCallsite lw a3, offObject_clazz(a0) # a3 <- this->class lw rIBASE, 8(a2) # t0 <- predictedChainCell->clazz lw a0, 12(a2) # a0 <- predictedChainCell->method lw t1, offThread_icRechainCount(rSELF) # t1 <- shared rechainCount #if defined(WITH_JIT_TUNING) la rINST, .LdvmICHitCount #add t2, t2, 1 bne a3, rIBASE, 1f nop lw t2, 0(rINST) add t2, t2, 1 sw t2, 0(rINST) 1: #add t2, t2, 1 #endif beq a3, rIBASE, .LinvokeChainProf # branch if predicted chain is valid lw rINST, offClassObject_vtable(a3) # rINST <- this->class->vtable beqz rIBASE, 2f # initialized class or not sub a1, t1, 1 # count-- sw a1, offThread_icRechainCount(rSELF) # write back to InterpState b 3f 2: move a1, zero 3: add ra, ra, 16 # return to fully-resolve landing pad /* * a1 <- count * a2 <- &predictedChainCell * a3 <- this->class * rPC <- dPC * rINST <- this->class->vtable */ RETURN #undef TEMPLATE_INLINE_PROFILING /* ------------------------------ */ .balign 4 .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE_PROF dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE_PROF: /* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE_PROF.S */ #define TEMPLATE_INLINE_PROFILING /* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE.S */ # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite lh t7, offMethod_registersSize(a0) # t7<- methodToCall->regsSize lw t9, offThread_interpStackEnd(rSELF) # t9<- interpStackEnd lbu t8, offThread_breakFlags(rSELF) # t8<- breakFlags move a3, a1 # a3<- returnCell SAVEAREA_FROM_FP(a1, rFP) # a1<- stack save area sll t6, t7, 2 # multiply regsSize by 4 (4 bytes per reg) sub a1, a1, t6 # a1<- newFp(old savearea-regsSize) SAVEAREA_FROM_FP(t0, a1) # t0<- stack save area bgeu t0, t9, 1f # bottom < interpStackEnd? RETURN # return to raise stack overflow excep. 1: # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite sw rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP) sw rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1) lw rPC, offMethod_insns(a0) # rPC<- methodToCall->insns # set up newSaveArea sw rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1) sw a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1) sw a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1) lw rTEMP, offMethod_nativeFunc(a0) # t9<- method->nativeFunc #if !defined(WITH_SELF_VERIFICATION) beqz t8, 2f # breakFlags != 0 RETURN # bail to the interpreter 2: #else RETURN # bail to the interpreter unconditionally #endif # go ahead and transfer control to the native code lw t6, offThread_jniLocal_topCookie(rSELF) # t6<- thread->localRef->... sw a1, offThread_curFrame(rSELF) # self->curFrame = newFp sw zero, offThread_inJitCodeCache(rSELF) # not in the jit code cache sw t6, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1) # newFp->localRefCookie=top SAVEAREA_FROM_FP(rBIX, a1) # rBIX<- new stack save area move a2, a0 # a2<- methodToCall move a0, a1 # a0<- newFp add a1, rSELF, offThread_retval # a1<- &retval move a3, rSELF # a3<- self #if defined(TEMPLATE_INLINE_PROFILING) # a2: methodToCall # preserve a0-a3 SCRATCH_STORE(a0, 0) SCRATCH_STORE(a1, 4) SCRATCH_STORE(a2, 8) SCRATCH_STORE(a3, 12) move a0, a2 move a1, rSELF # a0=JNIMethod, a1=rSELF la t9, dvmFastMethodTraceEnter JALR(t9) # off to the native code lw gp, STACK_OFFSET_GP(sp) # restore a0-a3 SCRATCH_LOAD(a3, 12) SCRATCH_LOAD(a2, 8) SCRATCH_LOAD(a1, 4) SCRATCH_LOAD(a0, 0) move rOBJ, a2 # save a2 #endif JALR(rTEMP) # off to the native code lw gp, STACK_OFFSET_GP(sp) #if defined(TEMPLATE_INLINE_PROFILING) move a0, rOBJ move a1, rSELF # a0=JNIMethod, a1=rSELF la t9, dvmFastNativeMethodTraceExit JALR(t9) lw gp, STACK_OFFSET_GP(sp) #endif # native return; rBIX=newSaveArea # equivalent to dvmPopJniLocals lw a2, offStackSaveArea_returnAddr(rBIX) # a2 = chaining cell ret addr lw a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top lw a1, offThread_exception(rSELF) # check for exception sw rFP, offThread_curFrame(rSELF) # self->curFrame = fp sw a0, offThread_jniLocal_topCookie(rSELF) # new top <- old top lw a0, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP) # a0 = dalvikCallsitePC bnez a1, .LhandleException # handle exception if any sw a2, offThread_inJitCodeCache(rSELF) # set the mode properly beqz a2, 3f jr a2 # go if return chaining cell still exist 3: # continue executing the next instruction through the interpreter la a1, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S lw a1, (a1) add rPC, a0, 3*2 # reconstruct new rPC (advance 3 dalvik instr) #if defined(WITH_JIT_TUNING) li a0, kCallsiteInterpreted #endif jr a1 #undef TEMPLATE_INLINE_PROFILING .size dvmCompilerTemplateStart, .-dvmCompilerTemplateStart /* File: mips/footer.S */ /* * =========================================================================== * Common subroutines and data * =========================================================================== */ .section .data.rel.ro .align 4 .LinvokeNative: # Prep for the native call # a1 = newFP, a0 = methodToCall lw t9, offThread_jniLocal_topCookie(rSELF) # t9<- thread->localRef->... sw zero, offThread_inJitCodeCache(rSELF) # not in jit code cache sw a1, offThread_curFrame(rSELF) # self->curFrame = newFp sw t9, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1) # newFp->localRefCookie=top lhu ra, offThread_subMode(rSELF) SAVEAREA_FROM_FP(rBIX, a1) # rBIX<- new stack save area move a2, a0 # a2<- methodToCall move a0, a1 # a0<- newFp add a1, rSELF, offThread_retval # a1<- &retval move a3, rSELF # a3<- self andi ra, kSubModeMethodTrace beqz ra, 121f # a2: methodToCall # preserve a0-a3 SCRATCH_STORE(a0, 0) SCRATCH_STORE(a1, 4) SCRATCH_STORE(a2, 8) SCRATCH_STORE(a3, 12) move rTEMP, a2 # preserve a2 move a0, rTEMP move a1, rSELF la t9, dvmFastMethodTraceEnter JALR(t9) lw gp, STACK_OFFSET_GP(sp) # restore a0-a3 SCRATCH_LOAD(a3, 12) SCRATCH_LOAD(a2, 8) SCRATCH_LOAD(a1, 4) SCRATCH_LOAD(a0, 0) lw t9, offMethod_nativeFunc(a2) JALR(t9) # call methodToCall->nativeFunc lw gp, STACK_OFFSET_GP(sp) move a0, rTEMP move a1, rSELF la t9, dvmFastNativeMethodTraceExit JALR(t9) lw gp, STACK_OFFSET_GP(sp) b 212f 121: lw t9, offMethod_nativeFunc(a2) JALR(t9) # call methodToCall->nativeFunc lw gp, STACK_OFFSET_GP(sp) 212: # native return; rBIX=newSaveArea # equivalent to dvmPopJniLocals lw a2, offStackSaveArea_returnAddr(rBIX) # a2 = chaining cell ret addr lw a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top lw a1, offThread_exception(rSELF) # check for exception sw rFP, offThread_curFrame(rSELF) # self->curFrame = fp sw a0, offThread_jniLocal_topCookie(rSELF) # new top <- old top lw a0, offStackSaveArea_savedPc(rBIX) # reload rPC # a0 = dalvikCallsitePC bnez a1, .LhandleException # handle exception if any sw a2, offThread_inJitCodeCache(rSELF) # set the mode properly beqz a2, 3f jr a2 # go if return chaining cell still exist 3: # continue executing the next instruction through the interpreter la a1, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S lw a1, (a1) add rPC, a0, 3*2 # reconstruct new rPC #if defined(WITH_JIT_TUNING) li a0, kCallsiteInterpreted #endif jr a1 /* * On entry: * a0 Faulting Dalvik PC */ .LhandleException: #if defined(WITH_SELF_VERIFICATION) la t0, .LdeadFood lw t0, (t0) # should not see this under self-verification mode jr t0 .LdeadFood: .word 0xdeadf00d #endif sw zero, offThread_inJitCodeCache(rSELF) # in interpreter land la a1, .LdvmMterpCommonExceptionThrown # PIC way of getting &func lw a1, (a1) la rIBASE, .LdvmAsmInstructionStart # PIC way of getting &func lw rIBASE, (rIBASE) move rPC, a0 # reload the faulting Dalvid address jr a1 # branch to dvmMterpCommonExeceptionThrown .align 4 .LdvmAsmInstructionStart: .word dvmAsmInstructionStart .LdvmJitToInterpNoChainNoProfile: .word dvmJitToInterpNoChainNoProfile .LdvmJitToInterpTraceSelectNoChain: .word dvmJitToInterpTraceSelectNoChain .LdvmJitToInterpNoChain: .word dvmJitToInterpNoChain .LdvmMterpStdBail: .word dvmMterpStdBail .LdvmMterpCommonExceptionThrown: .word dvmMterpCommonExceptionThrown .LdvmLockObject: .word dvmLockObject #if defined(WITH_JIT_TUNING) .LdvmICHitCount: .word gDvmICHitCount #endif #if defined(WITH_SELF_VERIFICATION) .LdvmSelfVerificationMemOpDecode: .word dvmSelfVerificationMemOpDecode #endif .global dmvCompilerTemplateEnd dmvCompilerTemplateEnd: #endif /* WITH_JIT */