/* * Copyright (C) 2012 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include <sys/mman.h> #include "Dalvik.h" #include "libdex/DexOpcodes.h" #include "compiler/Compiler.h" #include "compiler/CompilerIR.h" #include "interp/Jit.h" #include "libdex/DexFile.h" #include "Lower.h" #include "NcgAot.h" #include "compiler/codegen/CompilerCodegen.h" /* Init values when a predicted chain is initially assembled */ /* E7FE is branch to self */ #define PREDICTED_CHAIN_BX_PAIR_INIT 0xe7fe /* Target-specific save/restore */ extern "C" void dvmJitCalleeSave(double *saveArea); extern "C" void dvmJitCalleeRestore(double *saveArea); /* * Determine the initial instruction set to be used for this trace. * Later components may decide to change this. */ //JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit) JitInstructionSetType dvmCompilerInstructionSet(void) { return DALVIK_JIT_IA32; } JitInstructionSetType dvmCompilerGetInterpretTemplateSet() { return DALVIK_JIT_IA32; } /* we don't use template for IA32 */ void *dvmCompilerGetInterpretTemplate() { return NULL; } /* Track the number of times that the code cache is patched */ #if defined(WITH_JIT_TUNING) #define UPDATE_CODE_CACHE_PATCHES() (gDvmJit.codeCachePatches++) #else #define UPDATE_CODE_CACHE_PATCHES() #endif bool dvmCompilerArchInit() { /* Target-specific configuration */ gDvmJit.jitTableSize = 1 << 12; gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1; gDvmJit.threshold = 255; gDvmJit.codeCacheSize = 512*1024; gDvmJit.optLevel = kJitOptLevelO1; #if defined(WITH_SELF_VERIFICATION) /* Force into blocking mode */ gDvmJit.blockingMode = true; gDvm.nativeDebuggerActive = true; #endif // Make sure all threads have current values dvmJitUpdateThreadStateAll(); return true; } void dvmCompilerPatchInlineCache(void) { int i; PredictedChainingCell *minAddr, *maxAddr; /* Nothing to be done */ if (gDvmJit.compilerICPatchIndex == 0) return; /* * Since all threads are already stopped we don't really need to acquire * the lock. But race condition can be easily introduced in the future w/o * paying attention so we still acquire the lock here. */ dvmLockMutex(&gDvmJit.compilerICPatchLock); UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); //ALOGD("Number of IC patch work orders: %d", gDvmJit.compilerICPatchIndex); /* Initialize the min/max address range */ minAddr = (PredictedChainingCell *) ((char *) gDvmJit.codeCache + gDvmJit.codeCacheSize); maxAddr = (PredictedChainingCell *) gDvmJit.codeCache; for (i = 0; i < gDvmJit.compilerICPatchIndex; i++) { ICPatchWorkOrder *workOrder = &gDvmJit.compilerICPatchQueue[i]; PredictedChainingCell *cellAddr = workOrder->cellAddr; PredictedChainingCell *cellContent = &workOrder->cellContent; ClassObject *clazz = dvmFindClassNoInit(workOrder->classDescriptor, workOrder->classLoader); assert(clazz->serialNumber == workOrder->serialNumber); /* Use the newly resolved clazz pointer */ cellContent->clazz = clazz; if (cellAddr->clazz == NULL) { COMPILER_TRACE_CHAINING( ALOGI("Jit Runtime: predicted chain %p to %s (%s) initialized", cellAddr, cellContent->clazz->descriptor, cellContent->method->name)); } else { COMPILER_TRACE_CHAINING( ALOGI("Jit Runtime: predicted chain %p from %s to %s (%s) " "patched", cellAddr, cellAddr->clazz->descriptor, cellContent->clazz->descriptor, cellContent->method->name)); } /* Patch the chaining cell */ *cellAddr = *cellContent; minAddr = (cellAddr < minAddr) ? cellAddr : minAddr; maxAddr = (cellAddr > maxAddr) ? cellAddr : maxAddr; } PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); gDvmJit.compilerICPatchIndex = 0; dvmUnlockMutex(&gDvmJit.compilerICPatchLock); } /* Target-specific cache clearing */ void dvmCompilerCacheClear(char *start, size_t size) { /* "0xFF 0xFF" is an invalid opcode for x86. */ memset(start, 0xFF, size); } /* for JIT debugging, to be implemented */ void dvmJitCalleeSave(double *saveArea) { } void dvmJitCalleeRestore(double *saveArea) { } void dvmJitToInterpSingleStep() { } JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc, const JitEntry *knownEntry) { return NULL; } void dvmCompilerCodegenDump(CompilationUnit *cUnit) //in ArchUtility.c { } void dvmCompilerArchDump(void) { } char *getTraceBase(const JitEntry *p) { return NULL; } void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo* info) { } void dvmJitInstallClassObjectPointers(CompilationUnit *cUnit, char *codeAddress) { } void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit) { // Method-based JIT not supported for x86. } void dvmJitScanAllClassPointers(void (*callback)(void *)) { } /* Handy function to retrieve the profile count */ static inline int getProfileCount(const JitEntry *entry) { if (entry->dPC == 0 || entry->codeAddress == 0) return 0; u4 *pExecutionCount = (u4 *) getTraceBase(entry); return pExecutionCount ? *pExecutionCount : 0; } /* qsort callback function */ static int sortTraceProfileCount(const void *entry1, const void *entry2) { const JitEntry *jitEntry1 = (const JitEntry *)entry1; const JitEntry *jitEntry2 = (const JitEntry *)entry2; JitTraceCounter_t count1 = getProfileCount(jitEntry1); JitTraceCounter_t count2 = getProfileCount(jitEntry2); return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1); } /* Sort the trace profile counts and dump them */ void dvmCompilerSortAndPrintTraceProfiles() //in Assemble.c { JitEntry *sortedEntries; int numTraces = 0; unsigned long counts = 0; unsigned int i; /* Make sure that the table is not changing */ dvmLockMutex(&gDvmJit.tableLock); /* Sort the entries by descending order */ sortedEntries = (JitEntry *)malloc(sizeof(JitEntry) * gDvmJit.jitTableSize); if (sortedEntries == NULL) goto done; memcpy(sortedEntries, gDvmJit.pJitEntryTable, sizeof(JitEntry) * gDvmJit.jitTableSize); qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry), sortTraceProfileCount); /* Dump the sorted entries */ for (i=0; i < gDvmJit.jitTableSize; i++) { if (sortedEntries[i].dPC != 0) { numTraces++; } } if (numTraces == 0) numTraces = 1; ALOGI("JIT: Average execution count -> %d",(int)(counts / numTraces)); free(sortedEntries); done: dvmUnlockMutex(&gDvmJit.tableLock); return; } void jumpWithRelOffset(char* instAddr, int relOffset) { stream = instAddr; OpndSize immSize = estOpndSizeFromImm(relOffset); relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond); dump_imm(Mnemonic_JMP, immSize, relOffset); } // works whether instructions for target basic block are generated or not LowOp* jumpToBasicBlock(char* instAddr, int targetId) { stream = instAddr; bool unknown; OpndSize size; int relativeNCG = targetId; relativeNCG = getRelativeNCG(targetId, JmpCall_uncond, &unknown, &size); unconditional_jump_int(relativeNCG, size); return NULL; } LowOp* condJumpToBasicBlock(char* instAddr, ConditionCode cc, int targetId) { stream = instAddr; bool unknown; OpndSize size; int relativeNCG = targetId; relativeNCG = getRelativeNCG(targetId, JmpCall_cond, &unknown, &size); conditional_jump_int(cc, relativeNCG, size); return NULL; } /* * Attempt to enqueue a work order to patch an inline cache for a predicted * chaining cell for virtual/interface calls. */ static bool inlineCachePatchEnqueue(PredictedChainingCell *cellAddr, PredictedChainingCell *newContent) { bool result = true; /* * Make sure only one thread gets here since updating the cell (ie fast * path and queueing the request (ie the queued path) have to be done * in an atomic fashion. */ dvmLockMutex(&gDvmJit.compilerICPatchLock); /* Fast path for uninitialized chaining cell */ if (cellAddr->clazz == NULL && cellAddr->branch == PREDICTED_CHAIN_BX_PAIR_INIT) { UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); cellAddr->method = newContent->method; cellAddr->branch = newContent->branch; cellAddr->branch2 = newContent->branch2; /* * The update order matters - make sure clazz is updated last since it * will bring the uninitialized chaining cell to life. */ android_atomic_release_store((int32_t)newContent->clazz, (volatile int32_t *)(void*) &cellAddr->clazz); //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0); UPDATE_CODE_CACHE_PATCHES(); PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); #if 0 MEM_BARRIER(); cellAddr->clazz = newContent->clazz; //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0); #endif #if defined(IA_JIT_TUNING) gDvmJit.icPatchInit++; #endif COMPILER_TRACE_CHAINING( ALOGI("Jit Runtime: FAST predicted chain %p to method %s%s %p", cellAddr, newContent->clazz->descriptor, newContent->method->name, newContent->method)); /* Check if this is a frequently missed clazz */ } else if (cellAddr->stagedClazz != newContent->clazz) { /* Not proven to be frequent yet - build up the filter cache */ UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); cellAddr->stagedClazz = newContent->clazz; UPDATE_CODE_CACHE_PATCHES(); PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); #if defined(WITH_JIT_TUNING) gDvmJit.icPatchRejected++; #endif /* * Different classes but same method implementation - it is safe to just * patch the class value without the need to stop the world. */ } else if (cellAddr->method == newContent->method) { UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); cellAddr->clazz = newContent->clazz; /* No need to flush the cache here since the branch is not patched */ UPDATE_CODE_CACHE_PATCHES(); PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); #if defined(WITH_JIT_TUNING) gDvmJit.icPatchLockFree++; #endif /* * Cannot patch the chaining cell inline - queue it until the next safe * point. */ } else if (gDvmJit.compilerICPatchIndex < COMPILER_IC_PATCH_QUEUE_SIZE) { int index = gDvmJit.compilerICPatchIndex++; const ClassObject *clazz = newContent->clazz; gDvmJit.compilerICPatchQueue[index].cellAddr = cellAddr; gDvmJit.compilerICPatchQueue[index].cellContent = *newContent; gDvmJit.compilerICPatchQueue[index].classDescriptor = clazz->descriptor; gDvmJit.compilerICPatchQueue[index].classLoader = clazz->classLoader; /* For verification purpose only */ gDvmJit.compilerICPatchQueue[index].serialNumber = clazz->serialNumber; #if defined(WITH_JIT_TUNING) gDvmJit.icPatchQueued++; #endif COMPILER_TRACE_CHAINING( ALOGI("Jit Runtime: QUEUE predicted chain %p to method %s%s", cellAddr, newContent->clazz->descriptor, newContent->method->name)); } else { /* Queue is full - just drop this patch request */ #if defined(WITH_JIT_TUNING) gDvmJit.icPatchDropped++; #endif COMPILER_TRACE_CHAINING( ALOGI("Jit Runtime: DROP predicted chain %p to method %s%s", cellAddr, newContent->clazz->descriptor, newContent->method->name)); } dvmUnlockMutex(&gDvmJit.compilerICPatchLock); return result; } /* * This method is called from the invoke templates for virtual and interface * methods to speculatively setup a chain to the callee. The templates are * written in assembly and have setup method, cell, and clazz at r0, r2, and * r3 respectively, so there is a unused argument in the list. Upon return one * of the following three results may happen: * 1) Chain is not setup because the callee is native. Reset the rechain * count to a big number so that it will take a long time before the next * rechain attempt to happen. * 2) Chain is not setup because the callee has not been created yet. Reset * the rechain count to a small number and retry in the near future. * 3) Ask all other threads to stop before patching this chaining cell. * This is required because another thread may have passed the class check * but hasn't reached the chaining cell yet to follow the chain. If we * patch the content before halting the other thread, there could be a * small window for race conditions to happen that it may follow the new * but wrong chain to invoke a different method. */ const Method *dvmJitToPatchPredictedChain(const Method *method, Thread *self, PredictedChainingCell *cell, const ClassObject *clazz) { int newRechainCount = PREDICTED_CHAIN_COUNTER_RECHAIN; /* Don't come back here for a long time if the method is native */ if (dvmIsNativeMethod(method)) { UNPROTECT_CODE_CACHE(cell, sizeof(*cell)); /* * Put a non-zero/bogus value in the clazz field so that it won't * trigger immediate patching and will continue to fail to match with * a real clazz pointer. */ cell->clazz = (ClassObject *) PREDICTED_CHAIN_FAKE_CLAZZ; UPDATE_CODE_CACHE_PATCHES(); PROTECT_CODE_CACHE(cell, sizeof(*cell)); COMPILER_TRACE_CHAINING( ALOGI("Jit Runtime: predicted chain %p to native method %s ignored", cell, method->name)); goto done; } { int tgtAddr = (int) dvmJitGetTraceAddr(method->insns); /* * Compilation not made yet for the callee. Reset the counter to a small * value and come back to check soon. */ if ((tgtAddr == 0) || ((void*)tgtAddr == dvmCompilerGetInterpretTemplate())) { COMPILER_TRACE_CHAINING( ALOGI("Jit Runtime: predicted chain %p to method %s%s delayed", cell, method->clazz->descriptor, method->name)); goto done; } PredictedChainingCell newCell; if (cell->clazz == NULL) { newRechainCount = self->icRechainCount; } int relOffset = (int) tgtAddr - (int)cell; OpndSize immSize = estOpndSizeFromImm(relOffset); int jumpSize = getJmpCallInstSize(immSize, JmpCall_uncond); relOffset -= jumpSize; COMPILER_TRACE_CHAINING( ALOGI("inlineCachePatchEnqueue chain %p to method %s%s inst size %d", cell, method->clazz->descriptor, method->name, jumpSize)); //can't use stream here since it is used by the compilation thread dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*) (&newCell)); //update newCell.branch newCell.clazz = clazz; newCell.method = method; /* * Enter the work order to the queue and the chaining cell will be patched * the next time a safe point is entered. * * If the enqueuing fails reset the rechain count to a normal value so that * it won't get indefinitely delayed. */ inlineCachePatchEnqueue(cell, &newCell); } done: self->icRechainCount = newRechainCount; return method; } /* * Unchain a trace given the starting address of the translation * in the code cache. Refer to the diagram in dvmCompilerAssembleLIR. * For ARM, it returns the address following the last cell unchained. * For IA, it returns NULL since cacheflush is not required for IA. */ u4* dvmJitUnchain(void* codeAddr) { /* codeAddr is 4-byte aligned, so is chain cell count offset */ u2* pChainCellCountOffset = (u2*)((char*)codeAddr - 4); u2 chainCellCountOffset = *pChainCellCountOffset; /* chain cell counts information is 4-byte aligned */ ChainCellCounts *pChainCellCounts = (ChainCellCounts*)((char*)codeAddr + chainCellCountOffset); u2* pChainCellOffset = (u2*)((char*)codeAddr - 2); u2 chainCellOffset = *pChainCellOffset; u1* pChainCells; int i,j; PredictedChainingCell *predChainCell; int padding; /* Locate the beginning of the chain cell region */ pChainCells = (u1 *)((char*)codeAddr + chainCellOffset); /* The cells are sorted in order - walk through them and reset */ for (i = 0; i < kChainingCellGap; i++) { /* for hot, normal, singleton chaining: nop //padding. jmp 0 mov imm32, reg1 mov imm32, reg2 call reg2 after chaining: nop jmp imm mov imm32, reg1 mov imm32, reg2 call reg2 after unchaining: nop jmp 0 mov imm32, reg1 mov imm32, reg2 call reg2 Space occupied by the chaining cell in bytes: nop is for padding, jump 0, the target 0 is 4 bytes aligned. Space for predicted chaining: 5 words = 20 bytes */ int elemSize = 0; if (i == kChainingCellInvokePredicted) { elemSize = 20; } COMPILER_TRACE_CHAINING( ALOGI("Jit Runtime: unchaining type %d count %d", i, pChainCellCounts->u.count[i])); for (j = 0; j < pChainCellCounts->u.count[i]; j++) { switch(i) { case kChainingCellNormal: case kChainingCellHot: case kChainingCellInvokeSingleton: case kChainingCellBackwardBranch: COMPILER_TRACE_CHAINING( ALOGI("Jit Runtime: unchaining of normal, hot, or singleton")); pChainCells = (u1*) (((uint)pChainCells + 4)&(~0x03)); elemSize = 4+5+5+2; memset(pChainCells, 0, 4); break; case kChainingCellInvokePredicted: COMPILER_TRACE_CHAINING( ALOGI("Jit Runtime: unchaining of predicted")); /* 4-byte aligned */ padding = (4 - ((u4)pChainCells & 3)) & 3; pChainCells += padding; predChainCell = (PredictedChainingCell *) pChainCells; /* * There could be a race on another mutator thread to use * this particular predicted cell and the check has passed * the clazz comparison. So we cannot safely wipe the * method and branch but it is safe to clear the clazz, * which serves as the key. */ predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT; break; default: ALOGE("Unexpected chaining type: %d", i); dvmAbort(); // dvmAbort OK here - can't safely recover } COMPILER_TRACE_CHAINING( ALOGI("Jit Runtime: unchaining 0x%x", (int)pChainCells)); pChainCells += elemSize; /* Advance by a fixed number of bytes */ } } return NULL; } /* Unchain all translation in the cache. */ void dvmJitUnchainAll() { ALOGV("Jit Runtime: unchaining all"); if (gDvmJit.pJitEntryTable != NULL) { COMPILER_TRACE_CHAINING(ALOGI("Jit Runtime: unchaining all")); dvmLockMutex(&gDvmJit.tableLock); UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); for (size_t i = 0; i < gDvmJit.jitTableSize; i++) { if (gDvmJit.pJitEntryTable[i].dPC && !gDvmJit.pJitEntryTable[i].u.info.isMethodEntry && gDvmJit.pJitEntryTable[i].codeAddress) { dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress); } } PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); dvmUnlockMutex(&gDvmJit.tableLock); gDvmJit.translationChains = 0; } gDvmJit.hasNewChain = false; } #define P_GPR_1 PhysicalReg_EBX /* Add an additional jump instruction, keep jump target 4 bytes aligned.*/ static void insertJumpHelp() { int rem = (uint)stream % 4; int nop_size = 3 - rem; dump_nop(nop_size); unconditional_jump_int(0, OpndSize_32); return; } /* Chaining cell for code that may need warmup. */ /* ARM assembly: ldr r0, [r6, #76] (why a single instruction to access member of glue structure?) blx r0 data 0xb23a //bytecode address: 0x5115b23a data 0x5115 IA32 assembly: jmp 0 //5 bytes movl address, %ebx movl dvmJitToInterpNormal, %eax call %eax <-- return address */ static void handleNormalChainingCell(CompilationUnit *cUnit, unsigned int offset, int blockId, LowOpBlockLabel* labelList) { ALOGV("in handleNormalChainingCell for method %s block %d BC offset %x NCG offset %x", cUnit->method->name, blockId, offset, stream - streamMethodStart); if(dump_x86_inst) ALOGI("LOWER NormalChainingCell at offsetPC %x offsetNCG %x @%p", offset, stream - streamMethodStart, stream); /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps * reslove the multithreading issue. */ insertJumpHelp(); move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); scratchRegs[0] = PhysicalReg_EAX; call_dvmJitToInterpNormal(); //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */ } /* * Chaining cell for instructions that immediately following already translated * code. */ static void handleHotChainingCell(CompilationUnit *cUnit, unsigned int offset, int blockId, LowOpBlockLabel* labelList) { ALOGV("in handleHotChainingCell for method %s block %d BC offset %x NCG offset %x", cUnit->method->name, blockId, offset, stream - streamMethodStart); if(dump_x86_inst) ALOGI("LOWER HotChainingCell at offsetPC %x offsetNCG %x @%p", offset, stream - streamMethodStart, stream); /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps * reslove the multithreading issue. */ insertJumpHelp(); move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); scratchRegs[0] = PhysicalReg_EAX; call_dvmJitToInterpTraceSelect(); //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */ } /* Chaining cell for branches that branch back into the same basic block */ static void handleBackwardBranchChainingCell(CompilationUnit *cUnit, unsigned int offset, int blockId, LowOpBlockLabel* labelList) { ALOGV("in handleBackwardBranchChainingCell for method %s block %d BC offset %x NCG offset %x", cUnit->method->name, blockId, offset, stream - streamMethodStart); if(dump_x86_inst) ALOGI("LOWER BackwardBranchChainingCell at offsetPC %x offsetNCG %x @%p", offset, stream - streamMethodStart, stream); /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps * reslove the multithreading issue. */ insertJumpHelp(); move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); scratchRegs[0] = PhysicalReg_EAX; call_dvmJitToInterpNormal(); //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */ } /* Chaining cell for monomorphic method invocations. */ static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit, const Method *callee, int blockId, LowOpBlockLabel* labelList) { ALOGV("in handleInvokeSingletonChainingCell for method %s block %d callee %s NCG offset %x", cUnit->method->name, blockId, callee->name, stream - streamMethodStart); if(dump_x86_inst) ALOGI("LOWER InvokeSingletonChainingCell at block %d offsetNCG %x @%p", blockId, stream - streamMethodStart, stream); /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps * reslove the multithreading issue. */ insertJumpHelp(); move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); scratchRegs[0] = PhysicalReg_EAX; call_dvmJitToInterpTraceSelect(); //move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); /* used when unchaining */ } #undef P_GPR_1 /* Chaining cell for monomorphic method invocations. */ static void handleInvokePredictedChainingCell(CompilationUnit *cUnit, int blockId) { if(dump_x86_inst) ALOGI("LOWER InvokePredictedChainingCell at block %d offsetNCG %x @%p", blockId, stream - streamMethodStart, stream); #ifndef PREDICTED_CHAINING //assume rPC for callee->insns in %ebx scratchRegs[0] = PhysicalReg_EAX; call_dvmJitToInterpTraceSelectNoChain(); #else /* make sure section for predicited chaining cell is 4-byte aligned */ //int padding = (4 - ((u4)stream & 3)) & 3; //stream += padding; int* streamData = (int*)stream; /* Should not be executed in the initial state */ streamData[0] = PREDICTED_CHAIN_BX_PAIR_INIT; streamData[1] = 0; /* To be filled: class */ streamData[2] = PREDICTED_CHAIN_CLAZZ_INIT; /* To be filled: method */ streamData[3] = PREDICTED_CHAIN_METHOD_INIT; /* * Rechain count. The initial value of 0 here will trigger chaining upon * the first invocation of this callsite. */ streamData[4] = PREDICTED_CHAIN_COUNTER_INIT; #if 0 ALOGI("--- DATA @ %p: %x %x %x %x", stream, *((int*)stream), *((int*)(stream+4)), *((int*)(stream+8)), *((int*)(stream+12))); #endif stream += 20; //5 *4 #endif } /* Load the Dalvik PC into r0 and jump to the specified target */ static void handlePCReconstruction(CompilationUnit *cUnit, LowOpBlockLabel *targetLabel) { #if 0 LowOp **pcrLabel = (LowOp **) cUnit->pcReconstructionList.elemList; int numElems = cUnit->pcReconstructionList.numUsed; int i; for (i = 0; i < numElems; i++) { dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]); /* r0 = dalvik PC */ loadConstant(cUnit, r0, pcrLabel[i]->operands[0]); genUnconditionalBranch(cUnit, targetLabel); } #endif } //use O0 code generator for hoisted checks outside of the loop /* * vA = arrayReg; * vB = idxReg; * vC = endConditionReg; * arg[0] = maxC * arg[1] = minC * arg[2] = loopBranchConditionCode */ #define P_GPR_1 PhysicalReg_EBX #define P_GPR_2 PhysicalReg_ECX static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir) { /* * NOTE: these synthesized blocks don't have ssa names assigned * for Dalvik registers. However, because they dominate the following * blocks we can simply use the Dalvik name w/ subscript 0 as the * ssa name. */ DecodedInstruction *dInsn = &mir->dalvikInsn; const int maxC = dInsn->arg[0]; /* assign array in virtual register to P_GPR_1 */ get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); /* assign index in virtual register to P_GPR_2 */ get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, P_GPR_2, true); export_pc(); compare_imm_reg(OpndSize_32, 0, P_GPR_1, true); condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId); int delta = maxC; /* * If the loop end condition is ">=" instead of ">", then the largest value * of the index is "endCondition - 1". */ if (dInsn->arg[2] == OP_IF_GE) { delta--; } if (delta < 0) { //+delta //if P_GPR_2 is mapped to a VR, we can't do this alu_binary_imm_reg(OpndSize_32, sub_opc, -delta, P_GPR_2, true); } else if(delta > 0) { alu_binary_imm_reg(OpndSize_32, add_opc, delta, P_GPR_2, true); } compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true); condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId); } /* * vA = arrayReg; * vB = idxReg; * vC = endConditionReg; * arg[0] = maxC * arg[1] = minC * arg[2] = loopBranchConditionCode */ static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir) { DecodedInstruction *dInsn = &mir->dalvikInsn; const int maxC = dInsn->arg[0]; /* assign array in virtual register to P_GPR_1 */ get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); /* assign index in virtual register to P_GPR_2 */ get_virtual_reg(mir->dalvikInsn.vB, OpndSize_32, P_GPR_2, true); export_pc(); compare_imm_reg(OpndSize_32, 0, P_GPR_1, true); condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId); if (maxC < 0) { //if P_GPR_2 is mapped to a VR, we can't do this alu_binary_imm_reg(OpndSize_32, sub_opc, -maxC, P_GPR_2, true); } else if(maxC > 0) { alu_binary_imm_reg(OpndSize_32, add_opc, maxC, P_GPR_2, true); } compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true); condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId); } #undef P_GPR_1 #undef P_GPR_2 /* * vA = idxReg; * vB = minC; */ #define P_GPR_1 PhysicalReg_ECX static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir) { DecodedInstruction *dInsn = &mir->dalvikInsn; const int minC = dInsn->vB; get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); //array export_pc(); compare_imm_reg(OpndSize_32, -minC, P_GPR_1, true); condJumpToBasicBlock(stream, Condition_C, cUnit->exceptionBlockId); } #undef P_GPR_1 #ifdef WITH_JIT_INLINING static void genValidationForPredictedInline(CompilationUnit *cUnit, MIR *mir) { CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo; if(gDvm.executionMode == kExecutionModeNcgO0) { get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, PhysicalReg_EBX, true); move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, PhysicalReg_ECX, true); compare_imm_reg(OpndSize_32, 0, PhysicalReg_EBX, true); export_pc(); //use %edx conditional_jump_global_API(, Condition_E, "common_errNullObject", false); move_mem_to_reg(OpndSize_32, offObject_clazz, PhysicalReg_EBX, true, PhysicalReg_EAX, true); compare_reg_reg(PhysicalReg_ECX, true, PhysicalReg_EAX, true); } else { get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, 5, false); move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, 4, false); nullCheck(5, false, 1, mir->dalvikInsn.vC); move_mem_to_reg(OpndSize_32, offObject_clazz, 5, false, 6, false); compare_reg_reg(4, false, 6, false); } //immdiate will be updated later in genLandingPadForMispredictedCallee streamMisPred = stream; callsiteInfo->misPredBranchOver = (LIR*)conditional_jump_int(Condition_NE, 0, OpndSize_8); } #endif /* Extended MIR instructions like PHI */ void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir) { ExecutionMode origMode = gDvm.executionMode; gDvm.executionMode = kExecutionModeNcgO0; switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) { case kMirOpPhi: { break; } case kMirOpNullNRangeUpCheck: { genHoistedChecksForCountUpLoop(cUnit, mir); break; } case kMirOpNullNRangeDownCheck: { genHoistedChecksForCountDownLoop(cUnit, mir); break; } case kMirOpLowerBound: { genHoistedLowerBoundCheck(cUnit, mir); break; } case kMirOpPunt: { break; } #ifdef WITH_JIT_INLINING case kMirOpCheckInlinePrediction: { //handled in ncg_o1_data.c genValidationForPredictedInline(cUnit, mir); break; } #endif default: break; } gDvm.executionMode = origMode; } static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry, int bodyId) { /* * Next, create two branches - one branch over to the loop body and the * other branch to the PCR cell to punt. */ //LowOp* branchToBody = jumpToBasicBlock(stream, bodyId); //setupResourceMasks(branchToBody); //cUnit->loopAnalysis->branchToBody = ((LIR*)branchToBody); #if 0 LowOp *branchToPCR = dvmCompilerNew(sizeof(ArmLIR), true); branchToPCR->opCode = kThumbBUncond; branchToPCR->generic.target = (LIR *) pcrLabel; setupResourceMasks(branchToPCR); cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR; #endif } /* check whether we can merge the block at index i with its target block */ bool mergeBlock(BasicBlock *bb) { if(bb->blockType == kDalvikByteCode && bb->firstMIRInsn != NULL && (bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_16 || bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO || bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_32) && bb->fallThrough == NULL) {// && //cUnit->hasLoop) { //ALOGI("merge blocks ending with goto at index %d", i); MIR* prevInsn = bb->lastMIRInsn->prev; if(bb->taken == NULL) return false; MIR* mergeInsn = bb->taken->firstMIRInsn; if(mergeInsn == NULL) return false; if(prevInsn == NULL) {//the block has a single instruction bb->firstMIRInsn = mergeInsn; } else { prevInsn->next = mergeInsn; //remove goto from the chain } mergeInsn->prev = prevInsn; bb->lastMIRInsn = bb->taken->lastMIRInsn; bb->taken->firstMIRInsn = NULL; //block being merged in bb->fallThrough = bb->taken->fallThrough; bb->taken = bb->taken->taken; return true; } return false; } static int genTraceProfileEntry(CompilationUnit *cUnit) { cUnit->headerSize = 6; if ((gDvmJit.profileMode == kTraceProfilingContinuous) || (gDvmJit.profileMode == kTraceProfilingDisabled)) { return 12; } else { return 4; } } #define PRINT_BUFFER_LEN 1024 /* Print the code block in code cache in the range of [startAddr, endAddr) * in readable format. */ void printEmittedCodeBlock(unsigned char *startAddr, unsigned char *endAddr) { char strbuf[PRINT_BUFFER_LEN]; unsigned char *addr; unsigned char *next_addr; int n; if (gDvmJit.printBinary) { // print binary in bytes n = 0; for (addr = startAddr; addr < endAddr; addr++) { n += snprintf(&strbuf[n], PRINT_BUFFER_LEN-n, "0x%x, ", *addr); if (n > PRINT_BUFFER_LEN - 10) { ALOGD("## %s", strbuf); n = 0; } } if (n > 0) ALOGD("## %s", strbuf); } // print disassembled instructions addr = startAddr; while (addr < endAddr) { next_addr = reinterpret_cast<unsigned char*> (decoder_disassemble_instr(reinterpret_cast<char*>(addr), strbuf, PRINT_BUFFER_LEN)); if (addr != next_addr) { ALOGD("** %p: %s", addr, strbuf); } else { // check whether this is nop padding if (addr[0] == 0x90) { ALOGD("** %p: NOP (1 byte)", addr); next_addr += 1; } else if (addr[0] == 0x66 && addr[1] == 0x90) { ALOGD("** %p: NOP (2 bytes)", addr); next_addr += 2; } else if (addr[0] == 0x0f && addr[1] == 0x1f && addr[2] == 0x00) { ALOGD("** %p: NOP (3 bytes)", addr); next_addr += 3; } else { ALOGD("** unable to decode binary at %p", addr); break; } } addr = next_addr; } } /* 4 is the number of additional bytes needed for chaining information for trace: * 2 bytes for chaining cell count offset and 2 bytes for chaining cell offset */ #define EXTRA_BYTES_FOR_CHAINING 4 /* Entry function to invoke the backend of the JIT compiler */ void dvmCompilerMIR2LIR(CompilationUnit *cUnit, JitTranslationInfo *info) { dump_x86_inst = cUnit->printMe; /* Used to hold the labels of each block */ LowOpBlockLabel *labelList = (LowOpBlockLabel *)dvmCompilerNew(sizeof(LowOpBlockLabel) * cUnit->numBlocks, true); //Utility.c LowOp *headLIR = NULL; GrowableList chainingListByType[kChainingCellLast]; unsigned int i, padding; /* * Initialize various types chaining lists. */ for (i = 0; i < kChainingCellLast; i++) { dvmInitGrowableList(&chainingListByType[i], 2); } /* Clear the visited flag for each block */ dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerClearVisitedFlag, kAllNodes, false /* isIterative */); GrowableListIterator iterator; dvmGrowableListIteratorInit(&cUnit->blockList, &iterator); /* Traces start with a profiling entry point. Generate it here */ cUnit->profileCodeSize = genTraceProfileEntry(cUnit); //BasicBlock **blockList = cUnit->blockList; GrowableList *blockList = &cUnit->blockList; BasicBlock *bb; info->codeAddress = NULL; stream = (char*)gDvmJit.codeCache + gDvmJit.codeCacheByteUsed; // TODO: compile into a temporary buffer and then copy into the code cache. // That would let us leave the code cache unprotected for a shorter time. size_t unprotected_code_cache_bytes = gDvmJit.codeCacheSize - gDvmJit.codeCacheByteUsed - CODE_CACHE_PADDING; UNPROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); streamStart = stream; /* trace start before alignment */ stream += EXTRA_BYTES_FOR_CHAINING; /* This is needed for chaining. Add the bytes before the alignment */ stream = (char*)(((unsigned int)stream + 0xF) & ~0xF); /* Align trace to 16-bytes */ streamMethodStart = stream; /* code start */ for (i = 0; i < ((unsigned int) cUnit->numBlocks); i++) { labelList[i].lop.generic.offset = -1; } cUnit->exceptionBlockId = -1; for (i = 0; i < blockList->numUsed; i++) { bb = (BasicBlock *) blockList->elemList[i]; if(bb->blockType == kExceptionHandling) cUnit->exceptionBlockId = i; } startOfTrace(cUnit->method, labelList, cUnit->exceptionBlockId, cUnit); if(gDvm.executionMode == kExecutionModeNcgO1) { //merge blocks ending with "goto" with the fall through block if (cUnit->jitMode != kJitLoop) for (i = 0; i < blockList->numUsed; i++) { bb = (BasicBlock *) blockList->elemList[i]; bool merged = mergeBlock(bb); while(merged) merged = mergeBlock(bb); } for (i = 0; i < blockList->numUsed; i++) { bb = (BasicBlock *) blockList->elemList[i]; if(bb->blockType == kDalvikByteCode && bb->firstMIRInsn != NULL) { preprocessingBB(bb); } } preprocessingTrace(); } /* Handle the content in each basic block */ for (i = 0; ; i++) { MIR *mir; bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator); if (bb == NULL) break; if (bb->visited == true) continue; labelList[i].immOpnd.value = bb->startOffset; if (bb->blockType >= kChainingCellLast) { /* * Append the label pseudo LIR first. Chaining cells will be handled * separately afterwards. */ dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]); } if (bb->blockType == kEntryBlock) { labelList[i].lop.opCode2 = ATOM_PSEUDO_ENTRY_BLOCK; if (bb->firstMIRInsn == NULL) { continue; } else { setupLoopEntryBlock(cUnit, bb, bb->fallThrough->id); //&labelList[blockList[i]->fallThrough->id]); } } else if (bb->blockType == kExitBlock) { labelList[i].lop.opCode2 = ATOM_PSEUDO_EXIT_BLOCK; labelList[i].lop.generic.offset = (stream - streamMethodStart); goto gen_fallthrough; } else if (bb->blockType == kDalvikByteCode) { if (bb->hidden == true) continue; labelList[i].lop.opCode2 = ATOM_PSEUDO_NORMAL_BLOCK_LABEL; /* Reset the register state */ #if 0 resetRegisterScoreboard(cUnit); #endif } else { switch (bb->blockType) { case kChainingCellNormal: labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_NORMAL; /* handle the codegen later */ dvmInsertGrowableList( &chainingListByType[kChainingCellNormal], i); break; case kChainingCellInvokeSingleton: labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON; labelList[i].immOpnd.value = (int) bb->containingMethod; /* handle the codegen later */ dvmInsertGrowableList( &chainingListByType[kChainingCellInvokeSingleton], i); break; case kChainingCellInvokePredicted: labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED; /* * Move the cached method pointer from operand 1 to 0. * Operand 0 was clobbered earlier in this routine to store * the block starting offset, which is not applicable to * predicted chaining cell. */ //TODO //labelList[i].operands[0] = labelList[i].operands[1]; /* handle the codegen later */ dvmInsertGrowableList( &chainingListByType[kChainingCellInvokePredicted], i); break; case kChainingCellHot: labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_HOT; /* handle the codegen later */ dvmInsertGrowableList( &chainingListByType[kChainingCellHot], i); break; case kPCReconstruction: /* Make sure exception handling block is next */ labelList[i].lop.opCode2 = ATOM_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL; //assert (i == cUnit->numBlocks - 2); labelList[i].lop.generic.offset = (stream - streamMethodStart); handlePCReconstruction(cUnit, &labelList[cUnit->puntBlock->id]); break; case kExceptionHandling: labelList[i].lop.opCode2 = ATOM_PSEUDO_EH_BLOCK_LABEL; labelList[i].lop.generic.offset = (stream - streamMethodStart); //if (cUnit->pcReconstructionList.numUsed) { scratchRegs[0] = PhysicalReg_EAX; jumpToInterpPunt(); //call_dvmJitToInterpPunt(); //} break; case kChainingCellBackwardBranch: labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH; /* handle the codegen later */ dvmInsertGrowableList( &chainingListByType[kChainingCellBackwardBranch], i); break; default: break; } continue; } { //LowOp *headLIR = NULL; const DexCode *dexCode = dvmGetMethodCode(cUnit->method); const u2 *startCodePtr = dexCode->insns; const u2 *codePtr; labelList[i].lop.generic.offset = (stream - streamMethodStart); ALOGV("get ready to handle JIT bb %d type %d hidden %d", bb->id, bb->blockType, bb->hidden); for (BasicBlock *nextBB = bb; nextBB != NULL; nextBB = cUnit->nextCodegenBlock) { bb = nextBB; bb->visited = true; cUnit->nextCodegenBlock = NULL; if(gDvm.executionMode == kExecutionModeNcgO1 && bb->blockType != kEntryBlock && bb->firstMIRInsn != NULL) { startOfBasicBlock(bb); int cg_ret = codeGenBasicBlockJit(cUnit->method, bb); endOfBasicBlock(bb); if(cg_ret < 0) { endOfTrace(true/*freeOnly*/); cUnit->baseAddr = NULL; ALOGI("codeGenBasicBlockJit returns negative number"); PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); return; } } else { for (mir = bb->firstMIRInsn; mir; mir = mir->next) { startOfBasicBlock(bb); //why here for O0 Opcode dalvikOpCode = mir->dalvikInsn.opcode; if((int)dalvikOpCode >= (int)kMirOpFirst) { handleExtendedMIR(cUnit, mir); continue; } InstructionFormat dalvikFormat = dexGetFormatFromOpcode(dalvikOpCode); ALOGV("ready to handle bytecode at offset %x: opcode %d format %d", mir->offset, dalvikOpCode, dalvikFormat); LowOpImm *boundaryLIR = dump_special(ATOM_PSEUDO_DALVIK_BYTECODE_BOUNDARY, mir->offset); /* Remember the first LIR for this block */ if (headLIR == NULL) { headLIR = (LowOp*)boundaryLIR; } bool notHandled = true; /* * Debugging: screen the opcode first to see if it is in the * do[-not]-compile list */ bool singleStepMe = gDvmJit.includeSelectedOp != ((gDvmJit.opList[dalvikOpCode >> 3] & (1 << (dalvikOpCode & 0x7))) != 0); if (singleStepMe || cUnit->allSingleStep) { } else { codePtr = startCodePtr + mir->offset; //lower each byte code, update LIR notHandled = lowerByteCodeJit(cUnit->method, cUnit->method->insns+mir->offset, mir); if(gDvmJit.codeCacheByteUsed + (stream - streamStart) + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) { ALOGI("JIT code cache full after lowerByteCodeJit (trace uses %uB)", (stream - streamStart)); gDvmJit.codeCacheFull = true; cUnit->baseAddr = NULL; endOfTrace(true/*freeOnly*/); PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); return; } } if (notHandled) { ALOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled", mir->offset, dalvikOpCode, dexGetOpcodeName(dalvikOpCode), dalvikFormat); dvmAbort(); break; } } // end for } // end else //JIT + O0 code generator } } // end for /* Eliminate redundant loads/stores and delay stores into later slots */ #if 0 dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR, cUnit->lastLIRInsn); #endif if (headLIR) headLIR = NULL; gen_fallthrough: /* * Check if the block is terminated due to trace length constraint - * insert an unconditional branch to the chaining cell. */ if (bb->needFallThroughBranch) { jumpToBasicBlock(stream, bb->fallThrough->id); } } char* streamChainingStart = (char*)stream; /* Handle the chaining cells in predefined order */ for (i = 0; i < kChainingCellGap; i++) { size_t j; int *blockIdList = (int *) chainingListByType[i].elemList; cUnit->numChainingCells[i] = chainingListByType[i].numUsed; /* No chaining cells of this type */ if (cUnit->numChainingCells[i] == 0) continue; /* Record the first LIR for a new type of chaining cell */ cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]]; for (j = 0; j < chainingListByType[i].numUsed; j++) { int blockId = blockIdList[j]; BasicBlock *chainingBlock = (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList, blockId); labelList[blockId].lop.generic.offset = (stream - streamMethodStart); /* Align this chaining cell first */ #if 0 newLIR0(cUnit, ATOM_PSEUDO_ALIGN4); #endif /* Insert the pseudo chaining instruction */ dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]); switch (chainingBlock->blockType) { case kChainingCellNormal: handleNormalChainingCell(cUnit, chainingBlock->startOffset, blockId, labelList); break; case kChainingCellInvokeSingleton: handleInvokeSingletonChainingCell(cUnit, chainingBlock->containingMethod, blockId, labelList); break; case kChainingCellInvokePredicted: handleInvokePredictedChainingCell(cUnit, blockId); break; case kChainingCellHot: handleHotChainingCell(cUnit, chainingBlock->startOffset, blockId, labelList); break; case kChainingCellBackwardBranch: handleBackwardBranchChainingCell(cUnit, chainingBlock->startOffset, blockId, labelList); break; default: ALOGE("Bad blocktype %d", chainingBlock->blockType); dvmAbort(); break; } if (gDvmJit.codeCacheByteUsed + (stream - streamStart) + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) { ALOGI("JIT code cache full after ChainingCell (trace uses %uB)", (stream - streamStart)); gDvmJit.codeCacheFull = true; cUnit->baseAddr = NULL; endOfTrace(true); /* need to free structures */ PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); return; } } } #if 0 dvmCompilerApplyGlobalOptimizations(cUnit); #endif endOfTrace(false); if (gDvmJit.codeCacheFull) { /* We hit code cache size limit inside endofTrace(false). * Bail out for this trace! */ ALOGI("JIT code cache full after endOfTrace (trace uses %uB)", (stream - streamStart)); cUnit->baseAddr = NULL; PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); return; } /* dump section for chaining cell counts, make sure it is 4-byte aligned */ padding = (4 - ((u4)stream & 3)) & 3; stream += padding; ChainCellCounts chainCellCounts; /* Install the chaining cell counts */ for (i=0; i< kChainingCellGap; i++) { chainCellCounts.u.count[i] = cUnit->numChainingCells[i]; } char* streamCountStart = (char*)stream; memcpy((char*)stream, &chainCellCounts, sizeof(chainCellCounts)); stream += sizeof(chainCellCounts); cUnit->baseAddr = streamMethodStart; cUnit->totalSize = (stream - streamStart); if(gDvmJit.codeCacheByteUsed + cUnit->totalSize + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) { ALOGI("JIT code cache full after ChainingCellCounts (trace uses %uB)", (stream - streamStart)); gDvmJit.codeCacheFull = true; cUnit->baseAddr = NULL; PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); return; } /* write chaining cell count offset & chaining cell offset */ u2* pOffset = (u2*)(streamMethodStart - EXTRA_BYTES_FOR_CHAINING); /* space was already allocated for this purpose */ *pOffset = streamCountStart - streamMethodStart; /* from codeAddr */ pOffset[1] = streamChainingStart - streamMethodStart; PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); gDvmJit.codeCacheByteUsed += (stream - streamStart); if (cUnit->printMe) { unsigned char* codeBaseAddr = (unsigned char *) cUnit->baseAddr; unsigned char* codeBaseAddrNext = ((unsigned char *) gDvmJit.codeCache) + gDvmJit.codeCacheByteUsed; ALOGD("-------- Built trace for %s%s, JIT code [%p, %p) cache start %p", cUnit->method->clazz->descriptor, cUnit->method->name, codeBaseAddr, codeBaseAddrNext, gDvmJit.codeCache); ALOGD("** %s%s@0x%x:", cUnit->method->clazz->descriptor, cUnit->method->name, cUnit->traceDesc->trace[0].info.frag.startOffset); printEmittedCodeBlock(codeBaseAddr, codeBaseAddrNext); } ALOGV("JIT CODE after trace %p to %p size %x START %p", cUnit->baseAddr, (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed, cUnit->totalSize, gDvmJit.codeCache); gDvmJit.numCompilations++; info->codeAddress = (char*)cUnit->baseAddr;// + cUnit->headerSize; } /* * Perform translation chain operation. */ void* dvmJitChain(void* tgtAddr, u4* branchAddr) { #ifdef JIT_CHAIN int relOffset = (int) tgtAddr - (int)branchAddr; if ((gDvmJit.pProfTable != NULL) && (gDvm.sumThreadSuspendCount == 0) && (gDvmJit.codeCacheFull == false)) { gDvmJit.translationChains++; //OpndSize immSize = estOpndSizeFromImm(relOffset); //relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond); /* Hard coded the jump opnd size to 32 bits, This instruction will replace the "jump 0" in * the original code sequence. */ OpndSize immSize = OpndSize_32; relOffset -= 5; //can't use stream here since it is used by the compilation thread UNPROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr)); dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*)branchAddr); //dump to branchAddr PROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr)); gDvmJit.hasNewChain = true; COMPILER_TRACE_CHAINING( ALOGI("Jit Runtime: chaining 0x%x to %p with relOffset %x", (int) branchAddr, tgtAddr, relOffset)); } #endif return tgtAddr; } /* * Accept the work and start compiling. Returns true if compilation * is attempted. */ bool dvmCompilerDoWork(CompilerWorkOrder *work) { JitTraceDescription *desc; bool isCompile; bool success = true; if (gDvmJit.codeCacheFull) { return false; } switch (work->kind) { case kWorkOrderTrace: isCompile = true; /* Start compilation with maximally allowed trace length */ desc = (JitTraceDescription *)work->info; success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result, work->bailPtr, 0 /* no hints */); break; case kWorkOrderTraceDebug: { bool oldPrintMe = gDvmJit.printMe; gDvmJit.printMe = true; isCompile = true; /* Start compilation with maximally allowed trace length */ desc = (JitTraceDescription *)work->info; success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result, work->bailPtr, 0 /* no hints */); gDvmJit.printMe = oldPrintMe; break; } case kWorkOrderProfileMode: dvmJitChangeProfileMode((TraceProfilingModes)(int)work->info); isCompile = false; break; default: isCompile = false; ALOGE("Jit: unknown work order type"); assert(0); // Bail if debug build, discard otherwise } if (!success) work->result.codeAddress = NULL; return isCompile; } void dvmCompilerCacheFlush(long start, long end, long flags) { /* cacheflush is needed for ARM, but not for IA32 (coherent icache) */ } //#endif