/* * Copyright (C) 2009 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "Dalvik.h" #include "vm/compiler/CompilerInternals.h" #include "MipsLIR.h" /* * Identify unconditional branches that jump to the immediate successor of the * branch itself. */ static void applyRedundantBranchElimination(CompilationUnit *cUnit) { MipsLIR *thisLIR; for (thisLIR = (MipsLIR *) cUnit->firstLIRInsn; thisLIR != (MipsLIR *) cUnit->lastLIRInsn; thisLIR = NEXT_LIR(thisLIR)) { /* Branch to the next instruction */ if (!thisLIR->flags.isNop && thisLIR->opcode == kMipsB) { MipsLIR *nextLIR = thisLIR; while (true) { nextLIR = NEXT_LIR(nextLIR); /* * Is the branch target the next instruction? */ if (nextLIR == (MipsLIR *) thisLIR->generic.target) { thisLIR->flags.isNop = true; break; } /* * Found real useful stuff between the branch and the target. * Need to explicitly check the lastLIRInsn here since with * method-based JIT the branch might be the last real * instruction. */ if (!isPseudoOpCode(nextLIR->opcode) || (nextLIR = (MipsLIR *) cUnit->lastLIRInsn)) break; } } } } /* * Do simple a form of copy propagation and elimination. */ static void applyCopyPropagation(CompilationUnit *cUnit) { MipsLIR *thisLIR; /* look for copies to possibly eliminate */ for (thisLIR = (MipsLIR *) cUnit->firstLIRInsn; thisLIR != (MipsLIR *) cUnit->lastLIRInsn; thisLIR = NEXT_LIR(thisLIR)) { if (thisLIR->flags.isNop || thisLIR->opcode != kMipsMove) continue; const int max_insns = 10; MipsLIR *savedLIR[max_insns]; int srcRedefined = 0; int insnCount = 0; MipsLIR *nextLIR; /* look for and record all uses of reg defined by the copy */ for (nextLIR = (MipsLIR *) NEXT_LIR(thisLIR); nextLIR != (MipsLIR *) cUnit->lastLIRInsn; nextLIR = NEXT_LIR(nextLIR)) { if (nextLIR->flags.isNop || nextLIR->opcode == kMips32BitData) continue; if (isPseudoOpCode(nextLIR->opcode)) { if (nextLIR->opcode == kMipsPseudoDalvikByteCodeBoundary || nextLIR->opcode == kMipsPseudoBarrier || nextLIR->opcode == kMipsPseudoExtended || nextLIR->opcode == kMipsPseudoSSARep) continue; /* these pseudos don't pose problems */ else if (nextLIR->opcode == kMipsPseudoTargetLabel || nextLIR->opcode == kMipsPseudoEntryBlock || nextLIR->opcode == kMipsPseudoExitBlock) insnCount = 0; /* give up for these pseudos */ break; /* reached end for copy propagation */ } /* Since instructions with IS_BRANCH flag set will have its */ /* useMask and defMask set to ENCODE_ALL, any checking of */ /* these flags must come after the branching checks. */ /* don't propagate across branch/jump and link case or jump via register */ if (EncodingMap[nextLIR->opcode].flags & REG_DEF_LR || nextLIR->opcode == kMipsJalr || nextLIR->opcode == kMipsJr) { insnCount = 0; break; } /* branches with certain targets ok while others aren't */ if (EncodingMap[nextLIR->opcode].flags & IS_BRANCH) { MipsLIR *targetLIR = (MipsLIR *) nextLIR->generic.target; if (targetLIR->opcode != kMipsPseudoEHBlockLabel && targetLIR->opcode != kMipsPseudoChainingCellHot && targetLIR->opcode != kMipsPseudoChainingCellNormal && targetLIR->opcode != kMipsPseudoChainingCellInvokePredicted && targetLIR->opcode != kMipsPseudoChainingCellInvokeSingleton && targetLIR->opcode != kMipsPseudoPCReconstructionBlockLabel && targetLIR->opcode != kMipsPseudoPCReconstructionCell) { insnCount = 0; break; } /* FIXME - for now don't propagate across any branch/jump. */ insnCount = 0; break; } /* copy def reg used here, so record insn for copy propagation */ if (thisLIR->defMask & nextLIR->useMask) { if (insnCount == max_insns || srcRedefined) { insnCount = 0; break; /* just give up if too many or not possible */ } savedLIR[insnCount++] = nextLIR; } if (thisLIR->defMask & nextLIR->defMask) { if (nextLIR->opcode == kMipsMovz) insnCount = 0; /* movz relies on thisLIR setting dst reg so abandon propagation*/ break; } /* copy src reg redefined here, so can't propagate further */ if (thisLIR->useMask & nextLIR->defMask) { if (insnCount == 0) break; /* nothing to propagate */ srcRedefined = 1; } } /* conditions allow propagation and copy elimination */ if (insnCount) { int i; for (i = 0; i < insnCount; i++) { int flags = EncodingMap[savedLIR[i]->opcode].flags; savedLIR[i]->useMask &= ~(1 << thisLIR->operands[0]); savedLIR[i]->useMask |= 1 << thisLIR->operands[1]; if ((flags & REG_USE0) && savedLIR[i]->operands[0] == thisLIR->operands[0]) savedLIR[i]->operands[0] = thisLIR->operands[1]; if ((flags & REG_USE1) && savedLIR[i]->operands[1] == thisLIR->operands[0]) savedLIR[i]->operands[1] = thisLIR->operands[1]; if ((flags & REG_USE2) && savedLIR[i]->operands[2] == thisLIR->operands[0]) savedLIR[i]->operands[2] = thisLIR->operands[1]; if ((flags & REG_USE3) && savedLIR[i]->operands[3] == thisLIR->operands[0]) savedLIR[i]->operands[3] = thisLIR->operands[1]; } thisLIR->flags.isNop = true; } } } #ifdef __mips_hard_float /* * Look for pairs of mov.s instructions that can be combined into mov.d */ static void mergeMovs(CompilationUnit *cUnit) { MipsLIR *movsLIR = NULL; MipsLIR *thisLIR; for (thisLIR = (MipsLIR *) cUnit->firstLIRInsn; thisLIR != (MipsLIR *) cUnit->lastLIRInsn; thisLIR = NEXT_LIR(thisLIR)) { if (thisLIR->flags.isNop) continue; if (isPseudoOpCode(thisLIR->opcode)) { if (thisLIR->opcode == kMipsPseudoDalvikByteCodeBoundary || thisLIR->opcode == kMipsPseudoExtended || thisLIR->opcode == kMipsPseudoSSARep) continue; /* ok to move across these pseudos */ movsLIR = NULL; /* don't merge across other pseudos */ continue; } /* merge pairs of mov.s instructions */ if (thisLIR->opcode == kMipsFmovs) { if (movsLIR == NULL) movsLIR = thisLIR; else if (((movsLIR->operands[0] & 1) == 0) && ((movsLIR->operands[1] & 1) == 0) && ((movsLIR->operands[0] + 1) == thisLIR->operands[0]) && ((movsLIR->operands[1] + 1) == thisLIR->operands[1])) { /* movsLIR is handling even register - upgrade to mov.d */ movsLIR->opcode = kMipsFmovd; movsLIR->operands[0] = S2D(movsLIR->operands[0], movsLIR->operands[0]+1); movsLIR->operands[1] = S2D(movsLIR->operands[1], movsLIR->operands[1]+1); thisLIR->flags.isNop = true; movsLIR = NULL; } else if (((movsLIR->operands[0] & 1) == 1) && ((movsLIR->operands[1] & 1) == 1) && ((movsLIR->operands[0] - 1) == thisLIR->operands[0]) && ((movsLIR->operands[1] - 1) == thisLIR->operands[1])) { /* thissLIR is handling even register - upgrade to mov.d */ thisLIR->opcode = kMipsFmovd; thisLIR->operands[0] = S2D(thisLIR->operands[0], thisLIR->operands[0]+1); thisLIR->operands[1] = S2D(thisLIR->operands[1], thisLIR->operands[1]+1); movsLIR->flags.isNop = true; movsLIR = NULL; } else /* carry on searching from here */ movsLIR = thisLIR; continue; } /* intervening instruction - start search from scratch */ movsLIR = NULL; } } #endif /* * Look back first and then ahead to try to find an instruction to move into * the branch delay slot. If the analysis can be done cheaply enough, it may be * be possible to tune this routine to be more beneficial (e.g., being more * particular about what instruction is speculated). */ static MipsLIR *delaySlotLIR(MipsLIR *firstLIR, MipsLIR *branchLIR) { int isLoad; int loadVisited = 0; int isStore; int storeVisited = 0; u8 useMask = branchLIR->useMask; u8 defMask = branchLIR->defMask; MipsLIR *thisLIR; MipsLIR *newLIR = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); for (thisLIR = PREV_LIR(branchLIR); thisLIR != firstLIR; thisLIR = PREV_LIR(thisLIR)) { if (thisLIR->flags.isNop) continue; if (isPseudoOpCode(thisLIR->opcode)) { if (thisLIR->opcode == kMipsPseudoDalvikByteCodeBoundary || thisLIR->opcode == kMipsPseudoExtended || thisLIR->opcode == kMipsPseudoSSARep) continue; /* ok to move across these pseudos */ break; /* don't move across all other pseudos */ } /* give up on moving previous instruction down into slot */ if (thisLIR->opcode == kMipsNop || thisLIR->opcode == kMips32BitData || EncodingMap[thisLIR->opcode].flags & IS_BRANCH) break; /* don't reorder loads/stores (the alias info could possibly be used to allow as a future enhancement) */ isLoad = EncodingMap[thisLIR->opcode].flags & IS_LOAD; isStore = EncodingMap[thisLIR->opcode].flags & IS_STORE; if (!(thisLIR->useMask & defMask) && !(thisLIR->defMask & useMask) && !(thisLIR->defMask & defMask) && !(isLoad && storeVisited) && !(isStore && loadVisited) && !(isStore && storeVisited)) { *newLIR = *thisLIR; thisLIR->flags.isNop = true; return newLIR; /* move into delay slot succeeded */ } loadVisited |= isLoad; storeVisited |= isStore; /* accumulate def/use constraints */ useMask |= thisLIR->useMask; defMask |= thisLIR->defMask; } /* for unconditional branches try to copy the instruction at the branch target up into the delay slot and adjust the branch */ if (branchLIR->opcode == kMipsB) { MipsLIR *targetLIR; for (targetLIR = (MipsLIR *) branchLIR->generic.target; targetLIR; targetLIR = NEXT_LIR(targetLIR)) { if (!targetLIR->flags.isNop && (!isPseudoOpCode(targetLIR->opcode) || /* can't pull predicted up */ targetLIR->opcode == kMipsPseudoChainingCellInvokePredicted)) break; /* try to get to next real op at branch target */ } if (targetLIR && !isPseudoOpCode(targetLIR->opcode) && !(EncodingMap[targetLIR->opcode].flags & IS_BRANCH)) { *newLIR = *targetLIR; branchLIR->generic.target = (LIR *) NEXT_LIR(targetLIR); return newLIR; } } else if (branchLIR->opcode >= kMipsBeq && branchLIR->opcode <= kMipsBne) { /* for conditional branches try to fill branch delay slot via speculative execution when safe */ MipsLIR *targetLIR; for (targetLIR = (MipsLIR *) branchLIR->generic.target; targetLIR; targetLIR = NEXT_LIR(targetLIR)) { if (!targetLIR->flags.isNop && !isPseudoOpCode(targetLIR->opcode)) break; /* try to get to next real op at branch target */ } MipsLIR *nextLIR; for (nextLIR = NEXT_LIR(branchLIR); nextLIR; nextLIR = NEXT_LIR(nextLIR)) { if (!nextLIR->flags.isNop && !isPseudoOpCode(nextLIR->opcode)) break; /* try to get to next real op for fall thru */ } if (nextLIR && targetLIR) { int flags = EncodingMap[nextLIR->opcode].flags; int isLoad = flags & IS_LOAD; /* common branch and fall thru to normal chaining cells case */ if (isLoad && nextLIR->opcode == targetLIR->opcode && nextLIR->operands[0] == targetLIR->operands[0] && nextLIR->operands[1] == targetLIR->operands[1] && nextLIR->operands[2] == targetLIR->operands[2]) { *newLIR = *targetLIR; branchLIR->generic.target = (LIR *) NEXT_LIR(targetLIR); return newLIR; } /* try prefetching (maybe try speculating instructions along the trace like dalvik frame load which is common and may be safe) */ int isStore = flags & IS_STORE; if (isLoad || isStore) { newLIR->opcode = kMipsPref; newLIR->operands[0] = isLoad ? 0 : 1; newLIR->operands[1] = nextLIR->operands[1]; newLIR->operands[2] = nextLIR->operands[2]; newLIR->defMask = nextLIR->defMask; newLIR->useMask = nextLIR->useMask; return newLIR; } } } /* couldn't find a useful instruction to move into the delay slot */ newLIR->opcode = kMipsNop; return newLIR; } /* * The branch delay slot has been ignored thus far. This is the point where * a useful instruction is moved into it or a nop is inserted. Leave existing * NOPs alone -- these came from sparse and packed switch ops and are needed * to maintain the proper offset to the jump table. */ static void introduceBranchDelaySlot(CompilationUnit *cUnit) { MipsLIR *thisLIR; MipsLIR *firstLIR =(MipsLIR *) cUnit->firstLIRInsn; MipsLIR *lastLIR =(MipsLIR *) cUnit->lastLIRInsn; for (thisLIR = lastLIR; thisLIR != firstLIR; thisLIR = PREV_LIR(thisLIR)) { if (thisLIR->flags.isNop || isPseudoOpCode(thisLIR->opcode) || !(EncodingMap[thisLIR->opcode].flags & IS_BRANCH)) { continue; } else if (thisLIR == lastLIR) { dvmCompilerAppendLIR(cUnit, (LIR *) delaySlotLIR(firstLIR, thisLIR)); } else if (NEXT_LIR(thisLIR)->opcode != kMipsNop) { dvmCompilerInsertLIRAfter((LIR *) thisLIR, (LIR *) delaySlotLIR(firstLIR, thisLIR)); } } if (!thisLIR->flags.isNop && !isPseudoOpCode(thisLIR->opcode) && EncodingMap[thisLIR->opcode].flags & IS_BRANCH) { /* nothing available to move, so insert nop */ MipsLIR *nopLIR = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true); nopLIR->opcode = kMipsNop; dvmCompilerInsertLIRAfter((LIR *) thisLIR, (LIR *) nopLIR); } } void dvmCompilerApplyGlobalOptimizations(CompilationUnit *cUnit) { applyRedundantBranchElimination(cUnit); applyCopyPropagation(cUnit); #ifdef __mips_hard_float mergeMovs(cUnit); #endif introduceBranchDelaySlot(cUnit); }