/*
* Copyright (C) 2009 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "Dalvik.h"
#include "vm/compiler/CompilerInternals.h"
#include "MipsLIR.h"
/*
* Identify unconditional branches that jump to the immediate successor of the
* branch itself.
*/
static void applyRedundantBranchElimination(CompilationUnit *cUnit)
{
MipsLIR *thisLIR;
for (thisLIR = (MipsLIR *) cUnit->firstLIRInsn;
thisLIR != (MipsLIR *) cUnit->lastLIRInsn;
thisLIR = NEXT_LIR(thisLIR)) {
/* Branch to the next instruction */
if (!thisLIR->flags.isNop && thisLIR->opcode == kMipsB) {
MipsLIR *nextLIR = thisLIR;
while (true) {
nextLIR = NEXT_LIR(nextLIR);
/*
* Is the branch target the next instruction?
*/
if (nextLIR == (MipsLIR *) thisLIR->generic.target) {
thisLIR->flags.isNop = true;
break;
}
/*
* Found real useful stuff between the branch and the target.
* Need to explicitly check the lastLIRInsn here since with
* method-based JIT the branch might be the last real
* instruction.
*/
if (!isPseudoOpCode(nextLIR->opcode) ||
(nextLIR = (MipsLIR *) cUnit->lastLIRInsn))
break;
}
}
}
}
/*
* Do simple a form of copy propagation and elimination.
*/
static void applyCopyPropagation(CompilationUnit *cUnit)
{
MipsLIR *thisLIR;
/* look for copies to possibly eliminate */
for (thisLIR = (MipsLIR *) cUnit->firstLIRInsn;
thisLIR != (MipsLIR *) cUnit->lastLIRInsn;
thisLIR = NEXT_LIR(thisLIR)) {
if (thisLIR->flags.isNop || thisLIR->opcode != kMipsMove)
continue;
const int max_insns = 10;
MipsLIR *savedLIR[max_insns];
int srcRedefined = 0;
int insnCount = 0;
MipsLIR *nextLIR;
/* look for and record all uses of reg defined by the copy */
for (nextLIR = (MipsLIR *) NEXT_LIR(thisLIR);
nextLIR != (MipsLIR *) cUnit->lastLIRInsn;
nextLIR = NEXT_LIR(nextLIR)) {
if (nextLIR->flags.isNop || nextLIR->opcode == kMips32BitData)
continue;
if (isPseudoOpCode(nextLIR->opcode)) {
if (nextLIR->opcode == kMipsPseudoDalvikByteCodeBoundary ||
nextLIR->opcode == kMipsPseudoBarrier ||
nextLIR->opcode == kMipsPseudoExtended ||
nextLIR->opcode == kMipsPseudoSSARep)
continue; /* these pseudos don't pose problems */
else if (nextLIR->opcode == kMipsPseudoTargetLabel ||
nextLIR->opcode == kMipsPseudoEntryBlock ||
nextLIR->opcode == kMipsPseudoExitBlock)
insnCount = 0; /* give up for these pseudos */
break; /* reached end for copy propagation */
}
/* Since instructions with IS_BRANCH flag set will have its */
/* useMask and defMask set to ENCODE_ALL, any checking of */
/* these flags must come after the branching checks. */
/* don't propagate across branch/jump and link case
or jump via register */
if (EncodingMap[nextLIR->opcode].flags & REG_DEF_LR ||
nextLIR->opcode == kMipsJalr ||
nextLIR->opcode == kMipsJr) {
insnCount = 0;
break;
}
/* branches with certain targets ok while others aren't */
if (EncodingMap[nextLIR->opcode].flags & IS_BRANCH) {
MipsLIR *targetLIR = (MipsLIR *) nextLIR->generic.target;
if (targetLIR->opcode != kMipsPseudoEHBlockLabel &&
targetLIR->opcode != kMipsPseudoChainingCellHot &&
targetLIR->opcode != kMipsPseudoChainingCellNormal &&
targetLIR->opcode != kMipsPseudoChainingCellInvokePredicted &&
targetLIR->opcode != kMipsPseudoChainingCellInvokeSingleton &&
targetLIR->opcode != kMipsPseudoPCReconstructionBlockLabel &&
targetLIR->opcode != kMipsPseudoPCReconstructionCell) {
insnCount = 0;
break;
}
/* FIXME - for now don't propagate across any branch/jump. */
insnCount = 0;
break;
}
/* copy def reg used here, so record insn for copy propagation */
if (thisLIR->defMask & nextLIR->useMask) {
if (insnCount == max_insns || srcRedefined) {
insnCount = 0;
break; /* just give up if too many or not possible */
}
savedLIR[insnCount++] = nextLIR;
}
if (thisLIR->defMask & nextLIR->defMask) {
if (nextLIR->opcode == kMipsMovz)
insnCount = 0; /* movz relies on thisLIR setting dst reg so abandon propagation*/
break;
}
/* copy src reg redefined here, so can't propagate further */
if (thisLIR->useMask & nextLIR->defMask) {
if (insnCount == 0)
break; /* nothing to propagate */
srcRedefined = 1;
}
}
/* conditions allow propagation and copy elimination */
if (insnCount) {
int i;
for (i = 0; i < insnCount; i++) {
int flags = EncodingMap[savedLIR[i]->opcode].flags;
savedLIR[i]->useMask &= ~(1 << thisLIR->operands[0]);
savedLIR[i]->useMask |= 1 << thisLIR->operands[1];
if ((flags & REG_USE0) &&
savedLIR[i]->operands[0] == thisLIR->operands[0])
savedLIR[i]->operands[0] = thisLIR->operands[1];
if ((flags & REG_USE1) &&
savedLIR[i]->operands[1] == thisLIR->operands[0])
savedLIR[i]->operands[1] = thisLIR->operands[1];
if ((flags & REG_USE2) &&
savedLIR[i]->operands[2] == thisLIR->operands[0])
savedLIR[i]->operands[2] = thisLIR->operands[1];
if ((flags & REG_USE3) &&
savedLIR[i]->operands[3] == thisLIR->operands[0])
savedLIR[i]->operands[3] = thisLIR->operands[1];
}
thisLIR->flags.isNop = true;
}
}
}
#ifdef __mips_hard_float
/*
* Look for pairs of mov.s instructions that can be combined into mov.d
*/
static void mergeMovs(CompilationUnit *cUnit)
{
MipsLIR *movsLIR = NULL;
MipsLIR *thisLIR;
for (thisLIR = (MipsLIR *) cUnit->firstLIRInsn;
thisLIR != (MipsLIR *) cUnit->lastLIRInsn;
thisLIR = NEXT_LIR(thisLIR)) {
if (thisLIR->flags.isNop)
continue;
if (isPseudoOpCode(thisLIR->opcode)) {
if (thisLIR->opcode == kMipsPseudoDalvikByteCodeBoundary ||
thisLIR->opcode == kMipsPseudoExtended ||
thisLIR->opcode == kMipsPseudoSSARep)
continue; /* ok to move across these pseudos */
movsLIR = NULL; /* don't merge across other pseudos */
continue;
}
/* merge pairs of mov.s instructions */
if (thisLIR->opcode == kMipsFmovs) {
if (movsLIR == NULL)
movsLIR = thisLIR;
else if (((movsLIR->operands[0] & 1) == 0) &&
((movsLIR->operands[1] & 1) == 0) &&
((movsLIR->operands[0] + 1) == thisLIR->operands[0]) &&
((movsLIR->operands[1] + 1) == thisLIR->operands[1])) {
/* movsLIR is handling even register - upgrade to mov.d */
movsLIR->opcode = kMipsFmovd;
movsLIR->operands[0] = S2D(movsLIR->operands[0], movsLIR->operands[0]+1);
movsLIR->operands[1] = S2D(movsLIR->operands[1], movsLIR->operands[1]+1);
thisLIR->flags.isNop = true;
movsLIR = NULL;
}
else if (((movsLIR->operands[0] & 1) == 1) &&
((movsLIR->operands[1] & 1) == 1) &&
((movsLIR->operands[0] - 1) == thisLIR->operands[0]) &&
((movsLIR->operands[1] - 1) == thisLIR->operands[1])) {
/* thissLIR is handling even register - upgrade to mov.d */
thisLIR->opcode = kMipsFmovd;
thisLIR->operands[0] = S2D(thisLIR->operands[0], thisLIR->operands[0]+1);
thisLIR->operands[1] = S2D(thisLIR->operands[1], thisLIR->operands[1]+1);
movsLIR->flags.isNop = true;
movsLIR = NULL;
}
else
/* carry on searching from here */
movsLIR = thisLIR;
continue;
}
/* intervening instruction - start search from scratch */
movsLIR = NULL;
}
}
#endif
/*
* Look back first and then ahead to try to find an instruction to move into
* the branch delay slot. If the analysis can be done cheaply enough, it may be
* be possible to tune this routine to be more beneficial (e.g., being more
* particular about what instruction is speculated).
*/
static MipsLIR *delaySlotLIR(MipsLIR *firstLIR, MipsLIR *branchLIR)
{
int isLoad;
int loadVisited = 0;
int isStore;
int storeVisited = 0;
u8 useMask = branchLIR->useMask;
u8 defMask = branchLIR->defMask;
MipsLIR *thisLIR;
MipsLIR *newLIR = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true);
for (thisLIR = PREV_LIR(branchLIR);
thisLIR != firstLIR;
thisLIR = PREV_LIR(thisLIR)) {
if (thisLIR->flags.isNop)
continue;
if (isPseudoOpCode(thisLIR->opcode)) {
if (thisLIR->opcode == kMipsPseudoDalvikByteCodeBoundary ||
thisLIR->opcode == kMipsPseudoExtended ||
thisLIR->opcode == kMipsPseudoSSARep)
continue; /* ok to move across these pseudos */
break; /* don't move across all other pseudos */
}
/* give up on moving previous instruction down into slot */
if (thisLIR->opcode == kMipsNop ||
thisLIR->opcode == kMips32BitData ||
EncodingMap[thisLIR->opcode].flags & IS_BRANCH)
break;
/* don't reorder loads/stores (the alias info could
possibly be used to allow as a future enhancement) */
isLoad = EncodingMap[thisLIR->opcode].flags & IS_LOAD;
isStore = EncodingMap[thisLIR->opcode].flags & IS_STORE;
if (!(thisLIR->useMask & defMask) &&
!(thisLIR->defMask & useMask) &&
!(thisLIR->defMask & defMask) &&
!(isLoad && storeVisited) &&
!(isStore && loadVisited) &&
!(isStore && storeVisited)) {
*newLIR = *thisLIR;
thisLIR->flags.isNop = true;
return newLIR; /* move into delay slot succeeded */
}
loadVisited |= isLoad;
storeVisited |= isStore;
/* accumulate def/use constraints */
useMask |= thisLIR->useMask;
defMask |= thisLIR->defMask;
}
/* for unconditional branches try to copy the instruction at the
branch target up into the delay slot and adjust the branch */
if (branchLIR->opcode == kMipsB) {
MipsLIR *targetLIR;
for (targetLIR = (MipsLIR *) branchLIR->generic.target;
targetLIR;
targetLIR = NEXT_LIR(targetLIR)) {
if (!targetLIR->flags.isNop &&
(!isPseudoOpCode(targetLIR->opcode) || /* can't pull predicted up */
targetLIR->opcode == kMipsPseudoChainingCellInvokePredicted))
break; /* try to get to next real op at branch target */
}
if (targetLIR && !isPseudoOpCode(targetLIR->opcode) &&
!(EncodingMap[targetLIR->opcode].flags & IS_BRANCH)) {
*newLIR = *targetLIR;
branchLIR->generic.target = (LIR *) NEXT_LIR(targetLIR);
return newLIR;
}
} else if (branchLIR->opcode >= kMipsBeq && branchLIR->opcode <= kMipsBne) {
/* for conditional branches try to fill branch delay slot
via speculative execution when safe */
MipsLIR *targetLIR;
for (targetLIR = (MipsLIR *) branchLIR->generic.target;
targetLIR;
targetLIR = NEXT_LIR(targetLIR)) {
if (!targetLIR->flags.isNop && !isPseudoOpCode(targetLIR->opcode))
break; /* try to get to next real op at branch target */
}
MipsLIR *nextLIR;
for (nextLIR = NEXT_LIR(branchLIR);
nextLIR;
nextLIR = NEXT_LIR(nextLIR)) {
if (!nextLIR->flags.isNop && !isPseudoOpCode(nextLIR->opcode))
break; /* try to get to next real op for fall thru */
}
if (nextLIR && targetLIR) {
int flags = EncodingMap[nextLIR->opcode].flags;
int isLoad = flags & IS_LOAD;
/* common branch and fall thru to normal chaining cells case */
if (isLoad && nextLIR->opcode == targetLIR->opcode &&
nextLIR->operands[0] == targetLIR->operands[0] &&
nextLIR->operands[1] == targetLIR->operands[1] &&
nextLIR->operands[2] == targetLIR->operands[2]) {
*newLIR = *targetLIR;
branchLIR->generic.target = (LIR *) NEXT_LIR(targetLIR);
return newLIR;
}
/* try prefetching (maybe try speculating instructions along the
trace like dalvik frame load which is common and may be safe) */
int isStore = flags & IS_STORE;
if (isLoad || isStore) {
newLIR->opcode = kMipsPref;
newLIR->operands[0] = isLoad ? 0 : 1;
newLIR->operands[1] = nextLIR->operands[1];
newLIR->operands[2] = nextLIR->operands[2];
newLIR->defMask = nextLIR->defMask;
newLIR->useMask = nextLIR->useMask;
return newLIR;
}
}
}
/* couldn't find a useful instruction to move into the delay slot */
newLIR->opcode = kMipsNop;
return newLIR;
}
/*
* The branch delay slot has been ignored thus far. This is the point where
* a useful instruction is moved into it or a nop is inserted. Leave existing
* NOPs alone -- these came from sparse and packed switch ops and are needed
* to maintain the proper offset to the jump table.
*/
static void introduceBranchDelaySlot(CompilationUnit *cUnit)
{
MipsLIR *thisLIR;
MipsLIR *firstLIR =(MipsLIR *) cUnit->firstLIRInsn;
MipsLIR *lastLIR =(MipsLIR *) cUnit->lastLIRInsn;
for (thisLIR = lastLIR; thisLIR != firstLIR; thisLIR = PREV_LIR(thisLIR)) {
if (thisLIR->flags.isNop ||
isPseudoOpCode(thisLIR->opcode) ||
!(EncodingMap[thisLIR->opcode].flags & IS_BRANCH)) {
continue;
} else if (thisLIR == lastLIR) {
dvmCompilerAppendLIR(cUnit,
(LIR *) delaySlotLIR(firstLIR, thisLIR));
} else if (NEXT_LIR(thisLIR)->opcode != kMipsNop) {
dvmCompilerInsertLIRAfter((LIR *) thisLIR,
(LIR *) delaySlotLIR(firstLIR, thisLIR));
}
}
if (!thisLIR->flags.isNop &&
!isPseudoOpCode(thisLIR->opcode) &&
EncodingMap[thisLIR->opcode].flags & IS_BRANCH) {
/* nothing available to move, so insert nop */
MipsLIR *nopLIR = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true);
nopLIR->opcode = kMipsNop;
dvmCompilerInsertLIRAfter((LIR *) thisLIR, (LIR *) nopLIR);
}
}
void dvmCompilerApplyGlobalOptimizations(CompilationUnit *cUnit)
{
applyRedundantBranchElimination(cUnit);
applyCopyPropagation(cUnit);
#ifdef __mips_hard_float
mergeMovs(cUnit);
#endif
introduceBranchDelaySlot(cUnit);
}