//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Top-level implementation for the Cell SPU target.
//
//===----------------------------------------------------------------------===//
#include "SPU.h"
#include "SPUFrameLowering.h"
#include "SPUInstrBuilder.h"
#include "SPUInstrInfo.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
// SPUFrameLowering:
//===----------------------------------------------------------------------===//
SPUFrameLowering::SPUFrameLowering(const SPUSubtarget &sti)
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
Subtarget(sti) {
LR[0].first = SPU::R0;
LR[0].second = 16;
}
//--------------------------------------------------------------------------
// hasFP - Return true if the specified function actually has a dedicated frame
// pointer register. This is true if the function needs a frame pointer and has
// a non-zero stack size.
bool SPUFrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
return MFI->getStackSize() &&
(DisableFramePointerElim(MF) || MFI->hasVarSizedObjects());
}
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
void SPUFrameLowering::determineFrameLayout(MachineFunction &MF) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
// Get the number of bytes to allocate from the FrameInfo
unsigned FrameSize = MFI->getStackSize();
// Get the alignments provided by the target, and the maximum alignment
// (if any) of the fixed frame objects.
unsigned TargetAlign = getStackAlignment();
unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment());
assert(isPowerOf2_32(Align) && "Alignment is not power of 2");
unsigned AlignMask = Align - 1;
// Get the maximum call frame size of all the calls.
unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
// If we have dynamic alloca then maxCallFrameSize needs to be aligned so
// that allocations will be aligned.
if (MFI->hasVarSizedObjects())
maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
// Update maximum call frame size.
MFI->setMaxCallFrameSize(maxCallFrameSize);
// Include call frame size in total.
FrameSize += maxCallFrameSize;
// Make sure the frame is aligned.
FrameSize = (FrameSize + AlignMask) & ~AlignMask;
// Update frame info.
MFI->setStackSize(FrameSize);
}
void SPUFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
const SPUInstrInfo &TII =
*static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
MachineModuleInfo &MMI = MF.getMMI();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
// Prepare for debug frame info.
bool hasDebugInfo = MMI.hasDebugInfo();
MCSymbol *FrameLabel = 0;
// Move MBBI back to the beginning of the function.
MBBI = MBB.begin();
// Work out frame sizes.
determineFrameLayout(MF);
int FrameSize = MFI->getStackSize();
assert((FrameSize & 0xf) == 0
&& "SPURegisterInfo::emitPrologue: FrameSize not aligned");
// the "empty" frame size is 16 - just the register scavenger spill slot
if (FrameSize > 16 || MFI->adjustsStack()) {
FrameSize = -(FrameSize + SPUFrameLowering::minStackSize());
if (hasDebugInfo) {
// Mark effective beginning of when frame pointer becomes valid.
FrameLabel = MMI.getContext().CreateTempSymbol();
BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel);
}
// Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
// for the ABI
BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
.addReg(SPU::R1);
if (isInt<10>(FrameSize)) {
// Spill $sp to adjusted $sp
BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize)
.addReg(SPU::R1);
// Adjust $sp by required amout
BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
.addImm(FrameSize);
} else if (isInt<16>(FrameSize)) {
// Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
// $r2 to adjust $sp:
BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
.addImm(-16)
.addReg(SPU::R1);
BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
.addImm(FrameSize);
BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
.addReg(SPU::R2)
.addReg(SPU::R1);
BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
.addReg(SPU::R1)
.addReg(SPU::R2);
BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2)
.addReg(SPU::R2)
.addImm(16);
BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
.addReg(SPU::R2)
.addReg(SPU::R1);
} else {
report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
}
if (hasDebugInfo) {
std::vector<MachineMove> &Moves = MMI.getFrameMoves();
// Show update of SP.
MachineLocation SPDst(MachineLocation::VirtualFP);
MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
// Add callee saved registers to move list.
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
unsigned Reg = CSI[I].getReg();
if (Reg == SPU::R0) continue;
MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
MachineLocation CSSrc(Reg);
Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
}
// Mark effective beginning of when frame pointer is ready.
MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol();
BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel);
MachineLocation FPDst(SPU::R1);
MachineLocation FPSrc(MachineLocation::VirtualFP);
Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
}
}
}
void SPUFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
const SPUInstrInfo &TII =
*static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
const MachineFrameInfo *MFI = MF.getFrameInfo();
int FrameSize = MFI->getStackSize();
int LinkSlotOffset = SPUFrameLowering::stackSlotSize();
DebugLoc dl = MBBI->getDebugLoc();
assert(MBBI->getOpcode() == SPU::RET &&
"Can only insert epilog into returning blocks");
assert((FrameSize & 0xf) == 0 && "FrameSize not aligned");
// the "empty" frame size is 16 - just the register scavenger spill slot
if (FrameSize > 16 || MFI->adjustsStack()) {
FrameSize = FrameSize + SPUFrameLowering::minStackSize();
if (isInt<10>(FrameSize + LinkSlotOffset)) {
// Reload $lr, adjust $sp by required amount
// Note: We do this to slightly improve dual issue -- not by much, but it
// is an opportunity for dual issue.
BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
.addImm(FrameSize + LinkSlotOffset)
.addReg(SPU::R1);
BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1)
.addReg(SPU::R1)
.addImm(FrameSize);
} else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
// Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
// $r2 to adjust $sp:
BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
.addImm(16)
.addReg(SPU::R1);
BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
.addImm(FrameSize);
BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
.addReg(SPU::R1)
.addReg(SPU::R2);
BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
.addImm(16)
.addReg(SPU::R1);
BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
addReg(SPU::R2)
.addImm(16);
BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
.addReg(SPU::R2)
.addReg(SPU::R1);
} else {
report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
}
}
}
void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const{
// Mark LR and SP unused, since the prolog spills them to stack and
// we don't want anyone else to spill them for us.
//
// Also, unless R2 is really used someday, don't spill it automatically.
MF.getRegInfo().setPhysRegUnused(SPU::R0);
MF.getRegInfo().setPhysRegUnused(SPU::R1);
MF.getRegInfo().setPhysRegUnused(SPU::R2);
MachineFrameInfo *MFI = MF.getFrameInfo();
const TargetRegisterClass *RC = &SPU::R32CRegClass;
RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
RC->getAlignment(),
false));
}