//==-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ ---===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines an instruction selector for the SystemZ target.
//
//===----------------------------------------------------------------------===//

#include "SystemZ.h"
#include "SystemZTargetMachine.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Intrinsics.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;

namespace {
  /// SystemZRRIAddressMode - This corresponds to rriaddr, but uses SDValue's
  /// instead of register numbers for the leaves of the matched tree.
  struct SystemZRRIAddressMode {
    enum {
      RegBase,
      FrameIndexBase
    } BaseType;

    struct {            // This is really a union, discriminated by BaseType!
      SDValue Reg;
      int FrameIndex;
    } Base;

    SDValue IndexReg;
    int64_t Disp;
    bool isRI;

    SystemZRRIAddressMode(bool RI = false)
      : BaseType(RegBase), IndexReg(), Disp(0), isRI(RI) {
    }

    void dump() {
      errs() << "SystemZRRIAddressMode " << this << '\n';
      if (BaseType == RegBase) {
        errs() << "Base.Reg ";
        if (Base.Reg.getNode() != 0)
          Base.Reg.getNode()->dump();
        else
          errs() << "nul";
        errs() << '\n';
      } else {
        errs() << " Base.FrameIndex " << Base.FrameIndex << '\n';
      }
      if (!isRI) {
        errs() << "IndexReg ";
        if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
        else errs() << "nul";
      }
      errs() << " Disp " << Disp << '\n';
    }
  };
}

/// SystemZDAGToDAGISel - SystemZ specific code to select SystemZ machine
/// instructions for SelectionDAG operations.
///
namespace {
  class SystemZDAGToDAGISel : public SelectionDAGISel {
    const SystemZTargetLowering &Lowering;
    const SystemZSubtarget &Subtarget;

    void getAddressOperandsRI(const SystemZRRIAddressMode &AM,
                            SDValue &Base, SDValue &Disp);
    void getAddressOperands(const SystemZRRIAddressMode &AM,
                            SDValue &Base, SDValue &Disp,
                            SDValue &Index);

  public:
    SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
      : SelectionDAGISel(TM, OptLevel),
        Lowering(*TM.getTargetLowering()),
        Subtarget(*TM.getSubtargetImpl()) { }

    virtual const char *getPassName() const {
      return "SystemZ DAG->DAG Pattern Instruction Selection";
    }

    /// getI8Imm - Return a target constant with the specified value, of type
    /// i8.
    inline SDValue getI8Imm(uint64_t Imm) {
      return CurDAG->getTargetConstant(Imm, MVT::i8);
    }

    /// getI16Imm - Return a target constant with the specified value, of type
    /// i16.
    inline SDValue getI16Imm(uint64_t Imm) {
      return CurDAG->getTargetConstant(Imm, MVT::i16);
    }

    /// getI32Imm - Return a target constant with the specified value, of type
    /// i32.
    inline SDValue getI32Imm(uint64_t Imm) {
      return CurDAG->getTargetConstant(Imm, MVT::i32);
    }

    // Include the pieces autogenerated from the target description.
    #include "SystemZGenDAGISel.inc"

  private:
    bool SelectAddrRI12Only(SDValue& Addr,
                            SDValue &Base, SDValue &Disp);
    bool SelectAddrRI12(SDValue& Addr,
                        SDValue &Base, SDValue &Disp,
                        bool is12BitOnly = false);
    bool SelectAddrRI(SDValue& Addr, SDValue &Base, SDValue &Disp);
    bool SelectAddrRRI12(SDValue Addr,
                         SDValue &Base, SDValue &Disp, SDValue &Index);
    bool SelectAddrRRI20(SDValue Addr,
                         SDValue &Base, SDValue &Disp, SDValue &Index);
    bool SelectLAAddr(SDValue Addr,
                      SDValue &Base, SDValue &Disp, SDValue &Index);

    SDNode *Select(SDNode *Node);

    bool TryFoldLoad(SDNode *P, SDValue N,
                     SDValue &Base, SDValue &Disp, SDValue &Index);

    bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
                      bool is12Bit, unsigned Depth = 0);
    bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM);
  };
}  // end anonymous namespace

/// createSystemZISelDag - This pass converts a legalized DAG into a
/// SystemZ-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM,
                                        CodeGenOpt::Level OptLevel) {
  return new SystemZDAGToDAGISel(TM, OptLevel);
}

/// isImmSExt20 - This method tests to see if the node is either a 32-bit
/// or 64-bit immediate, and if the value can be accurately represented as a
/// sign extension from a 20-bit value. If so, this returns true and the
/// immediate.
static bool isImmSExt20(int64_t Val, int64_t &Imm) {
  if (Val >= -524288 && Val <= 524287) {
    Imm = Val;
    return true;
  }
  return false;
}

/// isImmZExt12 - This method tests to see if the node is either a 32-bit
/// or 64-bit immediate, and if the value can be accurately represented as a
/// zero extension from a 12-bit value. If so, this returns true and the
/// immediate.
static bool isImmZExt12(int64_t Val, int64_t &Imm) {
  if (Val >= 0 && Val <= 0xFFF) {
    Imm = Val;
    return true;
  }
  return false;
}

/// MatchAddress - Add the specified node to the specified addressing mode,
/// returning true if it cannot be done.  This just pattern matches for the
/// addressing mode.
bool SystemZDAGToDAGISel::MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
                                       bool is12Bit, unsigned Depth) {
  DebugLoc dl = N.getDebugLoc();
  DEBUG(errs() << "MatchAddress: "; AM.dump());
  // Limit recursion.
  if (Depth > 5)
    return MatchAddressBase(N, AM);

  // FIXME: We can perform better here. If we have something like
  // (shift (add A, imm), N), we can try to reassociate stuff and fold shift of
  // imm into addressing mode.
  switch (N.getOpcode()) {
  default: break;
  case ISD::Constant: {
    int64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
    int64_t Imm = 0;
    bool Match = (is12Bit ?
                  isImmZExt12(AM.Disp + Val, Imm) :
                  isImmSExt20(AM.Disp + Val, Imm));
    if (Match) {
      AM.Disp = Imm;
      return false;
    }
    break;
  }

  case ISD::FrameIndex:
    if (AM.BaseType == SystemZRRIAddressMode::RegBase &&
        AM.Base.Reg.getNode() == 0) {
      AM.BaseType = SystemZRRIAddressMode::FrameIndexBase;
      AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
      return false;
    }
    break;

  case ISD::SUB: {
    // Given A-B, if A can be completely folded into the address and
    // the index field with the index field unused, use -B as the index.
    // This is a win if a has multiple parts that can be folded into
    // the address. Also, this saves a mov if the base register has
    // other uses, since it avoids a two-address sub instruction, however
    // it costs an additional mov if the index register has other uses.

    // Test if the LHS of the sub can be folded.
    SystemZRRIAddressMode Backup = AM;
    if (MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1)) {
      AM = Backup;
      break;
    }
    // Test if the index field is free for use.
    if (AM.IndexReg.getNode() || AM.isRI) {
      AM = Backup;
      break;
    }

    // If the base is a register with multiple uses, this transformation may
    // save a mov. Otherwise it's probably better not to do it.
    if (AM.BaseType == SystemZRRIAddressMode::RegBase &&
        (!AM.Base.Reg.getNode() || AM.Base.Reg.getNode()->hasOneUse())) {
      AM = Backup;
      break;
    }

    // Ok, the transformation is legal and appears profitable. Go for it.
    SDValue RHS = N.getNode()->getOperand(1);
    SDValue Zero = CurDAG->getConstant(0, N.getValueType());
    SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
    AM.IndexReg = Neg;

    // Insert the new nodes into the topological ordering.
    if (Zero.getNode()->getNodeId() == -1 ||
        Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
      CurDAG->RepositionNode(N.getNode(), Zero.getNode());
      Zero.getNode()->setNodeId(N.getNode()->getNodeId());
    }
    if (Neg.getNode()->getNodeId() == -1 ||
        Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
      CurDAG->RepositionNode(N.getNode(), Neg.getNode());
      Neg.getNode()->setNodeId(N.getNode()->getNodeId());
    }
    return false;
  }

  case ISD::ADD: {
    SystemZRRIAddressMode Backup = AM;
    if (!MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1) &&
        !MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1))
      return false;
    AM = Backup;
    if (!MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1) &&
        !MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1))
      return false;
    AM = Backup;

    // If we couldn't fold both operands into the address at the same time,
    // see if we can just put each operand into a register and fold at least
    // the add.
    if (!AM.isRI &&
        AM.BaseType == SystemZRRIAddressMode::RegBase &&
        !AM.Base.Reg.getNode() && !AM.IndexReg.getNode()) {
      AM.Base.Reg = N.getNode()->getOperand(0);
      AM.IndexReg = N.getNode()->getOperand(1);
      return false;
    }
    break;
  }

  case ISD::OR:
    // Handle "X | C" as "X + C" iff X is known to have C bits clear.
    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
      SystemZRRIAddressMode Backup = AM;
      int64_t Offset = CN->getSExtValue();
      int64_t Imm = 0;
      bool MatchOffset = (is12Bit ?
                          isImmZExt12(AM.Disp + Offset, Imm) :
                          isImmSExt20(AM.Disp + Offset, Imm));
      // The resultant disp must fit in 12 or 20-bits.
      if (MatchOffset &&
          // LHS should be an addr mode.
          !MatchAddress(N.getOperand(0), AM, is12Bit, Depth+1) &&
          // Check to see if the LHS & C is zero.
          CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
        AM.Disp = Imm;
        return false;
      }
      AM = Backup;
    }
    break;
  }

  return MatchAddressBase(N, AM);
}

/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
/// specified addressing mode without any further recursion.
bool SystemZDAGToDAGISel::MatchAddressBase(SDValue N,
                                           SystemZRRIAddressMode &AM) {
  // Is the base register already occupied?
  if (AM.BaseType != SystemZRRIAddressMode::RegBase || AM.Base.Reg.getNode()) {
    // If so, check to see if the index register is set.
    if (AM.IndexReg.getNode() == 0 && !AM.isRI) {
      AM.IndexReg = N;
      return false;
    }

    // Otherwise, we cannot select it.
    return true;
  }

  // Default, generate it as a register.
  AM.BaseType = SystemZRRIAddressMode::RegBase;
  AM.Base.Reg = N;
  return false;
}

void SystemZDAGToDAGISel::getAddressOperandsRI(const SystemZRRIAddressMode &AM,
                                               SDValue &Base, SDValue &Disp) {
  if (AM.BaseType == SystemZRRIAddressMode::RegBase)
    Base = AM.Base.Reg;
  else
    Base = CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy());
  Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i64);
}

void SystemZDAGToDAGISel::getAddressOperands(const SystemZRRIAddressMode &AM,
                                             SDValue &Base, SDValue &Disp,
                                             SDValue &Index) {
  getAddressOperandsRI(AM, Base, Disp);
  Index = AM.IndexReg;
}

/// Returns true if the address can be represented by a base register plus
/// an unsigned 12-bit displacement [r+imm].
bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDValue &Addr,
                                             SDValue &Base, SDValue &Disp) {
  return SelectAddrRI12(Addr, Base, Disp, /*is12BitOnly*/true);
}

bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue &Addr,
                                         SDValue &Base, SDValue &Disp,
                                         bool is12BitOnly) {
  SystemZRRIAddressMode AM20(/*isRI*/true), AM12(/*isRI*/true);
  bool Done = false;

  if (!Addr.hasOneUse()) {
    unsigned Opcode = Addr.getOpcode();
    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
      // If we are able to fold N into addressing mode, then we'll allow it even
      // if N has multiple uses. In general, addressing computation is used as
      // addresses by all of its uses. But watch out for CopyToReg uses, that
      // means the address computation is liveout. It will be computed by a LA
      // so we want to avoid computing the address twice.
      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
        if (UI->getOpcode() == ISD::CopyToReg) {
          MatchAddressBase(Addr, AM12);
          Done = true;
          break;
        }
      }
    }
  }
  if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true))
    return false;

  // Check, whether we can match stuff using 20-bit displacements
  if (!Done && !is12BitOnly &&
      !MatchAddress(Addr, AM20, /* is12Bit */ false))
    if (AM12.Disp == 0 && AM20.Disp != 0)
      return false;

  DEBUG(errs() << "MatchAddress (final): "; AM12.dump());

  EVT VT = Addr.getValueType();
  if (AM12.BaseType == SystemZRRIAddressMode::RegBase) {
    if (!AM12.Base.Reg.getNode())
      AM12.Base.Reg = CurDAG->getRegister(0, VT);
  }

  assert(AM12.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!");

  getAddressOperandsRI(AM12, Base, Disp);

  return true;
}

/// Returns true if the address can be represented by a base register plus
/// a signed 20-bit displacement [r+imm].
bool SystemZDAGToDAGISel::SelectAddrRI(SDValue& Addr,
                                       SDValue &Base, SDValue &Disp) {
  SystemZRRIAddressMode AM(/*isRI*/true);
  bool Done = false;

  if (!Addr.hasOneUse()) {
    unsigned Opcode = Addr.getOpcode();
    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
      // If we are able to fold N into addressing mode, then we'll allow it even
      // if N has multiple uses. In general, addressing computation is used as
      // addresses by all of its uses. But watch out for CopyToReg uses, that
      // means the address computation is liveout. It will be computed by a LA
      // so we want to avoid computing the address twice.
      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
        if (UI->getOpcode() == ISD::CopyToReg) {
          MatchAddressBase(Addr, AM);
          Done = true;
          break;
        }
      }
    }
  }
  if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false))
    return false;

  DEBUG(errs() << "MatchAddress (final): "; AM.dump());

  EVT VT = Addr.getValueType();
  if (AM.BaseType == SystemZRRIAddressMode::RegBase) {
    if (!AM.Base.Reg.getNode())
      AM.Base.Reg = CurDAG->getRegister(0, VT);
  }

  assert(AM.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!");

  getAddressOperandsRI(AM, Base, Disp);

  return true;
}

/// Returns true if the address can be represented by a base register plus
/// index register plus an unsigned 12-bit displacement [base + idx + imm].
bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Addr,
                                SDValue &Base, SDValue &Disp, SDValue &Index) {
  SystemZRRIAddressMode AM20, AM12;
  bool Done = false;

  if (!Addr.hasOneUse()) {
    unsigned Opcode = Addr.getOpcode();
    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
      // If we are able to fold N into addressing mode, then we'll allow it even
      // if N has multiple uses. In general, addressing computation is used as
      // addresses by all of its uses. But watch out for CopyToReg uses, that
      // means the address computation is liveout. It will be computed by a LA
      // so we want to avoid computing the address twice.
      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
        if (UI->getOpcode() == ISD::CopyToReg) {
          MatchAddressBase(Addr, AM12);
          Done = true;
          break;
        }
      }
    }
  }
  if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true))
    return false;

  // Check, whether we can match stuff using 20-bit displacements
  if (!Done && !MatchAddress(Addr, AM20, /* is12Bit */ false))
    if (AM12.Disp == 0 && AM20.Disp != 0)
      return false;

  DEBUG(errs() << "MatchAddress (final): "; AM12.dump());

  EVT VT = Addr.getValueType();
  if (AM12.BaseType == SystemZRRIAddressMode::RegBase) {
    if (!AM12.Base.Reg.getNode())
      AM12.Base.Reg = CurDAG->getRegister(0, VT);
  }

  if (!AM12.IndexReg.getNode())
    AM12.IndexReg = CurDAG->getRegister(0, VT);

  getAddressOperands(AM12, Base, Disp, Index);

  return true;
}

/// Returns true if the address can be represented by a base register plus
/// index register plus a signed 20-bit displacement [base + idx + imm].
bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Addr,
                                SDValue &Base, SDValue &Disp, SDValue &Index) {
  SystemZRRIAddressMode AM;
  bool Done = false;

  if (!Addr.hasOneUse()) {
    unsigned Opcode = Addr.getOpcode();
    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
      // If we are able to fold N into addressing mode, then we'll allow it even
      // if N has multiple uses. In general, addressing computation is used as
      // addresses by all of its uses. But watch out for CopyToReg uses, that
      // means the address computation is liveout. It will be computed by a LA
      // so we want to avoid computing the address twice.
      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
        if (UI->getOpcode() == ISD::CopyToReg) {
          MatchAddressBase(Addr, AM);
          Done = true;
          break;
        }
      }
    }
  }
  if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false))
    return false;

  DEBUG(errs() << "MatchAddress (final): "; AM.dump());

  EVT VT = Addr.getValueType();
  if (AM.BaseType == SystemZRRIAddressMode::RegBase) {
    if (!AM.Base.Reg.getNode())
      AM.Base.Reg = CurDAG->getRegister(0, VT);
  }

  if (!AM.IndexReg.getNode())
    AM.IndexReg = CurDAG->getRegister(0, VT);

  getAddressOperands(AM, Base, Disp, Index);

  return true;
}

/// SelectLAAddr - it calls SelectAddr and determines if the maximal addressing
/// mode it matches can be cost effectively emitted as an LA/LAY instruction.
bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Addr,
                                  SDValue &Base, SDValue &Disp, SDValue &Index) {
  SystemZRRIAddressMode AM;

  if (MatchAddress(Addr, AM, false))
    return false;

  EVT VT = Addr.getValueType();
  unsigned Complexity = 0;
  if (AM.BaseType == SystemZRRIAddressMode::RegBase)
    if (AM.Base.Reg.getNode())
      Complexity = 1;
    else
      AM.Base.Reg = CurDAG->getRegister(0, VT);
  else if (AM.BaseType == SystemZRRIAddressMode::FrameIndexBase)
    Complexity = 4;

  if (AM.IndexReg.getNode())
    Complexity += 1;
  else
    AM.IndexReg = CurDAG->getRegister(0, VT);

  if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
    Complexity += 1;

  if (Complexity > 2) {
    getAddressOperands(AM, Base, Disp, Index);
    return true;
  }

  return false;
}

bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
                                 SDValue &Base, SDValue &Disp, SDValue &Index) {
  if (ISD::isNON_EXTLoad(N.getNode()) &&
      IsLegalToFold(N, P, P, OptLevel))
    return SelectAddrRRI20(N.getOperand(1), Base, Disp, Index);
  return false;
}

SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
  EVT NVT = Node->getValueType(0);
  DebugLoc dl = Node->getDebugLoc();
  unsigned Opcode = Node->getOpcode();

  // Dump information about the Node being selected
  DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");

  // If we have a custom node, we already have selected!
  if (Node->isMachineOpcode()) {
    DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
    return NULL; // Already selected.
  }

  switch (Opcode) {
  default: break;
  case ISD::SDIVREM: {
    unsigned Opc, MOpc;
    SDValue N0 = Node->getOperand(0);
    SDValue N1 = Node->getOperand(1);

    EVT ResVT;
    bool is32Bit = false;
    switch (NVT.getSimpleVT().SimpleTy) {
    default: assert(0 && "Unsupported VT!");
    case MVT::i32:
      Opc = SystemZ::SDIVREM32r; MOpc = SystemZ::SDIVREM32m;
      ResVT = MVT::v2i64;
      is32Bit = true;
      break;
    case MVT::i64:
      Opc = SystemZ::SDIVREM64r; MOpc = SystemZ::SDIVREM64m;
      ResVT = MVT::v2i64;
      break;
    }

    SDValue Tmp0, Tmp1, Tmp2;
    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2);

    // Prepare the dividend
    SDNode *Dividend;
    if (is32Bit)
      Dividend = CurDAG->getMachineNode(SystemZ::MOVSX64rr32, dl, MVT::i64, N0);
    else
      Dividend = N0.getNode();

    // Insert prepared dividend into suitable 'subreg'
    SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
                                         dl, ResVT);
    Dividend =
      CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
                             SDValue(Tmp, 0), SDValue(Dividend, 0),
                     CurDAG->getTargetConstant(SystemZ::subreg_odd, MVT::i32));

    SDNode *Result;
    SDValue DivVal = SDValue(Dividend, 0);
    if (foldedLoad) {
      SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
      Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other,
                                      Ops, array_lengthof(Ops));
      // Update the chain.
      ReplaceUses(N1.getValue(1), SDValue(Result, 1));
    } else {
      Result = CurDAG->getMachineNode(Opc, dl, ResVT, SDValue(Dividend, 0), N1);
    }

    // Copy the division (odd subreg) result, if it is needed.
    if (!SDValue(Node, 0).use_empty()) {
      unsigned SubRegIdx = (is32Bit ?
                            SystemZ::subreg_odd32 : SystemZ::subreg_odd);
      SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
                                           dl, NVT,
                                           SDValue(Result, 0),
                                           CurDAG->getTargetConstant(SubRegIdx,
                                                                     MVT::i32));

      ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
    }

    // Copy the remainder (even subreg) result, if it is needed.
    if (!SDValue(Node, 1).use_empty()) {
      unsigned SubRegIdx = (is32Bit ?
                            SystemZ::subreg_32bit : SystemZ::subreg_even);
      SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
                                           dl, NVT,
                                           SDValue(Result, 0),
                                           CurDAG->getTargetConstant(SubRegIdx,
                                                                     MVT::i32));

      ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
    }

    return NULL;
  }
  case ISD::UDIVREM: {
    unsigned Opc, MOpc, ClrOpc;
    SDValue N0 = Node->getOperand(0);
    SDValue N1 = Node->getOperand(1);
    EVT ResVT;

    bool is32Bit = false;
    switch (NVT.getSimpleVT().SimpleTy) {
    default: assert(0 && "Unsupported VT!");
    case MVT::i32:
      Opc = SystemZ::UDIVREM32r; MOpc = SystemZ::UDIVREM32m;
      ClrOpc = SystemZ::MOV64Pr0_even;
      ResVT = MVT::v2i32;
      is32Bit = true;
      break;
    case MVT::i64:
      Opc = SystemZ::UDIVREM64r; MOpc = SystemZ::UDIVREM64m;
      ClrOpc = SystemZ::MOV128r0_even;
      ResVT = MVT::v2i64;
      break;
    }

    SDValue Tmp0, Tmp1, Tmp2;
    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2);

    // Prepare the dividend
    SDNode *Dividend = N0.getNode();

    // Insert prepared dividend into suitable 'subreg'
    SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
                                         dl, ResVT);
    {
      unsigned SubRegIdx = (is32Bit ?
                            SystemZ::subreg_odd32 : SystemZ::subreg_odd);
      Dividend =
        CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
                               SDValue(Tmp, 0), SDValue(Dividend, 0),
                               CurDAG->getTargetConstant(SubRegIdx, MVT::i32));
    }

    // Zero out even subreg
    Dividend = CurDAG->getMachineNode(ClrOpc, dl, ResVT, SDValue(Dividend, 0));

    SDValue DivVal = SDValue(Dividend, 0);
    SDNode *Result;
    if (foldedLoad) {
      SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
      Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other,
                                      Ops, array_lengthof(Ops));
      // Update the chain.
      ReplaceUses(N1.getValue(1), SDValue(Result, 1));
    } else {
      Result = CurDAG->getMachineNode(Opc, dl, ResVT, DivVal, N1);
    }

    // Copy the division (odd subreg) result, if it is needed.
    if (!SDValue(Node, 0).use_empty()) {
      unsigned SubRegIdx = (is32Bit ?
                            SystemZ::subreg_odd32 : SystemZ::subreg_odd);
      SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
                                           dl, NVT,
                                           SDValue(Result, 0),
                                           CurDAG->getTargetConstant(SubRegIdx,
                                                                     MVT::i32));
      ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
    }

    // Copy the remainder (even subreg) result, if it is needed.
    if (!SDValue(Node, 1).use_empty()) {
      unsigned SubRegIdx = (is32Bit ?
                            SystemZ::subreg_32bit : SystemZ::subreg_even);
      SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
                                           dl, NVT,
                                           SDValue(Result, 0),
                                           CurDAG->getTargetConstant(SubRegIdx,
                                                                     MVT::i32));
      ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
    }

    return NULL;
  }
  }

  // Select the default instruction
  SDNode *ResNode = SelectCode(Node);

  DEBUG(errs() << "=> ";
        if (ResNode == NULL || ResNode == Node)
          Node->dump(CurDAG);
        else
          ResNode->dump(CurDAG);
        errs() << "\n";
        );
  return ResNode;
}