//===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// The QPX vector registers overlay the scalar floating-point registers, and
// any scalar floating-point loads splat their value across all vector lanes.
// Thus, if we have a scalar load followed by a splat, we can remove the splat
// (i.e. replace the load with a load-and-splat pseudo instruction).
//
// This pass must run after anything that might do store-to-load forwarding.
//
//===----------------------------------------------------------------------===//

#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;

#define DEBUG_TYPE "ppc-qpx-load-splat"

STATISTIC(NumSimplified, "Number of QPX load splats simplified");

namespace llvm {
  void initializePPCQPXLoadSplatPass(PassRegistry&);
}

namespace {
  struct PPCQPXLoadSplat : public MachineFunctionPass {
    static char ID;
    PPCQPXLoadSplat() : MachineFunctionPass(ID) {
      initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry());
    }

    bool runOnMachineFunction(MachineFunction &Fn) override;

    StringRef getPassName() const override {
      return "PowerPC QPX Load Splat Simplification";
    }
  };
  char PPCQPXLoadSplat::ID = 0;
}

INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat",
                "PowerPC QPX Load Splat Simplification",
                false, false)

FunctionPass *llvm::createPPCQPXLoadSplatPass() {
  return new PPCQPXLoadSplat();
}

bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
  if (skipFunction(MF.getFunction()))
    return false;

  bool MadeChange = false;
  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();

  for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) {
    MachineBasicBlock *MBB = &*MFI;
    SmallVector<MachineInstr *, 4> Splats;

    for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) {
      MachineInstr *MI = &*MBBI;

      if (MI->hasUnmodeledSideEffects() || MI->isCall()) {
        Splats.clear();
        continue;
      }

      // We're looking for a sequence like this:
      // %f0 = LFD 0, killed %x3, implicit-def %qf0; mem:LD8[%a](tbaa=!2)
      // %qf1 = QVESPLATI killed %qf0, 0, implicit %rm

      for (auto SI = Splats.begin(); SI != Splats.end();) {
        MachineInstr *SMI = *SI;
        unsigned SplatReg = SMI->getOperand(0).getReg();
        unsigned SrcReg = SMI->getOperand(1).getReg();

        if (MI->modifiesRegister(SrcReg, TRI)) {
          switch (MI->getOpcode()) {
          default:
            SI = Splats.erase(SI);
            continue;
          case PPC::LFS:
          case PPC::LFD:
          case PPC::LFSU:
          case PPC::LFDU:
          case PPC::LFSUX:
          case PPC::LFDUX:
          case PPC::LFSX:
          case PPC::LFDX:
          case PPC::LFIWAX:
          case PPC::LFIWZX:
            if (SplatReg != SrcReg) {
              // We need to change the load to define the scalar subregister of
              // the QPX splat source register.
              unsigned SubRegIndex =
                TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg());
              unsigned SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);

              // Substitute both the explicit defined register, and also the
              // implicit def of the containing QPX register.
              MI->getOperand(0).setReg(SplatSubReg);
              MI->substituteRegister(SrcReg, SplatReg, 0, *TRI);
            }

            SI = Splats.erase(SI);

            // If SMI is directly after MI, then MBBI's base iterator is
            // pointing at SMI.  Adjust MBBI around the call to erase SMI to
            // avoid invalidating MBBI.
            ++MBBI;
            SMI->eraseFromParent();
            --MBBI;

            ++NumSimplified;
            MadeChange = true;
            continue;
          }
        }

        // If this instruction defines the splat register, then we cannot move
        // the previous definition above it. If it reads from the splat
        // register, then it must already be alive from some previous
        // definition, and if the splat register is different from the source
        // register, then this definition must not be the load for which we're
        // searching.
        if (MI->modifiesRegister(SplatReg, TRI) ||
            (SrcReg != SplatReg &&
             MI->readsRegister(SplatReg, TRI))) {
          SI = Splats.erase(SI);
          continue;
        }

        ++SI;
      }

      if (MI->getOpcode() != PPC::QVESPLATI &&
          MI->getOpcode() != PPC::QVESPLATIs &&
          MI->getOpcode() != PPC::QVESPLATIb)
        continue;
      if (MI->getOperand(2).getImm() != 0)
        continue;

      // If there are other uses of the scalar value after this, replacing
      // those uses might be non-trivial.
      if (!MI->getOperand(1).isKill())
        continue;

      Splats.push_back(MI);
    }
  }

  return MadeChange;
}