//===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file Implements the ScheduleDAGInstrs class, which implements scheduling
/// for a MachineInstr-based dependency graph.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
#define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H

#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseMultiSet.h"
#include "llvm/ADT/SparseSet.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/MC/LaneBitmask.h"
#include <cassert>
#include <cstdint>
#include <list>
#include <utility>
#include <vector>

namespace llvm {

  class LiveIntervals;
  class MachineFrameInfo;
  class MachineFunction;
  class MachineInstr;
  class MachineLoopInfo;
  class MachineOperand;
  struct MCSchedClassDesc;
  class PressureDiffs;
  class PseudoSourceValue;
  class RegPressureTracker;
  class UndefValue;
  class Value;

  /// An individual mapping from virtual register number to SUnit.
  struct VReg2SUnit {
    unsigned VirtReg;
    LaneBitmask LaneMask;
    SUnit *SU;

    VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU)
      : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {}

    unsigned getSparseSetIndex() const {
      return TargetRegisterInfo::virtReg2Index(VirtReg);
    }
  };

  /// Mapping from virtual register to SUnit including an operand index.
  struct VReg2SUnitOperIdx : public VReg2SUnit {
    unsigned OperandIndex;

    VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask,
                      unsigned OperandIndex, SUnit *SU)
      : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {}
  };

  /// Record a physical register access.
  /// For non-data-dependent uses, OpIdx == -1.
  struct PhysRegSUOper {
    SUnit *SU;
    int OpIdx;
    unsigned Reg;

    PhysRegSUOper(SUnit *su, int op, unsigned R): SU(su), OpIdx(op), Reg(R) {}

    unsigned getSparseSetIndex() const { return Reg; }
  };

  /// Use a SparseMultiSet to track physical registers. Storage is only
  /// allocated once for the pass. It can be cleared in constant time and reused
  /// without any frees.
  using Reg2SUnitsMap =
      SparseMultiSet<PhysRegSUOper, identity<unsigned>, uint16_t>;

  /// Use SparseSet as a SparseMap by relying on the fact that it never
  /// compares ValueT's, only unsigned keys. This allows the set to be cleared
  /// between scheduling regions in constant time as long as ValueT does not
  /// require a destructor.
  using VReg2SUnitMap = SparseSet<VReg2SUnit, VirtReg2IndexFunctor>;

  /// Track local uses of virtual registers. These uses are gathered by the DAG
  /// builder and may be consulted by the scheduler to avoid iterating an entire
  /// vreg use list.
  using VReg2SUnitMultiMap = SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor>;

  using VReg2SUnitOperIdxMultiMap =
      SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>;

  using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>;

  struct UnderlyingObject : PointerIntPair<ValueType, 1, bool> {
    UnderlyingObject(ValueType V, bool MayAlias)
        : PointerIntPair<ValueType, 1, bool>(V, MayAlias) {}

    ValueType getValue() const { return getPointer(); }
    bool mayAlias() const { return getInt(); }
  };

  using UnderlyingObjectsVector = SmallVector<UnderlyingObject, 4>;

  /// A ScheduleDAG for scheduling lists of MachineInstr.
  class ScheduleDAGInstrs : public ScheduleDAG {
  protected:
    const MachineLoopInfo *MLI;
    const MachineFrameInfo &MFI;

    /// TargetSchedModel provides an interface to the machine model.
    TargetSchedModel SchedModel;

    /// True if the DAG builder should remove kill flags (in preparation for
    /// rescheduling).
    bool RemoveKillFlags;

    /// The standard DAG builder does not normally include terminators as DAG
    /// nodes because it does not create the necessary dependencies to prevent
    /// reordering. A specialized scheduler can override
    /// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate
    /// it has taken responsibility for scheduling the terminator correctly.
    bool CanHandleTerminators = false;

    /// Whether lane masks should get tracked.
    bool TrackLaneMasks = false;

    // State specific to the current scheduling region.
    // ------------------------------------------------

    /// The block in which to insert instructions
    MachineBasicBlock *BB;

    /// The beginning of the range to be scheduled.
    MachineBasicBlock::iterator RegionBegin;

    /// The end of the range to be scheduled.
    MachineBasicBlock::iterator RegionEnd;

    /// Instructions in this region (distance(RegionBegin, RegionEnd)).
    unsigned NumRegionInstrs;

    /// After calling BuildSchedGraph, each machine instruction in the current
    /// scheduling region is mapped to an SUnit.
    DenseMap<MachineInstr*, SUnit*> MISUnitMap;

    // State internal to DAG building.
    // -------------------------------

    /// Defs, Uses - Remember where defs and uses of each register are as we
    /// iterate upward through the instructions. This is allocated here instead
    /// of inside BuildSchedGraph to avoid the need for it to be initialized and
    /// destructed for each block.
    Reg2SUnitsMap Defs;
    Reg2SUnitsMap Uses;

    /// Tracks the last instruction(s) in this region defining each virtual
    /// register. There may be multiple current definitions for a register with
    /// disjunct lanemasks.
    VReg2SUnitMultiMap CurrentVRegDefs;
    /// Tracks the last instructions in this region using each virtual register.
    VReg2SUnitOperIdxMultiMap CurrentVRegUses;

    AliasAnalysis *AAForDep = nullptr;

    /// Remember a generic side-effecting instruction as we proceed.
    /// No other SU ever gets scheduled around it (except in the special
    /// case of a huge region that gets reduced).
    SUnit *BarrierChain = nullptr;

  public:
    /// A list of SUnits, used in Value2SUsMap, during DAG construction.
    /// Note: to gain speed it might be worth investigating an optimized
    /// implementation of this data structure, such as a singly linked list
    /// with a memory pool (SmallVector was tried but slow and SparseSet is not
    /// applicable).
    using SUList = std::list<SUnit *>;

  protected:
    /// A map from ValueType to SUList, used during DAG construction, as
    /// a means of remembering which SUs depend on which memory locations.
    class Value2SUsMap;

    /// Reduces maps in FIFO order, by N SUs. This is better than turning
    /// every Nth memory SU into BarrierChain in buildSchedGraph(), since
    /// it avoids unnecessary edges between seen SUs above the new BarrierChain,
    /// and those below it.
    void reduceHugeMemNodeMaps(Value2SUsMap &stores,
                               Value2SUsMap &loads, unsigned N);

    /// Adds a chain edge between SUa and SUb, but only if both
    /// AliasAnalysis and Target fail to deny the dependency.
    void addChainDependency(SUnit *SUa, SUnit *SUb,
                            unsigned Latency = 0);

    /// Adds dependencies as needed from all SUs in list to SU.
    void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency) {
      for (SUnit *Entry : SUs)
        addChainDependency(SU, Entry, Latency);
    }

    /// Adds dependencies as needed from all SUs in map, to SU.
    void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap);

    /// Adds dependencies as needed to SU, from all SUs mapped to V.
    void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap,
                              ValueType V);

    /// Adds barrier chain edges from all SUs in map, and then clear the map.
    /// This is equivalent to insertBarrierChain(), but optimized for the common
    /// case where the new BarrierChain (a global memory object) has a higher
    /// NodeNum than all SUs in map. It is assumed BarrierChain has been set
    /// before calling this.
    void addBarrierChain(Value2SUsMap &map);

    /// Inserts a barrier chain in a huge region, far below current SU.
    /// Adds barrier chain edges from all SUs in map with higher NodeNums than
    /// this new BarrierChain, and remove them from map. It is assumed
    /// BarrierChain has been set before calling this.
    void insertBarrierChain(Value2SUsMap &map);

    /// For an unanalyzable memory access, this Value is used in maps.
    UndefValue *UnknownValue;

    using DbgValueVector =
        std::vector<std::pair<MachineInstr *, MachineInstr *>>;
    /// Remember instruction that precedes DBG_VALUE.
    /// These are generated by buildSchedGraph but persist so they can be
    /// referenced when emitting the final schedule.
    DbgValueVector DbgValues;
    MachineInstr *FirstDbgValue = nullptr;

    /// Set of live physical registers for updating kill flags.
    LivePhysRegs LiveRegs;

  public:
    explicit ScheduleDAGInstrs(MachineFunction &mf,
                               const MachineLoopInfo *mli,
                               bool RemoveKillFlags = false);

    ~ScheduleDAGInstrs() override = default;

    /// Gets the machine model for instruction scheduling.
    const TargetSchedModel *getSchedModel() const { return &SchedModel; }

    /// Resolves and cache a resolved scheduling class for an SUnit.
    const MCSchedClassDesc *getSchedClass(SUnit *SU) const {
      if (!SU->SchedClass && SchedModel.hasInstrSchedModel())
        SU->SchedClass = SchedModel.resolveSchedClass(SU->getInstr());
      return SU->SchedClass;
    }

    /// Returns an iterator to the top of the current scheduling region.
    MachineBasicBlock::iterator begin() const { return RegionBegin; }

    /// Returns an iterator to the bottom of the current scheduling region.
    MachineBasicBlock::iterator end() const { return RegionEnd; }

    /// Creates a new SUnit and return a ptr to it.
    SUnit *newSUnit(MachineInstr *MI);

    /// Returns an existing SUnit for this MI, or nullptr.
    SUnit *getSUnit(MachineInstr *MI) const;

    /// If this method returns true, handling of the scheduling regions
    /// themselves (in case of a scheduling boundary in MBB) will be done
    /// beginning with the topmost region of MBB.
    virtual bool doMBBSchedRegionsTopDown() const { return false; }

    /// Prepares to perform scheduling in the given block.
    virtual void startBlock(MachineBasicBlock *BB);

    /// Cleans up after scheduling in the given block.
    virtual void finishBlock();

    /// Initialize the DAG and common scheduler state for a new
    /// scheduling region. This does not actually create the DAG, only clears
    /// it. The scheduling driver may call BuildSchedGraph multiple times per
    /// scheduling region.
    virtual void enterRegion(MachineBasicBlock *bb,
                             MachineBasicBlock::iterator begin,
                             MachineBasicBlock::iterator end,
                             unsigned regioninstrs);

    /// Called when the scheduler has finished scheduling the current region.
    virtual void exitRegion();

    /// Builds SUnits for the current region.
    /// If \p RPTracker is non-null, compute register pressure as a side effect.
    /// The DAG builder is an efficient place to do it because it already visits
    /// operands.
    void buildSchedGraph(AliasAnalysis *AA,
                         RegPressureTracker *RPTracker = nullptr,
                         PressureDiffs *PDiffs = nullptr,
                         LiveIntervals *LIS = nullptr,
                         bool TrackLaneMasks = false);

    /// Adds dependencies from instructions in the current list of
    /// instructions being scheduled to scheduling barrier. We want to make sure
    /// instructions which define registers that are either used by the
    /// terminator or are live-out are properly scheduled. This is especially
    /// important when the definition latency of the return value(s) are too
    /// high to be hidden by the branch or when the liveout registers used by
    /// instructions in the fallthrough block.
    void addSchedBarrierDeps();

    /// Orders nodes according to selected style.
    ///
    /// Typically, a scheduling algorithm will implement schedule() without
    /// overriding enterRegion() or exitRegion().
    virtual void schedule() = 0;

    /// Allow targets to perform final scheduling actions at the level of the
    /// whole MachineFunction. By default does nothing.
    virtual void finalizeSchedule() {}

    void dumpNode(const SUnit &SU) const override;
    void dump() const override;

    /// Returns a label for a DAG node that points to an instruction.
    std::string getGraphNodeLabel(const SUnit *SU) const override;

    /// Returns a label for the region of code covered by the DAG.
    std::string getDAGName() const override;

    /// Fixes register kill flags that scheduling has made invalid.
    void fixupKills(MachineBasicBlock &MBB);

  protected:
    void initSUnits();
    void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx);
    void addPhysRegDeps(SUnit *SU, unsigned OperIdx);
    void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
    void addVRegUseDeps(SUnit *SU, unsigned OperIdx);

    /// Initializes register live-range state for updating kills.
    /// PostRA helper for rewriting kill flags.
    void startBlockForKills(MachineBasicBlock *BB);

    /// Toggles a register operand kill flag.
    ///
    /// Other adjustments may be made to the instruction if necessary. Return
    /// true if the operand has been deleted, false if not.
    void toggleKillFlag(MachineInstr &MI, MachineOperand &MO);

    /// Returns a mask for which lanes get read/written by the given (register)
    /// machine operand.
    LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
  };

  /// Creates a new SUnit and return a ptr to it.
  inline SUnit *ScheduleDAGInstrs::newSUnit(MachineInstr *MI) {
#ifndef NDEBUG
    const SUnit *Addr = SUnits.empty() ? nullptr : &SUnits[0];
#endif
    SUnits.emplace_back(MI, (unsigned)SUnits.size());
    assert((Addr == nullptr || Addr == &SUnits[0]) &&
           "SUnits std::vector reallocated on the fly!");
    return &SUnits.back();
  }

  /// Returns an existing SUnit for this MI, or nullptr.
  inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const {
    DenseMap<MachineInstr*, SUnit*>::const_iterator I = MISUnitMap.find(MI);
    if (I == MISUnitMap.end())
      return nullptr;
    return I->second;
  }

} // end namespace llvm

#endif // LLVM_CODEGEN_SCHEDULEDAGINSTRS_H