35#define DEBUG_TYPE "gcn-vopd-utils"
46 if (IsVOPD3 && !ST.hasVOPD3())
48 if (!IsVOPD3 && (
TII.isVOP3(MIX) ||
TII.isVOP3(MIY)))
50 if (
TII.isDPP(MIX) ||
TII.isDPP(MIY))
58 for (
auto &
Literal : UniqueLiterals) {
62 UniqueLiterals.push_back(&
Op);
66 auto getVRegIdx = [&](
unsigned OpcodeIdx,
unsigned OperandIdx) {
69 if (Operand.
isReg() &&
TRI->isVectorRegister(MRI, Operand.
getReg()))
76 for (
auto CompIdx : VOPD::COMPONENTS) {
81 if (!
TRI->isVectorRegister(MRI, Src0.
getReg())) {
84 }
else if (!
TII.isInlineConstant(Src0)) {
90 if (InstInfo[CompIdx].hasMandatoryLiteral()) {
94 auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
95 addLiteral(
MI.getOperand(CompOprIdx));
97 if (
MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
98 UniqueScalarRegs.
insert(AMDGPU::VCC_LO);
101 for (
auto OpName : {AMDGPU::OpName::src1, AMDGPU::OpName::src2}) {
105 if (
OpName == AMDGPU::OpName::src2) {
108 if (
MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64) {
109 UniqueScalarRegs.
insert(Src->getReg());
113 if (!Src->isReg() || !
TRI->isVGPR(MRI, Src->getReg()))
117 for (
auto OpName : {AMDGPU::OpName::clamp, AMDGPU::OpName::omod,
118 AMDGPU::OpName::op_sel}) {
126 {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
127 AMDGPU::OpName::src2_modifiers}) {
135 if (UniqueLiterals.
size() > 1)
137 if ((UniqueLiterals.
size() + UniqueScalarRegs.
size()) > 2)
142 bool SkipSrc = (ST.hasGFX11_7Insts() || ST.hasGFX12Insts()) &&
143 MIX.
getOpcode() == AMDGPU::V_MOV_B32_e32 &&
144 MIY.
getOpcode() == AMDGPU::V_MOV_B32_e32;
146 if (InstInfo.hasInvalidOperand(getVRegIdx, *
TRI, SkipSrc, AllowSameVGPR,
155 *
TII.getNamedOperand(MIX, AMDGPU::OpName::src2);
161 *
TII.getNamedOperand(MIY, AMDGPU::OpName::src2);
168 <<
"\n\tY: " << MIY <<
"\n");
174static std::optional<VOPDMatchInfo>
185 if (!(FirstCanBeVOPD.
X && SecondCanBeVOPD.
Y) &&
186 !(FirstCanBeVOPD.
Y && SecondCanBeVOPD.
X))
190 if (
TII.hasRAWDependency(FirstMI, SecondMI))
194 bool AllowSameVGPR = ST.hasGFX12Insts();
196 if (FirstCanBeVOPD.
X && SecondCanBeVOPD.
Y) {
201 if (FirstCanBeVOPD.
Y && SecondCanBeVOPD.
X) {
204 bool IsAntiDep =
TII.hasRAWDependency(SecondMI, FirstMI);
205 AllowSameVGPR &= !IsAntiDep;
206 if (IsAntiDep && !
TII.isVOPDAntidependencyAllowed(SecondMI))
243 auto checkCanBeVOPD = [&](
bool VOPD3) {
246 return CanBeVOPD.
Y || CanBeVOPD.
X;
248 return checkCanBeVOPD(
false) || (ST.hasVOPD3() && checkCanBeVOPD(
true));
251#ifdef EXPENSIVE_CHECKS
254 MII != FirstMI->
getParent()->instr_end(); ++MII) {
255 if (&*MII == &SecondMI)
259 }() &&
"Expected FirstMI to precede SecondMI");
270 SUnit &Head,
bool Forward) {
275 Stack.push_back(&Head);
276 while (!Stack.empty()) {
277 SUnit *SU = Stack.pop_back_val();
279 for (
const SDep &Edge : Deps) {
282 SUnit *Dep = Edge.getSUnit();
288 Stack.push_back(Dep);
300 [[maybe_unused]]
SUnit &J,
303 if (JLoadPreds.
empty())
306 for (
SUnit *Succ : ILoadSuccs)
307 for (
SUnit *Pred : JLoadPreds)
310 dbgs() <<
"Will not pair SU(" <<
I.NodeNum <<
") with SU("
311 << J.NodeNum <<
")\n";
313 dbgs() <<
" Fusion would introduce a cyclic dependency "
315 << Pred->NodeNum <<
")\n";
317 dbgs() <<
" Fusion may force SU(" << Pred->NodeNum
318 <<
") to complete its load before dispatching SU("
319 << Succ->NodeNum <<
")\n";
331struct VOPDPairingMutation : ScheduleDAGMutation {
338 void apply(ScheduleDAGInstrs *DAG)
override {
339 const TargetInstrInfo &
TII = *DAG->
TII;
342 LLVM_DEBUG(
dbgs() <<
"Target does not support VOPDPairingMutation\n");
346 BitVector VOPDCapable(DAG->
SUnits.size());
349 for (
auto ISUI = DAG->
SUnits.begin(),
E = DAG->
SUnits.end(); ISUI !=
E;
351 const MachineInstr *IMI = ISUI->getInstr();
354 VOPDCapable[IIdx] =
true;
358 BitVector IVisited(DAG->
SUnits.size());
361 BitVector JVisited(DAG->
SUnits.size());
362 BitVector JLoadPredsComputed(DAG->
SUnits.size());
364 for (
auto ISUI = DAG->
SUnits.begin(),
E = DAG->
SUnits.end(); ISUI !=
E;
366 if (!VOPDCapable[IIdx])
368 const MachineInstr *IMI = ISUI->getInstr();
374 unsigned JIdx = IIdx + 1;
375 for (
auto JSUI = ISUI + 1; JSUI !=
E; ++JSUI, ++JIdx) {
376 if (!VOPDCapable[JIdx] || JSUI->isBoundaryNode())
378 const MachineInstr *JMI = JSUI->getInstr();
383 if (!ILoadSuccs.
empty()) {
385 if (!JLoadPredsComputed.test(JIdx)) {
388 JLoadPredsComputed.set(JIdx);
397 VOPDCapable[JIdx] =
false;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
static bool loadsMayOverlap(ScheduleDAGInstrs *DAG, SUnit &I, const BitVector &IVisited, const SmallVector< SUnit * > &ILoadSuccs, SUnit &J, const BitVector &JVisited, const SmallVector< SUnit * > &JLoadPreds)
Checks whether fusing SU I with SU J would force the loads preceding J to complete before loads depen...
static std::optional< VOPDMatchInfo > tryMatchVOPDPairVariant(const SIInstrInfo &TII, unsigned EncodingFamily, MachineInstr &FirstMI, MachineInstr &SecondMI, bool IsVOPD3)
Core pair-eligibility check for a single VOPD encoding variant (VOPD or VOPD3).
static void collectLoads(SmallVector< SUnit * > &Loads, BitVector &Visited, SUnit &Head, bool Forward)
Collect all load (dependents if Forward else dependencies) that connect to the Head SU.
static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be scheduled together.
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Interface definition for SIInstrInfo.
This file defines the SmallVector class.
bool test(unsigned Idx) const
Returns true if bit Idx is set.
BitVector & set()
Set all bits in the bitvector.
MachineInstrBundleIterator< const MachineInstr > const_iterator
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
@ Data
Regular data dependence (aka true-dependence).
const GCNSubtarget & getSubtarget() const
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
A ScheduleDAG for scheduling lists of MachineInstr.
bool IsReachable(SUnit *SU, SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
MachineFunction & MF
Machine function.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
bool hasVOPD(const MCSubtargetInfo &STI)
void apply(Opt *O, const Mod &M, const Mods &... Ms)
This is an optimization pass for GlobalISel generic memory operations.
std::unique_ptr< ScheduleDAGMutation > createVOPDPairingMutation()
LLVM_ABI bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, SUnit &SecondSU)
Create an artificial edge between FirstSU and SecondSU.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool checkVOPDRegConstraints(const SIInstrInfo &TII, const MachineInstr &FirstMI, const MachineInstr &SecondMI, bool IsVOPD3, bool AllowSameVGPR)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
DWARFExpression::Operation Op
bool(*)(const TargetInstrInfo &TII, const TargetSubtargetInfo &STI, const MachineInstr *FirstMI, const MachineInstr &SecondMI) MacroFusionPredTy
Check if the instr pair, FirstMI and SecondMI, should be fused together.
std::optional< VOPDMatchInfo > tryMatchVOPDPair(const SIInstrInfo &TII, MachineInstr &FirstMI, MachineInstr &SecondMI)
Check whether FirstMI and SecondMI can be combined into a VOPD instruction.
LLVM_ABI bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit)
Checks if the number of cluster edges between SU and its predecessors is less than FuseLimit.
Describes a matched VOPD pair: which instruction is the X component and which is the Y component,...