78#define DEBUG_TYPE "si-fix-sgpr-copies"
81 "amdgpu-enable-merge-m0",
82 cl::desc(
"Merge and hoist M0 initializations"),
95 unsigned NumSVCopies = 0;
100 unsigned NumReadfirstlanes = 0;
102 bool NeedToBeConvertedToVALU =
false;
108 unsigned SiblingPenalty = 0;
110 V2SCopyInfo() : Copy(nullptr),
ID(0){};
111 V2SCopyInfo(
unsigned Id, MachineInstr *
C,
unsigned Width)
112 : Copy(
C), NumReadfirstlanes(Width / 32), ID(
Id){};
113#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
115 dbgs() << ID <<
" : " << *Copy <<
"\n\tS:" << SChain.size()
116 <<
"\n\tSV:" << NumSVCopies <<
"\n\tSP: " << SiblingPenalty
117 <<
"\nScore: " << Score <<
"\n";
122class SIFixSGPRCopies {
123 MachineDominatorTree *MDT;
124 SmallVector<MachineInstr*, 4> SCCCopies;
125 SmallVector<MachineInstr*, 4> RegSequences;
126 SmallVector<MachineInstr*, 4> PHINodes;
127 SmallVector<MachineInstr*, 4> S2VCopies;
128 unsigned NextVGPRToSGPRCopyID = 0;
129 MapVector<unsigned, V2SCopyInfo> V2SCopies;
130 DenseMap<MachineInstr *, SetVector<unsigned>> SiblingPenalty;
131 DenseSet<MachineInstr *> PHISources;
134 MachineRegisterInfo *MRI;
135 const SIRegisterInfo *TRI;
136 const SIInstrInfo *TII;
138 SIFixSGPRCopies(MachineDominatorTree *MDT) : MDT(MDT) {}
140 bool run(MachineFunction &MF);
141 void fixSCCCopies(MachineFunction &MF);
142 void prepareRegSequenceAndPHIs(MachineFunction &MF);
143 unsigned getNextVGPRToSGPRCopyId() {
return ++NextVGPRToSGPRCopyID; }
144 bool needToBeConvertedToVALU(V2SCopyInfo *
I);
145 void analyzeVGPRToSGPRCopy(MachineInstr *
MI);
146 void lowerVGPR2SGPRCopies(MachineFunction &MF);
153 void processPHINode(MachineInstr &
MI);
158 bool tryMoveVGPRConstToSGPR(MachineOperand &MO,
Register NewDst,
159 MachineBasicBlock *BlockToInsertTo,
168 SIFixSGPRCopiesLegacy() : MachineFunctionPass(ID) {}
170 bool runOnMachineFunction(MachineFunction &MF)
override {
171 MachineDominatorTree *MDT =
172 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
173 SIFixSGPRCopies Impl(MDT);
177 StringRef getPassName()
const override {
return "SI Fix SGPR copies"; }
179 void getAnalysisUsage(AnalysisUsage &AU)
const override {
195char SIFixSGPRCopiesLegacy::
ID = 0;
200 return new SIFixSGPRCopiesLegacy();
203static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
207 Register DstReg = Copy.getOperand(0).getReg();
208 Register SrcReg = Copy.getOperand(1).getReg();
212 :
TRI.getPhysRegBaseClass(SrcReg);
219 :
TRI.getPhysRegBaseClass(DstReg);
221 return std::pair(SrcRC, DstRC);
227 return SrcRC != &AMDGPU::VReg_1RegClass &&
TRI.isSGPRClass(DstRC) &&
228 TRI.hasVectorRegisters(SrcRC);
234 return DstRC != &AMDGPU::VReg_1RegClass &&
TRI.isSGPRClass(SrcRC) &&
235 TRI.hasVectorRegisters(DstRC);
242 auto &Src =
MI.getOperand(1);
249 const auto *
UseMI = MO.getParent();
252 if (MO.isDef() ||
UseMI->getParent() !=
MI.getParent() ||
253 UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
256 unsigned OpIdx = MO.getOperandNo();
257 if (
OpIdx >=
UseMI->getDesc().getNumOperands() ||
311 if (SubReg != AMDGPU::NoSubRegister)
325 bool IsAGPR =
TRI->isAGPRClass(DstRC);
327 for (
unsigned I = 1,
N =
MI.getNumOperands();
I !=
N;
I += 2) {
329 TRI->getRegClassForOperandReg(MRI,
MI.getOperand(
I));
331 "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
343 unsigned Opc = NewSrcRC == &AMDGPU::AGPR_32RegClass ?
344 AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::COPY;
351 MI.getOperand(
I).setReg(TmpReg);
363 if (Copy->getOpcode() != AMDGPU::COPY)
366 if (!MoveImm->isMoveImmediate())
370 TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0);
375 if (Copy->getOperand(1).getSubReg())
378 switch (MoveImm->getOpcode()) {
381 case AMDGPU::V_MOV_B32_e32:
382 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
383 SMovOp = AMDGPU::S_MOV_B32;
385 case AMDGPU::V_MOV_B64_PSEUDO:
386 SMovOp = AMDGPU::S_MOV_B64_IMM_PSEUDO;
393template <
class UnaryPredicate>
403 while (!Worklist.
empty()) {
443 while (
I !=
MBB->end() &&
TII->isBasicBlockPrologue(*
I))
459 using InitListMap = std::map<unsigned, std::list<MachineInstr *>>;
470 for (
auto &MO :
MI.operands()) {
471 if ((MO.isReg() && ((MO.isDef() && MO.getReg() !=
Reg) || !MO.isDef())) ||
472 (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) {
480 Inits[Imm->getImm()].push_front(&
MI);
485 for (
auto &
Init : Inits) {
486 auto &Defs =
Init.second;
488 for (
auto I1 = Defs.begin(),
E = Defs.end(); I1 !=
E; ) {
491 for (
auto I2 = std::next(I1); I2 !=
E; ) {
500 auto interferes = [&MDT, From, To](
MachineInstr* &Clobber) ->
bool {
503 bool MayClobberFrom =
isReachable(Clobber, &*From, MBBTo, MDT);
504 bool MayClobberTo =
isReachable(Clobber, &*To, MBBTo, MDT);
505 if (!MayClobberFrom && !MayClobberTo)
507 if ((MayClobberFrom && !MayClobberTo) ||
508 (!MayClobberFrom && MayClobberTo))
514 return !((MBBFrom == MBBTo &&
522 return C.first !=
Init.first &&
528 if (!interferes(MI2, MI1)) {
538 if (!interferes(MI1, MI2)) {
556 if (!interferes(MI1,
I) && !interferes(MI2,
I)) {
560 <<
"and moving from "
577 for (
auto &
Init : Inits) {
578 auto &Defs =
Init.second;
579 auto I = Defs.begin();
580 while (
I != Defs.end()) {
581 if (MergedInstrs.
count(*
I)) {
582 (*I)->eraseFromParent();
590 for (
auto &
Init : Inits) {
591 auto &Defs =
Init.second;
592 for (
auto *
MI : Defs) {
593 auto *
MBB =
MI->getParent();
598 if (!
TII->isBasicBlockPrologue(*
B))
601 auto R = std::next(
MI->getReverseIterator());
602 const unsigned Threshold = 50;
604 for (
unsigned I = 0; R !=
B &&
I < Threshold; ++R, ++
I)
605 if (R->readsRegister(
Reg,
TRI) || R->definesRegister(
Reg,
TRI) ||
606 TII->isSchedulingBoundary(*R,
MBB, *
MBB->getParent()))
628 TRI =
ST.getRegisterInfo();
629 TII =
ST.getInstrInfo();
632 SmallVector<MachineInstr *, 8> Relegalize;
634 for (MachineBasicBlock &
MBB : MF) {
637 MachineInstr &
MI = *
I;
639 switch (
MI.getOpcode()) {
643 if (
TII->isWMMA(
MI) &&
648 const TargetRegisterClass *SrcRC, *DstRC;
664 if (lowerSpecialCase(
MI,
I))
667 analyzeVGPRToSGPRCopy(&
MI);
672 case AMDGPU::STRICT_WQM:
673 case AMDGPU::SOFT_WQM:
674 case AMDGPU::STRICT_WWM:
675 case AMDGPU::INSERT_SUBREG:
677 case AMDGPU::REG_SEQUENCE: {
678 if (
TRI->isSGPRClass(
TII->getOpRegClass(
MI, 0))) {
679 for (MachineOperand &MO :
MI.operands()) {
680 if (!MO.isReg() || !MO.getReg().isVirtual())
682 const TargetRegisterClass *SrcRC = MRI->getRegClass(MO.getReg());
683 if (SrcRC == &AMDGPU::VReg_1RegClass)
686 if (
TRI->hasVectorRegisters(SrcRC)) {
687 const TargetRegisterClass *DestRC =
688 TRI->getEquivalentSGPRClass(SrcRC);
689 Register NewDst = MRI->createVirtualRegister(DestRC);
690 MachineBasicBlock *BlockToInsertCopy =
691 MI.isPHI() ?
MI.getOperand(MO.getOperandNo() + 1).getMBB()
697 if (!tryMoveVGPRConstToSGPR(MO, NewDst, BlockToInsertCopy,
698 PointToInsertCopy,
DL)) {
699 MachineInstr *NewCopy =
700 BuildMI(*BlockToInsertCopy, PointToInsertCopy,
DL,
701 TII->get(AMDGPU::COPY), NewDst)
704 analyzeVGPRToSGPRCopy(NewCopy);
705 PHISources.
insert(NewCopy);
713 else if (
MI.isRegSequence())
718 case AMDGPU::V_WRITELANE_B32: {
721 if (
ST.getConstantBusLimit(
MI.getOpcode()) != 1)
731 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
733 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
734 MachineOperand &Src0 =
MI.getOperand(Src0Idx);
735 MachineOperand &Src1 =
MI.getOperand(Src1Idx);
739 Src0.
getReg() != AMDGPU::M0) &&
741 Src1.
getReg() != AMDGPU::M0)) {
748 for (MachineOperand *MO : {&Src0, &Src1}) {
749 if (MO->getReg().isVirtual()) {
750 MachineInstr *
DefMI = MRI->getVRegDef(MO->getReg());
754 MO->getReg() ==
Def.getReg() &&
755 MO->getSubReg() ==
Def.getSubReg()) {
757 if (Copied.
isImm() &&
758 TII->isInlineConstant(APInt(64, Copied.
getImm(),
true))) {
759 MO->ChangeToImmediate(Copied.
getImm());
772 TII->get(AMDGPU::COPY), AMDGPU::M0)
783 lowerVGPR2SGPRCopies(MF);
786 for (
auto *
MI : S2VCopies) {
789 const TargetRegisterClass *SrcRC, *DstRC;
795 for (
auto *
MI : RegSequences) {
797 if (
MI->isRegSequence())
800 for (
auto *
MI : PHINodes) {
803 while (!Relegalize.
empty())
806 if (MF.getTarget().getOptLevel() > CodeGenOptLevel::None &&
EnableM0Merge)
809 SiblingPenalty.clear();
812 RegSequences.clear();
820void SIFixSGPRCopies::processPHINode(MachineInstr &
MI) {
821 bool AllAGPRUses =
true;
822 SetVector<const MachineInstr *> worklist;
823 SmallPtrSet<const MachineInstr *, 4> Visited;
824 SetVector<MachineInstr *> PHIOperands;
828 bool HasUses =
false;
829 while (!worklist.
empty()) {
832 for (
const auto &Use : MRI->use_operands(
Reg)) {
834 const MachineInstr *
UseMI =
Use.getParent();
837 TRI->isAGPR(*MRI,
Use.getReg());
848 const TargetRegisterClass *RC0 = MRI->getRegClass(PHIRes);
849 if (HasUses && AllAGPRUses && !
TRI->isAGPRClass(RC0)) {
851 MRI->setRegClass(PHIRes,
TRI->getEquivalentAGPRClass(RC0));
852 for (
unsigned I = 1,
N =
MI.getNumOperands();
I !=
N;
I += 2) {
853 MachineInstr *
DefMI = MRI->getVRegDef(
MI.getOperand(
I).getReg());
859 if (
TRI->hasVectorRegisters(MRI->getRegClass(PHIRes)) ||
860 RC0 == &AMDGPU::VReg_1RegClass) {
862 TII->legalizeOperands(
MI, MDT);
866 while (!PHIOperands.
empty()) {
871bool SIFixSGPRCopies::tryMoveVGPRConstToSGPR(
872 MachineOperand &MaybeVGPRConstMO,
Register DstReg,
873 MachineBasicBlock *BlockToInsertTo,
876 MachineInstr *
DefMI = MRI->getVRegDef(MaybeVGPRConstMO.
getReg());
880 MachineOperand *SrcConst =
TII->getNamedOperand(*
DefMI, AMDGPU::OpName::src0);
881 if (SrcConst->
isReg())
884 const TargetRegisterClass *SrcRC =
885 MRI->getRegClass(MaybeVGPRConstMO.
getReg());
886 unsigned MoveSize =
TRI->getRegSizeInBits(*SrcRC);
888 MoveSize == 64 ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::S_MOV_B32;
889 BuildMI(*BlockToInsertTo, PointToInsertTo,
DL,
TII->get(MoveOp), DstReg)
891 if (MRI->hasOneUse(MaybeVGPRConstMO.
getReg()))
893 MaybeVGPRConstMO.
setReg(DstReg);
897bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &
MI,
906 const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
907 if (DstReg == AMDGPU::M0 &&
TRI->hasVectorRegisters(SrcRC)) {
909 MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
911 const MCInstrDesc &ReadFirstLaneDesc =
912 TII->get(AMDGPU::V_READFIRSTLANE_B32);
913 BuildMI(*
MI.getParent(),
MI,
MI.getDebugLoc(), ReadFirstLaneDesc, TmpReg)
914 .
add(
MI.getOperand(1));
916 unsigned SubReg =
MI.getOperand(1).getSubReg();
917 MI.getOperand(1).setReg(TmpReg);
918 MI.getOperand(1).setSubReg(AMDGPU::NoSubRegister);
920 const TargetRegisterClass *OpRC =
TII->getRegClass(ReadFirstLaneDesc, 1);
921 const TargetRegisterClass *ConstrainRC =
922 SubReg == AMDGPU::NoSubRegister
924 :
TRI->getMatchingSuperRegClass(SrcRC, OpRC, SubReg);
926 if (!MRI->constrainRegClass(SrcReg, ConstrainRC))
931 if (tryMoveVGPRConstToSGPR(
MI.getOperand(1), DstReg,
MI.getParent(),
MI,
933 I =
MI.eraseFromParent();
941 SIInstrWorklist worklist;
943 TII->moveToVALU(worklist, MDT);
952 MI.getOperand(1).ChangeToImmediate(Imm);
953 MI.addImplicitDefUseOperands(*
MI.getMF());
954 MI.setDesc(
TII->get(SMovOp));
960void SIFixSGPRCopies::analyzeVGPRToSGPRCopy(MachineInstr*
MI) {
964 const TargetRegisterClass *DstRC =
TRI->getRegClassForReg(*MRI, DstReg);
966 V2SCopyInfo
Info(getNextVGPRToSGPRCopyId(),
MI,
967 TRI->getRegSizeInBits(*DstRC));
968 SmallVector<MachineInstr *, 8> AnalysisWorklist;
971 DenseSet<MachineInstr *> Visited;
973 while (!AnalysisWorklist.
empty()) {
977 if (!Visited.
insert(Inst).second)
988 const TargetRegisterClass *SrcRC, *DstRC;
997 SiblingPenalty[Inst].insert(
Info.ID);
999 SmallVector<MachineInstr *, 4>
Users;
1005 !
I->findRegisterDefOperand(AMDGPU::SCC,
nullptr)) {
1006 if (
I->readsRegister(AMDGPU::SCC,
nullptr))
1012 !
TII->isVALU(*Inst,
true)) {
1013 for (
auto &U : MRI->use_instructions(
Reg))
1014 Users.push_back(&U);
1017 for (
auto *U :
Users) {
1018 if (
TII->isSALU(*U))
1019 Info.SChain.insert(U);
1023 V2SCopies[
Info.ID] = std::move(Info);
1028bool SIFixSGPRCopies::needToBeConvertedToVALU(V2SCopyInfo *Info) {
1029 if (
Info->SChain.empty()) {
1034 Info->SChain, [&](MachineInstr *
A, MachineInstr *
B) ->
bool {
1035 return SiblingPenalty[A].size() < SiblingPenalty[B].size();
1037 Info->Siblings.remove_if([&](
unsigned ID) {
return ID ==
Info->ID; });
1043 SmallSet<std::pair<Register, unsigned>, 4> SrcRegs;
1044 for (
auto J :
Info->Siblings) {
1045 auto *InfoIt = V2SCopies.find(J);
1046 if (InfoIt != V2SCopies.end()) {
1047 MachineInstr *SiblingCopy = InfoIt->second.Copy;
1056 Info->SiblingPenalty = SrcRegs.
size();
1059 Info->NumSVCopies +
Info->SiblingPenalty +
Info->NumReadfirstlanes;
1060 unsigned Profit =
Info->SChain.size();
1061 Info->Score = Penalty > Profit ? 0 : Profit - Penalty;
1062 Info->NeedToBeConvertedToVALU =
Info->Score < 3;
1063 return Info->NeedToBeConvertedToVALU;
1066void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
1068 SmallVector<unsigned, 8> LoweringWorklist;
1069 for (
auto &
C : V2SCopies) {
1070 if (needToBeConvertedToVALU(&
C.second))
1078 while (!LoweringWorklist.
empty()) {
1080 auto *CurInfoIt = V2SCopies.find(CurID);
1081 if (CurInfoIt != V2SCopies.end()) {
1082 const V2SCopyInfo &
C = CurInfoIt->second;
1084 for (
auto S :
C.Siblings) {
1085 auto *SibInfoIt = V2SCopies.find(S);
1086 if (SibInfoIt != V2SCopies.end()) {
1087 V2SCopyInfo &
SI = SibInfoIt->second;
1089 if (!
SI.NeedToBeConvertedToVALU) {
1090 SI.SChain.set_subtract(
C.SChain);
1091 if (needToBeConvertedToVALU(&SI))
1094 SI.Siblings.remove_if([&](
unsigned ID) {
return ID ==
C.ID; });
1098 <<
" is being turned to VALU\n");
1102 V2SCopies.erase(
C.ID);
1110 for (
auto C : V2SCopies) {
1111 MachineInstr *
MI =
C.second.Copy;
1112 MachineBasicBlock *
MBB =
MI->getParent();
1116 <<
" is being turned to v_readfirstlane_b32"
1117 <<
" Score: " <<
C.second.Score <<
"\n");
1118 Register DstReg =
MI->getOperand(0).getReg();
1119 MRI->constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass);
1121 Register SrcReg =
MI->getOperand(1).getReg();
1122 unsigned SubReg =
MI->getOperand(1).getSubReg();
1123 const TargetRegisterClass *SrcRC =
1124 TRI->getRegClassForOperandReg(*MRI,
MI->getOperand(1));
1125 size_t SrcSize =
TRI->getRegSizeInBits(*SrcRC);
1126 if (SrcSize == 16) {
1128 "We do not expect to see 16-bit copies from VGPR to SGPR unless "
1129 "we have 16-bit VGPRs");
1130 assert(MRI->getRegClass(DstReg) == &AMDGPU::SReg_32RegClass ||
1131 MRI->getRegClass(DstReg) == &AMDGPU::SReg_32_XM0RegClass);
1133 MRI->setRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass);
1134 Register VReg32 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1136 Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_16RegClass);
1139 .
addReg(SrcReg, {}, SubReg)
1140 .addImm(AMDGPU::lo16)
1145 }
else if (SrcSize == 32) {
1146 const MCInstrDesc &ReadFirstLaneDesc =
1147 TII->get(AMDGPU::V_READFIRSTLANE_B32);
1148 const TargetRegisterClass *OpRC =
TII->getRegClass(ReadFirstLaneDesc, 1);
1150 .
addReg(SrcReg, {}, SubReg);
1152 const TargetRegisterClass *ConstrainRC =
1153 SubReg == AMDGPU::NoSubRegister
1155 :
TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), OpRC,
1158 if (!MRI->constrainRegClass(SrcReg, ConstrainRC))
1162 TII->get(AMDGPU::REG_SEQUENCE), DstReg);
1163 int N =
TRI->getRegSizeInBits(*SrcRC) / 32;
1164 for (
int i = 0; i <
N; i++) {
1166 Result, *MRI,
MI->getOperand(1), SrcRC,
1167 TRI->getSubRegFromChannel(i), &AMDGPU::VGPR_32RegClass);
1169 MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1171 TII->get(AMDGPU::V_READFIRSTLANE_B32), PartialDst)
1173 Result.addReg(PartialDst).addImm(
TRI->getSubRegFromChannel(i));
1176 MI->eraseFromParent();
1180void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) {
1181 const AMDGPU::LaneMaskConstants &LMC =
1183 for (MachineBasicBlock &
MBB : MF) {
1186 MachineInstr &
MI = *
I;
1192 if (SrcReg == AMDGPU::SCC) {
1194 MRI->createVirtualRegister(
TRI->getWaveMaskRegClass());
1199 I =
BuildMI(*
MI.getParent(), std::next(
I),
I->getDebugLoc(),
1200 TII->get(AMDGPU::COPY), DstReg)
1202 MI.eraseFromParent();
1205 if (DstReg == AMDGPU::SCC) {
1206 Register Tmp = MRI->createVirtualRegister(
TRI->getBoolRC());
1212 MI.eraseFromParent();
1222 SIFixSGPRCopies Impl(&MDT);
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
iv Induction Variable Users
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getCopyRegClasses(const MachineInstr &Copy, const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI)
static cl::opt< bool > EnableM0Merge("amdgpu-enable-merge-m0", cl::desc("Merge and hoist M0 initializations"), cl::init(true))
static bool hoistAndMergeSGPRInits(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo *TRI, MachineDominatorTree &MDT, const TargetInstrInfo *TII)
static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, const SIRegisterInfo *TRI, const SIInstrInfo *TII, MachineRegisterInfo &MRI)
bool searchPredecessors(const MachineBasicBlock *MBB, const MachineBasicBlock *CutOff, UnaryPredicate Predicate)
static bool isReachable(const MachineInstr *From, const MachineInstr *To, const MachineBasicBlock *CutOff, MachineDominatorTree &MDT)
static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC, const TargetRegisterClass *DstRC, const SIRegisterInfo &TRI)
static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI, const SIRegisterInfo *TRI, const SIInstrInfo *TII)
static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC, const TargetRegisterClass *DstRC, const SIRegisterInfo &TRI)
static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, const MachineInstr *MoveImm, const SIInstrInfo *TII, unsigned &SMovOp, int64_t &Imm)
static MachineBasicBlock::iterator getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Implements a dense probed hash-table based set.
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
FunctionPass class - This class is used to implement most global optimizations.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
bool isCompare(QueryType Type=IgnoreBundle) const
Return true if this instruction is a comparison.
bool isRegSequence() const
LLVM_ABI unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
iterator_range< def_instr_iterator > def_instructions(Register Reg) const
use_instr_iterator use_instr_begin(Register RegNo) const
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
iterator_range< reg_nodbg_iterator > reg_nodbg_operands(Register Reg) const
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
A vector that has set insertion semantics.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
value_type pop_back_val()
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
@ Resolved
Queried, materialization begun.
NodeAddr< DefNode * > Def
NodeAddr< InstrNode * > Instr
NodeAddr< UseNode * > Use
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr RegState getDefRegState(bool B)
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
char & SIFixSGPRCopiesLegacyID
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
FunctionPass * createSIFixSGPRCopiesLegacyPass()
void insert(MachineInstr *MI)