19#include "llvm/IR/IntrinsicsAMDGPU.h"
23#define DEBUG_TYPE "amdgpu-lower-intrinsics"
29class AMDGPULowerIntrinsicsImpl {
43class AMDGPULowerIntrinsicsLegacy :
public ModulePass {
49 bool runOnModule(
Module &M)
override;
65bool AMDGPULowerIntrinsicsImpl::run() {
68 for (Function &
F : M) {
69 switch (
F.getIntrinsicID()) {
72 case Intrinsic::amdgcn_s_barrier:
73 case Intrinsic::amdgcn_s_barrier_signal:
74 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
75 case Intrinsic::amdgcn_s_barrier_wait:
76 case Intrinsic::amdgcn_s_cluster_barrier:
87bool AMDGPULowerIntrinsicsImpl::visitBarrier(IntrinsicInst &
I) {
88 assert(
I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier ||
89 I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_signal ||
90 I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_signal_isfirst ||
91 I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_wait ||
92 I.getIntrinsicID() == Intrinsic::amdgcn_s_cluster_barrier);
94 const GCNSubtarget &
ST = TM.
getSubtarget<GCNSubtarget>(*
I.getFunction());
95 bool IsSingleWaveWG =
false;
98 IsSingleWaveWG =
ST.isSingleWavefrontWorkgroup(*
I.getFunction());
104 if (
I.getIntrinsicID() == Intrinsic::amdgcn_s_cluster_barrier) {
107 if (IsSingleWaveWG) {
108 B.CreateIntrinsicWithoutFolding(
B.getVoidTy(),
109 Intrinsic::amdgcn_wave_barrier, {})
114 CallInst *IsFirst =
B.CreateIntrinsicWithoutFolding(
115 B.getInt1Ty(), Intrinsic::amdgcn_s_barrier_signal_isfirst,
118 B.CreateIntrinsicWithoutFolding(
119 B.getVoidTy(), Intrinsic::amdgcn_s_barrier_wait, {BarrierID_16})
124 B.SetInsertPoint(ThenTerm);
131 B.CreateIntrinsicWithoutFolding(
132 B.getVoidTy(), Intrinsic::amdgcn_s_barrier_signal, {BarrierID_32})
135 B.SetInsertPoint(&
I);
136 B.CreateIntrinsicWithoutFolding(
137 B.getVoidTy(), Intrinsic::amdgcn_s_barrier_wait, {BarrierID_16})
144 bool IsWorkgroupScope =
false;
146 if (
I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_wait ||
147 I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_signal ||
148 I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_signal_isfirst) {
154 IsWorkgroupScope =
true;
156 assert(
I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier);
157 IsWorkgroupScope =
true;
160 if (IsWorkgroupScope && IsSingleWaveWG) {
162 if (
I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier ||
163 I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_wait) {
164 B.CreateIntrinsicWithoutFolding(
B.getVoidTy(),
165 Intrinsic::amdgcn_wave_barrier, {})
167 }
else if (
I.getIntrinsicID() ==
168 Intrinsic::amdgcn_s_barrier_signal_isfirst) {
170 I.replaceAllUsesWith(
B.getInt1(
true));
176 if (
I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier &&
177 ST.hasSplitBarriers()) {
181 B.CreateIntrinsicWithoutFolding(
182 B.getVoidTy(), Intrinsic::amdgcn_s_barrier_signal, {BarrierID_32})
184 B.CreateIntrinsicWithoutFolding(
185 B.getVoidTy(), Intrinsic::amdgcn_s_barrier_wait, {BarrierID_16})
196 AMDGPULowerIntrinsicsImpl Impl(M, TM);
202bool AMDGPULowerIntrinsicsLegacy::runOnModule(
Module &M) {
203 auto &TPC = getAnalysis<TargetPassConfig>();
206 AMDGPULowerIntrinsicsImpl Impl(M, TM);
210#define PASS_DESC "AMDGPU lower intrinsics"
217char AMDGPULowerIntrinsicsLegacy::
ID = 0;
220 return new AMDGPULowerIntrinsicsLegacy;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
The AMDGPU TargetMachine interface definition for hw codegen targets.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
uint64_t IntrinsicInst * II
ModuleAnalysisManager MAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static bool forEachCall(Function &Intrin, T Callback)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
A wrapper class for inspecting calls to intrinsic functions.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Target-Independent Code Generator Pass Configuration Options.
iterator_range< user_iterator > users()
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
ModulePass * createAMDGPULowerIntrinsicsLegacyPass()
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)