33#define DEBUG_TYPE "amdgpu-lower-exec-sync"
57unsigned allocateExecSyncID(
T &NextAvailableIDTracker,
59 unsigned MaxNumGroup,
unsigned IDCnt) {
60 constexpr unsigned InitialVal = 1;
61 unsigned NextID = InitialVal;
64 NextAvailableIDTracker.lookup(
F);
65 unsigned ID = InitialVal;
66 if (!NextAvailableID.
empty())
67 ID = NextAvailableID[GroupID];
75 auto Inserted = NextAvailableIDTracker.try_emplace(
F);
78 Inserted.first->second.assign(MaxNumGroup, InitialVal);
80 Inserted.first->second[GroupID] = NextID + IDCnt;
90 constexpr unsigned NumBarScopes = 1;
95 for (
auto *GV : GVs) {
98 auto Iter = AllocationQ.
find(GV);
99 if (Iter == AllocationQ.
end())
102 Iter->second.push_back(
F);
107 for (
auto *GV : GVs) {
110 auto Iter = AllocationQ.
find(GV);
111 if (Iter == AllocationQ.
end())
114 Iter->second.push_back(
F);
121 if (
A.second.size() !=
B.second.size())
122 return A.second.size() >
B.second.size();
125 return A.first->getName() <
B.first->getName();
128 for (
auto &[GV, Kernels] : AllocationQ) {
131 unsigned BarrierScope = ExtTy->getIntParameter(0);
134 unsigned BarID = allocateExecSyncID(KernelBarrierIDs, Kernels,
135 BarrierScope, NumBarScopes, BarCnt);
138 dbgs() <<
" was assigned barrier id: " << BarID
139 <<
" id-count: " << BarCnt <<
"\n");
142 Offset = 0x802000u | BarrierScope << 9 | BarID << 4;
147 recordLDSAbsoluteAddress(&M, GV,
Offset);
158static bool hasBarrierToLower(
const GVUsesInfoTy &GVUsesInfo) {
160 for (
auto &[Fn, GVs] : Map) {
161 for (
auto &GV : GVs) {
173static bool handleNamedBarriersForObjectLinking(
Module &M) {
180 BarrierToFuncs[&GV].
insert(
I->getFunction());
183 if (BarrierToFuncs.
empty())
187 NamedMDNode *BarMD = M.getOrInsertNamedMetadata(
"amdgpu.named_barrier.uses");
189 std::string ModuleId;
191 assert(!ModuleId.empty() &&
192 "modules with named barriers should have a unique ID");
193 for (
auto &[V, Funcs] : BarrierToFuncs) {
194 if (V->hasLocalLinkage())
195 V->setName(
"__amdgpu_named_barrier." + V->getName() + ModuleId);
196 else if (!V->getName().starts_with(
"__amdgpu_named_barrier"))
197 V->setName(
"__amdgpu_named_barrier." + V->getName());
198 V->setInitializer(
nullptr);
210static bool runLowerExecSyncGlobals(
Module &M) {
212 return handleNamedBarriersForObjectLinking(M);
223 if (hasBarrierToLower(LDSUsesInfo)) {
225 Changed |= lowerExecSyncGlobalVariables(M, LDSUsesInfo);
231class AMDGPULowerExecSyncLegacy :
public ModulePass {
235 bool runOnModule(
Module &M)
override;
240char AMDGPULowerExecSyncLegacy::ID = 0;
244 "AMDGPU lowering of execution synchronization",
false,
248 "AMDGPU lowering of execution synchronization",
false,
251bool AMDGPULowerExecSyncLegacy::runOnModule(
Module &M) {
252 return runLowerExecSyncGlobals(M);
256 return new AMDGPULowerExecSyncLegacy();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Target-Independent Code Generator Pass Configuration Options pass.
static bool EnableObjectLinking
Represent a constant reference to an array (0 or more elements consecutively in memory),...
The basic data container for the call graph of a Module of IR.
A parsed version of the target data layout string in and methods for querying it.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
LLVM_ABI bool isAbsoluteSymbolRef() const
Returns whether this is a reference to an absolute symbol.
@ ExternalLinkage
Externally visible function.
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
This is an important class for using LLVM in a threaded context.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
This class implements a map that also provides access to all stored values in a deterministic order.
iterator find(const KeyT &Key)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
LLVM_ABI void addOperand(MDNode *M)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Class to represent target extensions types, which are generally unintrospectable from target-independ...
Target-Independent Code Generator Pass Configuration Options.
iterator_range< user_iterator > users()
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
GVUsesInfoTy getTransitiveUsesOfLDSForLowering(const CallGraph &CG, Module &M)
Collects all uses of LDS Global Variables in M using getUsesOfGVByFunction, with isLDSVariableToLower...
bool eliminateGVConstantExprUsesFromAllInstructions(Module &M, function_ref< bool(const GlobalVariable &)> Filter)
Iterates over all GlobalVariables in M, and whenever Filter returns true, replace all constant users ...
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
TargetExtType * isNamedBarrier(const GlobalVariable &GV)
bool isLDSVariableToLower(const GlobalVariable &GV)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
char & AMDGPULowerExecSyncLegacyPassID
LLVM_ABI std::string getUniqueModuleId(Module *M)
Produce a unique identifier for this module by taking the MD5 sum of the names of the module's strong...
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
ModulePass * createAMDGPULowerExecSyncLegacyPass()
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
FunctionVariableMap DirectAccess
FunctionVariableMap IndirectAccess