32#include "llvm/IR/IntrinsicsAArch64.h"
42#define DEBUG_TYPE "aarch64-sve-intrinsic-opts"
49 bool runOnModule(
Module &M)
override;
53 bool coalescePTrueIntrinsicCalls(
BasicBlock &BB,
59void SVEIntrinsicOpts::getAnalysisUsage(
AnalysisUsage &AU)
const {
64char SVEIntrinsicOpts::ID = 0;
65static const char *
name =
"SVE intrinsics optimizations";
71 return new SVEIntrinsicOpts();
97 if (ConvertToUses.
empty())
107 if (IntrUser && IntrUser->getIntrinsicID() ==
108 Intrinsic::aarch64_sve_convert_from_svbool) {
112 if (IntrUserVTy->getElementCount().getKnownMinValue() >
113 PTrueVTy->getElementCount().getKnownMinValue())
125bool SVEIntrinsicOpts::coalescePTrueIntrinsicCalls(
126 BasicBlock &BB, SmallSetVector<IntrinsicInst *, 4> &PTrues) {
127 if (PTrues.
size() <= 1)
131 auto *MostEncompassingPTrue =
135 return PTrue1VTy->getElementCount().getKnownMinValue() <
136 PTrue2VTy->getElementCount().getKnownMinValue();
141 PTrues.
remove(MostEncompassingPTrue);
151 Builder.SetInsertPoint(&BB, ++MostEncompassingPTrue->getIterator());
153 auto *MostEncompassingPTrueVTy =
155 auto *ConvertToSVBool = Builder.CreateIntrinsic(
156 Intrinsic::aarch64_sve_convert_to_svbool, {MostEncompassingPTrueVTy},
157 {MostEncompassingPTrue});
159 bool ConvertFromCreated =
false;
160 for (
auto *PTrue : PTrues) {
165 if (MostEncompassingPTrueVTy != PTrueVTy) {
166 ConvertFromCreated =
true;
168 Builder.SetInsertPoint(&BB, ++ConvertToSVBool->getIterator());
169 auto *ConvertFromSVBool =
170 Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
171 {PTrueVTy}, {ConvertToSVBool});
172 PTrue->replaceAllUsesWith(ConvertFromSVBool);
174 PTrue->replaceAllUsesWith(MostEncompassingPTrue);
176 PTrue->eraseFromParent();
180 if (!ConvertFromCreated)
181 ConvertToSVBool->eraseFromParent();
234bool SVEIntrinsicOpts::optimizePTrueIntrinsicCalls(
235 SmallSetVector<Function *, 4> &Functions) {
238 for (
auto *
F : Functions) {
239 for (
auto &BB : *
F) {
240 SmallSetVector<IntrinsicInst *, 4> SVAllPTrues;
241 SmallSetVector<IntrinsicInst *, 4> SVPow2PTrues;
244 for (Instruction &
I : BB) {
249 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
252 const auto PTruePattern =
255 if (PTruePattern == AArch64SVEPredPattern::all)
256 SVAllPTrues.
insert(IntrI);
257 if (PTruePattern == AArch64SVEPredPattern::pow2)
258 SVPow2PTrues.
insert(IntrI);
261 Changed |= coalescePTrueIntrinsicCalls(BB, SVAllPTrues);
262 Changed |= coalescePTrueIntrinsicCalls(BB, SVPow2PTrues);
269bool SVEIntrinsicOpts::runOnModule(
Module &M) {
271 SmallSetVector<Function *, 4> Functions;
276 for (
auto &
F :
M.getFunctionList()) {
277 if (!
F.isDeclaration())
280 switch (
F.getIntrinsicID()) {
281 case Intrinsic::aarch64_sve_ptrue:
282 for (User *U :
F.users())
290 if (!Functions.
empty())
291 Changed |= optimizePTrueIntrinsicCalls(Functions);
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static bool isPTruePromoted(IntrinsicInst *PTrue)
Checks if a ptrue intrinsic call is promoted.
This file implements a set that has insertion order iteration characteristics.
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
LLVM Basic Block Representation.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
Legacy analysis pass which computes a DominatorTree.
A wrapper class for inspecting calls to intrinsic functions.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
bool remove(const value_type &X)
Remove an item from the set vector.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
size_type size() const
Determine the number of elements in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A SetVector that performs no allocations if smaller than a certain size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
ModulePass * createSVEIntrinsicOptsPass()
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.