Vendor import of llvm-project branch release/12.x llvmorg-12.0.0-0-gd28af7c654d8, a.k.a. 12.0.0 release.

This commit is contained in:
Dimitry Andric 2021-04-19 21:18:04 +02:00
parent 9f93bc8bfd
commit b4125f7d51
73 changed files with 864 additions and 196 deletions

View file

@ -266,6 +266,9 @@ CODEGENOPT(VectorizeLoop , 1, 0) ///< Run loop vectorizer.
CODEGENOPT(VectorizeSLP , 1, 0) ///< Run SLP vectorizer.
CODEGENOPT(ProfileSampleAccurate, 1, 0) ///< Sample profile is accurate.
/// Treat loops as finite: language, always, never.
ENUM_CODEGENOPT(FiniteLoops, FiniteLoopsKind, 2, FiniteLoopsKind::Language)
/// Attempt to use register sized accesses to bit-fields in structures, when
/// possible.
CODEGENOPT(UseRegisterSizedBitfieldAccess , 1, 0)

View file

@ -140,6 +140,12 @@ public:
All, // Keep all frame pointers.
};
enum FiniteLoopsKind {
Language, // Not specified, use language standard.
Always, // All loops are assumed to be finite.
Never, // No loop is assumed to be finite.
};
/// The code model to use (-mcmodel).
std::string CodeModel;

View file

@ -2410,6 +2410,11 @@ def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>,
defm reroll_loops : BoolFOption<"reroll-loops",
CodeGenOpts<"RerollLoops">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "Turn on loop reroller">, NegFlag<SetFalse>>;
def ffinite_loops: Flag<["-"], "ffinite-loops">, Group<f_Group>,
HelpText<"Assume all loops are finite.">, Flags<[CC1Option]>;
def fno_finite_loops: Flag<["-"], "fno-finite-loops">, Group<f_Group>,
HelpText<"Do not assume that any loop is finite.">, Flags<[CC1Option]>;
def ftrigraphs : Flag<["-"], "ftrigraphs">, Group<f_Group>,
HelpText<"Process trigraph sequences">, Flags<[CC1Option]>;
def fno_trigraphs : Flag<["-"], "fno-trigraphs">, Group<f_Group>,

View file

@ -1995,9 +1995,14 @@ void CodeGenModule::ConstructAttributeList(
if (TargetDecl->hasAttr<ConstAttr>()) {
FuncAttrs.addAttribute(llvm::Attribute::ReadNone);
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
// gcc specifies that 'const' functions have greater restrictions than
// 'pure' functions, so they also cannot have infinite loops.
FuncAttrs.addAttribute(llvm::Attribute::WillReturn);
} else if (TargetDecl->hasAttr<PureAttr>()) {
FuncAttrs.addAttribute(llvm::Attribute::ReadOnly);
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
// gcc specifies that 'pure' functions cannot have infinite loops.
FuncAttrs.addAttribute(llvm::Attribute::WillReturn);
} else if (TargetDecl->hasAttr<NoAliasAttr>()) {
FuncAttrs.addAttribute(llvm::Attribute::ArgMemOnly);
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);

View file

@ -9892,7 +9892,7 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall(
llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
Args);
}
};

View file

@ -507,12 +507,23 @@ public:
/// True if the C++ Standard Requires Progress.
bool CPlusPlusWithProgress() {
if (CGM.getCodeGenOpts().getFiniteLoops() ==
CodeGenOptions::FiniteLoopsKind::Never)
return false;
return getLangOpts().CPlusPlus11 || getLangOpts().CPlusPlus14 ||
getLangOpts().CPlusPlus17 || getLangOpts().CPlusPlus20;
}
/// True if the C Standard Requires Progress.
bool CWithProgress() {
if (CGM.getCodeGenOpts().getFiniteLoops() ==
CodeGenOptions::FiniteLoopsKind::Always)
return true;
if (CGM.getCodeGenOpts().getFiniteLoops() ==
CodeGenOptions::FiniteLoopsKind::Never)
return false;
return getLangOpts().C11 || getLangOpts().C17 || getLangOpts().C2x;
}

View file

@ -5620,6 +5620,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
if (A->getOption().matches(options::OPT_freroll_loops))
CmdArgs.push_back("-freroll-loops");
Args.AddLastArg(CmdArgs, options::OPT_ffinite_loops,
options::OPT_fno_finite_loops);
Args.AddLastArg(CmdArgs, options::OPT_fwritable_strings);
Args.AddLastArg(CmdArgs, options::OPT_funroll_loops,
options::OPT_fno_unroll_loops);

View file

@ -11,6 +11,7 @@
#include "Darwin.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/Version.h"
#include "clang/Config/config.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
@ -520,7 +521,10 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
// translate 'lld' into 'lld-link', and in the case of the regular msvc
// linker, we need to use a special search algorithm.
llvm::SmallString<128> linkPath;
StringRef Linker = Args.getLastArgValue(options::OPT_fuse_ld_EQ, "link");
StringRef Linker
= Args.getLastArgValue(options::OPT_fuse_ld_EQ, CLANG_DEFAULT_LINKER);
if (Linker.empty())
Linker = "link";
if (Linker.equals_lower("lld"))
Linker = "lld-link";

View file

@ -296,6 +296,7 @@ void OpenBSD::AddCXXStdlibLibArgs(const ArgList &Args,
CmdArgs.push_back(Profiling ? "-lc++_p" : "-lc++");
CmdArgs.push_back(Profiling ? "-lc++abi_p" : "-lc++abi");
CmdArgs.push_back(Profiling ? "-lpthread_p" : "-lpthread");
}
std::string OpenBSD::getCompilerRT(const ArgList &Args,

View file

@ -1037,7 +1037,6 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
Opts.UnrollLoops =
Args.hasFlag(OPT_funroll_loops, OPT_fno_unroll_loops,
(Opts.OptimizationLevel > 1));
Opts.BinutilsVersion =
std::string(Args.getLastArgValue(OPT_fbinutils_version_EQ));
@ -1324,6 +1323,10 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
Opts.EmitVersionIdentMetadata = Args.hasFlag(OPT_Qy, OPT_Qn, true);
if (Args.hasArg(options::OPT_ffinite_loops))
Opts.FiniteLoops = CodeGenOptions::FiniteLoopsKind::Always;
else if (Args.hasArg(options::OPT_fno_finite_loops))
Opts.FiniteLoops = CodeGenOptions::FiniteLoopsKind::Never;
return Success;
}

View file

@ -565,7 +565,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
Builder.defineMacro("__cpp_aggregate_bases", "201603L");
Builder.defineMacro("__cpp_structured_bindings", "201606L");
Builder.defineMacro("__cpp_nontype_template_args",
LangOpts.CPlusPlus20 ? "201911L" : "201411L");
"201411L"); // (not latest)
Builder.defineMacro("__cpp_fold_expressions", "201603L");
Builder.defineMacro("__cpp_guaranteed_copy_elision", "201606L");
Builder.defineMacro("__cpp_nontype_template_parameter_auto", "201606L");

View file

@ -5158,6 +5158,20 @@ private:
llvm::DenseMap<const IdentifierInfo *, Member> Results;
};
// If \p Base is ParenListExpr, assume a chain of comma operators and pick the
// last expr. We expect other ParenListExprs to be resolved to e.g. constructor
// calls before here. (So the ParenListExpr should be nonempty, but check just
// in case)
Expr *unwrapParenList(Expr *Base) {
if (auto *PLE = llvm::dyn_cast_or_null<ParenListExpr>(Base)) {
if (PLE->getNumExprs() == 0)
return nullptr;
Base = PLE->getExpr(PLE->getNumExprs() - 1);
}
return Base;
}
} // namespace
void Sema::CodeCompleteMemberReferenceExpr(Scope *S, Expr *Base,
@ -5165,6 +5179,8 @@ void Sema::CodeCompleteMemberReferenceExpr(Scope *S, Expr *Base,
SourceLocation OpLoc, bool IsArrow,
bool IsBaseExprStatement,
QualType PreferredType) {
Base = unwrapParenList(Base);
OtherOpBase = unwrapParenList(OtherOpBase);
if (!Base || !CodeCompleter)
return;
@ -5597,12 +5613,13 @@ ProduceSignatureHelp(Sema &SemaRef, Scope *S,
QualType Sema::ProduceCallSignatureHelp(Scope *S, Expr *Fn,
ArrayRef<Expr *> Args,
SourceLocation OpenParLoc) {
if (!CodeCompleter)
Fn = unwrapParenList(Fn);
if (!CodeCompleter || !Fn)
return QualType();
// FIXME: Provide support for variadic template functions.
// Ignore type-dependent call expressions entirely.
if (!Fn || Fn->isTypeDependent() || anyNullArguments(Args))
if (Fn->isTypeDependent() || anyNullArguments(Args))
return QualType();
// In presence of dependent args we surface all possible signatures using the
// non-dependent args in the prefix. Afterwards we do a post filtering to make

View file

@ -24,13 +24,77 @@ Non-comprehensive list of changes in this release
ELF Improvements
----------------
* ``--error-handling-script`` is added to allow for user-defined handlers upon
* ``--dependency-file`` has been added. (Similar to ``cc -M -MF``.)
(`D82437 <https://reviews.llvm.org/D82437>`_)
* ``--error-handling-script`` has been added to allow for user-defined handlers upon
missing libraries. (`D87758 <https://reviews.llvm.org/D87758>`_)
* ``--exclude-libs`` can now localize defined version symbols and bitcode referenced libcall symbols.
(`D94280 <https://reviews.llvm.org/D94280>`_)
* ``--gdb-index`` now works with DWARF v5 and ``--icf={safe,all}``.
(`D85579 <https://reviews.llvm.org/D85579>`_)
(`D89751 <https://reviews.llvm.org/D89751>`_)
* ``--gdb-index --emit-relocs`` can now be used together.
(`D94354 <https://reviews.llvm.org/D94354>`_)
* ``--icf={safe,all}`` conservatively no longer fold text sections with LSDA.
Previously ICF on ``-fexceptions`` code could be unsafe.
(`D84610 <https://reviews.llvm.org/D84610>`_)
* ``--icf={safe,all}`` can now fold two sections with relocations referencing aliased symbols.
(`D88830 <https://reviews.llvm.org/D88830>`_)
* ``--lto-pseudo-probe-for-profiling`` has been added.
(`D95056 <https://reviews.llvm.org/D95056>`_)
* ``--no-lto-whole-program-visibility`` has been added.
(`D92060 <https://reviews.llvm.org/D92060>`_)
* ``--oformat-binary`` has been fixed to respect LMA.
(`D85086 <https://reviews.llvm.org/D85086>`_)
* ``--reproduce`` includes ``--lto-sample-profile``, ``--just-symbols``, ``--call-graph-ordering-file``, ``--retain-symbols-file`` files.
* ``-r --gc-sections`` is now supported.
(`D84131 <https://reviews.llvm.org/D84131>`_)
* A ``-u`` specified symbol will no longer change the binding to ``STB_WEAK``.
(`D88945 <https://reviews.llvm.org/D88945>`_)
* ``--wrap`` support has been improved.
+ If ``foo`` is not referenced, there is no longer an undefined symbol ``__wrap_foo``.
+ If ``__real_foo`` is not referenced, there is no longer an undefined symbol ``foo``.
* ``SHF_LINK_ORDER`` sections can now have zero ``sh_link`` values.
* ``SHF_LINK_ORDER`` and non-``SHF_LINK_ORDER`` sections can now be mixed within an input section description.
(`D84001 <https://reviews.llvm.org/D84001>`_)
* ``LOG2CEIL`` is now supported in linker scripts.
(`D84054 <https://reviews.llvm.org/D84054>`_)
* ``DEFINED`` has been fixed to check whether the symbol is defined.
(`D83758 <https://reviews.llvm.org/D83758>`_)
* An input section description may now have multiple ``SORT_*``.
The matched sections are ordered by radix sort with the keys being ``(SORT*, --sort-section, input order)``.
(`D91127 <https://reviews.llvm.org/D91127>`_)
* Users can now provide a GNU style linker script to convert ``.ctors`` into ``.init_array``.
(`D91187 <https://reviews.llvm.org/D91187>`_)
* An empty output section can now be discarded even if it is assigned to a program header.
(`D92301 <https://reviews.llvm.org/D92301>`_)
* Non-``SHF_ALLOC`` sections now have larger file offsets than ``SHF_ALLOC`` sections.
(`D85867 <https://reviews.llvm.org/D85867>`_)
* Some symbol versioning improvements.
+ Defined ``foo@@v1`` now resolve undefined ``foo@v1`` (`D92259 <https://reviews.llvm.org/D92259>`_)
+ Undefined ``foo@v1`` now gets an error (`D92260 <https://reviews.llvm.org/D92260>`_)
* The AArch64 port now has support for ``STO_AARCH64_VARIANT_PCS`` and ``DT_AARCH64_VARIANT_PCS``.
(`D93045 <https://reviews.llvm.org/D93045>`_)
* The AArch64 port now has support for ``R_AARCH64_LD64_GOTPAGE_LO15``.
* The PowerPC64 port now detects missing R_PPC64_TLSGD/R_PPC64_TLSLD and disables TLS relaxation.
This allows linking with object files produced by very old IBM XL compilers.
(`D92959 <https://reviews.llvm.org/D92959>`_)
* Many PowerPC PC-relative relocations are now supported.
* ``R_PPC_ADDR24`` and ``R_PPC64_ADDR16_HIGH`` are now supported.
* powerpcle is now supported. Tested with FreeBSD loader and freestanding.
(`D93917 <https://reviews.llvm.org/D93917>`_)
* RISC-V: the first ``SHT_RISCV_ATTRIBUTES`` section is now retained.
(`D86309 <https://reviews.llvm.org/D86309>`_)
* LTO pipeline now defaults to the new PM if the CMake variable ``ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER`` is on.
(`D92885 <https://reviews.llvm.org/D92885>`_)
Breaking changes
----------------
* ...
* A COMMON symbol can now cause the fetch of an archive providing a ``STB_GLOBAL`` definition.
This behavior follows GNU ld newer than December 1999.
If you see ``duplicate symbol`` errors with the new behavior, check out `PR49226 <https://bugs.llvm.org//show_bug.cgi?id=49226>`_.
(`D86142 <https://reviews.llvm.org/D86142>`_)
COFF Improvements
-----------------
@ -58,10 +122,26 @@ MinGW Improvements
(`D93950 <https://reviews.llvm.org/D93950>`_)
MachO Improvements
Mach-O Improvements
------------------
* Item 1.
We've gotten the new implementation of LLD for Mach-O to the point where it is
able to link large x86_64 programs, and we'd love to get some alpha testing on
it. The new Darwin back-end can be invoked as follows:
.. code-block::
clang -fuse-ld=lld.darwinnew /path/to/file.c
To reach this point, we implemented numerous features, and it's easier to list
the major features we *haven't* yet completed:
* LTO support
* Stack unwinding for exceptions
* Support for arm64, arm, and i386 architectures
If you stumble upon an issue and it doesn't fall into one of these categories,
please file a bug report!
WebAssembly Improvements
------------------------

View file

@ -339,8 +339,7 @@ LLVMErrorRef LLVMOrcResourceTrackerRemove(LLVMOrcResourceTrackerRef RT);
* ownership has not been passed to a JITDylib (e.g. because some error
* prevented the client from calling LLVMOrcJITDylibAddGenerator).
*/
void LLVMOrcDisposeDefinitionGenerator(
LLVMOrcDefinitionGeneratorRef DG);
void LLVMOrcDisposeDefinitionGenerator(LLVMOrcDefinitionGeneratorRef DG);
/**
* Dispose of a MaterializationUnit.
@ -388,7 +387,9 @@ LLVMOrcExecutionSessionCreateJITDylib(LLVMOrcExecutionSessionRef ES,
* Returns the JITDylib with the given name, or NULL if no such JITDylib
* exists.
*/
LLVMOrcJITDylibRef LLVMOrcExecutionSessionGetJITDylibByName(const char *Name);
LLVMOrcJITDylibRef
LLVMOrcExecutionSessionGetJITDylibByName(LLVMOrcExecutionSessionRef ES,
const char *Name);
/**
* Return a reference to a newly created resource tracker associated with JD.

View file

@ -490,7 +490,10 @@ protected:
/// - \c Add has a constant operand.
bool canFoldAddIntoGEP(const User *GEP, const Value *Add);
/// Test whether the given value has exactly one use.
/// Test whether the register associated with this value has exactly one use,
/// in which case that single use is killing. Note that multiple IR values
/// may map onto the same register, in which case this is not the same as
/// checking that an IR value has one use.
bool hasTrivialKill(const Value *V);
/// Create a machine mem operand from the given instruction.

View file

@ -1156,6 +1156,10 @@ public:
return getOpcode() == TargetOpcode::CFI_INSTRUCTION;
}
bool isPseudoProbe() const {
return getOpcode() == TargetOpcode::PSEUDO_PROBE;
}
// True if the instruction represents a position in the function.
bool isPosition() const { return isLabel() || isCFIInstruction(); }
@ -1165,6 +1169,9 @@ public:
bool isDebugInstr() const {
return isDebugValue() || isDebugLabel() || isDebugRef();
}
bool isDebugOrPseudoInstr() const {
return isDebugInstr() || isPseudoProbe();
}
bool isDebugOffsetImm() const { return getDebugOffset().isImm(); }

View file

@ -375,7 +375,7 @@ __OMP_RTL(__kmpc_init_allocator, false, /* omp_allocator_handle_t */ VoidPtr,
__OMP_RTL(__kmpc_destroy_allocator, false, Void, /* Int */ Int32,
/* omp_allocator_handle_t */ VoidPtr)
__OMP_RTL(__kmpc_push_target_tripcount, false, Void, IdentPtr, Int64, Int64)
__OMP_RTL(__kmpc_push_target_tripcount_mapper, false, Void, IdentPtr, Int64, Int64)
__OMP_RTL(__tgt_target_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32, VoidPtrPtr,
VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
__OMP_RTL(__tgt_target_nowait_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32,
@ -844,7 +844,7 @@ __OMP_RTL_ATTRS(__kmpc_free, AllocAttrs, AttributeSet(), {})
__OMP_RTL_ATTRS(__kmpc_init_allocator, DefaultAttrs, ReturnPtrAttrs, {})
__OMP_RTL_ATTRS(__kmpc_destroy_allocator, AllocAttrs, AttributeSet(), {})
__OMP_RTL_ATTRS(__kmpc_push_target_tripcount, SetterAttrs, AttributeSet(), {})
__OMP_RTL_ATTRS(__kmpc_push_target_tripcount_mapper, SetterAttrs, AttributeSet(), {})
__OMP_RTL_ATTRS(__tgt_target_mapper, ForkAttrs, AttributeSet(), {})
__OMP_RTL_ATTRS(__tgt_target_nowait_mapper, ForkAttrs, AttributeSet(), {})
__OMP_RTL_ATTRS(__tgt_target_teams_mapper, ForkAttrs, AttributeSet(), {})

View file

@ -1757,9 +1757,6 @@ public:
return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly);
}
/// Returns true if this function is guaranteed to return.
bool willReturn() const { return hasFnAttr(Attribute::WillReturn); }
void setOnlyReadsMemory() {
addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly);
}

View file

@ -633,6 +633,10 @@ public:
/// generated program.
bool isSafeToRemove() const;
/// Return true if the instruction will return (unwinding is considered as
/// a form of returning control flow here).
bool willReturn() const;
/// Return true if the instruction is a variety of EH-block.
bool isEHPad() const {
switch (getOpcode()) {
@ -650,6 +654,9 @@ public:
/// llvm.lifetime.end marker.
bool isLifetimeStartOrEnd() const;
/// Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
bool isDebugOrPseudoInst() const;
/// Return a pointer to the next non-debug instruction in the same basic
/// block as 'this', or nullptr if no such instruction exists. Skip any pseudo
/// operations if \c SkipPseudoOp is true.

View file

@ -667,6 +667,12 @@ struct AAMDNodes {
/// The tag specifying the noalias scope.
MDNode *NoAlias = nullptr;
// Shift tbaa Metadata node to start off bytes later
static MDNode *ShiftTBAA(MDNode *M, size_t off);
// Shift tbaa.struct Metadata node to start off bytes later
static MDNode *ShiftTBAAStruct(MDNode *M, size_t off);
/// Given two sets of AAMDNodes that apply to the same pointer,
/// give the best AAMDNodes that are compatible with both (i.e. a set of
/// nodes whose allowable aliasing conclusions are a subset of those
@ -680,6 +686,18 @@ struct AAMDNodes {
Result.NoAlias = Other.NoAlias == NoAlias ? NoAlias : nullptr;
return Result;
}
/// Create a new AAMDNode that describes this AAMDNode after applying a
/// constant offset to the start of the pointer
AAMDNodes shift(size_t Offset) {
AAMDNodes Result;
Result.TBAA = TBAA ? ShiftTBAA(TBAA, Offset) : nullptr;
Result.TBAAStruct =
TBAAStruct ? ShiftTBAAStruct(TBAAStruct, Offset) : nullptr;
Result.Scope = Scope;
Result.NoAlias = NoAlias;
return Result;
}
};
// Specialize DenseMapInfo for AAMDNodes.

View file

@ -568,6 +568,11 @@ public:
bool accumulateConstantOffset(
const DataLayout &DL, APInt &Offset,
function_ref<bool(Value &, APInt &)> ExternalAnalysis = nullptr) const;
static bool accumulateConstantOffset(
Type *SourceType, ArrayRef<const Value *> Index, const DataLayout &DL,
APInt &Offset,
function_ref<bool(Value &, APInt &)> ExternalAnalysis = nullptr);
};
class PtrToIntOperator

View file

@ -17,6 +17,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/Error.h"
#include <algorithm>
#include <cstdint>
@ -89,6 +90,8 @@ public:
void addRecord(const sampleprof::FunctionSamples &FS,
bool isCallsiteSample = false);
std::unique_ptr<ProfileSummary> computeSummaryForProfiles(
const StringMap<sampleprof::FunctionSamples> &Profiles);
std::unique_ptr<ProfileSummary> getSummary();
};

View file

@ -18,6 +18,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Instructions.h"
#include "llvm/ProfileData/SampleProf.h"
@ -90,6 +91,8 @@ private:
// calling context and the context is identified by path from root to the node.
class SampleContextTracker {
public:
using ContextSamplesTy = SmallSet<FunctionSamples *, 16>;
SampleContextTracker(StringMap<FunctionSamples> &Profiles);
// Query context profile for a specific callee with given name at a given
// call-site. The full context is identified by location of call instruction.
@ -103,6 +106,9 @@ public:
FunctionSamples *getContextSamplesFor(const DILocation *DIL);
// Query context profile for a given sample contxt of a function.
FunctionSamples *getContextSamplesFor(const SampleContext &Context);
// Get all context profile for given function.
ContextSamplesTy &getAllContextSamplesFor(const Function &Func);
ContextSamplesTy &getAllContextSamplesFor(StringRef Name);
// Query base profile for a given function. A base profile is a merged view
// of all context profiles for contexts that are not inlined.
FunctionSamples *getBaseSamplesFor(const Function &Func,
@ -113,6 +119,9 @@ public:
// This makes sure that inlined context profile will be excluded in
// function's base profile.
void markContextSamplesInlined(const FunctionSamples *InlinedSamples);
void promoteMergeContextSamplesTree(const Instruction &Inst,
StringRef CalleeName);
void addCallGraphEdges(CallGraph &CG, StringMap<Function *> &SymbolMap);
// Dump the internal context profile trie.
void dump();
@ -126,8 +135,6 @@ private:
ContextTrieNode *getTopLevelContextNode(StringRef FName);
ContextTrieNode &addTopLevelContextNode(StringRef FName);
ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo);
void promoteMergeContextSamplesTree(const Instruction &Inst,
StringRef CalleeName);
void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode,
StringRef ContextStrToRemove);
ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &FromNode,
@ -135,7 +142,7 @@ private:
StringRef ContextStrToRemove);
// Map from function name to context profiles (excluding base profile)
StringMap<SmallSet<FunctionSamples *, 16>> FuncToCtxtProfileSet;
StringMap<ContextSamplesTy> FuncToCtxtProfileSet;
// Root node for context trie tree
ContextTrieNode RootContext;

View file

@ -62,7 +62,7 @@ public:
private:
// Allow a little bias due the rounding to integral factors.
constexpr static float DistributionFactorVariance = 0.02;
constexpr static float DistributionFactorVariance = 0.02f;
// Distribution factors from last pass.
FuncProbeFactorMap FunctionProbeFactors;

View file

@ -274,6 +274,13 @@ void updateProfileCallee(
void identifyNoAliasScopesToClone(
ArrayRef<BasicBlock *> BBs, SmallVectorImpl<MDNode *> &NoAliasDeclScopes);
/// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified
/// instruction range and extract their scope. These are candidates for
/// duplication when cloning.
void identifyNoAliasScopesToClone(
BasicBlock::iterator Start, BasicBlock::iterator End,
SmallVectorImpl<MDNode *> &NoAliasDeclScopes);
/// Duplicate the specified list of noalias decl scopes.
/// The 'Ext' string is added as an extension to the name.
/// Afterwards, the ClonedScopes contains the mapping of the original scope

View file

@ -80,7 +80,7 @@ void DemandedBitsWrapperPass::print(raw_ostream &OS, const Module *M) const {
static bool isAlwaysLive(Instruction *I) {
return I->isTerminator() || isa<DbgInfoIntrinsic>(I) || I->isEHPad() ||
I->mayHaveSideEffects();
I->mayHaveSideEffects() || !I->willReturn();
}
void DemandedBits::determineLiveOperandBits(

View file

@ -243,11 +243,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
if (RecurrenceType->isFloatingPointTy()) {
if (!isFloatingPointRecurrenceKind(Kind))
return false;
} else {
} else if (RecurrenceType->isIntegerTy()) {
if (!isIntegerRecurrenceKind(Kind))
return false;
if (isArithmeticRecurrenceKind(Kind))
Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
} else {
// Pointer min/max may exist, but it is not supported as a reduction op.
return false;
}
Worklist.push_back(Start);

View file

@ -737,3 +737,84 @@ bool TypeBasedAAWrapperPass::doFinalization(Module &M) {
void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
}
MDNode *AAMDNodes::ShiftTBAA(MDNode *MD, size_t Offset) {
// Fast path if there's no offset
if (Offset == 0)
return MD;
// Fast path if there's no path tbaa node (and thus scalar)
if (!isStructPathTBAA(MD))
return MD;
TBAAStructTagNode Tag(MD);
SmallVector<Metadata *, 5> Sub;
Sub.push_back(MD->getOperand(0));
Sub.push_back(MD->getOperand(1));
ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD->getOperand(2));
if (Tag.isNewFormat()) {
ConstantInt *InnerSize = mdconst::extract<ConstantInt>(MD->getOperand(3));
if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset) {
return nullptr;
}
uint64_t NewSize = InnerSize->getZExtValue();
uint64_t NewOffset = InnerOffset->getZExtValue() - Offset;
if (InnerOffset->getZExtValue() < Offset) {
NewOffset = 0;
NewSize -= Offset - InnerOffset->getZExtValue();
}
Sub.push_back(ConstantAsMetadata::get(
ConstantInt::get(InnerOffset->getType(), NewOffset)));
Sub.push_back(ConstantAsMetadata::get(
ConstantInt::get(InnerSize->getType(), NewSize)));
// immutable type
if (MD->getNumOperands() >= 5)
Sub.push_back(MD->getOperand(4));
} else {
if (InnerOffset->getZExtValue() < Offset)
return nullptr;
Sub.push_back(ConstantAsMetadata::get(ConstantInt::get(
InnerOffset->getType(), InnerOffset->getZExtValue() - Offset)));
// immutable type
if (MD->getNumOperands() >= 4)
Sub.push_back(MD->getOperand(3));
}
return MDNode::get(MD->getContext(), Sub);
}
MDNode *AAMDNodes::ShiftTBAAStruct(MDNode *MD, size_t Offset) {
// Fast path if there's no offset
if (Offset == 0)
return MD;
SmallVector<Metadata *, 3> Sub;
for (size_t i = 0, size = MD->getNumOperands(); i < size; i += 3) {
ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD->getOperand(i));
ConstantInt *InnerSize =
mdconst::extract<ConstantInt>(MD->getOperand(i + 1));
// Don't include any triples that aren't in bounds
if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset)
continue;
uint64_t NewSize = InnerSize->getZExtValue();
uint64_t NewOffset = InnerOffset->getZExtValue() - Offset;
if (InnerOffset->getZExtValue() < Offset) {
NewOffset = 0;
NewSize -= Offset - InnerOffset->getZExtValue();
}
// Shift the offset of the triple
Sub.push_back(ConstantAsMetadata::get(
ConstantInt::get(InnerOffset->getType(), NewOffset)));
Sub.push_back(ConstantAsMetadata::get(
ConstantInt::get(InnerSize->getType(), NewSize)));
Sub.push_back(MD->getOperand(i + 2));
}
return MDNode::get(MD->getContext(), Sub);
}

View file

@ -5018,36 +5018,14 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
// arbitrary length of time, but programs aren't allowed to rely on that.
// If there is no successor, then execution can't transfer to it.
if (const auto *CRI = dyn_cast<CleanupReturnInst>(I))
return !CRI->unwindsToCaller();
if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I))
return !CatchSwitch->unwindsToCaller();
if (isa<ResumeInst>(I))
return false;
if (isa<ReturnInst>(I))
return false;
if (isa<UnreachableInst>(I))
return false;
// Calls can throw, or contain an infinite loop, or kill the process.
if (const auto *CB = dyn_cast<CallBase>(I)) {
// Call sites that throw have implicit non-local control flow.
if (!CB->doesNotThrow())
return false;
// A function which doens't throw and has "willreturn" attribute will
// always return.
if (CB->hasFnAttr(Attribute::WillReturn))
return true;
// FIXME: Temporarily assume that all side-effect free intrinsics will
// return. Remove this workaround once all intrinsics are appropriately
// annotated.
return isa<IntrinsicInst>(CB) && CB->onlyReadsMemory();
}
// Other instructions return normally.
return true;
// An instruction that returns without throwing must transfer control flow
// to a successor.
return !I->mayThrow() && I->willReturn();
}
bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) {

View file

@ -1063,6 +1063,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_PHI: {
// FIXME: add support for when SizeOp0 isn't an exact multiple of
// NarrowSize.
if (SizeOp0 % NarrowSize != 0)
return UnableToLegalize;
unsigned NumParts = SizeOp0 / NarrowSize;
SmallVector<Register, 2> DstRegs(NumParts);
SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);

View file

@ -156,7 +156,8 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
// If MI has side effects, it should become a barrier for code motion.
// IOM is rebuild from the next instruction to prevent later
// instructions from being moved before this MI.
if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) {
if (MI.hasUnmodeledSideEffects() && !MI.isPseudoProbe() &&
Next != MBB.end()) {
BuildInstOrderMap(Next, IOM);
SawStore = false;
}

View file

@ -1462,7 +1462,8 @@ bool MachineInstr::hasUnmodeledSideEffects() const {
}
bool MachineInstr::isLoadFoldBarrier() const {
return mayStore() || isCall() || hasUnmodeledSideEffects();
return mayStore() || isCall() ||
(hasUnmodeledSideEffects() && !isPseudoProbe());
}
/// allDefsAreDead - Return true if all the defs of this instruction are dead.

View file

@ -6517,8 +6517,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
// in direction shift1 by Neg. The range [0, EltSize) means that we only need
// to consider shift amounts with defined behavior.
//
// The IsRotate flag should be set when the LHS of both shifts is the same.
// Otherwise if matching a general funnel shift, it should be clear.
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
SelectionDAG &DAG) {
SelectionDAG &DAG, bool IsRotate) {
// If EltSize is a power of 2 then:
//
// (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
@ -6550,8 +6553,11 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
// always invokes undefined behavior for 32-bit X.
//
// Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
//
// NOTE: We can only do this when matching an AND and not a general
// funnel shift.
unsigned MaskLoBits = 0;
if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
unsigned Bits = Log2_64(EltSize);
@ -6641,7 +6647,8 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
// (srl x, (*ext y))) ->
// (rotr x, y) or (rotl x, (sub 32, y))
EVT VT = Shifted.getValueType();
if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
/*IsRotate*/ true)) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
HasPos ? Pos : Neg);
@ -6670,7 +6677,7 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
// fold (or (shl x0, (*ext (sub 32, y))),
// (srl x1, (*ext y))) ->
// (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) {
if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
HasPos ? Pos : Neg);

View file

@ -261,12 +261,16 @@ bool FastISel::hasTrivialKill(const Value *V) {
if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0)))
return false;
// Casts and extractvalues may be trivially coalesced by fast-isel.
if (I->getOpcode() == Instruction::BitCast ||
I->getOpcode() == Instruction::PtrToInt ||
I->getOpcode() == Instruction::IntToPtr ||
I->getOpcode() == Instruction::ExtractValue)
return false;
// Only instructions with a single use in the same basic block are considered
// to have trivial kills.
return I->hasOneUse() &&
!(I->getOpcode() == Instruction::BitCast ||
I->getOpcode() == Instruction::PtrToInt ||
I->getOpcode() == Instruction::IntToPtr) &&
cast<Instruction>(*I->user_begin())->getParent() == I->getParent();
}

View file

@ -9660,8 +9660,9 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
// We will look through cast uses, so ignore them completely.
if (I.isCast())
continue;
// Ignore debug info intrinsics, they don't escape or store to allocas.
if (isa<DbgInfoIntrinsic>(I))
// Ignore debug info and pseudo op intrinsics, they don't escape or store
// to allocas.
if (I.isDebugOrPseudoInst())
continue;
// This is an unknown instruction. Assume it escapes or writes to all
// static alloca operands.

View file

@ -2012,7 +2012,7 @@ bool TargetLowering::SimplifyDemandedBits(
const APInt *ShAmtC =
TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
if (!ShAmtC)
if (!ShAmtC || ShAmtC->uge(BitWidth))
break;
uint64_t ShVal = ShAmtC->getZExtValue();
@ -5935,6 +5935,11 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
SDLoc DL(Op);
// Because getNegatedExpression can delete nodes we need a handle to keep
// temporary nodes alive in case the recursion manages to create an identical
// node.
std::list<HandleSDNode> Handles;
switch (Opcode) {
case ISD::ConstantFP: {
// Don't invert constant FP values after legalization unless the target says
@ -6003,11 +6008,18 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
NegatibleCost CostX = NegatibleCost::Expensive;
SDValue NegX =
getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
// Prevent this node from being deleted by the next call.
if (NegX)
Handles.emplace_back(NegX);
// fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
NegatibleCost CostY = NegatibleCost::Expensive;
SDValue NegY =
getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
// We're done with the handles.
Handles.clear();
// Negate the X if its cost is less or equal than Y.
if (NegX && (CostX <= CostY)) {
Cost = CostX;
@ -6052,11 +6064,18 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
NegatibleCost CostX = NegatibleCost::Expensive;
SDValue NegX =
getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
// Prevent this node from being deleted by the next call.
if (NegX)
Handles.emplace_back(NegX);
// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
NegatibleCost CostY = NegatibleCost::Expensive;
SDValue NegY =
getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
// We're done with the handles.
Handles.clear();
// Negate the X if its cost is less or equal than Y.
if (NegX && (CostX <= CostY)) {
Cost = CostX;
@ -6094,15 +6113,25 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
if (!NegZ)
break;
// Prevent this node from being deleted by the next two calls.
Handles.emplace_back(NegZ);
// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
NegatibleCost CostX = NegatibleCost::Expensive;
SDValue NegX =
getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
// Prevent this node from being deleted by the next call.
if (NegX)
Handles.emplace_back(NegX);
// fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
NegatibleCost CostY = NegatibleCost::Expensive;
SDValue NegY =
getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
// We're done with the handles.
Handles.clear();
// Negate the X if its cost is less or equal than Y.
if (NegX && (CostX <= CostY)) {
Cost = std::min(CostX, CostZ);

View file

@ -192,7 +192,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
// Ignore intrinsics that do not become real instructions.
// TODO: Narrow this to intrinsics that have store-like effects.
const auto *CI = cast<CallInst>(I);
if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd())
if (!CI->isDebugOrPseudoInst() && !CI->isLifetimeStartOrEnd())
return true;
break;
}

View file

@ -801,8 +801,8 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(
MachineBasicBlock::iterator KillPos = KillMI;
++KillPos;
for (MachineInstr &OtherMI : make_range(End, KillPos)) {
// Debug instructions cannot be counted against the limit.
if (OtherMI.isDebugInstr())
// Debug or pseudo instructions cannot be counted against the limit.
if (OtherMI.isDebugOrPseudoInstr())
continue;
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
return false;
@ -974,8 +974,8 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI(
unsigned NumVisited = 0;
for (MachineInstr &OtherMI :
make_range(mi, MachineBasicBlock::iterator(KillMI))) {
// Debug instructions cannot be counted against the limit.
if (OtherMI.isDebugInstr())
// Debug or pseudo instructions cannot be counted against the limit.
if (OtherMI.isDebugOrPseudoInstr())
continue;
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
return false;

View file

@ -393,7 +393,7 @@ void LLVMOrcDisposeJITTargetMachineBuilder(
delete unwrap(JTMB);
}
void lLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer) {
void LLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer) {
delete unwrap(ObjLayer);
}

View file

@ -937,6 +937,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
return true;
}
} else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::ptr_annotation,
F->arg_begin()->getType());
return true;
}
break;
@ -947,6 +953,16 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
break;
case 'v': {
if (Name == "var.annotation" && F->arg_size() == 4) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(),
Intrinsic::var_annotation);
return true;
}
break;
}
case 'x':
if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
return true;
@ -3730,6 +3746,32 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->eraseFromParent();
return;
case Intrinsic::ptr_annotation:
// Upgrade from versions that lacked the annotation attribute argument.
assert(CI->getNumArgOperands() == 4 &&
"Before LLVM 12.0 this intrinsic took four arguments");
// Create a new call with an added null annotation attribute argument.
NewCall = Builder.CreateCall(
NewFn,
{CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
NewCall->takeName(CI);
CI->replaceAllUsesWith(NewCall);
CI->eraseFromParent();
return;
case Intrinsic::var_annotation:
// Upgrade from versions that lacked the annotation attribute argument.
assert(CI->getNumArgOperands() == 4 &&
"Before LLVM 12.0 this intrinsic took four arguments");
// Create a new call with an added null annotation attribute argument.
NewCall = Builder.CreateCall(
NewFn,
{CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
CI->eraseFromParent();
return;
case Intrinsic::x86_xop_vfrcz_ss:
case Intrinsic::x86_xop_vfrcz_sd:
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});

View file

@ -633,6 +633,16 @@ bool Instruction::isSafeToRemove() const {
!this->isTerminator();
}
bool Instruction::willReturn() const {
if (const auto *CB = dyn_cast<CallBase>(this))
// FIXME: Temporarily assume that all side-effect free intrinsics will
// return. Remove this workaround once all intrinsics are appropriately
// annotated.
return CB->hasFnAttr(Attribute::WillReturn) ||
(isa<IntrinsicInst>(CB) && CB->onlyReadsMemory());
return true;
}
bool Instruction::isLifetimeStartOrEnd() const {
auto II = dyn_cast<IntrinsicInst>(this);
if (!II)
@ -641,6 +651,10 @@ bool Instruction::isLifetimeStartOrEnd() const {
return ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end;
}
bool Instruction::isDebugOrPseudoInst() const {
return isa<DbgInfoIntrinsic>(this) || isa<PseudoProbeInst>(this);
}
const Instruction *
Instruction::getNextNonDebugInstruction(bool SkipPseudoOp) const {
for (const Instruction *I = getNextNode(); I; I = I->getNextNode())

View file

@ -61,10 +61,17 @@ Align GEPOperator::getMaxPreservedAlignment(const DataLayout &DL) const {
bool GEPOperator::accumulateConstantOffset(
const DataLayout &DL, APInt &Offset,
function_ref<bool(Value &, APInt &)> ExternalAnalysis) const {
assert(Offset.getBitWidth() ==
DL.getIndexSizeInBits(getPointerAddressSpace()) &&
"The offset bit width does not match DL specification.");
assert(Offset.getBitWidth() ==
DL.getIndexSizeInBits(getPointerAddressSpace()) &&
"The offset bit width does not match DL specification.");
SmallVector<const Value *> Index(value_op_begin() + 1, value_op_end());
return GEPOperator::accumulateConstantOffset(getSourceElementType(), Index,
DL, Offset, ExternalAnalysis);
}
bool GEPOperator::accumulateConstantOffset(
Type *SourceType, ArrayRef<const Value *> Index, const DataLayout &DL,
APInt &Offset, function_ref<bool(Value &, APInt &)> ExternalAnalysis) {
bool UsedExternalAnalysis = false;
auto AccumulateOffset = [&](APInt Index, uint64_t Size) -> bool {
Index = Index.sextOrTrunc(Offset.getBitWidth());
@ -85,9 +92,10 @@ bool GEPOperator::accumulateConstantOffset(
}
return true;
};
for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this);
GTI != GTE; ++GTI) {
auto begin = generic_gep_type_iterator<decltype(Index.begin())>::begin(
SourceType, Index.begin());
auto end = generic_gep_type_iterator<decltype(Index.end())>::end(Index.end());
for (auto GTI = begin, GTE = end; GTI != GTE; ++GTI) {
// Scalable vectors are multiplied by a runtime constant.
bool ScalableType = false;
if (isa<ScalableVectorType>(GTI.getIndexedType()))

View file

@ -794,7 +794,6 @@ LineCoverageStats::LineCoverageStats(
ExecutionCount = WrappedSegment->Count;
if (!MinRegionCount)
return;
ExecutionCount = 0;
for (const auto *LS : LineSegments)
if (isStartOfRegion(LS))
ExecutionCount = std::max(ExecutionCount, LS->Count);

View file

@ -18,9 +18,14 @@
#include "llvm/ProfileData/ProfileCommon.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
cl::opt<bool> UseContextLessSummary(
"profile-summary-contextless", cl::Hidden, cl::init(false), cl::ZeroOrMore,
cl::desc("Merge context profiles before calculating thresholds."));
// A set of cutoff values. Each value, when divided by ProfileSummary::Scale
// (which is 1000000) is a desired percentile of total counts.
static const uint32_t DefaultCutoffsData[] = {
@ -111,6 +116,35 @@ std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() {
MaxFunctionCount, NumCounts, NumFunctions);
}
std::unique_ptr<ProfileSummary>
SampleProfileSummaryBuilder::computeSummaryForProfiles(
const StringMap<sampleprof::FunctionSamples> &Profiles) {
assert(NumFunctions == 0 &&
"This can only be called on an empty summary builder");
StringMap<sampleprof::FunctionSamples> ContextLessProfiles;
const StringMap<sampleprof::FunctionSamples> *ProfilesToUse = &Profiles;
// For CSSPGO, context-sensitive profile effectively split a function profile
// into many copies each representing the CFG profile of a particular calling
// context. That makes the count distribution looks more flat as we now have
// more function profiles each with lower counts, which in turn leads to lower
// hot thresholds. To compensate for that, by defauly we merge context
// profiles before coumputing profile summary.
if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
!UseContextLessSummary.getNumOccurrences())) {
for (const auto &I : Profiles) {
ContextLessProfiles[I.second.getName()].merge(I.second);
}
ProfilesToUse = &ContextLessProfiles;
}
for (const auto &I : *ProfilesToUse) {
const sampleprof::FunctionSamples &Profile = I.second;
addRecord(Profile);
}
return getSummary();
}
std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() {
computeDetailedSummary();
return std::make_unique<ProfileSummary>(

View file

@ -1610,9 +1610,5 @@ SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
// profile. Binary format has the profile summary in its header.
void SampleProfileReader::computeSummary() {
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
for (const auto &I : Profiles) {
const FunctionSamples &Profile = I.second;
Builder.addRecord(Profile);
}
Summary = Builder.getSummary();
Summary = Builder.computeSummaryForProfiles(Profiles);
}

View file

@ -360,10 +360,7 @@ std::error_code SampleProfileWriterCompactBinary::write(
/// it needs to be parsed by the SampleProfileReaderText class.
std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
auto &OS = *OutputStream;
if (FunctionSamples::ProfileIsCS)
OS << "[" << S.getNameWithContext() << "]:" << S.getTotalSamples();
else
OS << S.getName() << ":" << S.getTotalSamples();
OS << S.getNameWithContext(true) << ":" << S.getTotalSamples();
if (Indent == 0)
OS << ":" << S.getHeadSamples();
OS << "\n";
@ -752,9 +749,5 @@ SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
void SampleProfileWriter::computeSummary(
const StringMap<FunctionSamples> &ProfileMap) {
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
for (const auto &I : ProfileMap) {
const FunctionSamples &Profile = I.second;
Builder.addRecord(Profile);
}
Summary = Builder.getSummary();
Summary = Builder.computeSummaryForProfiles(ProfileMap);
}

View file

@ -402,8 +402,22 @@ std::error_code is_local(int FD, bool &Result) {
}
static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) {
// First, check if the file is on a network (non-local) drive. If so, don't
// set DeleteFile to true, since it prevents opening the file for writes.
// Clear the FILE_DISPOSITION_INFO flag first, before checking if it's a
// network file. On Windows 7 the function realPathFromHandle() below fails
// if the FILE_DISPOSITION_INFO flag was already set to 'DeleteFile = true' by
// a prior call.
FILE_DISPOSITION_INFO Disposition;
Disposition.DeleteFile = false;
if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition,
sizeof(Disposition)))
return mapWindowsError(::GetLastError());
if (!Delete)
return std::error_code();
// Check if the file is on a network (non-local) drive. If so, don't
// continue when DeleteFile is true, since it prevents opening the file for
// writes. Note -- this will leak temporary files on disk, but only when the
// target file is on a network drive.
SmallVector<wchar_t, 128> FinalPath;
if (std::error_code EC = realPathFromHandle(Handle, FinalPath))
return EC;
@ -415,9 +429,9 @@ static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) {
if (!IsLocal)
return std::error_code();
// The file is on a local drive, set the DeleteFile to true.
FILE_DISPOSITION_INFO Disposition;
Disposition.DeleteFile = Delete;
// The file is on a local drive, we can safely set FILE_DISPOSITION_INFO's
// flag.
Disposition.DeleteFile = true;
if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition,
sizeof(Disposition)))
return mapWindowsError(::GetLastError());

View file

@ -1017,11 +1017,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Vector reductions
for (MVT VT : { MVT::v4f16, MVT::v2f32,
MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16())
setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
}
}
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {

View file

@ -5896,7 +5896,13 @@ bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
User->getMachineOpcode() != PPC::SELECT_I8)
return false;
SDNode *Op1 = User->getOperand(1).getNode();
SDNode *Op2 = User->getOperand(2).getNode();
// If we have a degenerate select with two equal operands, swapping will
// not do anything, and we may run into an infinite loop.
if (Op1 == Op2)
return false;
if (!Op2->isMachineOpcode())
return false;

View file

@ -504,19 +504,19 @@ def VSOXEI16_V : VIndexedStore<MOPSTIndexedOrder, LSWidth16, "vsoxei16.v">;
def VSOXEI32_V : VIndexedStore<MOPSTIndexedOrder, LSWidth32, "vsoxei32.v">;
def VSOXEI64_V : VIndexedStore<MOPSTIndexedOrder, LSWidth64, "vsoxei64.v">;
defm VL1R : VWholeLoad<1, "vl1r">;
defm VL2R : VWholeLoad<2, "vl2r">;
defm VL4R : VWholeLoad<4, "vl4r">;
defm VL8R : VWholeLoad<8, "vl8r">;
defm VL1R : VWholeLoad<0, "vl1r">;
defm VL2R : VWholeLoad<1, "vl2r">;
defm VL4R : VWholeLoad<3, "vl4r">;
defm VL8R : VWholeLoad<7, "vl8r">;
def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>;
def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VR:$vd, GPR:$rs1)>;
def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VR:$vd, GPR:$rs1)>;
def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VR:$vd, GPR:$rs1)>;
def VS1R_V : VWholeStore<1, "vs1r.v">;
def VS2R_V : VWholeStore<2, "vs2r.v">;
def VS4R_V : VWholeStore<4, "vs4r.v">;
def VS8R_V : VWholeStore<8, "vs8r.v">;
def VS1R_V : VWholeStore<0, "vs1r.v">;
def VS2R_V : VWholeStore<1, "vs2r.v">;
def VS4R_V : VWholeStore<3, "vs4r.v">;
def VS8R_V : VWholeStore<7, "vs8r.v">;
// Vector Single-Width Integer Add and Subtract
defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>;

View file

@ -284,6 +284,14 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
return false;
}
// Make sure no potentially eflags clobbering phi moves can be inserted in
// between.
auto HasPhis = [](const BasicBlock *Succ) {
return !llvm::empty(Succ->phis());
};
if (I->isTerminator() && llvm::any_of(successors(I), HasPhis))
return false;
CC = TmpCC;
return true;
}

View file

@ -3778,7 +3778,7 @@ let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
VEX_4V, VEX_WIG;
defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
VEX_4V;
VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
@ -3794,7 +3794,7 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
VEX_4V, VEX_L, VEX_WIG;
defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
VEX_4V, VEX_L;
VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
@ -4756,7 +4756,7 @@ let isCommutable = 0 in {
SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
load, i128mem,
SchedWritePHAdd.XMM, 0>, VEX_4V;
SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb",
int_x86_ssse3_psign_b_128,
SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
@ -4802,7 +4802,7 @@ let isCommutable = 0 in {
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
load, i256mem,
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L;
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w,
@ -6503,7 +6503,7 @@ multiclass pcmpistrm_SS42AI<string asm> {
let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX;
defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX, VEX_WIG;
defm PCMPISTRM : pcmpistrm_SS42AI<"pcmpistrm"> ;
}
@ -6521,7 +6521,7 @@ multiclass SS42AI_pcmpestrm<string asm> {
let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX;
defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX, VEX_WIG;
defm PCMPESTRM : SS42AI_pcmpestrm<"pcmpestrm">;
}
@ -6539,7 +6539,7 @@ multiclass SS42AI_pcmpistri<string asm> {
let Defs = [ECX, EFLAGS], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX;
defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX, VEX_WIG;
defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">;
}
@ -6557,7 +6557,7 @@ multiclass SS42AI_pcmpestri<string asm> {
let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX;
defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX, VEX_WIG;
defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">;
}

View file

@ -149,6 +149,13 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
if (isNoModRef(MRI))
continue;
// A pseudo probe call shouldn't change any function attribute since it
// doesn't translate to a real instruction. It comes with a memory access
// tag to prevent itself being removed by optimizations and not block
// other instructions being optimized.
if (isa<PseudoProbeInst>(I))
continue;
if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) {
// The call could access any memory. If that includes writes, note it.
if (isModSet(MRI))
@ -1445,8 +1452,7 @@ static bool functionWillReturn(const Function &F) {
// If there are no loops, then the function is willreturn if all calls in
// it are willreturn.
return all_of(instructions(F), [](const Instruction &I) {
const auto *CB = dyn_cast<CallBase>(&I);
return !CB || CB->hasFnAttr(Attribute::WillReturn);
return I.willReturn();
});
}

View file

@ -263,6 +263,17 @@ SampleContextTracker::getContextSamplesFor(const SampleContext &Context) {
return Node->getFunctionSamples();
}
SampleContextTracker::ContextSamplesTy &
SampleContextTracker::getAllContextSamplesFor(const Function &Func) {
StringRef CanonName = FunctionSamples::getCanonicalFnName(Func);
return FuncToCtxtProfileSet[CanonName];
}
SampleContextTracker::ContextSamplesTy &
SampleContextTracker::getAllContextSamplesFor(StringRef Name) {
return FuncToCtxtProfileSet[Name];
}
FunctionSamples *SampleContextTracker::getBaseSamplesFor(const Function &Func,
bool MergeContext) {
StringRef CanonName = FunctionSamples::getCanonicalFnName(Func);
@ -550,4 +561,25 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
return *ToNode;
}
// Replace call graph edges with dynamic call edges from the profile.
void SampleContextTracker::addCallGraphEdges(CallGraph &CG,
StringMap<Function *> &SymbolMap) {
// Add profile call edges to the call graph.
std::queue<ContextTrieNode *> NodeQueue;
NodeQueue.push(&RootContext);
while (!NodeQueue.empty()) {
ContextTrieNode *Node = NodeQueue.front();
NodeQueue.pop();
Function *F = SymbolMap.lookup(Node->getFuncName());
for (auto &I : Node->getAllChildContext()) {
ContextTrieNode *ChildNode = &I.second;
NodeQueue.push(ChildNode);
if (F && !F->isDeclaration()) {
Function *Callee = SymbolMap.lookup(ChildNode->getFuncName());
if (Callee && !Callee->isDeclaration())
CG[F]->addCalledFunction(nullptr, CG[Callee]);
}
}
}
}
} // namespace llvm

View file

@ -177,6 +177,16 @@ static cl::opt<bool> ProfileTopDownLoad(
"order of call graph during sample profile loading. It only "
"works for new pass manager. "));
static cl::opt<bool> UseProfileIndirectCallEdges(
"use-profile-indirect-call-edges", cl::init(true), cl::Hidden,
cl::desc("Considering indirect call samples from profile when top-down "
"processing functions. Only CSSPGO is supported."));
static cl::opt<bool> UseProfileTopDownOrder(
"use-profile-top-down-order", cl::init(false), cl::Hidden,
cl::desc("Process functions in one SCC in a top-down order "
"based on the input profile."));
static cl::opt<bool> ProfileSizeInline(
"sample-profile-inline-size", cl::Hidden, cl::init(false),
cl::desc("Inline cold call sites in profile loader if it's beneficial "
@ -458,6 +468,8 @@ protected:
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
void buildEdges(Function &F);
std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
void addCallGraphEdges(CallGraph &CG, const FunctionSamples &Samples);
void replaceCallGraphEdges(CallGraph &CG, StringMap<Function *> &SymbolMap);
bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
void computeDominanceAndLoopInfo(Function &F);
void clearFunctionData();
@ -2278,6 +2290,45 @@ INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
"Sample Profile loader", false, false)
// Add inlined profile call edges to the call graph.
void SampleProfileLoader::addCallGraphEdges(CallGraph &CG,
const FunctionSamples &Samples) {
Function *Caller = SymbolMap.lookup(Samples.getFuncName());
if (!Caller || Caller->isDeclaration())
return;
// Skip non-inlined call edges which are not important since top down inlining
// for non-CS profile is to get more precise profile matching, not to enable
// more inlining.
for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
for (const auto &InlinedSamples : CallsiteSamples.second) {
Function *Callee = SymbolMap.lookup(InlinedSamples.first);
if (Callee && !Callee->isDeclaration())
CG[Caller]->addCalledFunction(nullptr, CG[Callee]);
addCallGraphEdges(CG, InlinedSamples.second);
}
}
}
// Replace call graph edges with dynamic call edges from the profile.
void SampleProfileLoader::replaceCallGraphEdges(
CallGraph &CG, StringMap<Function *> &SymbolMap) {
// Remove static call edges from the call graph except for the ones from the
// root which make the call graph connected.
for (const auto &Node : CG)
if (Node.second.get() != CG.getExternalCallingNode())
Node.second->removeAllCalledFunctions();
// Add profile call edges to the call graph.
if (ProfileIsCS) {
ContextTracker->addCallGraphEdges(CG, SymbolMap);
} else {
for (const auto &Samples : Reader->getProfiles())
addCallGraphEdges(CG, Samples.second);
}
}
std::vector<Function *>
SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
std::vector<Function *> FunctionOrderList;
@ -2300,16 +2351,97 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
}
assert(&CG->getModule() == &M);
// Add indirect call edges from profile to augment the static call graph.
// Functions will be processed in a top-down order defined by the static call
// graph. Adjusting the order by considering indirect call edges from the
// profile (which don't exist in the static call graph) can enable the
// inlining of indirect call targets by processing the caller before them.
// TODO: enable this for non-CS profile and fix the counts returning logic to
// have a full support for indirect calls.
if (UseProfileIndirectCallEdges && ProfileIsCS) {
for (auto &Entry : *CG) {
const auto *F = Entry.first;
if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile"))
continue;
auto &AllContexts = ContextTracker->getAllContextSamplesFor(F->getName());
if (AllContexts.empty())
continue;
for (const auto &BB : *F) {
for (const auto &I : BB.getInstList()) {
const auto *CB = dyn_cast<CallBase>(&I);
if (!CB || !CB->isIndirectCall())
continue;
const DebugLoc &DLoc = I.getDebugLoc();
if (!DLoc)
continue;
auto CallSite = FunctionSamples::getCallSiteIdentifier(DLoc);
for (FunctionSamples *Samples : AllContexts) {
if (auto CallTargets = Samples->findCallTargetMapAt(CallSite)) {
for (const auto &Target : CallTargets.get()) {
Function *Callee = SymbolMap.lookup(Target.first());
if (Callee && !Callee->isDeclaration())
Entry.second->addCalledFunction(nullptr, (*CG)[Callee]);
}
}
}
}
}
}
}
// Compute a top-down order the profile which is used to sort functions in
// one SCC later. The static processing order computed for an SCC may not
// reflect the call contexts in the context-sensitive profile, thus may cause
// potential inlining to be overlooked. The function order in one SCC is being
// adjusted to a top-down order based on the profile to favor more inlining.
DenseMap<Function *, uint64_t> ProfileOrderMap;
if (UseProfileTopDownOrder ||
(ProfileIsCS && !UseProfileTopDownOrder.getNumOccurrences())) {
// Create a static call graph. The call edges are not important since they
// will be replaced by dynamic edges from the profile.
CallGraph ProfileCG(M);
replaceCallGraphEdges(ProfileCG, SymbolMap);
scc_iterator<CallGraph *> CGI = scc_begin(&ProfileCG);
uint64_t I = 0;
while (!CGI.isAtEnd()) {
for (CallGraphNode *Node : *CGI) {
if (auto *F = Node->getFunction())
ProfileOrderMap[F] = ++I;
}
++CGI;
}
}
scc_iterator<CallGraph *> CGI = scc_begin(CG);
while (!CGI.isAtEnd()) {
for (CallGraphNode *node : *CGI) {
auto F = node->getFunction();
uint64_t Start = FunctionOrderList.size();
for (CallGraphNode *Node : *CGI) {
auto *F = Node->getFunction();
if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
FunctionOrderList.push_back(F);
}
// Sort nodes in SCC based on the profile top-down order.
if (!ProfileOrderMap.empty()) {
std::stable_sort(FunctionOrderList.begin() + Start,
FunctionOrderList.end(),
[&ProfileOrderMap](Function *Left, Function *Right) {
return ProfileOrderMap[Left] < ProfileOrderMap[Right];
});
}
++CGI;
}
LLVM_DEBUG({
dbgs() << "Function processing order:\n";
for (auto F : reverse(FunctionOrderList)) {
dbgs() << F->getName() << "\n";
}
});
std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
return FunctionOrderList;
}
@ -2461,6 +2593,7 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
}
bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
DILocation2SampleMap.clear();
// By default the entry count is initialized to -1, which will be treated
// conservatively by getEntryCount as the same as unknown (None). This is

View file

@ -1270,6 +1270,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {
ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType() &&
(transformZExtICmp(LHS, CI, false) ||
transformZExtICmp(RHS, CI, false))) {
// zext (or icmp, icmp) -> or (zext icmp), (zext icmp)

View file

@ -592,8 +592,14 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
BasicBlock::iterator BBI = L->getIterator(), E = L->getParent()->end();
for (++BBI; BBI != E; ++BBI)
if (BBI->mayWriteToMemory())
if (BBI->mayWriteToMemory()) {
// Calls that only access inaccessible memory do not block sinking the
// load.
if (auto *CB = dyn_cast<CallBase>(BBI))
if (CB->onlyAccessesInaccessibleMemory())
continue;
return false;
}
// Check for non-address taken alloca. If not address-taken already, it isn't
// profitable to do this xform.

View file

@ -345,10 +345,14 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return false;
// Get the constant out of the ICmp, if there is one.
// Only try this when exactly 1 operand is a constant (if both operands
// are constant, the icmp should eventually simplify). Otherwise, we may
// invert the transform that reduces set bits and infinite-loop.
Value *X;
const APInt *CmpC;
ICmpInst::Predicate Pred;
if (!match(I->getOperand(0), m_c_ICmp(Pred, m_APInt(CmpC), m_Value())) ||
CmpC->getBitWidth() != SelC->getBitWidth())
if (!match(I->getOperand(0), m_ICmp(Pred, m_Value(X), m_APInt(CmpC))) ||
isa<Constant>(X) || CmpC->getBitWidth() != SelC->getBitWidth())
return ShrinkDemandedConstant(I, OpNo, DemandedMask);
// If the constant is already the same as the ICmp, leave it as-is.

View file

@ -3878,9 +3878,10 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
}
}
// Skip processing debug intrinsics in InstCombine. Processing these call instructions
// consumes non-trivial amount of time and provides no value for the optimization.
if (!isa<DbgInfoIntrinsic>(Inst)) {
// Skip processing debug and pseudo intrinsics in InstCombine. Processing
// these call instructions consumes non-trivial amount of time and
// provides no value for the optimization.
if (!Inst->isDebugOrPseudoInst()) {
InstrsForInstCombineWorklist.push_back(Inst);
SeenAliasScopes.analyse(Inst);
}

View file

@ -325,7 +325,7 @@ void AggressiveDeadCodeElimination::initialize() {
bool AggressiveDeadCodeElimination::isAlwaysLive(Instruction &I) {
// TODO -- use llvm::isInstructionTriviallyDead
if (I.isEHPad() || I.mayHaveSideEffects()) {
if (I.isEHPad() || I.mayHaveSideEffects() || !I.willReturn()) {
// Skip any value profile instrumentation calls if they are
// instrumenting constants.
if (isInstrumentsConstant(I))

View file

@ -2076,6 +2076,15 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
ValueMapping[PN] = NewPN;
}
// Clone noalias scope declarations in the threaded block. When threading a
// loop exit, we would otherwise end up with two idential scope declarations
// visible at the same time.
SmallVector<MDNode *> NoAliasScopes;
DenseMap<MDNode *, MDNode *> ClonedScopes;
LLVMContext &Context = PredBB->getContext();
identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
// Clone the non-phi instructions of the source basic block into NewBB,
// keeping track of the mapping and using it to remap operands in the cloned
// instructions.
@ -2084,6 +2093,7 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
New->setName(BI->getName());
NewBB->getInstList().push_back(New);
ValueMapping[&*BI] = New;
adaptNoAliasScopes(New, ClonedScopes, Context);
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)

View file

@ -2524,7 +2524,7 @@ private:
NewAI.getAlign(), LI.isVolatile(),
LI.getName());
if (AATags)
NewLI->setAAMetadata(AATags);
NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
if (LI.isVolatile())
NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
if (NewLI->isAtomic())
@ -2563,7 +2563,7 @@ private:
IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
getSliceAlign(), LI.isVolatile(), LI.getName());
if (AATags)
NewLI->setAAMetadata(AATags);
NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
if (LI.isVolatile())
NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
@ -2626,7 +2626,7 @@ private:
}
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
if (AATags)
Store->setAAMetadata(AATags);
Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
Pass.DeadInsts.push_back(&SI);
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
@ -2650,7 +2650,7 @@ private:
Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
Store->setAAMetadata(AATags);
Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
Pass.DeadInsts.push_back(&SI);
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
@ -2720,7 +2720,7 @@ private:
NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
NewSI->setAAMetadata(AATags);
NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
if (SI.isVolatile())
NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
if (NewSI->isAtomic())
@ -2816,7 +2816,7 @@ private:
getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
MaybeAlign(getSliceAlign()), II.isVolatile());
if (AATags)
New->setAAMetadata(AATags);
New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return false;
}
@ -2885,7 +2885,7 @@ private:
StoreInst *New =
IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), II.isVolatile());
if (AATags)
New->setAAMetadata(AATags);
New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return !II.isVolatile();
}
@ -3006,7 +3006,7 @@ private:
CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
Size, II.isVolatile());
if (AATags)
New->setAAMetadata(AATags);
New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return false;
}
@ -3060,7 +3060,7 @@ private:
LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
II.isVolatile(), "copyload");
if (AATags)
Load->setAAMetadata(AATags);
Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
Src = Load;
}
@ -3080,7 +3080,7 @@ private:
StoreInst *Store = cast<StoreInst>(
IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
if (AATags)
Store->setAAMetadata(AATags);
Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
return !II.isVolatile();
}
@ -3381,8 +3381,13 @@ private:
IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
LoadInst *Load =
IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");
if (AATags)
Load->setAAMetadata(AATags);
APInt Offset(
DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
if (AATags &&
GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
Load->setAAMetadata(AATags.shift(Offset.getZExtValue()));
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
}
@ -3428,8 +3433,13 @@ private:
IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
StoreInst *Store =
IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
if (AATags)
Store->setAAMetadata(AATags);
APInt Offset(
DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
if (AATags &&
GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
Store->setAAMetadata(AATags.shift(Offset.getZExtValue()));
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
}
};

View file

@ -989,3 +989,11 @@ void llvm::identifyNoAliasScopesToClone(
if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
NoAliasDeclScopes.push_back(Decl->getScopeList());
}
void llvm::identifyNoAliasScopesToClone(
BasicBlock::iterator Start, BasicBlock::iterator End,
SmallVectorImpl<MDNode *> &NoAliasDeclScopes) {
for (Instruction &I : make_range(Start, End))
if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
NoAliasDeclScopes.push_back(Decl->getScopeList());
}

View file

@ -420,13 +420,8 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
return true;
}
if (auto *CB = dyn_cast<CallBase>(I)) {
// Treat calls that may not return as alive.
// TODO: Remove the intrinsic escape hatch once all intrinsics set
// willreturn properly.
if (!CB->willReturn() && !isa<IntrinsicInst>(I))
return false;
}
if (!I->willReturn())
return false;
if (!I->mayHaveSideEffects())
return true;
@ -923,6 +918,7 @@ static void gatherIncomingValuesToPhi(PHINode *PN,
/// \param IncomingValues A map from block to value.
static void replaceUndefValuesInPhi(PHINode *PN,
const IncomingValueMap &IncomingValues) {
SmallVector<unsigned> TrueUndefOps;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *V = PN->getIncomingValue(i);
@ -930,10 +926,31 @@ static void replaceUndefValuesInPhi(PHINode *PN,
BasicBlock *BB = PN->getIncomingBlock(i);
IncomingValueMap::const_iterator It = IncomingValues.find(BB);
if (It == IncomingValues.end()) continue;
// Keep track of undef/poison incoming values. Those must match, so we fix
// them up below if needed.
// Note: this is conservatively correct, but we could try harder and group
// the undef values per incoming basic block.
if (It == IncomingValues.end()) {
TrueUndefOps.push_back(i);
continue;
}
// There is a defined value for this incoming block, so map this undef
// incoming value to the defined value.
PN->setIncomingValue(i, It->second);
}
// If there are both undef and poison values incoming, then convert those
// values to undef. It is invalid to have different values for the same
// incoming block.
unsigned PoisonCount = count_if(TrueUndefOps, [&](unsigned i) {
return isa<PoisonValue>(PN->getIncomingValue(i));
});
if (PoisonCount != 0 && PoisonCount != TrueUndefOps.size()) {
for (unsigned i : TrueUndefOps)
PN->setIncomingValue(i, UndefValue::get(PN->getType()));
}
}
/// Replace a value flowing from a block to a phi with

View file

@ -1628,6 +1628,11 @@ static bool canSinkInstructions(
I->getType()->isTokenTy())
return false;
// Do not try to sink an instruction in an infinite loop - it can cause
// this algorithm to infinite loop.
if (I->getParent()->getSingleSuccessor() == I->getParent())
return false;
// Conservatively return false if I is an inline-asm instruction. Sinking
// and merging inline-asm instructions can potentially create arguments
// that cannot satisfy the inline-asm constraints.
@ -1714,13 +1719,13 @@ static bool canSinkInstructions(
return true;
}
// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last
// Assuming canSinkInstructions(Blocks) has returned true, sink the last
// instruction of every block in Blocks to their common successor, commoning
// into one instruction.
static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
// canSinkLastInstruction returning true guarantees that every block has at
// canSinkInstructions returning true guarantees that every block has at
// least one non-terminator instruction.
SmallVector<Instruction*,4> Insts;
for (auto *BB : Blocks) {
@ -1733,9 +1738,9 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
}
// The only checking we need to do now is that all users of all instructions
// are the same PHI node. canSinkLastInstruction should have checked this but
// it is slightly over-aggressive - it gets confused by commutative instructions
// so double-check it here.
// are the same PHI node. canSinkInstructions should have checked this but
// it is slightly over-aggressive - it gets confused by commutative
// instructions so double-check it here.
Instruction *I0 = Insts.front();
if (!I0->user_empty()) {
auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
@ -1746,11 +1751,11 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
return false;
}
// We don't need to do any more checking here; canSinkLastInstruction should
// We don't need to do any more checking here; canSinkInstructions should
// have done it all for us.
SmallVector<Value*, 4> NewOperands;
for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
// This check is different to that in canSinkLastInstruction. There, we
// This check is different to that in canSinkInstructions. There, we
// cared about the global view once simplifycfg (and instcombine) have
// completed - it takes into account PHIs that become trivially
// simplifiable. However here we need a more local view; if an operand

View file

@ -142,6 +142,10 @@ public:
return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS});
}
VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) {
return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal});
}
//===--------------------------------------------------------------------===//
// RAII helpers.
//===--------------------------------------------------------------------===//

View file

@ -372,19 +372,11 @@ static Type *getMemInstValueType(Value *I) {
/// A helper function that returns true if the given type is irregular. The
/// type is irregular if its allocated size doesn't equal the store size of an
/// element of the corresponding vector type at the given vectorization factor.
static bool hasIrregularType(Type *Ty, const DataLayout &DL, ElementCount VF) {
// Determine if an array of VF elements of type Ty is "bitcast compatible"
// with a <VF x Ty> vector.
if (VF.isVector()) {
auto *VectorTy = VectorType::get(Ty, VF);
return TypeSize::get(VF.getKnownMinValue() *
DL.getTypeAllocSize(Ty).getFixedValue(),
VF.isScalable()) != DL.getTypeStoreSize(VectorTy);
}
// If the vectorization factor is one, we just check if an array of type Ty
// requires padding between elements.
/// element of the corresponding vector type.
static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
// Determine if an array of N elements of type Ty is "bitcast compatible"
// with a <N x Ty> vector.
// This is only true if there is no padding between the array elements.
return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
}
@ -5212,7 +5204,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
// requires padding and will be scalarized.
auto &DL = I->getModule()->getDataLayout();
auto *ScalarTy = getMemInstValueType(I);
if (hasIrregularType(ScalarTy, DL, VF))
if (hasIrregularType(ScalarTy, DL))
return false;
// Check if masking is required.
@ -5259,7 +5251,7 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
// requires padding and will be scalarized.
auto &DL = I->getModule()->getDataLayout();
auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType();
if (hasIrregularType(ScalarTy, DL, VF))
if (hasIrregularType(ScalarTy, DL))
return false;
return true;
@ -8195,8 +8187,15 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst,
if (BI->getSuccessor(0) != Dst)
EdgeMask = Builder.createNot(EdgeMask);
if (SrcMask) // Otherwise block in-mask is all-one, no need to AND.
EdgeMask = Builder.createAnd(EdgeMask, SrcMask);
if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
// The condition is 'SrcMask && EdgeMask', which is equivalent to
// 'select i1 SrcMask, i1 EdgeMask, i1 false'.
// The select version does not introduce new UB if SrcMask is false and
// EdgeMask is poison. Using 'and' here introduces undefined behavior.
VPValue *False = Plan->getOrAddVPValue(
ConstantInt::getFalse(BI->getCondition()->getType()));
EdgeMask = Builder.createSelect(SrcMask, EdgeMask, False);
}
return EdgeMaskCache[Edge] = EdgeMask;
}

View file

@ -526,8 +526,8 @@ getDWOFilenames(StringRef ExecFilename) {
std::string DWOCompDir =
dwarf::toString(Die.find(dwarf::DW_AT_comp_dir), "");
if (!DWOCompDir.empty()) {
SmallString<16> DWOPath;
sys::path::append(DWOPath, DWOCompDir, DWOName);
SmallString<16> DWOPath(std::move(DWOName));
sys::fs::make_absolute(DWOCompDir, DWOPath);
DWOPaths.emplace_back(DWOPath.data(), DWOPath.size());
} else {
DWOPaths.push_back(std::move(DWOName));

View file

@ -947,8 +947,8 @@ protected:
std::unordered_map<std::string, std::vector<StringRef>> LineCache;
// Keep track of missing sources.
StringSet<> MissingSources;
// Only emit 'no debug info' warning once.
bool WarnedNoDebugInfo;
// Only emit 'invalid debug info' warning once.
bool WarnedInvalidDebugInfo = false;
private:
bool cacheSource(const DILineInfo& LineInfoFile);
@ -962,8 +962,7 @@ private:
public:
SourcePrinter() = default;
SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch)
: Obj(Obj), WarnedNoDebugInfo(false) {
SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch) : Obj(Obj) {
symbolize::LLVMSymbolizer::Options SymbolizerOpts;
SymbolizerOpts.PrintFunctions =
DILineInfoSpecifier::FunctionNameKind::LinkageName;
@ -1018,22 +1017,17 @@ void SourcePrinter::printSourceLine(formatted_raw_ostream &OS,
return;
DILineInfo LineInfo = DILineInfo();
auto ExpectedLineInfo = Symbolizer->symbolizeCode(*Obj, Address);
Expected<DILineInfo> ExpectedLineInfo =
Symbolizer->symbolizeCode(*Obj, Address);
std::string ErrorMessage;
if (!ExpectedLineInfo)
ErrorMessage = toString(ExpectedLineInfo.takeError());
else
if (ExpectedLineInfo) {
LineInfo = *ExpectedLineInfo;
if (LineInfo.FileName == DILineInfo::BadString) {
if (!WarnedNoDebugInfo) {
std::string Warning =
"failed to parse debug information for " + ObjectFilename.str();
if (!ErrorMessage.empty())
Warning += ": " + ErrorMessage;
reportWarning(Warning, ObjectFilename);
WarnedNoDebugInfo = true;
}
} else if (!WarnedInvalidDebugInfo) {
WarnedInvalidDebugInfo = true;
// TODO Untested.
reportWarning("failed to parse debug information: " +
toString(ExpectedLineInfo.takeError()),
ObjectFilename);
}
if (!Prefix.empty() && sys::path::is_absolute_gnu(LineInfo.FileName)) {

View file

@ -181,7 +181,12 @@ static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA,
// the topmost function, which suits our needs better.
auto ResOrErr = Symbolizer.symbolizeInlinedCode(
ModuleName, {Offset, object::SectionedAddress::UndefSection});
Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get().getFrame(0));
if (!ResOrErr || ResOrErr->getNumberOfFrames() == 0) {
error(ResOrErr);
Printer << DILineInfo();
} else {
Printer << ResOrErr->getFrame(0);
}
} else {
auto ResOrErr = Symbolizer.symbolizeCode(
ModuleName, {Offset, object::SectionedAddress::UndefSection});

View file

@ -920,6 +920,12 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
if (TCR_PTR(__kmp_threads[0]) == NULL) {
--capacity;
}
// If it is not for initializing the hidden helper team, we need to take
// __kmp_hidden_helper_threads_num out of the capacity because it is included
// in __kmp_threads_capacity.
if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
capacity -= __kmp_hidden_helper_threads_num;
}
if (__kmp_nth + new_nthreads -
(root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
capacity) {
@ -3632,6 +3638,13 @@ int __kmp_register_root(int initial_thread) {
--capacity;
}
// If it is not for initializing the hidden helper team, we need to take
// __kmp_hidden_helper_threads_num out of the capacity because it is included
// in __kmp_threads_capacity.
if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
capacity -= __kmp_hidden_helper_threads_num;
}
/* see if there are too many threads */
if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
if (__kmp_tp_cached) {
@ -3664,7 +3677,7 @@ int __kmp_register_root(int initial_thread) {
/* find an available thread slot */
// Don't reassign the zero slot since we need that to only be used by
// initial thread. Slots for hidden helper threads should also be skipped.
if (initial_thread && __kmp_threads[0] == NULL) {
if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
gtid = 0;
} else {
for (gtid = __kmp_hidden_helper_threads_num + 1;

View file

@ -504,9 +504,10 @@ int __kmp_initial_threads_capacity(int req_nproc) {
nth = (4 * __kmp_xproc);
// If hidden helper task is enabled, we initialize the thread capacity with
// extra
// __kmp_hidden_helper_threads_num.
nth += __kmp_hidden_helper_threads_num;
// extra __kmp_hidden_helper_threads_num.
if (__kmp_enable_hidden_helper) {
nth += __kmp_hidden_helper_threads_num;
}
if (nth > __kmp_max_nth)
nth = __kmp_max_nth;

View file

@ -326,7 +326,8 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
if (taskdata->td_flags.hidden_helper) {
// We don't need to map to shadow gtid if it is already hidden helper thread
if (taskdata->td_flags.hidden_helper && !KMP_HIDDEN_HELPER_THREAD(gtid)) {
gtid = KMP_GTID_TO_SHADOW_GTID(gtid);
thread = __kmp_threads[gtid];
}