mirror of
https://github.com/opnsense/src.git
synced 2026-06-09 08:43:19 -04:00
Vendor import of llvm-project branch release/12.x llvmorg-12.0.0-0-gd28af7c654d8, a.k.a. 12.0.0 release.
This commit is contained in:
parent
9f93bc8bfd
commit
b4125f7d51
73 changed files with 864 additions and 196 deletions
|
|
@ -266,6 +266,9 @@ CODEGENOPT(VectorizeLoop , 1, 0) ///< Run loop vectorizer.
|
|||
CODEGENOPT(VectorizeSLP , 1, 0) ///< Run SLP vectorizer.
|
||||
CODEGENOPT(ProfileSampleAccurate, 1, 0) ///< Sample profile is accurate.
|
||||
|
||||
/// Treat loops as finite: language, always, never.
|
||||
ENUM_CODEGENOPT(FiniteLoops, FiniteLoopsKind, 2, FiniteLoopsKind::Language)
|
||||
|
||||
/// Attempt to use register sized accesses to bit-fields in structures, when
|
||||
/// possible.
|
||||
CODEGENOPT(UseRegisterSizedBitfieldAccess , 1, 0)
|
||||
|
|
|
|||
|
|
@ -140,6 +140,12 @@ public:
|
|||
All, // Keep all frame pointers.
|
||||
};
|
||||
|
||||
enum FiniteLoopsKind {
|
||||
Language, // Not specified, use language standard.
|
||||
Always, // All loops are assumed to be finite.
|
||||
Never, // No loop is assumed to be finite.
|
||||
};
|
||||
|
||||
/// The code model to use (-mcmodel).
|
||||
std::string CodeModel;
|
||||
|
||||
|
|
|
|||
|
|
@ -2410,6 +2410,11 @@ def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>,
|
|||
defm reroll_loops : BoolFOption<"reroll-loops",
|
||||
CodeGenOpts<"RerollLoops">, DefaultFalse,
|
||||
PosFlag<SetTrue, [CC1Option], "Turn on loop reroller">, NegFlag<SetFalse>>;
|
||||
def ffinite_loops: Flag<["-"], "ffinite-loops">, Group<f_Group>,
|
||||
HelpText<"Assume all loops are finite.">, Flags<[CC1Option]>;
|
||||
def fno_finite_loops: Flag<["-"], "fno-finite-loops">, Group<f_Group>,
|
||||
HelpText<"Do not assume that any loop is finite.">, Flags<[CC1Option]>;
|
||||
|
||||
def ftrigraphs : Flag<["-"], "ftrigraphs">, Group<f_Group>,
|
||||
HelpText<"Process trigraph sequences">, Flags<[CC1Option]>;
|
||||
def fno_trigraphs : Flag<["-"], "fno-trigraphs">, Group<f_Group>,
|
||||
|
|
|
|||
|
|
@ -1995,9 +1995,14 @@ void CodeGenModule::ConstructAttributeList(
|
|||
if (TargetDecl->hasAttr<ConstAttr>()) {
|
||||
FuncAttrs.addAttribute(llvm::Attribute::ReadNone);
|
||||
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
|
||||
// gcc specifies that 'const' functions have greater restrictions than
|
||||
// 'pure' functions, so they also cannot have infinite loops.
|
||||
FuncAttrs.addAttribute(llvm::Attribute::WillReturn);
|
||||
} else if (TargetDecl->hasAttr<PureAttr>()) {
|
||||
FuncAttrs.addAttribute(llvm::Attribute::ReadOnly);
|
||||
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
|
||||
// gcc specifies that 'pure' functions cannot have infinite loops.
|
||||
FuncAttrs.addAttribute(llvm::Attribute::WillReturn);
|
||||
} else if (TargetDecl->hasAttr<NoAliasAttr>()) {
|
||||
FuncAttrs.addAttribute(llvm::Attribute::ArgMemOnly);
|
||||
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
|
||||
|
|
|
|||
|
|
@ -9892,7 +9892,7 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall(
|
|||
llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
|
||||
CGF.EmitRuntimeCall(
|
||||
OMPBuilder.getOrCreateRuntimeFunction(
|
||||
CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
|
||||
CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
|
||||
Args);
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -507,12 +507,23 @@ public:
|
|||
|
||||
/// True if the C++ Standard Requires Progress.
|
||||
bool CPlusPlusWithProgress() {
|
||||
if (CGM.getCodeGenOpts().getFiniteLoops() ==
|
||||
CodeGenOptions::FiniteLoopsKind::Never)
|
||||
return false;
|
||||
|
||||
return getLangOpts().CPlusPlus11 || getLangOpts().CPlusPlus14 ||
|
||||
getLangOpts().CPlusPlus17 || getLangOpts().CPlusPlus20;
|
||||
}
|
||||
|
||||
/// True if the C Standard Requires Progress.
|
||||
bool CWithProgress() {
|
||||
if (CGM.getCodeGenOpts().getFiniteLoops() ==
|
||||
CodeGenOptions::FiniteLoopsKind::Always)
|
||||
return true;
|
||||
if (CGM.getCodeGenOpts().getFiniteLoops() ==
|
||||
CodeGenOptions::FiniteLoopsKind::Never)
|
||||
return false;
|
||||
|
||||
return getLangOpts().C11 || getLangOpts().C17 || getLangOpts().C2x;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5620,6 +5620,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
if (A->getOption().matches(options::OPT_freroll_loops))
|
||||
CmdArgs.push_back("-freroll-loops");
|
||||
|
||||
Args.AddLastArg(CmdArgs, options::OPT_ffinite_loops,
|
||||
options::OPT_fno_finite_loops);
|
||||
|
||||
Args.AddLastArg(CmdArgs, options::OPT_fwritable_strings);
|
||||
Args.AddLastArg(CmdArgs, options::OPT_funroll_loops,
|
||||
options::OPT_fno_unroll_loops);
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
#include "Darwin.h"
|
||||
#include "clang/Basic/CharInfo.h"
|
||||
#include "clang/Basic/Version.h"
|
||||
#include "clang/Config/config.h"
|
||||
#include "clang/Driver/Compilation.h"
|
||||
#include "clang/Driver/Driver.h"
|
||||
#include "clang/Driver/DriverDiagnostic.h"
|
||||
|
|
@ -520,7 +521,10 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
// translate 'lld' into 'lld-link', and in the case of the regular msvc
|
||||
// linker, we need to use a special search algorithm.
|
||||
llvm::SmallString<128> linkPath;
|
||||
StringRef Linker = Args.getLastArgValue(options::OPT_fuse_ld_EQ, "link");
|
||||
StringRef Linker
|
||||
= Args.getLastArgValue(options::OPT_fuse_ld_EQ, CLANG_DEFAULT_LINKER);
|
||||
if (Linker.empty())
|
||||
Linker = "link";
|
||||
if (Linker.equals_lower("lld"))
|
||||
Linker = "lld-link";
|
||||
|
||||
|
|
|
|||
|
|
@ -296,6 +296,7 @@ void OpenBSD::AddCXXStdlibLibArgs(const ArgList &Args,
|
|||
|
||||
CmdArgs.push_back(Profiling ? "-lc++_p" : "-lc++");
|
||||
CmdArgs.push_back(Profiling ? "-lc++abi_p" : "-lc++abi");
|
||||
CmdArgs.push_back(Profiling ? "-lpthread_p" : "-lpthread");
|
||||
}
|
||||
|
||||
std::string OpenBSD::getCompilerRT(const ArgList &Args,
|
||||
|
|
|
|||
|
|
@ -1037,7 +1037,6 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
|
|||
Opts.UnrollLoops =
|
||||
Args.hasFlag(OPT_funroll_loops, OPT_fno_unroll_loops,
|
||||
(Opts.OptimizationLevel > 1));
|
||||
|
||||
Opts.BinutilsVersion =
|
||||
std::string(Args.getLastArgValue(OPT_fbinutils_version_EQ));
|
||||
|
||||
|
|
@ -1324,6 +1323,10 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
|
|||
|
||||
Opts.EmitVersionIdentMetadata = Args.hasFlag(OPT_Qy, OPT_Qn, true);
|
||||
|
||||
if (Args.hasArg(options::OPT_ffinite_loops))
|
||||
Opts.FiniteLoops = CodeGenOptions::FiniteLoopsKind::Always;
|
||||
else if (Args.hasArg(options::OPT_fno_finite_loops))
|
||||
Opts.FiniteLoops = CodeGenOptions::FiniteLoopsKind::Never;
|
||||
return Success;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -565,7 +565,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
|
|||
Builder.defineMacro("__cpp_aggregate_bases", "201603L");
|
||||
Builder.defineMacro("__cpp_structured_bindings", "201606L");
|
||||
Builder.defineMacro("__cpp_nontype_template_args",
|
||||
LangOpts.CPlusPlus20 ? "201911L" : "201411L");
|
||||
"201411L"); // (not latest)
|
||||
Builder.defineMacro("__cpp_fold_expressions", "201603L");
|
||||
Builder.defineMacro("__cpp_guaranteed_copy_elision", "201606L");
|
||||
Builder.defineMacro("__cpp_nontype_template_parameter_auto", "201606L");
|
||||
|
|
|
|||
|
|
@ -5158,6 +5158,20 @@ private:
|
|||
|
||||
llvm::DenseMap<const IdentifierInfo *, Member> Results;
|
||||
};
|
||||
|
||||
// If \p Base is ParenListExpr, assume a chain of comma operators and pick the
|
||||
// last expr. We expect other ParenListExprs to be resolved to e.g. constructor
|
||||
// calls before here. (So the ParenListExpr should be nonempty, but check just
|
||||
// in case)
|
||||
Expr *unwrapParenList(Expr *Base) {
|
||||
if (auto *PLE = llvm::dyn_cast_or_null<ParenListExpr>(Base)) {
|
||||
if (PLE->getNumExprs() == 0)
|
||||
return nullptr;
|
||||
Base = PLE->getExpr(PLE->getNumExprs() - 1);
|
||||
}
|
||||
return Base;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void Sema::CodeCompleteMemberReferenceExpr(Scope *S, Expr *Base,
|
||||
|
|
@ -5165,6 +5179,8 @@ void Sema::CodeCompleteMemberReferenceExpr(Scope *S, Expr *Base,
|
|||
SourceLocation OpLoc, bool IsArrow,
|
||||
bool IsBaseExprStatement,
|
||||
QualType PreferredType) {
|
||||
Base = unwrapParenList(Base);
|
||||
OtherOpBase = unwrapParenList(OtherOpBase);
|
||||
if (!Base || !CodeCompleter)
|
||||
return;
|
||||
|
||||
|
|
@ -5597,12 +5613,13 @@ ProduceSignatureHelp(Sema &SemaRef, Scope *S,
|
|||
QualType Sema::ProduceCallSignatureHelp(Scope *S, Expr *Fn,
|
||||
ArrayRef<Expr *> Args,
|
||||
SourceLocation OpenParLoc) {
|
||||
if (!CodeCompleter)
|
||||
Fn = unwrapParenList(Fn);
|
||||
if (!CodeCompleter || !Fn)
|
||||
return QualType();
|
||||
|
||||
// FIXME: Provide support for variadic template functions.
|
||||
// Ignore type-dependent call expressions entirely.
|
||||
if (!Fn || Fn->isTypeDependent() || anyNullArguments(Args))
|
||||
if (Fn->isTypeDependent() || anyNullArguments(Args))
|
||||
return QualType();
|
||||
// In presence of dependent args we surface all possible signatures using the
|
||||
// non-dependent args in the prefix. Afterwards we do a post filtering to make
|
||||
|
|
|
|||
|
|
@ -24,13 +24,77 @@ Non-comprehensive list of changes in this release
|
|||
ELF Improvements
|
||||
----------------
|
||||
|
||||
* ``--error-handling-script`` is added to allow for user-defined handlers upon
|
||||
* ``--dependency-file`` has been added. (Similar to ``cc -M -MF``.)
|
||||
(`D82437 <https://reviews.llvm.org/D82437>`_)
|
||||
* ``--error-handling-script`` has been added to allow for user-defined handlers upon
|
||||
missing libraries. (`D87758 <https://reviews.llvm.org/D87758>`_)
|
||||
* ``--exclude-libs`` can now localize defined version symbols and bitcode referenced libcall symbols.
|
||||
(`D94280 <https://reviews.llvm.org/D94280>`_)
|
||||
* ``--gdb-index`` now works with DWARF v5 and ``--icf={safe,all}``.
|
||||
(`D85579 <https://reviews.llvm.org/D85579>`_)
|
||||
(`D89751 <https://reviews.llvm.org/D89751>`_)
|
||||
* ``--gdb-index --emit-relocs`` can now be used together.
|
||||
(`D94354 <https://reviews.llvm.org/D94354>`_)
|
||||
* ``--icf={safe,all}`` conservatively no longer fold text sections with LSDA.
|
||||
Previously ICF on ``-fexceptions`` code could be unsafe.
|
||||
(`D84610 <https://reviews.llvm.org/D84610>`_)
|
||||
* ``--icf={safe,all}`` can now fold two sections with relocations referencing aliased symbols.
|
||||
(`D88830 <https://reviews.llvm.org/D88830>`_)
|
||||
* ``--lto-pseudo-probe-for-profiling`` has been added.
|
||||
(`D95056 <https://reviews.llvm.org/D95056>`_)
|
||||
* ``--no-lto-whole-program-visibility`` has been added.
|
||||
(`D92060 <https://reviews.llvm.org/D92060>`_)
|
||||
* ``--oformat-binary`` has been fixed to respect LMA.
|
||||
(`D85086 <https://reviews.llvm.org/D85086>`_)
|
||||
* ``--reproduce`` includes ``--lto-sample-profile``, ``--just-symbols``, ``--call-graph-ordering-file``, ``--retain-symbols-file`` files.
|
||||
* ``-r --gc-sections`` is now supported.
|
||||
(`D84131 <https://reviews.llvm.org/D84131>`_)
|
||||
* A ``-u`` specified symbol will no longer change the binding to ``STB_WEAK``.
|
||||
(`D88945 <https://reviews.llvm.org/D88945>`_)
|
||||
* ``--wrap`` support has been improved.
|
||||
+ If ``foo`` is not referenced, there is no longer an undefined symbol ``__wrap_foo``.
|
||||
+ If ``__real_foo`` is not referenced, there is no longer an undefined symbol ``foo``.
|
||||
* ``SHF_LINK_ORDER`` sections can now have zero ``sh_link`` values.
|
||||
* ``SHF_LINK_ORDER`` and non-``SHF_LINK_ORDER`` sections can now be mixed within an input section description.
|
||||
(`D84001 <https://reviews.llvm.org/D84001>`_)
|
||||
* ``LOG2CEIL`` is now supported in linker scripts.
|
||||
(`D84054 <https://reviews.llvm.org/D84054>`_)
|
||||
* ``DEFINED`` has been fixed to check whether the symbol is defined.
|
||||
(`D83758 <https://reviews.llvm.org/D83758>`_)
|
||||
* An input section description may now have multiple ``SORT_*``.
|
||||
The matched sections are ordered by radix sort with the keys being ``(SORT*, --sort-section, input order)``.
|
||||
(`D91127 <https://reviews.llvm.org/D91127>`_)
|
||||
* Users can now provide a GNU style linker script to convert ``.ctors`` into ``.init_array``.
|
||||
(`D91187 <https://reviews.llvm.org/D91187>`_)
|
||||
* An empty output section can now be discarded even if it is assigned to a program header.
|
||||
(`D92301 <https://reviews.llvm.org/D92301>`_)
|
||||
* Non-``SHF_ALLOC`` sections now have larger file offsets than ``SHF_ALLOC`` sections.
|
||||
(`D85867 <https://reviews.llvm.org/D85867>`_)
|
||||
* Some symbol versioning improvements.
|
||||
+ Defined ``foo@@v1`` now resolve undefined ``foo@v1`` (`D92259 <https://reviews.llvm.org/D92259>`_)
|
||||
+ Undefined ``foo@v1`` now gets an error (`D92260 <https://reviews.llvm.org/D92260>`_)
|
||||
* The AArch64 port now has support for ``STO_AARCH64_VARIANT_PCS`` and ``DT_AARCH64_VARIANT_PCS``.
|
||||
(`D93045 <https://reviews.llvm.org/D93045>`_)
|
||||
* The AArch64 port now has support for ``R_AARCH64_LD64_GOTPAGE_LO15``.
|
||||
* The PowerPC64 port now detects missing R_PPC64_TLSGD/R_PPC64_TLSLD and disables TLS relaxation.
|
||||
This allows linking with object files produced by very old IBM XL compilers.
|
||||
(`D92959 <https://reviews.llvm.org/D92959>`_)
|
||||
* Many PowerPC PC-relative relocations are now supported.
|
||||
* ``R_PPC_ADDR24`` and ``R_PPC64_ADDR16_HIGH`` are now supported.
|
||||
* powerpcle is now supported. Tested with FreeBSD loader and freestanding.
|
||||
(`D93917 <https://reviews.llvm.org/D93917>`_)
|
||||
* RISC-V: the first ``SHT_RISCV_ATTRIBUTES`` section is now retained.
|
||||
(`D86309 <https://reviews.llvm.org/D86309>`_)
|
||||
* LTO pipeline now defaults to the new PM if the CMake variable ``ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER`` is on.
|
||||
(`D92885 <https://reviews.llvm.org/D92885>`_)
|
||||
|
||||
Breaking changes
|
||||
----------------
|
||||
|
||||
* ...
|
||||
* A COMMON symbol can now cause the fetch of an archive providing a ``STB_GLOBAL`` definition.
|
||||
This behavior follows GNU ld newer than December 1999.
|
||||
If you see ``duplicate symbol`` errors with the new behavior, check out `PR49226 <https://bugs.llvm.org//show_bug.cgi?id=49226>`_.
|
||||
(`D86142 <https://reviews.llvm.org/D86142>`_)
|
||||
|
||||
COFF Improvements
|
||||
-----------------
|
||||
|
|
@ -58,10 +122,26 @@ MinGW Improvements
|
|||
(`D93950 <https://reviews.llvm.org/D93950>`_)
|
||||
|
||||
|
||||
MachO Improvements
|
||||
Mach-O Improvements
|
||||
------------------
|
||||
|
||||
* Item 1.
|
||||
We've gotten the new implementation of LLD for Mach-O to the point where it is
|
||||
able to link large x86_64 programs, and we'd love to get some alpha testing on
|
||||
it. The new Darwin back-end can be invoked as follows:
|
||||
|
||||
.. code-block::
|
||||
clang -fuse-ld=lld.darwinnew /path/to/file.c
|
||||
|
||||
To reach this point, we implemented numerous features, and it's easier to list
|
||||
the major features we *haven't* yet completed:
|
||||
|
||||
* LTO support
|
||||
* Stack unwinding for exceptions
|
||||
* Support for arm64, arm, and i386 architectures
|
||||
|
||||
If you stumble upon an issue and it doesn't fall into one of these categories,
|
||||
please file a bug report!
|
||||
|
||||
|
||||
WebAssembly Improvements
|
||||
------------------------
|
||||
|
|
|
|||
|
|
@ -339,8 +339,7 @@ LLVMErrorRef LLVMOrcResourceTrackerRemove(LLVMOrcResourceTrackerRef RT);
|
|||
* ownership has not been passed to a JITDylib (e.g. because some error
|
||||
* prevented the client from calling LLVMOrcJITDylibAddGenerator).
|
||||
*/
|
||||
void LLVMOrcDisposeDefinitionGenerator(
|
||||
LLVMOrcDefinitionGeneratorRef DG);
|
||||
void LLVMOrcDisposeDefinitionGenerator(LLVMOrcDefinitionGeneratorRef DG);
|
||||
|
||||
/**
|
||||
* Dispose of a MaterializationUnit.
|
||||
|
|
@ -388,7 +387,9 @@ LLVMOrcExecutionSessionCreateJITDylib(LLVMOrcExecutionSessionRef ES,
|
|||
* Returns the JITDylib with the given name, or NULL if no such JITDylib
|
||||
* exists.
|
||||
*/
|
||||
LLVMOrcJITDylibRef LLVMOrcExecutionSessionGetJITDylibByName(const char *Name);
|
||||
LLVMOrcJITDylibRef
|
||||
LLVMOrcExecutionSessionGetJITDylibByName(LLVMOrcExecutionSessionRef ES,
|
||||
const char *Name);
|
||||
|
||||
/**
|
||||
* Return a reference to a newly created resource tracker associated with JD.
|
||||
|
|
|
|||
|
|
@ -490,7 +490,10 @@ protected:
|
|||
/// - \c Add has a constant operand.
|
||||
bool canFoldAddIntoGEP(const User *GEP, const Value *Add);
|
||||
|
||||
/// Test whether the given value has exactly one use.
|
||||
/// Test whether the register associated with this value has exactly one use,
|
||||
/// in which case that single use is killing. Note that multiple IR values
|
||||
/// may map onto the same register, in which case this is not the same as
|
||||
/// checking that an IR value has one use.
|
||||
bool hasTrivialKill(const Value *V);
|
||||
|
||||
/// Create a machine mem operand from the given instruction.
|
||||
|
|
|
|||
|
|
@ -1156,6 +1156,10 @@ public:
|
|||
return getOpcode() == TargetOpcode::CFI_INSTRUCTION;
|
||||
}
|
||||
|
||||
bool isPseudoProbe() const {
|
||||
return getOpcode() == TargetOpcode::PSEUDO_PROBE;
|
||||
}
|
||||
|
||||
// True if the instruction represents a position in the function.
|
||||
bool isPosition() const { return isLabel() || isCFIInstruction(); }
|
||||
|
||||
|
|
@ -1165,6 +1169,9 @@ public:
|
|||
bool isDebugInstr() const {
|
||||
return isDebugValue() || isDebugLabel() || isDebugRef();
|
||||
}
|
||||
bool isDebugOrPseudoInstr() const {
|
||||
return isDebugInstr() || isPseudoProbe();
|
||||
}
|
||||
|
||||
bool isDebugOffsetImm() const { return getDebugOffset().isImm(); }
|
||||
|
||||
|
|
|
|||
|
|
@ -375,7 +375,7 @@ __OMP_RTL(__kmpc_init_allocator, false, /* omp_allocator_handle_t */ VoidPtr,
|
|||
__OMP_RTL(__kmpc_destroy_allocator, false, Void, /* Int */ Int32,
|
||||
/* omp_allocator_handle_t */ VoidPtr)
|
||||
|
||||
__OMP_RTL(__kmpc_push_target_tripcount, false, Void, IdentPtr, Int64, Int64)
|
||||
__OMP_RTL(__kmpc_push_target_tripcount_mapper, false, Void, IdentPtr, Int64, Int64)
|
||||
__OMP_RTL(__tgt_target_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32, VoidPtrPtr,
|
||||
VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
|
||||
__OMP_RTL(__tgt_target_nowait_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32,
|
||||
|
|
@ -844,7 +844,7 @@ __OMP_RTL_ATTRS(__kmpc_free, AllocAttrs, AttributeSet(), {})
|
|||
__OMP_RTL_ATTRS(__kmpc_init_allocator, DefaultAttrs, ReturnPtrAttrs, {})
|
||||
__OMP_RTL_ATTRS(__kmpc_destroy_allocator, AllocAttrs, AttributeSet(), {})
|
||||
|
||||
__OMP_RTL_ATTRS(__kmpc_push_target_tripcount, SetterAttrs, AttributeSet(), {})
|
||||
__OMP_RTL_ATTRS(__kmpc_push_target_tripcount_mapper, SetterAttrs, AttributeSet(), {})
|
||||
__OMP_RTL_ATTRS(__tgt_target_mapper, ForkAttrs, AttributeSet(), {})
|
||||
__OMP_RTL_ATTRS(__tgt_target_nowait_mapper, ForkAttrs, AttributeSet(), {})
|
||||
__OMP_RTL_ATTRS(__tgt_target_teams_mapper, ForkAttrs, AttributeSet(), {})
|
||||
|
|
|
|||
|
|
@ -1757,9 +1757,6 @@ public:
|
|||
return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly);
|
||||
}
|
||||
|
||||
/// Returns true if this function is guaranteed to return.
|
||||
bool willReturn() const { return hasFnAttr(Attribute::WillReturn); }
|
||||
|
||||
void setOnlyReadsMemory() {
|
||||
addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -633,6 +633,10 @@ public:
|
|||
/// generated program.
|
||||
bool isSafeToRemove() const;
|
||||
|
||||
/// Return true if the instruction will return (unwinding is considered as
|
||||
/// a form of returning control flow here).
|
||||
bool willReturn() const;
|
||||
|
||||
/// Return true if the instruction is a variety of EH-block.
|
||||
bool isEHPad() const {
|
||||
switch (getOpcode()) {
|
||||
|
|
@ -650,6 +654,9 @@ public:
|
|||
/// llvm.lifetime.end marker.
|
||||
bool isLifetimeStartOrEnd() const;
|
||||
|
||||
/// Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
|
||||
bool isDebugOrPseudoInst() const;
|
||||
|
||||
/// Return a pointer to the next non-debug instruction in the same basic
|
||||
/// block as 'this', or nullptr if no such instruction exists. Skip any pseudo
|
||||
/// operations if \c SkipPseudoOp is true.
|
||||
|
|
|
|||
|
|
@ -667,6 +667,12 @@ struct AAMDNodes {
|
|||
/// The tag specifying the noalias scope.
|
||||
MDNode *NoAlias = nullptr;
|
||||
|
||||
// Shift tbaa Metadata node to start off bytes later
|
||||
static MDNode *ShiftTBAA(MDNode *M, size_t off);
|
||||
|
||||
// Shift tbaa.struct Metadata node to start off bytes later
|
||||
static MDNode *ShiftTBAAStruct(MDNode *M, size_t off);
|
||||
|
||||
/// Given two sets of AAMDNodes that apply to the same pointer,
|
||||
/// give the best AAMDNodes that are compatible with both (i.e. a set of
|
||||
/// nodes whose allowable aliasing conclusions are a subset of those
|
||||
|
|
@ -680,6 +686,18 @@ struct AAMDNodes {
|
|||
Result.NoAlias = Other.NoAlias == NoAlias ? NoAlias : nullptr;
|
||||
return Result;
|
||||
}
|
||||
|
||||
/// Create a new AAMDNode that describes this AAMDNode after applying a
|
||||
/// constant offset to the start of the pointer
|
||||
AAMDNodes shift(size_t Offset) {
|
||||
AAMDNodes Result;
|
||||
Result.TBAA = TBAA ? ShiftTBAA(TBAA, Offset) : nullptr;
|
||||
Result.TBAAStruct =
|
||||
TBAAStruct ? ShiftTBAAStruct(TBAAStruct, Offset) : nullptr;
|
||||
Result.Scope = Scope;
|
||||
Result.NoAlias = NoAlias;
|
||||
return Result;
|
||||
}
|
||||
};
|
||||
|
||||
// Specialize DenseMapInfo for AAMDNodes.
|
||||
|
|
|
|||
|
|
@ -568,6 +568,11 @@ public:
|
|||
bool accumulateConstantOffset(
|
||||
const DataLayout &DL, APInt &Offset,
|
||||
function_ref<bool(Value &, APInt &)> ExternalAnalysis = nullptr) const;
|
||||
|
||||
static bool accumulateConstantOffset(
|
||||
Type *SourceType, ArrayRef<const Value *> Index, const DataLayout &DL,
|
||||
APInt &Offset,
|
||||
function_ref<bool(Value &, APInt &)> ExternalAnalysis = nullptr);
|
||||
};
|
||||
|
||||
class PtrToIntOperator
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/IR/ProfileSummary.h"
|
||||
#include "llvm/ProfileData/InstrProf.h"
|
||||
#include "llvm/ProfileData/SampleProf.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
|
|
@ -89,6 +90,8 @@ public:
|
|||
|
||||
void addRecord(const sampleprof::FunctionSamples &FS,
|
||||
bool isCallsiteSample = false);
|
||||
std::unique_ptr<ProfileSummary> computeSummaryForProfiles(
|
||||
const StringMap<sampleprof::FunctionSamples> &Profiles);
|
||||
std::unique_ptr<ProfileSummary> getSummary();
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@
|
|||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/StringMap.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Analysis/CallGraph.h"
|
||||
#include "llvm/IR/DebugInfoMetadata.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/ProfileData/SampleProf.h"
|
||||
|
|
@ -90,6 +91,8 @@ private:
|
|||
// calling context and the context is identified by path from root to the node.
|
||||
class SampleContextTracker {
|
||||
public:
|
||||
using ContextSamplesTy = SmallSet<FunctionSamples *, 16>;
|
||||
|
||||
SampleContextTracker(StringMap<FunctionSamples> &Profiles);
|
||||
// Query context profile for a specific callee with given name at a given
|
||||
// call-site. The full context is identified by location of call instruction.
|
||||
|
|
@ -103,6 +106,9 @@ public:
|
|||
FunctionSamples *getContextSamplesFor(const DILocation *DIL);
|
||||
// Query context profile for a given sample contxt of a function.
|
||||
FunctionSamples *getContextSamplesFor(const SampleContext &Context);
|
||||
// Get all context profile for given function.
|
||||
ContextSamplesTy &getAllContextSamplesFor(const Function &Func);
|
||||
ContextSamplesTy &getAllContextSamplesFor(StringRef Name);
|
||||
// Query base profile for a given function. A base profile is a merged view
|
||||
// of all context profiles for contexts that are not inlined.
|
||||
FunctionSamples *getBaseSamplesFor(const Function &Func,
|
||||
|
|
@ -113,6 +119,9 @@ public:
|
|||
// This makes sure that inlined context profile will be excluded in
|
||||
// function's base profile.
|
||||
void markContextSamplesInlined(const FunctionSamples *InlinedSamples);
|
||||
void promoteMergeContextSamplesTree(const Instruction &Inst,
|
||||
StringRef CalleeName);
|
||||
void addCallGraphEdges(CallGraph &CG, StringMap<Function *> &SymbolMap);
|
||||
// Dump the internal context profile trie.
|
||||
void dump();
|
||||
|
||||
|
|
@ -126,8 +135,6 @@ private:
|
|||
ContextTrieNode *getTopLevelContextNode(StringRef FName);
|
||||
ContextTrieNode &addTopLevelContextNode(StringRef FName);
|
||||
ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo);
|
||||
void promoteMergeContextSamplesTree(const Instruction &Inst,
|
||||
StringRef CalleeName);
|
||||
void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode,
|
||||
StringRef ContextStrToRemove);
|
||||
ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &FromNode,
|
||||
|
|
@ -135,7 +142,7 @@ private:
|
|||
StringRef ContextStrToRemove);
|
||||
|
||||
// Map from function name to context profiles (excluding base profile)
|
||||
StringMap<SmallSet<FunctionSamples *, 16>> FuncToCtxtProfileSet;
|
||||
StringMap<ContextSamplesTy> FuncToCtxtProfileSet;
|
||||
|
||||
// Root node for context trie tree
|
||||
ContextTrieNode RootContext;
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ public:
|
|||
|
||||
private:
|
||||
// Allow a little bias due the rounding to integral factors.
|
||||
constexpr static float DistributionFactorVariance = 0.02;
|
||||
constexpr static float DistributionFactorVariance = 0.02f;
|
||||
// Distribution factors from last pass.
|
||||
FuncProbeFactorMap FunctionProbeFactors;
|
||||
|
||||
|
|
|
|||
|
|
@ -274,6 +274,13 @@ void updateProfileCallee(
|
|||
void identifyNoAliasScopesToClone(
|
||||
ArrayRef<BasicBlock *> BBs, SmallVectorImpl<MDNode *> &NoAliasDeclScopes);
|
||||
|
||||
/// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified
|
||||
/// instruction range and extract their scope. These are candidates for
|
||||
/// duplication when cloning.
|
||||
void identifyNoAliasScopesToClone(
|
||||
BasicBlock::iterator Start, BasicBlock::iterator End,
|
||||
SmallVectorImpl<MDNode *> &NoAliasDeclScopes);
|
||||
|
||||
/// Duplicate the specified list of noalias decl scopes.
|
||||
/// The 'Ext' string is added as an extension to the name.
|
||||
/// Afterwards, the ClonedScopes contains the mapping of the original scope
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ void DemandedBitsWrapperPass::print(raw_ostream &OS, const Module *M) const {
|
|||
|
||||
static bool isAlwaysLive(Instruction *I) {
|
||||
return I->isTerminator() || isa<DbgInfoIntrinsic>(I) || I->isEHPad() ||
|
||||
I->mayHaveSideEffects();
|
||||
I->mayHaveSideEffects() || !I->willReturn();
|
||||
}
|
||||
|
||||
void DemandedBits::determineLiveOperandBits(
|
||||
|
|
|
|||
|
|
@ -243,11 +243,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
|
|||
if (RecurrenceType->isFloatingPointTy()) {
|
||||
if (!isFloatingPointRecurrenceKind(Kind))
|
||||
return false;
|
||||
} else {
|
||||
} else if (RecurrenceType->isIntegerTy()) {
|
||||
if (!isIntegerRecurrenceKind(Kind))
|
||||
return false;
|
||||
if (isArithmeticRecurrenceKind(Kind))
|
||||
Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
|
||||
} else {
|
||||
// Pointer min/max may exist, but it is not supported as a reduction op.
|
||||
return false;
|
||||
}
|
||||
|
||||
Worklist.push_back(Start);
|
||||
|
|
|
|||
|
|
@ -737,3 +737,84 @@ bool TypeBasedAAWrapperPass::doFinalization(Module &M) {
|
|||
void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesAll();
|
||||
}
|
||||
|
||||
MDNode *AAMDNodes::ShiftTBAA(MDNode *MD, size_t Offset) {
|
||||
// Fast path if there's no offset
|
||||
if (Offset == 0)
|
||||
return MD;
|
||||
// Fast path if there's no path tbaa node (and thus scalar)
|
||||
if (!isStructPathTBAA(MD))
|
||||
return MD;
|
||||
|
||||
TBAAStructTagNode Tag(MD);
|
||||
SmallVector<Metadata *, 5> Sub;
|
||||
Sub.push_back(MD->getOperand(0));
|
||||
Sub.push_back(MD->getOperand(1));
|
||||
ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD->getOperand(2));
|
||||
|
||||
if (Tag.isNewFormat()) {
|
||||
ConstantInt *InnerSize = mdconst::extract<ConstantInt>(MD->getOperand(3));
|
||||
|
||||
if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uint64_t NewSize = InnerSize->getZExtValue();
|
||||
uint64_t NewOffset = InnerOffset->getZExtValue() - Offset;
|
||||
if (InnerOffset->getZExtValue() < Offset) {
|
||||
NewOffset = 0;
|
||||
NewSize -= Offset - InnerOffset->getZExtValue();
|
||||
}
|
||||
|
||||
Sub.push_back(ConstantAsMetadata::get(
|
||||
ConstantInt::get(InnerOffset->getType(), NewOffset)));
|
||||
|
||||
Sub.push_back(ConstantAsMetadata::get(
|
||||
ConstantInt::get(InnerSize->getType(), NewSize)));
|
||||
|
||||
// immutable type
|
||||
if (MD->getNumOperands() >= 5)
|
||||
Sub.push_back(MD->getOperand(4));
|
||||
} else {
|
||||
if (InnerOffset->getZExtValue() < Offset)
|
||||
return nullptr;
|
||||
|
||||
Sub.push_back(ConstantAsMetadata::get(ConstantInt::get(
|
||||
InnerOffset->getType(), InnerOffset->getZExtValue() - Offset)));
|
||||
|
||||
// immutable type
|
||||
if (MD->getNumOperands() >= 4)
|
||||
Sub.push_back(MD->getOperand(3));
|
||||
}
|
||||
return MDNode::get(MD->getContext(), Sub);
|
||||
}
|
||||
|
||||
MDNode *AAMDNodes::ShiftTBAAStruct(MDNode *MD, size_t Offset) {
|
||||
// Fast path if there's no offset
|
||||
if (Offset == 0)
|
||||
return MD;
|
||||
SmallVector<Metadata *, 3> Sub;
|
||||
for (size_t i = 0, size = MD->getNumOperands(); i < size; i += 3) {
|
||||
ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD->getOperand(i));
|
||||
ConstantInt *InnerSize =
|
||||
mdconst::extract<ConstantInt>(MD->getOperand(i + 1));
|
||||
// Don't include any triples that aren't in bounds
|
||||
if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset)
|
||||
continue;
|
||||
|
||||
uint64_t NewSize = InnerSize->getZExtValue();
|
||||
uint64_t NewOffset = InnerOffset->getZExtValue() - Offset;
|
||||
if (InnerOffset->getZExtValue() < Offset) {
|
||||
NewOffset = 0;
|
||||
NewSize -= Offset - InnerOffset->getZExtValue();
|
||||
}
|
||||
|
||||
// Shift the offset of the triple
|
||||
Sub.push_back(ConstantAsMetadata::get(
|
||||
ConstantInt::get(InnerOffset->getType(), NewOffset)));
|
||||
Sub.push_back(ConstantAsMetadata::get(
|
||||
ConstantInt::get(InnerSize->getType(), NewSize)));
|
||||
Sub.push_back(MD->getOperand(i + 2));
|
||||
}
|
||||
return MDNode::get(MD->getContext(), Sub);
|
||||
}
|
||||
|
|
@ -5018,36 +5018,14 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
|
|||
// arbitrary length of time, but programs aren't allowed to rely on that.
|
||||
|
||||
// If there is no successor, then execution can't transfer to it.
|
||||
if (const auto *CRI = dyn_cast<CleanupReturnInst>(I))
|
||||
return !CRI->unwindsToCaller();
|
||||
if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I))
|
||||
return !CatchSwitch->unwindsToCaller();
|
||||
if (isa<ResumeInst>(I))
|
||||
return false;
|
||||
if (isa<ReturnInst>(I))
|
||||
return false;
|
||||
if (isa<UnreachableInst>(I))
|
||||
return false;
|
||||
|
||||
// Calls can throw, or contain an infinite loop, or kill the process.
|
||||
if (const auto *CB = dyn_cast<CallBase>(I)) {
|
||||
// Call sites that throw have implicit non-local control flow.
|
||||
if (!CB->doesNotThrow())
|
||||
return false;
|
||||
|
||||
// A function which doens't throw and has "willreturn" attribute will
|
||||
// always return.
|
||||
if (CB->hasFnAttr(Attribute::WillReturn))
|
||||
return true;
|
||||
|
||||
// FIXME: Temporarily assume that all side-effect free intrinsics will
|
||||
// return. Remove this workaround once all intrinsics are appropriately
|
||||
// annotated.
|
||||
return isa<IntrinsicInst>(CB) && CB->onlyReadsMemory();
|
||||
}
|
||||
|
||||
// Other instructions return normally.
|
||||
return true;
|
||||
// An instruction that returns without throwing must transfer control flow
|
||||
// to a successor.
|
||||
return !I->mayThrow() && I->willReturn();
|
||||
}
|
||||
|
||||
bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) {
|
||||
|
|
|
|||
|
|
@ -1063,6 +1063,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
|
|||
Observer.changedInstr(MI);
|
||||
return Legalized;
|
||||
case TargetOpcode::G_PHI: {
|
||||
// FIXME: add support for when SizeOp0 isn't an exact multiple of
|
||||
// NarrowSize.
|
||||
if (SizeOp0 % NarrowSize != 0)
|
||||
return UnableToLegalize;
|
||||
|
||||
unsigned NumParts = SizeOp0 / NarrowSize;
|
||||
SmallVector<Register, 2> DstRegs(NumParts);
|
||||
SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
|
||||
|
|
|
|||
|
|
@ -156,7 +156,8 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
|
|||
// If MI has side effects, it should become a barrier for code motion.
|
||||
// IOM is rebuild from the next instruction to prevent later
|
||||
// instructions from being moved before this MI.
|
||||
if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) {
|
||||
if (MI.hasUnmodeledSideEffects() && !MI.isPseudoProbe() &&
|
||||
Next != MBB.end()) {
|
||||
BuildInstOrderMap(Next, IOM);
|
||||
SawStore = false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1462,7 +1462,8 @@ bool MachineInstr::hasUnmodeledSideEffects() const {
|
|||
}
|
||||
|
||||
bool MachineInstr::isLoadFoldBarrier() const {
|
||||
return mayStore() || isCall() || hasUnmodeledSideEffects();
|
||||
return mayStore() || isCall() ||
|
||||
(hasUnmodeledSideEffects() && !isPseudoProbe());
|
||||
}
|
||||
|
||||
/// allDefsAreDead - Return true if all the defs of this instruction are dead.
|
||||
|
|
|
|||
|
|
@ -6517,8 +6517,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
|
|||
// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
|
||||
// in direction shift1 by Neg. The range [0, EltSize) means that we only need
|
||||
// to consider shift amounts with defined behavior.
|
||||
//
|
||||
// The IsRotate flag should be set when the LHS of both shifts is the same.
|
||||
// Otherwise if matching a general funnel shift, it should be clear.
|
||||
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
|
||||
SelectionDAG &DAG) {
|
||||
SelectionDAG &DAG, bool IsRotate) {
|
||||
// If EltSize is a power of 2 then:
|
||||
//
|
||||
// (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
|
||||
|
|
@ -6550,8 +6553,11 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
|
|||
// always invokes undefined behavior for 32-bit X.
|
||||
//
|
||||
// Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
|
||||
//
|
||||
// NOTE: We can only do this when matching an AND and not a general
|
||||
// funnel shift.
|
||||
unsigned MaskLoBits = 0;
|
||||
if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
|
||||
if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
|
||||
if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
|
||||
KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
|
||||
unsigned Bits = Log2_64(EltSize);
|
||||
|
|
@ -6641,7 +6647,8 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
|
|||
// (srl x, (*ext y))) ->
|
||||
// (rotr x, y) or (rotl x, (sub 32, y))
|
||||
EVT VT = Shifted.getValueType();
|
||||
if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
|
||||
if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
|
||||
/*IsRotate*/ true)) {
|
||||
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
|
||||
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
|
||||
HasPos ? Pos : Neg);
|
||||
|
|
@ -6670,7 +6677,7 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
|
|||
// fold (or (shl x0, (*ext (sub 32, y))),
|
||||
// (srl x1, (*ext y))) ->
|
||||
// (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
|
||||
if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) {
|
||||
if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
|
||||
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
|
||||
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
|
||||
HasPos ? Pos : Neg);
|
||||
|
|
|
|||
|
|
@ -261,12 +261,16 @@ bool FastISel::hasTrivialKill(const Value *V) {
|
|||
if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0)))
|
||||
return false;
|
||||
|
||||
// Casts and extractvalues may be trivially coalesced by fast-isel.
|
||||
if (I->getOpcode() == Instruction::BitCast ||
|
||||
I->getOpcode() == Instruction::PtrToInt ||
|
||||
I->getOpcode() == Instruction::IntToPtr ||
|
||||
I->getOpcode() == Instruction::ExtractValue)
|
||||
return false;
|
||||
|
||||
// Only instructions with a single use in the same basic block are considered
|
||||
// to have trivial kills.
|
||||
return I->hasOneUse() &&
|
||||
!(I->getOpcode() == Instruction::BitCast ||
|
||||
I->getOpcode() == Instruction::PtrToInt ||
|
||||
I->getOpcode() == Instruction::IntToPtr) &&
|
||||
cast<Instruction>(*I->user_begin())->getParent() == I->getParent();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -9660,8 +9660,9 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
|
|||
// We will look through cast uses, so ignore them completely.
|
||||
if (I.isCast())
|
||||
continue;
|
||||
// Ignore debug info intrinsics, they don't escape or store to allocas.
|
||||
if (isa<DbgInfoIntrinsic>(I))
|
||||
// Ignore debug info and pseudo op intrinsics, they don't escape or store
|
||||
// to allocas.
|
||||
if (I.isDebugOrPseudoInst())
|
||||
continue;
|
||||
// This is an unknown instruction. Assume it escapes or writes to all
|
||||
// static alloca operands.
|
||||
|
|
|
|||
|
|
@ -2012,7 +2012,7 @@ bool TargetLowering::SimplifyDemandedBits(
|
|||
|
||||
const APInt *ShAmtC =
|
||||
TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
|
||||
if (!ShAmtC)
|
||||
if (!ShAmtC || ShAmtC->uge(BitWidth))
|
||||
break;
|
||||
uint64_t ShVal = ShAmtC->getZExtValue();
|
||||
|
||||
|
|
@ -5935,6 +5935,11 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
|
|||
|
||||
SDLoc DL(Op);
|
||||
|
||||
// Because getNegatedExpression can delete nodes we need a handle to keep
|
||||
// temporary nodes alive in case the recursion manages to create an identical
|
||||
// node.
|
||||
std::list<HandleSDNode> Handles;
|
||||
|
||||
switch (Opcode) {
|
||||
case ISD::ConstantFP: {
|
||||
// Don't invert constant FP values after legalization unless the target says
|
||||
|
|
@ -6003,11 +6008,18 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
|
|||
NegatibleCost CostX = NegatibleCost::Expensive;
|
||||
SDValue NegX =
|
||||
getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
|
||||
// Prevent this node from being deleted by the next call.
|
||||
if (NegX)
|
||||
Handles.emplace_back(NegX);
|
||||
|
||||
// fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
|
||||
NegatibleCost CostY = NegatibleCost::Expensive;
|
||||
SDValue NegY =
|
||||
getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
|
||||
|
||||
// We're done with the handles.
|
||||
Handles.clear();
|
||||
|
||||
// Negate the X if its cost is less or equal than Y.
|
||||
if (NegX && (CostX <= CostY)) {
|
||||
Cost = CostX;
|
||||
|
|
@ -6052,11 +6064,18 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
|
|||
NegatibleCost CostX = NegatibleCost::Expensive;
|
||||
SDValue NegX =
|
||||
getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
|
||||
// Prevent this node from being deleted by the next call.
|
||||
if (NegX)
|
||||
Handles.emplace_back(NegX);
|
||||
|
||||
// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
|
||||
NegatibleCost CostY = NegatibleCost::Expensive;
|
||||
SDValue NegY =
|
||||
getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
|
||||
|
||||
// We're done with the handles.
|
||||
Handles.clear();
|
||||
|
||||
// Negate the X if its cost is less or equal than Y.
|
||||
if (NegX && (CostX <= CostY)) {
|
||||
Cost = CostX;
|
||||
|
|
@ -6094,15 +6113,25 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
|
|||
if (!NegZ)
|
||||
break;
|
||||
|
||||
// Prevent this node from being deleted by the next two calls.
|
||||
Handles.emplace_back(NegZ);
|
||||
|
||||
// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
|
||||
NegatibleCost CostX = NegatibleCost::Expensive;
|
||||
SDValue NegX =
|
||||
getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
|
||||
// Prevent this node from being deleted by the next call.
|
||||
if (NegX)
|
||||
Handles.emplace_back(NegX);
|
||||
|
||||
// fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
|
||||
NegatibleCost CostY = NegatibleCost::Expensive;
|
||||
SDValue NegY =
|
||||
getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
|
||||
|
||||
// We're done with the handles.
|
||||
Handles.clear();
|
||||
|
||||
// Negate the X if its cost is less or equal than Y.
|
||||
if (NegX && (CostX <= CostY)) {
|
||||
Cost = std::min(CostX, CostZ);
|
||||
|
|
|
|||
|
|
@ -192,7 +192,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
|
|||
// Ignore intrinsics that do not become real instructions.
|
||||
// TODO: Narrow this to intrinsics that have store-like effects.
|
||||
const auto *CI = cast<CallInst>(I);
|
||||
if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd())
|
||||
if (!CI->isDebugOrPseudoInst() && !CI->isLifetimeStartOrEnd())
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -801,8 +801,8 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(
|
|||
MachineBasicBlock::iterator KillPos = KillMI;
|
||||
++KillPos;
|
||||
for (MachineInstr &OtherMI : make_range(End, KillPos)) {
|
||||
// Debug instructions cannot be counted against the limit.
|
||||
if (OtherMI.isDebugInstr())
|
||||
// Debug or pseudo instructions cannot be counted against the limit.
|
||||
if (OtherMI.isDebugOrPseudoInstr())
|
||||
continue;
|
||||
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
|
||||
return false;
|
||||
|
|
@ -974,8 +974,8 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI(
|
|||
unsigned NumVisited = 0;
|
||||
for (MachineInstr &OtherMI :
|
||||
make_range(mi, MachineBasicBlock::iterator(KillMI))) {
|
||||
// Debug instructions cannot be counted against the limit.
|
||||
if (OtherMI.isDebugInstr())
|
||||
// Debug or pseudo instructions cannot be counted against the limit.
|
||||
if (OtherMI.isDebugOrPseudoInstr())
|
||||
continue;
|
||||
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -393,7 +393,7 @@ void LLVMOrcDisposeJITTargetMachineBuilder(
|
|||
delete unwrap(JTMB);
|
||||
}
|
||||
|
||||
void lLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer) {
|
||||
void LLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer) {
|
||||
delete unwrap(ObjLayer);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -937,6 +937,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
|||
Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
|
||||
return true;
|
||||
}
|
||||
} else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
|
||||
rename(F);
|
||||
NewFn = Intrinsic::getDeclaration(F->getParent(),
|
||||
Intrinsic::ptr_annotation,
|
||||
F->arg_begin()->getType());
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
@ -947,6 +953,16 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
|||
}
|
||||
break;
|
||||
|
||||
case 'v': {
|
||||
if (Name == "var.annotation" && F->arg_size() == 4) {
|
||||
rename(F);
|
||||
NewFn = Intrinsic::getDeclaration(F->getParent(),
|
||||
Intrinsic::var_annotation);
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'x':
|
||||
if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
|
||||
return true;
|
||||
|
|
@ -3730,6 +3746,32 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
CI->eraseFromParent();
|
||||
return;
|
||||
|
||||
case Intrinsic::ptr_annotation:
|
||||
// Upgrade from versions that lacked the annotation attribute argument.
|
||||
assert(CI->getNumArgOperands() == 4 &&
|
||||
"Before LLVM 12.0 this intrinsic took four arguments");
|
||||
// Create a new call with an added null annotation attribute argument.
|
||||
NewCall = Builder.CreateCall(
|
||||
NewFn,
|
||||
{CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
|
||||
CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
|
||||
NewCall->takeName(CI);
|
||||
CI->replaceAllUsesWith(NewCall);
|
||||
CI->eraseFromParent();
|
||||
return;
|
||||
|
||||
case Intrinsic::var_annotation:
|
||||
// Upgrade from versions that lacked the annotation attribute argument.
|
||||
assert(CI->getNumArgOperands() == 4 &&
|
||||
"Before LLVM 12.0 this intrinsic took four arguments");
|
||||
// Create a new call with an added null annotation attribute argument.
|
||||
NewCall = Builder.CreateCall(
|
||||
NewFn,
|
||||
{CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
|
||||
CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
|
||||
CI->eraseFromParent();
|
||||
return;
|
||||
|
||||
case Intrinsic::x86_xop_vfrcz_ss:
|
||||
case Intrinsic::x86_xop_vfrcz_sd:
|
||||
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
|
||||
|
|
|
|||
|
|
@ -633,6 +633,16 @@ bool Instruction::isSafeToRemove() const {
|
|||
!this->isTerminator();
|
||||
}
|
||||
|
||||
bool Instruction::willReturn() const {
|
||||
if (const auto *CB = dyn_cast<CallBase>(this))
|
||||
// FIXME: Temporarily assume that all side-effect free intrinsics will
|
||||
// return. Remove this workaround once all intrinsics are appropriately
|
||||
// annotated.
|
||||
return CB->hasFnAttr(Attribute::WillReturn) ||
|
||||
(isa<IntrinsicInst>(CB) && CB->onlyReadsMemory());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Instruction::isLifetimeStartOrEnd() const {
|
||||
auto II = dyn_cast<IntrinsicInst>(this);
|
||||
if (!II)
|
||||
|
|
@ -641,6 +651,10 @@ bool Instruction::isLifetimeStartOrEnd() const {
|
|||
return ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end;
|
||||
}
|
||||
|
||||
bool Instruction::isDebugOrPseudoInst() const {
|
||||
return isa<DbgInfoIntrinsic>(this) || isa<PseudoProbeInst>(this);
|
||||
}
|
||||
|
||||
const Instruction *
|
||||
Instruction::getNextNonDebugInstruction(bool SkipPseudoOp) const {
|
||||
for (const Instruction *I = getNextNode(); I; I = I->getNextNode())
|
||||
|
|
|
|||
|
|
@ -61,10 +61,17 @@ Align GEPOperator::getMaxPreservedAlignment(const DataLayout &DL) const {
|
|||
bool GEPOperator::accumulateConstantOffset(
|
||||
const DataLayout &DL, APInt &Offset,
|
||||
function_ref<bool(Value &, APInt &)> ExternalAnalysis) const {
|
||||
assert(Offset.getBitWidth() ==
|
||||
DL.getIndexSizeInBits(getPointerAddressSpace()) &&
|
||||
"The offset bit width does not match DL specification.");
|
||||
assert(Offset.getBitWidth() ==
|
||||
DL.getIndexSizeInBits(getPointerAddressSpace()) &&
|
||||
"The offset bit width does not match DL specification.");
|
||||
SmallVector<const Value *> Index(value_op_begin() + 1, value_op_end());
|
||||
return GEPOperator::accumulateConstantOffset(getSourceElementType(), Index,
|
||||
DL, Offset, ExternalAnalysis);
|
||||
}
|
||||
|
||||
bool GEPOperator::accumulateConstantOffset(
|
||||
Type *SourceType, ArrayRef<const Value *> Index, const DataLayout &DL,
|
||||
APInt &Offset, function_ref<bool(Value &, APInt &)> ExternalAnalysis) {
|
||||
bool UsedExternalAnalysis = false;
|
||||
auto AccumulateOffset = [&](APInt Index, uint64_t Size) -> bool {
|
||||
Index = Index.sextOrTrunc(Offset.getBitWidth());
|
||||
|
|
@ -85,9 +92,10 @@ bool GEPOperator::accumulateConstantOffset(
|
|||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this);
|
||||
GTI != GTE; ++GTI) {
|
||||
auto begin = generic_gep_type_iterator<decltype(Index.begin())>::begin(
|
||||
SourceType, Index.begin());
|
||||
auto end = generic_gep_type_iterator<decltype(Index.end())>::end(Index.end());
|
||||
for (auto GTI = begin, GTE = end; GTI != GTE; ++GTI) {
|
||||
// Scalable vectors are multiplied by a runtime constant.
|
||||
bool ScalableType = false;
|
||||
if (isa<ScalableVectorType>(GTI.getIndexedType()))
|
||||
|
|
|
|||
|
|
@ -794,7 +794,6 @@ LineCoverageStats::LineCoverageStats(
|
|||
ExecutionCount = WrappedSegment->Count;
|
||||
if (!MinRegionCount)
|
||||
return;
|
||||
ExecutionCount = 0;
|
||||
for (const auto *LS : LineSegments)
|
||||
if (isStartOfRegion(LS))
|
||||
ExecutionCount = std::max(ExecutionCount, LS->Count);
|
||||
|
|
|
|||
|
|
@ -18,9 +18,14 @@
|
|||
#include "llvm/ProfileData/ProfileCommon.h"
|
||||
#include "llvm/ProfileData/SampleProf.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
cl::opt<bool> UseContextLessSummary(
|
||||
"profile-summary-contextless", cl::Hidden, cl::init(false), cl::ZeroOrMore,
|
||||
cl::desc("Merge context profiles before calculating thresholds."));
|
||||
|
||||
// A set of cutoff values. Each value, when divided by ProfileSummary::Scale
|
||||
// (which is 1000000) is a desired percentile of total counts.
|
||||
static const uint32_t DefaultCutoffsData[] = {
|
||||
|
|
@ -111,6 +116,35 @@ std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() {
|
|||
MaxFunctionCount, NumCounts, NumFunctions);
|
||||
}
|
||||
|
||||
std::unique_ptr<ProfileSummary>
|
||||
SampleProfileSummaryBuilder::computeSummaryForProfiles(
|
||||
const StringMap<sampleprof::FunctionSamples> &Profiles) {
|
||||
assert(NumFunctions == 0 &&
|
||||
"This can only be called on an empty summary builder");
|
||||
StringMap<sampleprof::FunctionSamples> ContextLessProfiles;
|
||||
const StringMap<sampleprof::FunctionSamples> *ProfilesToUse = &Profiles;
|
||||
// For CSSPGO, context-sensitive profile effectively split a function profile
|
||||
// into many copies each representing the CFG profile of a particular calling
|
||||
// context. That makes the count distribution looks more flat as we now have
|
||||
// more function profiles each with lower counts, which in turn leads to lower
|
||||
// hot thresholds. To compensate for that, by defauly we merge context
|
||||
// profiles before coumputing profile summary.
|
||||
if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
|
||||
!UseContextLessSummary.getNumOccurrences())) {
|
||||
for (const auto &I : Profiles) {
|
||||
ContextLessProfiles[I.second.getName()].merge(I.second);
|
||||
}
|
||||
ProfilesToUse = &ContextLessProfiles;
|
||||
}
|
||||
|
||||
for (const auto &I : *ProfilesToUse) {
|
||||
const sampleprof::FunctionSamples &Profile = I.second;
|
||||
addRecord(Profile);
|
||||
}
|
||||
|
||||
return getSummary();
|
||||
}
|
||||
|
||||
std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() {
|
||||
computeDetailedSummary();
|
||||
return std::make_unique<ProfileSummary>(
|
||||
|
|
|
|||
|
|
@ -1610,9 +1610,5 @@ SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
|
|||
// profile. Binary format has the profile summary in its header.
|
||||
void SampleProfileReader::computeSummary() {
|
||||
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
|
||||
for (const auto &I : Profiles) {
|
||||
const FunctionSamples &Profile = I.second;
|
||||
Builder.addRecord(Profile);
|
||||
}
|
||||
Summary = Builder.getSummary();
|
||||
Summary = Builder.computeSummaryForProfiles(Profiles);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -360,10 +360,7 @@ std::error_code SampleProfileWriterCompactBinary::write(
|
|||
/// it needs to be parsed by the SampleProfileReaderText class.
|
||||
std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
|
||||
auto &OS = *OutputStream;
|
||||
if (FunctionSamples::ProfileIsCS)
|
||||
OS << "[" << S.getNameWithContext() << "]:" << S.getTotalSamples();
|
||||
else
|
||||
OS << S.getName() << ":" << S.getTotalSamples();
|
||||
OS << S.getNameWithContext(true) << ":" << S.getTotalSamples();
|
||||
if (Indent == 0)
|
||||
OS << ":" << S.getHeadSamples();
|
||||
OS << "\n";
|
||||
|
|
@ -752,9 +749,5 @@ SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
|
|||
void SampleProfileWriter::computeSummary(
|
||||
const StringMap<FunctionSamples> &ProfileMap) {
|
||||
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
|
||||
for (const auto &I : ProfileMap) {
|
||||
const FunctionSamples &Profile = I.second;
|
||||
Builder.addRecord(Profile);
|
||||
}
|
||||
Summary = Builder.getSummary();
|
||||
Summary = Builder.computeSummaryForProfiles(ProfileMap);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -402,8 +402,22 @@ std::error_code is_local(int FD, bool &Result) {
|
|||
}
|
||||
|
||||
static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) {
|
||||
// First, check if the file is on a network (non-local) drive. If so, don't
|
||||
// set DeleteFile to true, since it prevents opening the file for writes.
|
||||
// Clear the FILE_DISPOSITION_INFO flag first, before checking if it's a
|
||||
// network file. On Windows 7 the function realPathFromHandle() below fails
|
||||
// if the FILE_DISPOSITION_INFO flag was already set to 'DeleteFile = true' by
|
||||
// a prior call.
|
||||
FILE_DISPOSITION_INFO Disposition;
|
||||
Disposition.DeleteFile = false;
|
||||
if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition,
|
||||
sizeof(Disposition)))
|
||||
return mapWindowsError(::GetLastError());
|
||||
if (!Delete)
|
||||
return std::error_code();
|
||||
|
||||
// Check if the file is on a network (non-local) drive. If so, don't
|
||||
// continue when DeleteFile is true, since it prevents opening the file for
|
||||
// writes. Note -- this will leak temporary files on disk, but only when the
|
||||
// target file is on a network drive.
|
||||
SmallVector<wchar_t, 128> FinalPath;
|
||||
if (std::error_code EC = realPathFromHandle(Handle, FinalPath))
|
||||
return EC;
|
||||
|
|
@ -415,9 +429,9 @@ static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) {
|
|||
if (!IsLocal)
|
||||
return std::error_code();
|
||||
|
||||
// The file is on a local drive, set the DeleteFile to true.
|
||||
FILE_DISPOSITION_INFO Disposition;
|
||||
Disposition.DeleteFile = Delete;
|
||||
// The file is on a local drive, we can safely set FILE_DISPOSITION_INFO's
|
||||
// flag.
|
||||
Disposition.DeleteFile = true;
|
||||
if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition,
|
||||
sizeof(Disposition)))
|
||||
return mapWindowsError(::GetLastError());
|
||||
|
|
|
|||
|
|
@ -1017,11 +1017,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|||
// Vector reductions
|
||||
for (MVT VT : { MVT::v4f16, MVT::v2f32,
|
||||
MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
|
||||
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
|
||||
if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
|
||||
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
|
||||
|
||||
if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16())
|
||||
setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
|
||||
}
|
||||
}
|
||||
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
|
||||
MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
|
||||
|
|
|
|||
|
|
@ -5896,7 +5896,13 @@ bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
|
|||
User->getMachineOpcode() != PPC::SELECT_I8)
|
||||
return false;
|
||||
|
||||
SDNode *Op1 = User->getOperand(1).getNode();
|
||||
SDNode *Op2 = User->getOperand(2).getNode();
|
||||
// If we have a degenerate select with two equal operands, swapping will
|
||||
// not do anything, and we may run into an infinite loop.
|
||||
if (Op1 == Op2)
|
||||
return false;
|
||||
|
||||
if (!Op2->isMachineOpcode())
|
||||
return false;
|
||||
|
||||
|
|
|
|||
|
|
@ -504,19 +504,19 @@ def VSOXEI16_V : VIndexedStore<MOPSTIndexedOrder, LSWidth16, "vsoxei16.v">;
|
|||
def VSOXEI32_V : VIndexedStore<MOPSTIndexedOrder, LSWidth32, "vsoxei32.v">;
|
||||
def VSOXEI64_V : VIndexedStore<MOPSTIndexedOrder, LSWidth64, "vsoxei64.v">;
|
||||
|
||||
defm VL1R : VWholeLoad<1, "vl1r">;
|
||||
defm VL2R : VWholeLoad<2, "vl2r">;
|
||||
defm VL4R : VWholeLoad<4, "vl4r">;
|
||||
defm VL8R : VWholeLoad<8, "vl8r">;
|
||||
defm VL1R : VWholeLoad<0, "vl1r">;
|
||||
defm VL2R : VWholeLoad<1, "vl2r">;
|
||||
defm VL4R : VWholeLoad<3, "vl4r">;
|
||||
defm VL8R : VWholeLoad<7, "vl8r">;
|
||||
def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>;
|
||||
def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VR:$vd, GPR:$rs1)>;
|
||||
def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VR:$vd, GPR:$rs1)>;
|
||||
def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VR:$vd, GPR:$rs1)>;
|
||||
|
||||
def VS1R_V : VWholeStore<1, "vs1r.v">;
|
||||
def VS2R_V : VWholeStore<2, "vs2r.v">;
|
||||
def VS4R_V : VWholeStore<4, "vs4r.v">;
|
||||
def VS8R_V : VWholeStore<8, "vs8r.v">;
|
||||
def VS1R_V : VWholeStore<0, "vs1r.v">;
|
||||
def VS2R_V : VWholeStore<1, "vs2r.v">;
|
||||
def VS4R_V : VWholeStore<3, "vs4r.v">;
|
||||
def VS8R_V : VWholeStore<7, "vs8r.v">;
|
||||
|
||||
// Vector Single-Width Integer Add and Subtract
|
||||
defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>;
|
||||
|
|
|
|||
|
|
@ -284,6 +284,14 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
|
|||
return false;
|
||||
}
|
||||
|
||||
// Make sure no potentially eflags clobbering phi moves can be inserted in
|
||||
// between.
|
||||
auto HasPhis = [](const BasicBlock *Succ) {
|
||||
return !llvm::empty(Succ->phis());
|
||||
};
|
||||
if (I->isTerminator() && llvm::any_of(successors(I), HasPhis))
|
||||
return false;
|
||||
|
||||
CC = TmpCC;
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3778,7 +3778,7 @@ let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
|
|||
VEX_4V, VEX_WIG;
|
||||
defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128,
|
||||
i128mem, SchedWriteShuffle.XMM, load, 0>,
|
||||
VEX_4V;
|
||||
VEX_4V, VEX_WIG;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
|
||||
|
|
@ -3794,7 +3794,7 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
|
|||
VEX_4V, VEX_L, VEX_WIG;
|
||||
defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256,
|
||||
i256mem, SchedWriteShuffle.YMM, load, 0>,
|
||||
VEX_4V, VEX_L;
|
||||
VEX_4V, VEX_L, VEX_WIG;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
|
|
@ -4756,7 +4756,7 @@ let isCommutable = 0 in {
|
|||
SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
|
||||
defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
|
||||
load, i128mem,
|
||||
SchedWritePHAdd.XMM, 0>, VEX_4V;
|
||||
SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
|
||||
defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb",
|
||||
int_x86_ssse3_psign_b_128,
|
||||
SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
|
||||
|
|
@ -4802,7 +4802,7 @@ let isCommutable = 0 in {
|
|||
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
|
||||
defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
|
||||
load, i256mem,
|
||||
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L;
|
||||
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
|
||||
defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
|
||||
SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
|
||||
defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w,
|
||||
|
|
@ -6503,7 +6503,7 @@ multiclass pcmpistrm_SS42AI<string asm> {
|
|||
|
||||
let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in {
|
||||
let Predicates = [HasAVX] in
|
||||
defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX;
|
||||
defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX, VEX_WIG;
|
||||
defm PCMPISTRM : pcmpistrm_SS42AI<"pcmpistrm"> ;
|
||||
}
|
||||
|
||||
|
|
@ -6521,7 +6521,7 @@ multiclass SS42AI_pcmpestrm<string asm> {
|
|||
|
||||
let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
|
||||
let Predicates = [HasAVX] in
|
||||
defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX;
|
||||
defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX, VEX_WIG;
|
||||
defm PCMPESTRM : SS42AI_pcmpestrm<"pcmpestrm">;
|
||||
}
|
||||
|
||||
|
|
@ -6539,7 +6539,7 @@ multiclass SS42AI_pcmpistri<string asm> {
|
|||
|
||||
let Defs = [ECX, EFLAGS], hasSideEffects = 0 in {
|
||||
let Predicates = [HasAVX] in
|
||||
defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX;
|
||||
defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX, VEX_WIG;
|
||||
defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">;
|
||||
}
|
||||
|
||||
|
|
@ -6557,7 +6557,7 @@ multiclass SS42AI_pcmpestri<string asm> {
|
|||
|
||||
let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
|
||||
let Predicates = [HasAVX] in
|
||||
defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX;
|
||||
defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX, VEX_WIG;
|
||||
defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -149,6 +149,13 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
|
|||
if (isNoModRef(MRI))
|
||||
continue;
|
||||
|
||||
// A pseudo probe call shouldn't change any function attribute since it
|
||||
// doesn't translate to a real instruction. It comes with a memory access
|
||||
// tag to prevent itself being removed by optimizations and not block
|
||||
// other instructions being optimized.
|
||||
if (isa<PseudoProbeInst>(I))
|
||||
continue;
|
||||
|
||||
if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) {
|
||||
// The call could access any memory. If that includes writes, note it.
|
||||
if (isModSet(MRI))
|
||||
|
|
@ -1445,8 +1452,7 @@ static bool functionWillReturn(const Function &F) {
|
|||
// If there are no loops, then the function is willreturn if all calls in
|
||||
// it are willreturn.
|
||||
return all_of(instructions(F), [](const Instruction &I) {
|
||||
const auto *CB = dyn_cast<CallBase>(&I);
|
||||
return !CB || CB->hasFnAttr(Attribute::WillReturn);
|
||||
return I.willReturn();
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -263,6 +263,17 @@ SampleContextTracker::getContextSamplesFor(const SampleContext &Context) {
|
|||
return Node->getFunctionSamples();
|
||||
}
|
||||
|
||||
SampleContextTracker::ContextSamplesTy &
|
||||
SampleContextTracker::getAllContextSamplesFor(const Function &Func) {
|
||||
StringRef CanonName = FunctionSamples::getCanonicalFnName(Func);
|
||||
return FuncToCtxtProfileSet[CanonName];
|
||||
}
|
||||
|
||||
SampleContextTracker::ContextSamplesTy &
|
||||
SampleContextTracker::getAllContextSamplesFor(StringRef Name) {
|
||||
return FuncToCtxtProfileSet[Name];
|
||||
}
|
||||
|
||||
FunctionSamples *SampleContextTracker::getBaseSamplesFor(const Function &Func,
|
||||
bool MergeContext) {
|
||||
StringRef CanonName = FunctionSamples::getCanonicalFnName(Func);
|
||||
|
|
@ -550,4 +561,25 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
|
|||
return *ToNode;
|
||||
}
|
||||
|
||||
// Replace call graph edges with dynamic call edges from the profile.
|
||||
void SampleContextTracker::addCallGraphEdges(CallGraph &CG,
|
||||
StringMap<Function *> &SymbolMap) {
|
||||
// Add profile call edges to the call graph.
|
||||
std::queue<ContextTrieNode *> NodeQueue;
|
||||
NodeQueue.push(&RootContext);
|
||||
while (!NodeQueue.empty()) {
|
||||
ContextTrieNode *Node = NodeQueue.front();
|
||||
NodeQueue.pop();
|
||||
Function *F = SymbolMap.lookup(Node->getFuncName());
|
||||
for (auto &I : Node->getAllChildContext()) {
|
||||
ContextTrieNode *ChildNode = &I.second;
|
||||
NodeQueue.push(ChildNode);
|
||||
if (F && !F->isDeclaration()) {
|
||||
Function *Callee = SymbolMap.lookup(ChildNode->getFuncName());
|
||||
if (Callee && !Callee->isDeclaration())
|
||||
CG[F]->addCalledFunction(nullptr, CG[Callee]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace llvm
|
||||
|
|
|
|||
|
|
@ -177,6 +177,16 @@ static cl::opt<bool> ProfileTopDownLoad(
|
|||
"order of call graph during sample profile loading. It only "
|
||||
"works for new pass manager. "));
|
||||
|
||||
static cl::opt<bool> UseProfileIndirectCallEdges(
|
||||
"use-profile-indirect-call-edges", cl::init(true), cl::Hidden,
|
||||
cl::desc("Considering indirect call samples from profile when top-down "
|
||||
"processing functions. Only CSSPGO is supported."));
|
||||
|
||||
static cl::opt<bool> UseProfileTopDownOrder(
|
||||
"use-profile-top-down-order", cl::init(false), cl::Hidden,
|
||||
cl::desc("Process functions in one SCC in a top-down order "
|
||||
"based on the input profile."));
|
||||
|
||||
static cl::opt<bool> ProfileSizeInline(
|
||||
"sample-profile-inline-size", cl::Hidden, cl::init(false),
|
||||
cl::desc("Inline cold call sites in profile loader if it's beneficial "
|
||||
|
|
@ -458,6 +468,8 @@ protected:
|
|||
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
|
||||
void buildEdges(Function &F);
|
||||
std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
|
||||
void addCallGraphEdges(CallGraph &CG, const FunctionSamples &Samples);
|
||||
void replaceCallGraphEdges(CallGraph &CG, StringMap<Function *> &SymbolMap);
|
||||
bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
|
||||
void computeDominanceAndLoopInfo(Function &F);
|
||||
void clearFunctionData();
|
||||
|
|
@ -2278,6 +2290,45 @@ INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
|
|||
INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
|
||||
"Sample Profile loader", false, false)
|
||||
|
||||
// Add inlined profile call edges to the call graph.
|
||||
void SampleProfileLoader::addCallGraphEdges(CallGraph &CG,
|
||||
const FunctionSamples &Samples) {
|
||||
Function *Caller = SymbolMap.lookup(Samples.getFuncName());
|
||||
if (!Caller || Caller->isDeclaration())
|
||||
return;
|
||||
|
||||
// Skip non-inlined call edges which are not important since top down inlining
|
||||
// for non-CS profile is to get more precise profile matching, not to enable
|
||||
// more inlining.
|
||||
|
||||
for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
|
||||
for (const auto &InlinedSamples : CallsiteSamples.second) {
|
||||
Function *Callee = SymbolMap.lookup(InlinedSamples.first);
|
||||
if (Callee && !Callee->isDeclaration())
|
||||
CG[Caller]->addCalledFunction(nullptr, CG[Callee]);
|
||||
addCallGraphEdges(CG, InlinedSamples.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Replace call graph edges with dynamic call edges from the profile.
|
||||
void SampleProfileLoader::replaceCallGraphEdges(
|
||||
CallGraph &CG, StringMap<Function *> &SymbolMap) {
|
||||
// Remove static call edges from the call graph except for the ones from the
|
||||
// root which make the call graph connected.
|
||||
for (const auto &Node : CG)
|
||||
if (Node.second.get() != CG.getExternalCallingNode())
|
||||
Node.second->removeAllCalledFunctions();
|
||||
|
||||
// Add profile call edges to the call graph.
|
||||
if (ProfileIsCS) {
|
||||
ContextTracker->addCallGraphEdges(CG, SymbolMap);
|
||||
} else {
|
||||
for (const auto &Samples : Reader->getProfiles())
|
||||
addCallGraphEdges(CG, Samples.second);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Function *>
|
||||
SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
|
||||
std::vector<Function *> FunctionOrderList;
|
||||
|
|
@ -2300,16 +2351,97 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
|
|||
}
|
||||
|
||||
assert(&CG->getModule() == &M);
|
||||
|
||||
// Add indirect call edges from profile to augment the static call graph.
|
||||
// Functions will be processed in a top-down order defined by the static call
|
||||
// graph. Adjusting the order by considering indirect call edges from the
|
||||
// profile (which don't exist in the static call graph) can enable the
|
||||
// inlining of indirect call targets by processing the caller before them.
|
||||
// TODO: enable this for non-CS profile and fix the counts returning logic to
|
||||
// have a full support for indirect calls.
|
||||
if (UseProfileIndirectCallEdges && ProfileIsCS) {
|
||||
for (auto &Entry : *CG) {
|
||||
const auto *F = Entry.first;
|
||||
if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile"))
|
||||
continue;
|
||||
auto &AllContexts = ContextTracker->getAllContextSamplesFor(F->getName());
|
||||
if (AllContexts.empty())
|
||||
continue;
|
||||
|
||||
for (const auto &BB : *F) {
|
||||
for (const auto &I : BB.getInstList()) {
|
||||
const auto *CB = dyn_cast<CallBase>(&I);
|
||||
if (!CB || !CB->isIndirectCall())
|
||||
continue;
|
||||
const DebugLoc &DLoc = I.getDebugLoc();
|
||||
if (!DLoc)
|
||||
continue;
|
||||
auto CallSite = FunctionSamples::getCallSiteIdentifier(DLoc);
|
||||
for (FunctionSamples *Samples : AllContexts) {
|
||||
if (auto CallTargets = Samples->findCallTargetMapAt(CallSite)) {
|
||||
for (const auto &Target : CallTargets.get()) {
|
||||
Function *Callee = SymbolMap.lookup(Target.first());
|
||||
if (Callee && !Callee->isDeclaration())
|
||||
Entry.second->addCalledFunction(nullptr, (*CG)[Callee]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute a top-down order the profile which is used to sort functions in
|
||||
// one SCC later. The static processing order computed for an SCC may not
|
||||
// reflect the call contexts in the context-sensitive profile, thus may cause
|
||||
// potential inlining to be overlooked. The function order in one SCC is being
|
||||
// adjusted to a top-down order based on the profile to favor more inlining.
|
||||
DenseMap<Function *, uint64_t> ProfileOrderMap;
|
||||
if (UseProfileTopDownOrder ||
|
||||
(ProfileIsCS && !UseProfileTopDownOrder.getNumOccurrences())) {
|
||||
// Create a static call graph. The call edges are not important since they
|
||||
// will be replaced by dynamic edges from the profile.
|
||||
CallGraph ProfileCG(M);
|
||||
replaceCallGraphEdges(ProfileCG, SymbolMap);
|
||||
scc_iterator<CallGraph *> CGI = scc_begin(&ProfileCG);
|
||||
uint64_t I = 0;
|
||||
while (!CGI.isAtEnd()) {
|
||||
for (CallGraphNode *Node : *CGI) {
|
||||
if (auto *F = Node->getFunction())
|
||||
ProfileOrderMap[F] = ++I;
|
||||
}
|
||||
++CGI;
|
||||
}
|
||||
}
|
||||
|
||||
scc_iterator<CallGraph *> CGI = scc_begin(CG);
|
||||
while (!CGI.isAtEnd()) {
|
||||
for (CallGraphNode *node : *CGI) {
|
||||
auto F = node->getFunction();
|
||||
uint64_t Start = FunctionOrderList.size();
|
||||
for (CallGraphNode *Node : *CGI) {
|
||||
auto *F = Node->getFunction();
|
||||
if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
|
||||
FunctionOrderList.push_back(F);
|
||||
}
|
||||
|
||||
// Sort nodes in SCC based on the profile top-down order.
|
||||
if (!ProfileOrderMap.empty()) {
|
||||
std::stable_sort(FunctionOrderList.begin() + Start,
|
||||
FunctionOrderList.end(),
|
||||
[&ProfileOrderMap](Function *Left, Function *Right) {
|
||||
return ProfileOrderMap[Left] < ProfileOrderMap[Right];
|
||||
});
|
||||
}
|
||||
|
||||
++CGI;
|
||||
}
|
||||
|
||||
LLVM_DEBUG({
|
||||
dbgs() << "Function processing order:\n";
|
||||
for (auto F : reverse(FunctionOrderList)) {
|
||||
dbgs() << F->getName() << "\n";
|
||||
}
|
||||
});
|
||||
|
||||
std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
|
||||
return FunctionOrderList;
|
||||
}
|
||||
|
|
@ -2461,6 +2593,7 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
|
|||
}
|
||||
|
||||
bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
|
||||
LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
|
||||
DILocation2SampleMap.clear();
|
||||
// By default the entry count is initialized to -1, which will be treated
|
||||
// conservatively by getEntryCount as the same as unknown (None). This is
|
||||
|
|
|
|||
|
|
@ -1270,6 +1270,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {
|
|||
ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
|
||||
ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
|
||||
if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
|
||||
LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType() &&
|
||||
(transformZExtICmp(LHS, CI, false) ||
|
||||
transformZExtICmp(RHS, CI, false))) {
|
||||
// zext (or icmp, icmp) -> or (zext icmp), (zext icmp)
|
||||
|
|
|
|||
|
|
@ -592,8 +592,14 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
|
|||
BasicBlock::iterator BBI = L->getIterator(), E = L->getParent()->end();
|
||||
|
||||
for (++BBI; BBI != E; ++BBI)
|
||||
if (BBI->mayWriteToMemory())
|
||||
if (BBI->mayWriteToMemory()) {
|
||||
// Calls that only access inaccessible memory do not block sinking the
|
||||
// load.
|
||||
if (auto *CB = dyn_cast<CallBase>(BBI))
|
||||
if (CB->onlyAccessesInaccessibleMemory())
|
||||
continue;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check for non-address taken alloca. If not address-taken already, it isn't
|
||||
// profitable to do this xform.
|
||||
|
|
|
|||
|
|
@ -345,10 +345,14 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
|
|||
return false;
|
||||
|
||||
// Get the constant out of the ICmp, if there is one.
|
||||
// Only try this when exactly 1 operand is a constant (if both operands
|
||||
// are constant, the icmp should eventually simplify). Otherwise, we may
|
||||
// invert the transform that reduces set bits and infinite-loop.
|
||||
Value *X;
|
||||
const APInt *CmpC;
|
||||
ICmpInst::Predicate Pred;
|
||||
if (!match(I->getOperand(0), m_c_ICmp(Pred, m_APInt(CmpC), m_Value())) ||
|
||||
CmpC->getBitWidth() != SelC->getBitWidth())
|
||||
if (!match(I->getOperand(0), m_ICmp(Pred, m_Value(X), m_APInt(CmpC))) ||
|
||||
isa<Constant>(X) || CmpC->getBitWidth() != SelC->getBitWidth())
|
||||
return ShrinkDemandedConstant(I, OpNo, DemandedMask);
|
||||
|
||||
// If the constant is already the same as the ICmp, leave it as-is.
|
||||
|
|
|
|||
|
|
@ -3878,9 +3878,10 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
|
|||
}
|
||||
}
|
||||
|
||||
// Skip processing debug intrinsics in InstCombine. Processing these call instructions
|
||||
// consumes non-trivial amount of time and provides no value for the optimization.
|
||||
if (!isa<DbgInfoIntrinsic>(Inst)) {
|
||||
// Skip processing debug and pseudo intrinsics in InstCombine. Processing
|
||||
// these call instructions consumes non-trivial amount of time and
|
||||
// provides no value for the optimization.
|
||||
if (!Inst->isDebugOrPseudoInst()) {
|
||||
InstrsForInstCombineWorklist.push_back(Inst);
|
||||
SeenAliasScopes.analyse(Inst);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -325,7 +325,7 @@ void AggressiveDeadCodeElimination::initialize() {
|
|||
|
||||
bool AggressiveDeadCodeElimination::isAlwaysLive(Instruction &I) {
|
||||
// TODO -- use llvm::isInstructionTriviallyDead
|
||||
if (I.isEHPad() || I.mayHaveSideEffects()) {
|
||||
if (I.isEHPad() || I.mayHaveSideEffects() || !I.willReturn()) {
|
||||
// Skip any value profile instrumentation calls if they are
|
||||
// instrumenting constants.
|
||||
if (isInstrumentsConstant(I))
|
||||
|
|
|
|||
|
|
@ -2076,6 +2076,15 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
|
|||
ValueMapping[PN] = NewPN;
|
||||
}
|
||||
|
||||
// Clone noalias scope declarations in the threaded block. When threading a
|
||||
// loop exit, we would otherwise end up with two idential scope declarations
|
||||
// visible at the same time.
|
||||
SmallVector<MDNode *> NoAliasScopes;
|
||||
DenseMap<MDNode *, MDNode *> ClonedScopes;
|
||||
LLVMContext &Context = PredBB->getContext();
|
||||
identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
|
||||
cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
|
||||
|
||||
// Clone the non-phi instructions of the source basic block into NewBB,
|
||||
// keeping track of the mapping and using it to remap operands in the cloned
|
||||
// instructions.
|
||||
|
|
@ -2084,6 +2093,7 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
|
|||
New->setName(BI->getName());
|
||||
NewBB->getInstList().push_back(New);
|
||||
ValueMapping[&*BI] = New;
|
||||
adaptNoAliasScopes(New, ClonedScopes, Context);
|
||||
|
||||
// Remap operands to patch up intra-block references.
|
||||
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
|
||||
|
|
|
|||
|
|
@ -2524,7 +2524,7 @@ private:
|
|||
NewAI.getAlign(), LI.isVolatile(),
|
||||
LI.getName());
|
||||
if (AATags)
|
||||
NewLI->setAAMetadata(AATags);
|
||||
NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
|
||||
if (LI.isVolatile())
|
||||
NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
|
||||
if (NewLI->isAtomic())
|
||||
|
|
@ -2563,7 +2563,7 @@ private:
|
|||
IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
|
||||
getSliceAlign(), LI.isVolatile(), LI.getName());
|
||||
if (AATags)
|
||||
NewLI->setAAMetadata(AATags);
|
||||
NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
|
||||
if (LI.isVolatile())
|
||||
NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
|
||||
|
||||
|
|
@ -2626,7 +2626,7 @@ private:
|
|||
}
|
||||
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
|
||||
if (AATags)
|
||||
Store->setAAMetadata(AATags);
|
||||
Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
|
||||
Pass.DeadInsts.push_back(&SI);
|
||||
|
||||
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
|
||||
|
|
@ -2650,7 +2650,7 @@ private:
|
|||
Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
|
||||
LLVMContext::MD_access_group});
|
||||
if (AATags)
|
||||
Store->setAAMetadata(AATags);
|
||||
Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
|
||||
Pass.DeadInsts.push_back(&SI);
|
||||
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
|
||||
return true;
|
||||
|
|
@ -2720,7 +2720,7 @@ private:
|
|||
NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
|
||||
LLVMContext::MD_access_group});
|
||||
if (AATags)
|
||||
NewSI->setAAMetadata(AATags);
|
||||
NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
|
||||
if (SI.isVolatile())
|
||||
NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
|
||||
if (NewSI->isAtomic())
|
||||
|
|
@ -2816,7 +2816,7 @@ private:
|
|||
getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
|
||||
MaybeAlign(getSliceAlign()), II.isVolatile());
|
||||
if (AATags)
|
||||
New->setAAMetadata(AATags);
|
||||
New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
|
||||
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
|
||||
return false;
|
||||
}
|
||||
|
|
@ -2885,7 +2885,7 @@ private:
|
|||
StoreInst *New =
|
||||
IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), II.isVolatile());
|
||||
if (AATags)
|
||||
New->setAAMetadata(AATags);
|
||||
New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
|
||||
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
|
||||
return !II.isVolatile();
|
||||
}
|
||||
|
|
@ -3006,7 +3006,7 @@ private:
|
|||
CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
|
||||
Size, II.isVolatile());
|
||||
if (AATags)
|
||||
New->setAAMetadata(AATags);
|
||||
New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
|
||||
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
|
||||
return false;
|
||||
}
|
||||
|
|
@ -3060,7 +3060,7 @@ private:
|
|||
LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
|
||||
II.isVolatile(), "copyload");
|
||||
if (AATags)
|
||||
Load->setAAMetadata(AATags);
|
||||
Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
|
||||
Src = Load;
|
||||
}
|
||||
|
||||
|
|
@ -3080,7 +3080,7 @@ private:
|
|||
StoreInst *Store = cast<StoreInst>(
|
||||
IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
|
||||
if (AATags)
|
||||
Store->setAAMetadata(AATags);
|
||||
Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
|
||||
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
|
||||
return !II.isVolatile();
|
||||
}
|
||||
|
|
@ -3381,8 +3381,13 @@ private:
|
|||
IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
|
||||
LoadInst *Load =
|
||||
IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");
|
||||
if (AATags)
|
||||
Load->setAAMetadata(AATags);
|
||||
|
||||
APInt Offset(
|
||||
DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
|
||||
if (AATags &&
|
||||
GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
|
||||
Load->setAAMetadata(AATags.shift(Offset.getZExtValue()));
|
||||
|
||||
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
|
||||
LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
|
||||
}
|
||||
|
|
@ -3428,8 +3433,13 @@ private:
|
|||
IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
|
||||
StoreInst *Store =
|
||||
IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
|
||||
if (AATags)
|
||||
Store->setAAMetadata(AATags);
|
||||
|
||||
APInt Offset(
|
||||
DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
|
||||
if (AATags &&
|
||||
GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
|
||||
Store->setAAMetadata(AATags.shift(Offset.getZExtValue()));
|
||||
|
||||
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -989,3 +989,11 @@ void llvm::identifyNoAliasScopesToClone(
|
|||
if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
|
||||
NoAliasDeclScopes.push_back(Decl->getScopeList());
|
||||
}
|
||||
|
||||
void llvm::identifyNoAliasScopesToClone(
|
||||
BasicBlock::iterator Start, BasicBlock::iterator End,
|
||||
SmallVectorImpl<MDNode *> &NoAliasDeclScopes) {
|
||||
for (Instruction &I : make_range(Start, End))
|
||||
if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
|
||||
NoAliasDeclScopes.push_back(Decl->getScopeList());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -420,13 +420,8 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
|
|||
return true;
|
||||
}
|
||||
|
||||
if (auto *CB = dyn_cast<CallBase>(I)) {
|
||||
// Treat calls that may not return as alive.
|
||||
// TODO: Remove the intrinsic escape hatch once all intrinsics set
|
||||
// willreturn properly.
|
||||
if (!CB->willReturn() && !isa<IntrinsicInst>(I))
|
||||
return false;
|
||||
}
|
||||
if (!I->willReturn())
|
||||
return false;
|
||||
|
||||
if (!I->mayHaveSideEffects())
|
||||
return true;
|
||||
|
|
@ -923,6 +918,7 @@ static void gatherIncomingValuesToPhi(PHINode *PN,
|
|||
/// \param IncomingValues A map from block to value.
|
||||
static void replaceUndefValuesInPhi(PHINode *PN,
|
||||
const IncomingValueMap &IncomingValues) {
|
||||
SmallVector<unsigned> TrueUndefOps;
|
||||
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
|
||||
Value *V = PN->getIncomingValue(i);
|
||||
|
||||
|
|
@ -930,10 +926,31 @@ static void replaceUndefValuesInPhi(PHINode *PN,
|
|||
|
||||
BasicBlock *BB = PN->getIncomingBlock(i);
|
||||
IncomingValueMap::const_iterator It = IncomingValues.find(BB);
|
||||
if (It == IncomingValues.end()) continue;
|
||||
|
||||
// Keep track of undef/poison incoming values. Those must match, so we fix
|
||||
// them up below if needed.
|
||||
// Note: this is conservatively correct, but we could try harder and group
|
||||
// the undef values per incoming basic block.
|
||||
if (It == IncomingValues.end()) {
|
||||
TrueUndefOps.push_back(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
// There is a defined value for this incoming block, so map this undef
|
||||
// incoming value to the defined value.
|
||||
PN->setIncomingValue(i, It->second);
|
||||
}
|
||||
|
||||
// If there are both undef and poison values incoming, then convert those
|
||||
// values to undef. It is invalid to have different values for the same
|
||||
// incoming block.
|
||||
unsigned PoisonCount = count_if(TrueUndefOps, [&](unsigned i) {
|
||||
return isa<PoisonValue>(PN->getIncomingValue(i));
|
||||
});
|
||||
if (PoisonCount != 0 && PoisonCount != TrueUndefOps.size()) {
|
||||
for (unsigned i : TrueUndefOps)
|
||||
PN->setIncomingValue(i, UndefValue::get(PN->getType()));
|
||||
}
|
||||
}
|
||||
|
||||
/// Replace a value flowing from a block to a phi with
|
||||
|
|
|
|||
|
|
@ -1628,6 +1628,11 @@ static bool canSinkInstructions(
|
|||
I->getType()->isTokenTy())
|
||||
return false;
|
||||
|
||||
// Do not try to sink an instruction in an infinite loop - it can cause
|
||||
// this algorithm to infinite loop.
|
||||
if (I->getParent()->getSingleSuccessor() == I->getParent())
|
||||
return false;
|
||||
|
||||
// Conservatively return false if I is an inline-asm instruction. Sinking
|
||||
// and merging inline-asm instructions can potentially create arguments
|
||||
// that cannot satisfy the inline-asm constraints.
|
||||
|
|
@ -1714,13 +1719,13 @@ static bool canSinkInstructions(
|
|||
return true;
|
||||
}
|
||||
|
||||
// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last
|
||||
// Assuming canSinkInstructions(Blocks) has returned true, sink the last
|
||||
// instruction of every block in Blocks to their common successor, commoning
|
||||
// into one instruction.
|
||||
static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
|
||||
auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
|
||||
|
||||
// canSinkLastInstruction returning true guarantees that every block has at
|
||||
// canSinkInstructions returning true guarantees that every block has at
|
||||
// least one non-terminator instruction.
|
||||
SmallVector<Instruction*,4> Insts;
|
||||
for (auto *BB : Blocks) {
|
||||
|
|
@ -1733,9 +1738,9 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
|
|||
}
|
||||
|
||||
// The only checking we need to do now is that all users of all instructions
|
||||
// are the same PHI node. canSinkLastInstruction should have checked this but
|
||||
// it is slightly over-aggressive - it gets confused by commutative instructions
|
||||
// so double-check it here.
|
||||
// are the same PHI node. canSinkInstructions should have checked this but
|
||||
// it is slightly over-aggressive - it gets confused by commutative
|
||||
// instructions so double-check it here.
|
||||
Instruction *I0 = Insts.front();
|
||||
if (!I0->user_empty()) {
|
||||
auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
|
||||
|
|
@ -1746,11 +1751,11 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// We don't need to do any more checking here; canSinkLastInstruction should
|
||||
// We don't need to do any more checking here; canSinkInstructions should
|
||||
// have done it all for us.
|
||||
SmallVector<Value*, 4> NewOperands;
|
||||
for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
|
||||
// This check is different to that in canSinkLastInstruction. There, we
|
||||
// This check is different to that in canSinkInstructions. There, we
|
||||
// cared about the global view once simplifycfg (and instcombine) have
|
||||
// completed - it takes into account PHIs that become trivially
|
||||
// simplifiable. However here we need a more local view; if an operand
|
||||
|
|
|
|||
|
|
@ -142,6 +142,10 @@ public:
|
|||
return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS});
|
||||
}
|
||||
|
||||
VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) {
|
||||
return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal});
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// RAII helpers.
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
|
|
|||
|
|
@ -372,19 +372,11 @@ static Type *getMemInstValueType(Value *I) {
|
|||
|
||||
/// A helper function that returns true if the given type is irregular. The
|
||||
/// type is irregular if its allocated size doesn't equal the store size of an
|
||||
/// element of the corresponding vector type at the given vectorization factor.
|
||||
static bool hasIrregularType(Type *Ty, const DataLayout &DL, ElementCount VF) {
|
||||
// Determine if an array of VF elements of type Ty is "bitcast compatible"
|
||||
// with a <VF x Ty> vector.
|
||||
if (VF.isVector()) {
|
||||
auto *VectorTy = VectorType::get(Ty, VF);
|
||||
return TypeSize::get(VF.getKnownMinValue() *
|
||||
DL.getTypeAllocSize(Ty).getFixedValue(),
|
||||
VF.isScalable()) != DL.getTypeStoreSize(VectorTy);
|
||||
}
|
||||
|
||||
// If the vectorization factor is one, we just check if an array of type Ty
|
||||
// requires padding between elements.
|
||||
/// element of the corresponding vector type.
|
||||
static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
|
||||
// Determine if an array of N elements of type Ty is "bitcast compatible"
|
||||
// with a <N x Ty> vector.
|
||||
// This is only true if there is no padding between the array elements.
|
||||
return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
|
||||
}
|
||||
|
||||
|
|
@ -5212,7 +5204,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
|
|||
// requires padding and will be scalarized.
|
||||
auto &DL = I->getModule()->getDataLayout();
|
||||
auto *ScalarTy = getMemInstValueType(I);
|
||||
if (hasIrregularType(ScalarTy, DL, VF))
|
||||
if (hasIrregularType(ScalarTy, DL))
|
||||
return false;
|
||||
|
||||
// Check if masking is required.
|
||||
|
|
@ -5259,7 +5251,7 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
|
|||
// requires padding and will be scalarized.
|
||||
auto &DL = I->getModule()->getDataLayout();
|
||||
auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType();
|
||||
if (hasIrregularType(ScalarTy, DL, VF))
|
||||
if (hasIrregularType(ScalarTy, DL))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
|
@ -8195,8 +8187,15 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst,
|
|||
if (BI->getSuccessor(0) != Dst)
|
||||
EdgeMask = Builder.createNot(EdgeMask);
|
||||
|
||||
if (SrcMask) // Otherwise block in-mask is all-one, no need to AND.
|
||||
EdgeMask = Builder.createAnd(EdgeMask, SrcMask);
|
||||
if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
|
||||
// The condition is 'SrcMask && EdgeMask', which is equivalent to
|
||||
// 'select i1 SrcMask, i1 EdgeMask, i1 false'.
|
||||
// The select version does not introduce new UB if SrcMask is false and
|
||||
// EdgeMask is poison. Using 'and' here introduces undefined behavior.
|
||||
VPValue *False = Plan->getOrAddVPValue(
|
||||
ConstantInt::getFalse(BI->getCondition()->getType()));
|
||||
EdgeMask = Builder.createSelect(SrcMask, EdgeMask, False);
|
||||
}
|
||||
|
||||
return EdgeMaskCache[Edge] = EdgeMask;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -526,8 +526,8 @@ getDWOFilenames(StringRef ExecFilename) {
|
|||
std::string DWOCompDir =
|
||||
dwarf::toString(Die.find(dwarf::DW_AT_comp_dir), "");
|
||||
if (!DWOCompDir.empty()) {
|
||||
SmallString<16> DWOPath;
|
||||
sys::path::append(DWOPath, DWOCompDir, DWOName);
|
||||
SmallString<16> DWOPath(std::move(DWOName));
|
||||
sys::fs::make_absolute(DWOCompDir, DWOPath);
|
||||
DWOPaths.emplace_back(DWOPath.data(), DWOPath.size());
|
||||
} else {
|
||||
DWOPaths.push_back(std::move(DWOName));
|
||||
|
|
|
|||
|
|
@ -947,8 +947,8 @@ protected:
|
|||
std::unordered_map<std::string, std::vector<StringRef>> LineCache;
|
||||
// Keep track of missing sources.
|
||||
StringSet<> MissingSources;
|
||||
// Only emit 'no debug info' warning once.
|
||||
bool WarnedNoDebugInfo;
|
||||
// Only emit 'invalid debug info' warning once.
|
||||
bool WarnedInvalidDebugInfo = false;
|
||||
|
||||
private:
|
||||
bool cacheSource(const DILineInfo& LineInfoFile);
|
||||
|
|
@ -962,8 +962,7 @@ private:
|
|||
|
||||
public:
|
||||
SourcePrinter() = default;
|
||||
SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch)
|
||||
: Obj(Obj), WarnedNoDebugInfo(false) {
|
||||
SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch) : Obj(Obj) {
|
||||
symbolize::LLVMSymbolizer::Options SymbolizerOpts;
|
||||
SymbolizerOpts.PrintFunctions =
|
||||
DILineInfoSpecifier::FunctionNameKind::LinkageName;
|
||||
|
|
@ -1018,22 +1017,17 @@ void SourcePrinter::printSourceLine(formatted_raw_ostream &OS,
|
|||
return;
|
||||
|
||||
DILineInfo LineInfo = DILineInfo();
|
||||
auto ExpectedLineInfo = Symbolizer->symbolizeCode(*Obj, Address);
|
||||
Expected<DILineInfo> ExpectedLineInfo =
|
||||
Symbolizer->symbolizeCode(*Obj, Address);
|
||||
std::string ErrorMessage;
|
||||
if (!ExpectedLineInfo)
|
||||
ErrorMessage = toString(ExpectedLineInfo.takeError());
|
||||
else
|
||||
if (ExpectedLineInfo) {
|
||||
LineInfo = *ExpectedLineInfo;
|
||||
|
||||
if (LineInfo.FileName == DILineInfo::BadString) {
|
||||
if (!WarnedNoDebugInfo) {
|
||||
std::string Warning =
|
||||
"failed to parse debug information for " + ObjectFilename.str();
|
||||
if (!ErrorMessage.empty())
|
||||
Warning += ": " + ErrorMessage;
|
||||
reportWarning(Warning, ObjectFilename);
|
||||
WarnedNoDebugInfo = true;
|
||||
}
|
||||
} else if (!WarnedInvalidDebugInfo) {
|
||||
WarnedInvalidDebugInfo = true;
|
||||
// TODO Untested.
|
||||
reportWarning("failed to parse debug information: " +
|
||||
toString(ExpectedLineInfo.takeError()),
|
||||
ObjectFilename);
|
||||
}
|
||||
|
||||
if (!Prefix.empty() && sys::path::is_absolute_gnu(LineInfo.FileName)) {
|
||||
|
|
|
|||
|
|
@ -181,7 +181,12 @@ static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA,
|
|||
// the topmost function, which suits our needs better.
|
||||
auto ResOrErr = Symbolizer.symbolizeInlinedCode(
|
||||
ModuleName, {Offset, object::SectionedAddress::UndefSection});
|
||||
Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get().getFrame(0));
|
||||
if (!ResOrErr || ResOrErr->getNumberOfFrames() == 0) {
|
||||
error(ResOrErr);
|
||||
Printer << DILineInfo();
|
||||
} else {
|
||||
Printer << ResOrErr->getFrame(0);
|
||||
}
|
||||
} else {
|
||||
auto ResOrErr = Symbolizer.symbolizeCode(
|
||||
ModuleName, {Offset, object::SectionedAddress::UndefSection});
|
||||
|
|
|
|||
|
|
@ -920,6 +920,12 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
|
|||
if (TCR_PTR(__kmp_threads[0]) == NULL) {
|
||||
--capacity;
|
||||
}
|
||||
// If it is not for initializing the hidden helper team, we need to take
|
||||
// __kmp_hidden_helper_threads_num out of the capacity because it is included
|
||||
// in __kmp_threads_capacity.
|
||||
if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
|
||||
capacity -= __kmp_hidden_helper_threads_num;
|
||||
}
|
||||
if (__kmp_nth + new_nthreads -
|
||||
(root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
|
||||
capacity) {
|
||||
|
|
@ -3632,6 +3638,13 @@ int __kmp_register_root(int initial_thread) {
|
|||
--capacity;
|
||||
}
|
||||
|
||||
// If it is not for initializing the hidden helper team, we need to take
|
||||
// __kmp_hidden_helper_threads_num out of the capacity because it is included
|
||||
// in __kmp_threads_capacity.
|
||||
if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
|
||||
capacity -= __kmp_hidden_helper_threads_num;
|
||||
}
|
||||
|
||||
/* see if there are too many threads */
|
||||
if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
|
||||
if (__kmp_tp_cached) {
|
||||
|
|
@ -3664,7 +3677,7 @@ int __kmp_register_root(int initial_thread) {
|
|||
/* find an available thread slot */
|
||||
// Don't reassign the zero slot since we need that to only be used by
|
||||
// initial thread. Slots for hidden helper threads should also be skipped.
|
||||
if (initial_thread && __kmp_threads[0] == NULL) {
|
||||
if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
|
||||
gtid = 0;
|
||||
} else {
|
||||
for (gtid = __kmp_hidden_helper_threads_num + 1;
|
||||
|
|
|
|||
|
|
@ -504,9 +504,10 @@ int __kmp_initial_threads_capacity(int req_nproc) {
|
|||
nth = (4 * __kmp_xproc);
|
||||
|
||||
// If hidden helper task is enabled, we initialize the thread capacity with
|
||||
// extra
|
||||
// __kmp_hidden_helper_threads_num.
|
||||
nth += __kmp_hidden_helper_threads_num;
|
||||
// extra __kmp_hidden_helper_threads_num.
|
||||
if (__kmp_enable_hidden_helper) {
|
||||
nth += __kmp_hidden_helper_threads_num;
|
||||
}
|
||||
|
||||
if (nth > __kmp_max_nth)
|
||||
nth = __kmp_max_nth;
|
||||
|
|
|
|||
|
|
@ -326,7 +326,8 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
|
|||
kmp_info_t *thread = __kmp_threads[gtid];
|
||||
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
|
||||
|
||||
if (taskdata->td_flags.hidden_helper) {
|
||||
// We don't need to map to shadow gtid if it is already hidden helper thread
|
||||
if (taskdata->td_flags.hidden_helper && !KMP_HIDDEN_HELPER_THREAD(gtid)) {
|
||||
gtid = KMP_GTID_TO_SHADOW_GTID(gtid);
|
||||
thread = __kmp_threads[gtid];
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue