mirror of
https://github.com/opnsense/src.git
synced 2026-06-10 09:11:07 -04:00
Vendor import of llvm release_60 branch r325932:
https://llvm.org/svn/llvm-project/llvm/branches/release_60@325932
This commit is contained in:
parent
3c315f3a8e
commit
0f8e52dfc6
35 changed files with 869 additions and 229 deletions
|
|
@ -5,12 +5,6 @@ LLVM 6.0.0 Release Notes
|
|||
.. contents::
|
||||
:local:
|
||||
|
||||
.. warning::
|
||||
These are in-progress notes for the upcoming LLVM 6 release.
|
||||
Release notes for previous releases can be found on
|
||||
`the Download Page <http://releases.llvm.org/download.html>`_.
|
||||
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
|
|
@ -26,11 +20,6 @@ have questions or comments, the `LLVM Developer's Mailing List
|
|||
<http://lists.llvm.org/mailman/listinfo/llvm-dev>`_ is a good place to send
|
||||
them.
|
||||
|
||||
Note that if you are reading this file from a Subversion checkout or the main
|
||||
LLVM web page, this document applies to the *next* release, not the current
|
||||
one. To see the release notes for a specific release, please see the `releases
|
||||
page <http://llvm.org/releases/>`_.
|
||||
|
||||
Non-comprehensive list of changes in this release
|
||||
=================================================
|
||||
.. NOTE
|
||||
|
|
@ -56,6 +45,9 @@ Non-comprehensive list of changes in this release
|
|||
|
||||
* Significantly improved quality of CodeView debug info for Windows.
|
||||
|
||||
* Preliminary support for Sanitizers and sibling features on X86(_64) NetBSD
|
||||
(ASan, UBsan, TSan, MSan, SafeStack, libFuzzer).
|
||||
|
||||
* Note..
|
||||
|
||||
.. NOTE
|
||||
|
|
@ -71,6 +63,15 @@ Non-comprehensive list of changes in this release
|
|||
Changes to the LLVM IR
|
||||
----------------------
|
||||
|
||||
* The fast-math-flags (FMF) have been updated. Previously, the 'fast' flag
|
||||
indicated that floating-point reassociation was allowed and all other flags
|
||||
were set too. The 'fast' flag still exists, but there is a new flag called
|
||||
'reassoc' to indicate specifically that reassociation is allowed. A new bit
|
||||
called 'afn' was also added to selectively allow approximations for common
|
||||
mathlib functions like square-root. The new flags provide more flexibility
|
||||
to enable/disable specific floating-point optimizations. Making the
|
||||
optimizer respond appropriately to these flags is an ongoing effort.
|
||||
|
||||
Changes to the AArch64 Target
|
||||
-----------------------------
|
||||
|
||||
|
|
@ -112,8 +113,44 @@ Changes to the Hexagon Target
|
|||
Changes to the MIPS Target
|
||||
--------------------------
|
||||
|
||||
During this release ...
|
||||
Fixed numerous bugs:
|
||||
|
||||
* fpowi on MIPS64 giving incorrect results when used with a negative integer.
|
||||
* Usage of the asm 'c' constraint with the wrong datatype causing an
|
||||
assert/crash.
|
||||
* Fixed a conversion bug when using the DSP ASE.
|
||||
* Fixed an inconsistency where objects were not marked as using the microMIPS as
|
||||
when the micromips function attribute or the ".set micromips" directive was
|
||||
used.
|
||||
* Reordered the MIPSR6 specific hazard scheduler pass to after the delay slot
|
||||
filler, fixing a class of rare edge case bugs where the delay slot filler
|
||||
would violate ISA restrictions.
|
||||
* Fixed a crash when using a type of unknown size with gp relative addressing.
|
||||
* Corrected the j macro for microMIPS.
|
||||
* Corrected the encoding of movep for microMIPS32r6.
|
||||
* Fixed an issue with the usage of insert instructions having an invalid set of
|
||||
operands.
|
||||
* Fixed an issue where TLS symbols where not marked as such.
|
||||
* Enabled the usage of register scavanging with MSA, due to its' shorter offsets
|
||||
for loads and stores.
|
||||
* Corrected the ELF headers when using the DSP ASE.
|
||||
|
||||
New features:
|
||||
|
||||
* The long branch pass now generates some R6 specific instructions when
|
||||
targeting MIPSR6.
|
||||
* The delay slot filler now performs more branch conversions if delay slots
|
||||
cannot be filled.
|
||||
* The MIPS MT ASE is now fully supported.
|
||||
* Added support for the ``lapc`` pseudo instruction.
|
||||
* Improved the selection of multiple instructions (``dext``, ``nmadd``,
|
||||
``nmsub``).
|
||||
* Further improved microMIPS codesize reduction.
|
||||
|
||||
Deprecation notices:
|
||||
|
||||
* microMIPS64R6 support was been deprecated since 5.0, and has now been
|
||||
completely removed.
|
||||
|
||||
Changes to the PowerPC Target
|
||||
-----------------------------
|
||||
|
|
@ -132,11 +169,43 @@ During this release the SystemZ target has:
|
|||
Changes to the X86 Target
|
||||
-------------------------
|
||||
|
||||
During this release ...
|
||||
During this release the X86 target has:
|
||||
|
||||
* Got support for enabling SjLj exception handling on platforms where it
|
||||
* Added support for enabling SjLj exception handling on platforms where it
|
||||
isn't the default.
|
||||
|
||||
* Added intrinsics for Intel Extensions: VAES, GFNI, VPCLMULQDQ, AVX512VBMI2, AVX512BITALG, AVX512VNNI.
|
||||
|
||||
* Added support for Intel Icelake CPU.
|
||||
|
||||
* Fixed some X87 codegen bugs.
|
||||
|
||||
* Added instruction scheduling information for Intel Sandy Bridge, Ivy Bridge, Haswell, Broadwell, and Skylake CPUs.
|
||||
|
||||
* Improved scheduler model for AMD Jaguar CPUs.
|
||||
|
||||
* Improved llvm-mc's disassembler for some EVEX encoded instructions.
|
||||
|
||||
* Add support for i8 and i16 vector signed/unsigned min/max horizontal reductions.
|
||||
|
||||
* Improved codegen for memory comparisons
|
||||
|
||||
* Improved codegen for i32 vector multiplies
|
||||
|
||||
* Improved codegen for scalar integer absolute values
|
||||
|
||||
* Improved codegen for vector integer rotations (XOP and AVX512)
|
||||
|
||||
* Improved codegen of data being transferred between GPRs and K-registers.
|
||||
|
||||
* Improved codegen for vector truncations.
|
||||
|
||||
* Improved folding of address computations into gather/scatter instructions.
|
||||
|
||||
* Gained initial support recognizing variable shuffles from vector element extracts and inserts.
|
||||
|
||||
* Improved documentation for SSE/AVX intrinsics in *intrin.h header files.
|
||||
|
||||
Changes to the AMDGPU Target
|
||||
-----------------------------
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,6 @@
|
|||
Overview
|
||||
========
|
||||
|
||||
.. warning::
|
||||
|
||||
If you are using a released version of LLVM, see `the download page
|
||||
<http://llvm.org/releases/>`_ to find your documentation.
|
||||
|
||||
The LLVM compiler infrastructure supports a wide range of projects, from
|
||||
industrial strength compilers to specialized JIT applications to small
|
||||
research projects.
|
||||
|
|
|
|||
|
|
@ -395,6 +395,20 @@ enum OverflowingBinaryOperatorOptionalFlags {
|
|||
OBO_NO_SIGNED_WRAP = 1
|
||||
};
|
||||
|
||||
/// FastMath Flags
|
||||
/// This is a fixed layout derived from the bitcode emitted by LLVM 5.0
|
||||
/// intended to decouple the in-memory representation from the serialization.
|
||||
enum FastMathMap {
|
||||
UnsafeAlgebra = (1 << 0), // Legacy
|
||||
NoNaNs = (1 << 1),
|
||||
NoInfs = (1 << 2),
|
||||
NoSignedZeros = (1 << 3),
|
||||
AllowReciprocal = (1 << 4),
|
||||
AllowContract = (1 << 5),
|
||||
ApproxFunc = (1 << 6),
|
||||
AllowReassoc = (1 << 7)
|
||||
};
|
||||
|
||||
/// PossiblyExactOperatorOptionalFlags - Flags for serializing
|
||||
/// PossiblyExactOperator's SubclassOptionalData contents.
|
||||
enum PossiblyExactOperatorOptionalFlags { PEO_EXACT = 0 };
|
||||
|
|
|
|||
|
|
@ -33,6 +33,6 @@ public:
|
|||
MCAsmMacro(StringRef N, StringRef B, MCAsmMacroParameters P)
|
||||
: Name(N), Body(B), Parameters(std::move(P)) {}
|
||||
};
|
||||
}; // namespace llvm
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/DemandedBits.h"
|
||||
#include "llvm/Analysis/EHPersonalities.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
|
|
@ -172,15 +173,25 @@ public:
|
|||
Value *Left, Value *Right);
|
||||
|
||||
/// Returns true if Phi is a reduction of type Kind and adds it to the
|
||||
/// RecurrenceDescriptor.
|
||||
/// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are
|
||||
/// non-null, the minimal bit width needed to compute the reduction will be
|
||||
/// computed.
|
||||
static bool AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop,
|
||||
bool HasFunNoNaNAttr,
|
||||
RecurrenceDescriptor &RedDes);
|
||||
RecurrenceDescriptor &RedDes,
|
||||
DemandedBits *DB = nullptr,
|
||||
AssumptionCache *AC = nullptr,
|
||||
DominatorTree *DT = nullptr);
|
||||
|
||||
/// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor is
|
||||
/// returned in RedDes.
|
||||
/// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor
|
||||
/// is returned in RedDes. If either \p DB is non-null or \p AC and \p DT are
|
||||
/// non-null, the minimal bit width needed to compute the reduction will be
|
||||
/// computed.
|
||||
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop,
|
||||
RecurrenceDescriptor &RedDes);
|
||||
RecurrenceDescriptor &RedDes,
|
||||
DemandedBits *DB = nullptr,
|
||||
AssumptionCache *AC = nullptr,
|
||||
DominatorTree *DT = nullptr);
|
||||
|
||||
/// Returns true if Phi is a first-order recurrence. A first-order recurrence
|
||||
/// is a non-reduction recurrence relation in which the value of the
|
||||
|
|
@ -218,24 +229,6 @@ public:
|
|||
/// Returns true if the recurrence kind is an arithmetic kind.
|
||||
static bool isArithmeticRecurrenceKind(RecurrenceKind Kind);
|
||||
|
||||
/// Determines if Phi may have been type-promoted. If Phi has a single user
|
||||
/// that ANDs the Phi with a type mask, return the user. RT is updated to
|
||||
/// account for the narrower bit width represented by the mask, and the AND
|
||||
/// instruction is added to CI.
|
||||
static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
|
||||
SmallPtrSetImpl<Instruction *> &Visited,
|
||||
SmallPtrSetImpl<Instruction *> &CI);
|
||||
|
||||
/// Returns true if all the source operands of a recurrence are either
|
||||
/// SExtInsts or ZExtInsts. This function is intended to be used with
|
||||
/// lookThroughAnd to determine if the recurrence has been type-promoted. The
|
||||
/// source operands are added to CI, and IsSigned is updated to indicate if
|
||||
/// all source operands are SExtInsts.
|
||||
static bool getSourceExtensionKind(Instruction *Start, Instruction *Exit,
|
||||
Type *RT, bool &IsSigned,
|
||||
SmallPtrSetImpl<Instruction *> &Visited,
|
||||
SmallPtrSetImpl<Instruction *> &CI);
|
||||
|
||||
/// Returns the type of the recurrence. This type can be narrower than the
|
||||
/// actual type of the Phi if the recurrence has been type-promoted.
|
||||
Type *getRecurrenceType() { return RecurrenceType; }
|
||||
|
|
|
|||
|
|
@ -205,6 +205,11 @@ static cl::opt<unsigned>
|
|||
cl::desc("Max coefficients in AddRec during evolving"),
|
||||
cl::init(16));
|
||||
|
||||
static cl::opt<bool> VersionUnknown(
|
||||
"scev-version-unknown", cl::Hidden,
|
||||
cl::desc("Use predicated scalar evolution to version SCEVUnknowns"),
|
||||
cl::init(false));
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SCEV class definitions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
@ -11467,6 +11472,8 @@ private:
|
|||
// couldn't create an AddRec for it, or couldn't add the predicate), we just
|
||||
// return \p Expr.
|
||||
const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) {
|
||||
if (!VersionUnknown)
|
||||
return Expr;
|
||||
if (!isa<PHINode>(Expr->getValue()))
|
||||
return Expr;
|
||||
Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
|
||||
|
|
|
|||
|
|
@ -1046,19 +1046,21 @@ static Comdat::SelectionKind getDecodedComdatSelectionKind(unsigned Val) {
|
|||
|
||||
static FastMathFlags getDecodedFastMathFlags(unsigned Val) {
|
||||
FastMathFlags FMF;
|
||||
if (0 != (Val & FastMathFlags::AllowReassoc))
|
||||
if (0 != (Val & bitc::UnsafeAlgebra))
|
||||
FMF.setFast();
|
||||
if (0 != (Val & bitc::AllowReassoc))
|
||||
FMF.setAllowReassoc();
|
||||
if (0 != (Val & FastMathFlags::NoNaNs))
|
||||
if (0 != (Val & bitc::NoNaNs))
|
||||
FMF.setNoNaNs();
|
||||
if (0 != (Val & FastMathFlags::NoInfs))
|
||||
if (0 != (Val & bitc::NoInfs))
|
||||
FMF.setNoInfs();
|
||||
if (0 != (Val & FastMathFlags::NoSignedZeros))
|
||||
if (0 != (Val & bitc::NoSignedZeros))
|
||||
FMF.setNoSignedZeros();
|
||||
if (0 != (Val & FastMathFlags::AllowReciprocal))
|
||||
if (0 != (Val & bitc::AllowReciprocal))
|
||||
FMF.setAllowReciprocal();
|
||||
if (0 != (Val & FastMathFlags::AllowContract))
|
||||
if (0 != (Val & bitc::AllowContract))
|
||||
FMF.setAllowContract(true);
|
||||
if (0 != (Val & FastMathFlags::ApproxFunc))
|
||||
if (0 != (Val & bitc::ApproxFunc))
|
||||
FMF.setApproxFunc();
|
||||
return FMF;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1330,19 +1330,19 @@ static uint64_t getOptimizationFlags(const Value *V) {
|
|||
Flags |= 1 << bitc::PEO_EXACT;
|
||||
} else if (const auto *FPMO = dyn_cast<FPMathOperator>(V)) {
|
||||
if (FPMO->hasAllowReassoc())
|
||||
Flags |= FastMathFlags::AllowReassoc;
|
||||
Flags |= bitc::AllowReassoc;
|
||||
if (FPMO->hasNoNaNs())
|
||||
Flags |= FastMathFlags::NoNaNs;
|
||||
Flags |= bitc::NoNaNs;
|
||||
if (FPMO->hasNoInfs())
|
||||
Flags |= FastMathFlags::NoInfs;
|
||||
Flags |= bitc::NoInfs;
|
||||
if (FPMO->hasNoSignedZeros())
|
||||
Flags |= FastMathFlags::NoSignedZeros;
|
||||
Flags |= bitc::NoSignedZeros;
|
||||
if (FPMO->hasAllowReciprocal())
|
||||
Flags |= FastMathFlags::AllowReciprocal;
|
||||
Flags |= bitc::AllowReciprocal;
|
||||
if (FPMO->hasAllowContract())
|
||||
Flags |= FastMathFlags::AllowContract;
|
||||
Flags |= bitc::AllowContract;
|
||||
if (FPMO->hasApproxFunc())
|
||||
Flags |= FastMathFlags::ApproxFunc;
|
||||
Flags |= bitc::ApproxFunc;
|
||||
}
|
||||
|
||||
return Flags;
|
||||
|
|
@ -3183,7 +3183,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
|
|||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // flags
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); // flags
|
||||
if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
|
||||
FUNCTION_INST_BINOP_FLAGS_ABBREV)
|
||||
llvm_unreachable("Unexpected abbrev ordering!");
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@ if ( LLVM_ENABLE_ZLIB AND HAVE_LIBZ )
|
|||
endif()
|
||||
if( MSVC OR MINGW )
|
||||
# libuuid required for FOLDERID_Profile usage in lib/Support/Windows/Path.inc.
|
||||
set(system_libs ${system_libs} psapi shell32 ole32 uuid)
|
||||
# advapi32 required for CryptAcquireContextW in lib/Support/Windows/Path.inc.
|
||||
set(system_libs ${system_libs} psapi shell32 ole32 uuid advapi32)
|
||||
elseif( CMAKE_HOST_UNIX )
|
||||
if( HAVE_LIBRT )
|
||||
set(system_libs ${system_libs} rt)
|
||||
|
|
|
|||
|
|
@ -133,16 +133,21 @@ AArch64InstructionSelector::AArch64InstructionSelector(
|
|||
// for each class in the bank.
|
||||
static const TargetRegisterClass *
|
||||
getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
|
||||
const RegisterBankInfo &RBI) {
|
||||
const RegisterBankInfo &RBI,
|
||||
bool GetAllRegSet = false) {
|
||||
if (RB.getID() == AArch64::GPRRegBankID) {
|
||||
if (Ty.getSizeInBits() <= 32)
|
||||
return &AArch64::GPR32RegClass;
|
||||
return GetAllRegSet ? &AArch64::GPR32allRegClass
|
||||
: &AArch64::GPR32RegClass;
|
||||
if (Ty.getSizeInBits() == 64)
|
||||
return &AArch64::GPR64RegClass;
|
||||
return GetAllRegSet ? &AArch64::GPR64allRegClass
|
||||
: &AArch64::GPR64RegClass;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (RB.getID() == AArch64::FPRRegBankID) {
|
||||
if (Ty.getSizeInBits() <= 16)
|
||||
return &AArch64::FPR16RegClass;
|
||||
if (Ty.getSizeInBits() == 32)
|
||||
return &AArch64::FPR32RegClass;
|
||||
if (Ty.getSizeInBits() == 64)
|
||||
|
|
@ -310,19 +315,46 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
|
|||
return GenericOpc;
|
||||
}
|
||||
|
||||
static bool selectFP16CopyFromGPR32(MachineInstr &I, const TargetInstrInfo &TII,
|
||||
MachineRegisterInfo &MRI, unsigned SrcReg) {
|
||||
// Copies from gpr32 to fpr16 need to use a sub-register copy.
|
||||
unsigned CopyReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
|
||||
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::COPY))
|
||||
.addDef(CopyReg)
|
||||
.addUse(SrcReg);
|
||||
unsigned SubRegCopy = MRI.createVirtualRegister(&AArch64::FPR16RegClass);
|
||||
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY))
|
||||
.addDef(SubRegCopy)
|
||||
.addUse(CopyReg, 0, AArch64::hsub);
|
||||
|
||||
MachineOperand &RegOp = I.getOperand(1);
|
||||
RegOp.setReg(SubRegCopy);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
|
||||
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
|
||||
const RegisterBankInfo &RBI) {
|
||||
|
||||
unsigned DstReg = I.getOperand(0).getReg();
|
||||
unsigned SrcReg = I.getOperand(1).getReg();
|
||||
|
||||
if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
|
||||
if (TRI.getRegClass(AArch64::FPR16RegClassID)->contains(DstReg) &&
|
||||
!TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
|
||||
const RegisterBank &RegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
|
||||
const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(
|
||||
MRI.getType(SrcReg), RegBank, RBI, /* GetAllRegSet */ true);
|
||||
if (SrcRC == &AArch64::GPR32allRegClass)
|
||||
return selectFP16CopyFromGPR32(I, TII, MRI, SrcReg);
|
||||
}
|
||||
assert(I.isCopy() && "Generic operators do not allow physical registers");
|
||||
return true;
|
||||
}
|
||||
|
||||
const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
|
||||
const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
|
||||
unsigned SrcReg = I.getOperand(1).getReg();
|
||||
(void)DstSize;
|
||||
const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
|
||||
(void)SrcSize;
|
||||
assert((!TargetRegisterInfo::isPhysicalRegister(SrcReg) || I.isCopy()) &&
|
||||
|
|
@ -340,26 +372,38 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
|
|||
"Copy with different width?!");
|
||||
assert((DstSize <= 64 || RegBank.getID() == AArch64::FPRRegBankID) &&
|
||||
"GPRs cannot get more than 64-bit width values");
|
||||
const TargetRegisterClass *RC = nullptr;
|
||||
|
||||
if (RegBank.getID() == AArch64::FPRRegBankID) {
|
||||
if (DstSize <= 16)
|
||||
RC = &AArch64::FPR16RegClass;
|
||||
else if (DstSize <= 32)
|
||||
RC = &AArch64::FPR32RegClass;
|
||||
else if (DstSize <= 64)
|
||||
RC = &AArch64::FPR64RegClass;
|
||||
else if (DstSize <= 128)
|
||||
RC = &AArch64::FPR128RegClass;
|
||||
else {
|
||||
DEBUG(dbgs() << "Unexpected bitcast size " << DstSize << '\n');
|
||||
return false;
|
||||
const TargetRegisterClass *RC = getRegClassForTypeOnBank(
|
||||
MRI.getType(DstReg), RegBank, RBI, /* GetAllRegSet */ true);
|
||||
if (!RC) {
|
||||
DEBUG(dbgs() << "Unexpected bitcast size " << DstSize << '\n');
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
|
||||
const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(SrcReg);
|
||||
const TargetRegisterClass *SrcRC =
|
||||
RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
|
||||
const RegisterBank *RB = nullptr;
|
||||
if (!SrcRC) {
|
||||
RB = RegClassOrBank.get<const RegisterBank *>();
|
||||
SrcRC = getRegClassForTypeOnBank(MRI.getType(SrcReg), *RB, RBI, true);
|
||||
}
|
||||
// Copies from fpr16 to gpr32 need to use SUBREG_TO_REG.
|
||||
if (RC == &AArch64::GPR32allRegClass && SrcRC == &AArch64::FPR16RegClass) {
|
||||
unsigned PromoteReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
|
||||
BuildMI(*I.getParent(), I, I.getDebugLoc(),
|
||||
TII.get(AArch64::SUBREG_TO_REG))
|
||||
.addDef(PromoteReg)
|
||||
.addImm(0)
|
||||
.addUse(SrcReg)
|
||||
.addImm(AArch64::hsub);
|
||||
MachineOperand &RegOp = I.getOperand(1);
|
||||
RegOp.setReg(PromoteReg);
|
||||
} else if (RC == &AArch64::FPR16RegClass &&
|
||||
SrcRC == &AArch64::GPR32allRegClass) {
|
||||
selectFP16CopyFromGPR32(I, TII, MRI, SrcReg);
|
||||
}
|
||||
} else {
|
||||
assert(RegBank.getID() == AArch64::GPRRegBankID &&
|
||||
"Bitcast for the flags?");
|
||||
RC =
|
||||
DstSize <= 32 ? &AArch64::GPR32allRegClass : &AArch64::GPR64allRegClass;
|
||||
}
|
||||
|
||||
// No need to constrain SrcReg. It will get constrained when
|
||||
|
|
@ -795,15 +839,23 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
|
|||
}
|
||||
case TargetOpcode::G_EXTRACT: {
|
||||
LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
|
||||
LLT DstTy = MRI.getType(I.getOperand(0).getReg());
|
||||
unsigned SrcSize = SrcTy.getSizeInBits();
|
||||
// Larger extracts are vectors, same-size extracts should be something else
|
||||
// by now (either split up or simplified to a COPY).
|
||||
if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
|
||||
return false;
|
||||
|
||||
I.setDesc(TII.get(AArch64::UBFMXri));
|
||||
I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
|
||||
MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
|
||||
Ty.getSizeInBits() - 1);
|
||||
|
||||
if (SrcSize < 64) {
|
||||
assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
|
||||
"unexpected G_EXTRACT types");
|
||||
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
||||
}
|
||||
|
||||
unsigned DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
|
||||
BuildMI(MBB, std::next(I.getIterator()), I.getDebugLoc(),
|
||||
TII.get(AArch64::COPY))
|
||||
|
|
@ -818,17 +870,26 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
|
|||
|
||||
case TargetOpcode::G_INSERT: {
|
||||
LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
|
||||
LLT DstTy = MRI.getType(I.getOperand(0).getReg());
|
||||
unsigned DstSize = DstTy.getSizeInBits();
|
||||
(void)DstSize;
|
||||
// Larger inserts are vectors, same-size ones should be something else by
|
||||
// now (split up or turned into COPYs).
|
||||
if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
|
||||
return false;
|
||||
|
||||
I.setDesc(TII.get(AArch64::BFMXri));
|
||||
I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
|
||||
unsigned LSB = I.getOperand(3).getImm();
|
||||
unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
|
||||
I.getOperand(3).setImm((64 - LSB) % 64);
|
||||
I.getOperand(3).setImm((DstSize - LSB) % DstSize);
|
||||
MachineInstrBuilder(MF, I).addImm(Width - 1);
|
||||
|
||||
if (DstSize < 64) {
|
||||
assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
|
||||
"unexpected G_INSERT types");
|
||||
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
||||
}
|
||||
|
||||
unsigned SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
|
||||
BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
|
||||
TII.get(AArch64::SUBREG_TO_REG))
|
||||
|
|
|
|||
|
|
@ -3797,7 +3797,8 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
|
|||
}
|
||||
}
|
||||
|
||||
BuildMI(*MBB, Inst, Inst.getDebugLoc(),
|
||||
MachineInstr *NewInstr =
|
||||
BuildMI(*MBB, Inst, Inst.getDebugLoc(),
|
||||
get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst)
|
||||
.add(*VAddr) // vaddr
|
||||
.add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc
|
||||
|
|
@ -3806,12 +3807,17 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
|
|||
.addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm())
|
||||
.addImm(0) // slc
|
||||
.addImm(0) // tfe
|
||||
.setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end());
|
||||
.setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end())
|
||||
.getInstr();
|
||||
|
||||
MRI.replaceRegWith(getNamedOperand(Inst, AMDGPU::OpName::sdst)->getReg(),
|
||||
VDst);
|
||||
addUsersToMoveToVALUWorklist(VDst, MRI, Worklist);
|
||||
Inst.eraseFromParent();
|
||||
|
||||
// Legalize all operands other than the offset. Notably, convert the srsrc
|
||||
// into SGPRs using v_readfirstlane if needed.
|
||||
legalizeOperands(*NewInstr);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -454,13 +454,16 @@ bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
|
|||
return true;
|
||||
}
|
||||
|
||||
// FREM is always a call.
|
||||
if (J->getOpcode() == Instruction::FRem)
|
||||
return true;
|
||||
|
||||
if (STI->useSoftFloat()) {
|
||||
switch(J->getOpcode()) {
|
||||
case Instruction::FAdd:
|
||||
case Instruction::FSub:
|
||||
case Instruction::FMul:
|
||||
case Instruction::FDiv:
|
||||
case Instruction::FRem:
|
||||
case Instruction::FPTrunc:
|
||||
case Instruction::FPExt:
|
||||
case Instruction::FPToUI:
|
||||
|
|
|
|||
|
|
@ -740,7 +740,13 @@ class SkylakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
|
|||
def : SkylakeServerProc<"skylake-avx512">;
|
||||
def : SkylakeServerProc<"skx">; // Legacy alias.
|
||||
|
||||
def CNLFeatures : ProcessorFeatures<SKXFeatures.Value, [
|
||||
def CNLFeatures : ProcessorFeatures<SKLFeatures.Value, [
|
||||
FeatureAVX512,
|
||||
FeatureCDI,
|
||||
FeatureDQI,
|
||||
FeatureBWI,
|
||||
FeatureVLX,
|
||||
FeaturePKU,
|
||||
FeatureVBMI,
|
||||
FeatureIFMA,
|
||||
FeatureSHA
|
||||
|
|
|
|||
|
|
@ -1643,11 +1643,25 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
|
|||
}
|
||||
}
|
||||
|
||||
auto canMergeSelectThroughBinop = [](BinaryOperator *BO) {
|
||||
// The select might be preventing a division by 0.
|
||||
switch (BO->getOpcode()) {
|
||||
default:
|
||||
return true;
|
||||
case Instruction::SRem:
|
||||
case Instruction::URem:
|
||||
case Instruction::SDiv:
|
||||
case Instruction::UDiv:
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// Try to simplify a binop sandwiched between 2 selects with the same
|
||||
// condition.
|
||||
// select(C, binop(select(C, X, Y), W), Z) -> select(C, binop(X, W), Z)
|
||||
BinaryOperator *TrueBO;
|
||||
if (match(TrueVal, m_OneUse(m_BinOp(TrueBO)))) {
|
||||
if (match(TrueVal, m_OneUse(m_BinOp(TrueBO))) &&
|
||||
canMergeSelectThroughBinop(TrueBO)) {
|
||||
if (auto *TrueBOSI = dyn_cast<SelectInst>(TrueBO->getOperand(0))) {
|
||||
if (TrueBOSI->getCondition() == CondVal) {
|
||||
TrueBO->setOperand(0, TrueBOSI->getTrueValue());
|
||||
|
|
@ -1666,7 +1680,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
|
|||
|
||||
// select(C, Z, binop(select(C, X, Y), W)) -> select(C, Z, binop(Y, W))
|
||||
BinaryOperator *FalseBO;
|
||||
if (match(FalseVal, m_OneUse(m_BinOp(FalseBO)))) {
|
||||
if (match(FalseVal, m_OneUse(m_BinOp(FalseBO))) &&
|
||||
canMergeSelectThroughBinop(FalseBO)) {
|
||||
if (auto *FalseBOSI = dyn_cast<SelectInst>(FalseBO->getOperand(0))) {
|
||||
if (FalseBOSI->getCondition() == CondVal) {
|
||||
FalseBO->setOperand(0, FalseBOSI->getFalseValue());
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
|
|||
const LoopSafetyInfo *SafetyInfo,
|
||||
OptimizationRemarkEmitter *ORE);
|
||||
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
|
||||
const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
|
||||
const Loop *CurLoop, LoopSafetyInfo *SafetyInfo,
|
||||
OptimizationRemarkEmitter *ORE, bool FreeInLoop);
|
||||
static bool isSafeToExecuteUnconditionally(Instruction &Inst,
|
||||
const DominatorTree *DT,
|
||||
|
|
@ -855,10 +855,16 @@ static Instruction *sinkThroughTriviallyReplacablePHI(
|
|||
return New;
|
||||
}
|
||||
|
||||
static bool canSplitPredecessors(PHINode *PN) {
|
||||
static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
|
||||
BasicBlock *BB = PN->getParent();
|
||||
if (!BB->canSplitPredecessors())
|
||||
return false;
|
||||
// It's not impossible to split EHPad blocks, but if BlockColors already exist
|
||||
// it require updating BlockColors for all offspring blocks accordingly. By
|
||||
// skipping such corner case, we can make updating BlockColors after splitting
|
||||
// predecessor fairly simple.
|
||||
if (!SafetyInfo->BlockColors.empty() && BB->getFirstNonPHI()->isEHPad())
|
||||
return false;
|
||||
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
|
||||
BasicBlock *BBPred = *PI;
|
||||
if (isa<IndirectBrInst>(BBPred->getTerminator()))
|
||||
|
|
@ -868,7 +874,8 @@ static bool canSplitPredecessors(PHINode *PN) {
|
|||
}
|
||||
|
||||
static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
|
||||
LoopInfo *LI, const Loop *CurLoop) {
|
||||
LoopInfo *LI, const Loop *CurLoop,
|
||||
LoopSafetyInfo *SafetyInfo) {
|
||||
#ifndef NDEBUG
|
||||
SmallVector<BasicBlock *, 32> ExitBlocks;
|
||||
CurLoop->getUniqueExitBlocks(ExitBlocks);
|
||||
|
|
@ -910,13 +917,21 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
|
|||
// LE:
|
||||
// %p = phi [%p1, %LE.split], [%p2, %LE.split2]
|
||||
//
|
||||
auto &BlockColors = SafetyInfo->BlockColors;
|
||||
SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
|
||||
while (!PredBBs.empty()) {
|
||||
BasicBlock *PredBB = *PredBBs.begin();
|
||||
assert(CurLoop->contains(PredBB) &&
|
||||
"Expect all predecessors are in the loop");
|
||||
if (PN->getBasicBlockIndex(PredBB) >= 0)
|
||||
SplitBlockPredecessors(ExitBB, PredBB, ".split.loop.exit", DT, LI, true);
|
||||
if (PN->getBasicBlockIndex(PredBB) >= 0) {
|
||||
BasicBlock *NewPred = SplitBlockPredecessors(
|
||||
ExitBB, PredBB, ".split.loop.exit", DT, LI, true);
|
||||
// Since we do not allow splitting EH-block with BlockColors in
|
||||
// canSplitPredecessors(), we can simply assign predecessor's color to
|
||||
// the new block.
|
||||
if (!BlockColors.empty())
|
||||
BlockColors[NewPred] = BlockColors[PredBB];
|
||||
}
|
||||
PredBBs.remove(PredBB);
|
||||
}
|
||||
}
|
||||
|
|
@ -927,7 +942,7 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
|
|||
/// position, and may either delete it or move it to outside of the loop.
|
||||
///
|
||||
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
|
||||
const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
|
||||
const Loop *CurLoop, LoopSafetyInfo *SafetyInfo,
|
||||
OptimizationRemarkEmitter *ORE, bool FreeInLoop) {
|
||||
DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
|
||||
ORE->emit([&]() {
|
||||
|
|
@ -975,12 +990,12 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
|
|||
if (isTriviallyReplacablePHI(*PN, I))
|
||||
continue;
|
||||
|
||||
if (!canSplitPredecessors(PN))
|
||||
if (!canSplitPredecessors(PN, SafetyInfo))
|
||||
return Changed;
|
||||
|
||||
// Split predecessors of the PHI so that we can make users trivially
|
||||
// replacable.
|
||||
splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop);
|
||||
splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo);
|
||||
|
||||
// Should rebuild the iterators, as they may be invalidated by
|
||||
// splitPredecessorsOfLoopExit().
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
#include "llvm/Analysis/ScalarEvolutionExpander.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
|
|
@ -30,6 +31,7 @@
|
|||
#include "llvm/IR/ValueHandle.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/KnownBits.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
|
@ -77,10 +79,13 @@ bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) {
|
|||
return false;
|
||||
}
|
||||
|
||||
Instruction *
|
||||
RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT,
|
||||
SmallPtrSetImpl<Instruction *> &Visited,
|
||||
SmallPtrSetImpl<Instruction *> &CI) {
|
||||
/// Determines if Phi may have been type-promoted. If Phi has a single user
|
||||
/// that ANDs the Phi with a type mask, return the user. RT is updated to
|
||||
/// account for the narrower bit width represented by the mask, and the AND
|
||||
/// instruction is added to CI.
|
||||
static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
|
||||
SmallPtrSetImpl<Instruction *> &Visited,
|
||||
SmallPtrSetImpl<Instruction *> &CI) {
|
||||
if (!Phi->hasOneUse())
|
||||
return Phi;
|
||||
|
||||
|
|
@ -101,70 +106,92 @@ RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT,
|
|||
return Phi;
|
||||
}
|
||||
|
||||
bool RecurrenceDescriptor::getSourceExtensionKind(
|
||||
Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned,
|
||||
SmallPtrSetImpl<Instruction *> &Visited,
|
||||
SmallPtrSetImpl<Instruction *> &CI) {
|
||||
/// Compute the minimal bit width needed to represent a reduction whose exit
|
||||
/// instruction is given by Exit.
|
||||
static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
|
||||
DemandedBits *DB,
|
||||
AssumptionCache *AC,
|
||||
DominatorTree *DT) {
|
||||
bool IsSigned = false;
|
||||
const DataLayout &DL = Exit->getModule()->getDataLayout();
|
||||
uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
|
||||
|
||||
SmallVector<Instruction *, 8> Worklist;
|
||||
bool FoundOneOperand = false;
|
||||
unsigned DstSize = RT->getPrimitiveSizeInBits();
|
||||
Worklist.push_back(Exit);
|
||||
if (DB) {
|
||||
// Use the demanded bits analysis to determine the bits that are live out
|
||||
// of the exit instruction, rounding up to the nearest power of two. If the
|
||||
// use of demanded bits results in a smaller bit width, we know the value
|
||||
// must be positive (i.e., IsSigned = false), because if this were not the
|
||||
// case, the sign bit would have been demanded.
|
||||
auto Mask = DB->getDemandedBits(Exit);
|
||||
MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros();
|
||||
}
|
||||
|
||||
// Traverse the instructions in the reduction expression, beginning with the
|
||||
// exit value.
|
||||
while (!Worklist.empty()) {
|
||||
Instruction *I = Worklist.pop_back_val();
|
||||
for (Use &U : I->operands()) {
|
||||
|
||||
// Terminate the traversal if the operand is not an instruction, or we
|
||||
// reach the starting value.
|
||||
Instruction *J = dyn_cast<Instruction>(U.get());
|
||||
if (!J || J == Start)
|
||||
continue;
|
||||
|
||||
// Otherwise, investigate the operation if it is also in the expression.
|
||||
if (Visited.count(J)) {
|
||||
Worklist.push_back(J);
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the operand is not in Visited, it is not a reduction operation, but
|
||||
// it does feed into one. Make sure it is either a single-use sign- or
|
||||
// zero-extend instruction.
|
||||
CastInst *Cast = dyn_cast<CastInst>(J);
|
||||
bool IsSExtInst = isa<SExtInst>(J);
|
||||
if (!Cast || !Cast->hasOneUse() || !(isa<ZExtInst>(J) || IsSExtInst))
|
||||
return false;
|
||||
|
||||
// Ensure the source type of the extend is no larger than the reduction
|
||||
// type. It is not necessary for the types to be identical.
|
||||
unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
|
||||
if (SrcSize > DstSize)
|
||||
return false;
|
||||
|
||||
// Furthermore, ensure that all such extends are of the same kind.
|
||||
if (FoundOneOperand) {
|
||||
if (IsSigned != IsSExtInst)
|
||||
return false;
|
||||
} else {
|
||||
FoundOneOperand = true;
|
||||
IsSigned = IsSExtInst;
|
||||
}
|
||||
|
||||
// Lastly, if the source type of the extend matches the reduction type,
|
||||
// add the extend to CI so that we can avoid accounting for it in the
|
||||
// cost model.
|
||||
if (SrcSize == DstSize)
|
||||
CI.insert(Cast);
|
||||
if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
|
||||
// If demanded bits wasn't able to limit the bit width, we can try to use
|
||||
// value tracking instead. This can be the case, for example, if the value
|
||||
// may be negative.
|
||||
auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT);
|
||||
auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
|
||||
MaxBitWidth = NumTypeBits - NumSignBits;
|
||||
KnownBits Bits = computeKnownBits(Exit, DL);
|
||||
if (!Bits.isNonNegative()) {
|
||||
// If the value is not known to be non-negative, we set IsSigned to true,
|
||||
// meaning that we will use sext instructions instead of zext
|
||||
// instructions to restore the original type.
|
||||
IsSigned = true;
|
||||
if (!Bits.isNegative())
|
||||
// If the value is not known to be negative, we don't known what the
|
||||
// upper bit is, and therefore, we don't know what kind of extend we
|
||||
// will need. In this case, just increase the bit width by one bit and
|
||||
// use sext.
|
||||
++MaxBitWidth;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
if (!isPowerOf2_64(MaxBitWidth))
|
||||
MaxBitWidth = NextPowerOf2(MaxBitWidth);
|
||||
|
||||
return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
|
||||
IsSigned);
|
||||
}
|
||||
|
||||
/// Collect cast instructions that can be ignored in the vectorizer's cost
|
||||
/// model, given a reduction exit value and the minimal type in which the
|
||||
/// reduction can be represented.
|
||||
static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit,
|
||||
Type *RecurrenceType,
|
||||
SmallPtrSetImpl<Instruction *> &Casts) {
|
||||
|
||||
SmallVector<Instruction *, 8> Worklist;
|
||||
SmallPtrSet<Instruction *, 8> Visited;
|
||||
Worklist.push_back(Exit);
|
||||
|
||||
while (!Worklist.empty()) {
|
||||
Instruction *Val = Worklist.pop_back_val();
|
||||
Visited.insert(Val);
|
||||
if (auto *Cast = dyn_cast<CastInst>(Val))
|
||||
if (Cast->getSrcTy() == RecurrenceType) {
|
||||
// If the source type of a cast instruction is equal to the recurrence
|
||||
// type, it will be eliminated, and should be ignored in the vectorizer
|
||||
// cost model.
|
||||
Casts.insert(Cast);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Add all operands to the work list if they are loop-varying values that
|
||||
// we haven't yet visited.
|
||||
for (Value *O : cast<User>(Val)->operands())
|
||||
if (auto *I = dyn_cast<Instruction>(O))
|
||||
if (TheLoop->contains(I) && !Visited.count(I))
|
||||
Worklist.push_back(I);
|
||||
}
|
||||
}
|
||||
|
||||
bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
|
||||
Loop *TheLoop, bool HasFunNoNaNAttr,
|
||||
RecurrenceDescriptor &RedDes) {
|
||||
RecurrenceDescriptor &RedDes,
|
||||
DemandedBits *DB,
|
||||
AssumptionCache *AC,
|
||||
DominatorTree *DT) {
|
||||
if (Phi->getNumIncomingValues() != 2)
|
||||
return false;
|
||||
|
||||
|
|
@ -353,14 +380,49 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
|
|||
if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
|
||||
return false;
|
||||
|
||||
// If we think Phi may have been type-promoted, we also need to ensure that
|
||||
// all source operands of the reduction are either SExtInsts or ZEstInsts. If
|
||||
// so, we will be able to evaluate the reduction in the narrower bit width.
|
||||
if (Start != Phi)
|
||||
if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType,
|
||||
IsSigned, VisitedInsts, CastInsts))
|
||||
if (Start != Phi) {
|
||||
// If the starting value is not the same as the phi node, we speculatively
|
||||
// looked through an 'and' instruction when evaluating a potential
|
||||
// arithmetic reduction to determine if it may have been type-promoted.
|
||||
//
|
||||
// We now compute the minimal bit width that is required to represent the
|
||||
// reduction. If this is the same width that was indicated by the 'and', we
|
||||
// can represent the reduction in the smaller type. The 'and' instruction
|
||||
// will be eliminated since it will essentially be a cast instruction that
|
||||
// can be ignore in the cost model. If we compute a different type than we
|
||||
// did when evaluating the 'and', the 'and' will not be eliminated, and we
|
||||
// will end up with different kinds of operations in the recurrence
|
||||
// expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is
|
||||
// the case.
|
||||
//
|
||||
// The vectorizer relies on InstCombine to perform the actual
|
||||
// type-shrinking. It does this by inserting instructions to truncate the
|
||||
// exit value of the reduction to the width indicated by RecurrenceType and
|
||||
// then extend this value back to the original width. If IsSigned is false,
|
||||
// a 'zext' instruction will be generated; otherwise, a 'sext' will be
|
||||
// used.
|
||||
//
|
||||
// TODO: We should not rely on InstCombine to rewrite the reduction in the
|
||||
// smaller type. We should just generate a correctly typed expression
|
||||
// to begin with.
|
||||
Type *ComputedType;
|
||||
std::tie(ComputedType, IsSigned) =
|
||||
computeRecurrenceType(ExitInstruction, DB, AC, DT);
|
||||
if (ComputedType != RecurrenceType)
|
||||
return false;
|
||||
|
||||
// The recurrence expression will be represented in a narrower type. If
|
||||
// there are any cast instructions that will be unnecessary, collect them
|
||||
// in CastInsts. Note that the 'and' instruction was already included in
|
||||
// this list.
|
||||
//
|
||||
// TODO: A better way to represent this may be to tag in some way all the
|
||||
// instructions that are a part of the reduction. The vectorizer cost
|
||||
// model could then apply the recurrence type to these instructions,
|
||||
// without needing a white list of instructions to ignore.
|
||||
collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts);
|
||||
}
|
||||
|
||||
// We found a reduction var if we have reached the original phi node and we
|
||||
// only have a single instruction with out-of-loop users.
|
||||
|
||||
|
|
@ -480,47 +542,57 @@ bool RecurrenceDescriptor::hasMultipleUsesOf(
|
|||
return false;
|
||||
}
|
||||
bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
|
||||
RecurrenceDescriptor &RedDes) {
|
||||
RecurrenceDescriptor &RedDes,
|
||||
DemandedBits *DB, AssumptionCache *AC,
|
||||
DominatorTree *DT) {
|
||||
|
||||
BasicBlock *Header = TheLoop->getHeader();
|
||||
Function &F = *Header->getParent();
|
||||
bool HasFunNoNaNAttr =
|
||||
F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
|
||||
|
||||
if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) {
|
||||
if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
|
||||
AC, DT)) {
|
||||
DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
|
||||
return true;
|
||||
}
|
||||
if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) {
|
||||
if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
|
||||
AC, DT)) {
|
||||
DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
|
||||
return true;
|
||||
}
|
||||
if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)) {
|
||||
if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB,
|
||||
AC, DT)) {
|
||||
DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
|
||||
return true;
|
||||
}
|
||||
if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)) {
|
||||
if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
|
||||
AC, DT)) {
|
||||
DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
|
||||
return true;
|
||||
}
|
||||
if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)) {
|
||||
if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB,
|
||||
AC, DT)) {
|
||||
DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
|
||||
return true;
|
||||
}
|
||||
if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr,
|
||||
RedDes)) {
|
||||
if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes,
|
||||
DB, AC, DT)) {
|
||||
DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");
|
||||
return true;
|
||||
}
|
||||
if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)) {
|
||||
if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
|
||||
AC, DT)) {
|
||||
DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
|
||||
return true;
|
||||
}
|
||||
if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)) {
|
||||
if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
|
||||
AC, DT)) {
|
||||
DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
|
||||
return true;
|
||||
}
|
||||
if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)) {
|
||||
if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB,
|
||||
AC, DT)) {
|
||||
DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n");
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1542,9 +1542,10 @@ public:
|
|||
const TargetTransformInfo *TTI,
|
||||
std::function<const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI,
|
||||
OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R,
|
||||
LoopVectorizeHints *H)
|
||||
LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC)
|
||||
: TheLoop(L), PSE(PSE), TLI(TLI), TTI(TTI), DT(DT), GetLAA(GetLAA),
|
||||
ORE(ORE), InterleaveInfo(PSE, L, DT, LI), Requirements(R), Hints(H) {}
|
||||
ORE(ORE), InterleaveInfo(PSE, L, DT, LI), Requirements(R), Hints(H),
|
||||
DB(DB), AC(AC) {}
|
||||
|
||||
/// ReductionList contains the reduction descriptors for all
|
||||
/// of the reductions that were found in the loop.
|
||||
|
|
@ -1833,6 +1834,14 @@ private:
|
|||
/// Used to emit an analysis of any legality issues.
|
||||
LoopVectorizeHints *Hints;
|
||||
|
||||
/// The demanded bits analsyis is used to compute the minimum type size in
|
||||
/// which a reduction can be computed.
|
||||
DemandedBits *DB;
|
||||
|
||||
/// The assumption cache analysis is used to compute the minimum type size in
|
||||
/// which a reduction can be computed.
|
||||
AssumptionCache *AC;
|
||||
|
||||
/// While vectorizing these instructions we have to generate a
|
||||
/// call to the appropriate masked intrinsic
|
||||
SmallPtrSet<const Instruction *, 8> MaskedOp;
|
||||
|
|
@ -5300,7 +5309,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
|
|||
}
|
||||
|
||||
RecurrenceDescriptor RedDes;
|
||||
if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes)) {
|
||||
if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
|
||||
DT)) {
|
||||
if (RedDes.hasUnsafeAlgebra())
|
||||
Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
|
||||
AllowedExit.insert(RedDes.getLoopExitInstr());
|
||||
|
|
@ -8514,7 +8524,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
|
|||
// Check if it is legal to vectorize the loop.
|
||||
LoopVectorizationRequirements Requirements(*ORE);
|
||||
LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, GetLAA, LI, ORE,
|
||||
&Requirements, &Hints);
|
||||
&Requirements, &Hints, DB, AC);
|
||||
if (!LVL.canVectorize()) {
|
||||
DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
|
||||
emitMissedWarning(F, L, Hints, ORE);
|
||||
|
|
|
|||
|
|
@ -612,9 +612,7 @@ define void @fastmathflags(float %op1, float %op2) {
|
|||
%f.arcp = fadd arcp float %op1, %op2
|
||||
; CHECK: %f.arcp = fadd arcp float %op1, %op2
|
||||
%f.fast = fadd fast float %op1, %op2
|
||||
; 'fast' used to be its own bit, but this changed in Oct 2017.
|
||||
; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'.
|
||||
; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp float %op1, %op2
|
||||
; CHECK: %f.fast = fadd fast float %op1, %op2
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -656,9 +656,7 @@ define void @fastmathflags(float %op1, float %op2) {
|
|||
%f.arcp = fadd arcp float %op1, %op2
|
||||
; CHECK: %f.arcp = fadd arcp float %op1, %op2
|
||||
%f.fast = fadd fast float %op1, %op2
|
||||
; 'fast' used to be its own bit, but this changed in Oct 2017.
|
||||
; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'.
|
||||
; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp float %op1, %op2
|
||||
; CHECK: %f.fast = fadd fast float %op1, %op2
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -687,9 +687,7 @@ define void @fastmathflags(float %op1, float %op2) {
|
|||
%f.arcp = fadd arcp float %op1, %op2
|
||||
; CHECK: %f.arcp = fadd arcp float %op1, %op2
|
||||
%f.fast = fadd fast float %op1, %op2
|
||||
; 'fast' used to be its own bit, but this changed in Oct 2017.
|
||||
; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'.
|
||||
; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp float %op1, %op2
|
||||
; CHECK: %f.fast = fadd fast float %op1, %op2
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
@ -702,9 +700,7 @@ declare <4 x double> @fmf3()
|
|||
; CHECK-LABEL: fastMathFlagsForCalls(
|
||||
define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) {
|
||||
%call.fast = call fast float @fmf1()
|
||||
; 'fast' used to be its own bit, but this changed in Oct 2017.
|
||||
; The binary test file does not have the newer 'contract' and 'aml' bits set, so this is not fully 'fast'.
|
||||
; CHECK: %call.fast = call reassoc nnan ninf nsz arcp float @fmf1()
|
||||
; CHECK: %call.fast = call fast float @fmf1()
|
||||
|
||||
; Throw in some other attributes to make sure those stay in the right places.
|
||||
|
||||
|
|
|
|||
|
|
@ -758,9 +758,7 @@ define void @fastmathflags(float %op1, float %op2) {
|
|||
%f.arcp = fadd arcp float %op1, %op2
|
||||
; CHECK: %f.arcp = fadd arcp float %op1, %op2
|
||||
%f.fast = fadd fast float %op1, %op2
|
||||
; 'fast' used to be its own bit, but this changed in Oct 2017.
|
||||
; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'.
|
||||
; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp float %op1, %op2
|
||||
; CHECK: %f.fast = fadd fast float %op1, %op2
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
@ -773,9 +771,7 @@ declare <4 x double> @fmf3()
|
|||
; CHECK-LABEL: fastMathFlagsForCalls(
|
||||
define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) {
|
||||
%call.fast = call fast float @fmf1()
|
||||
; 'fast' used to be its own bit, but this changed in Oct 2017.
|
||||
; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'.
|
||||
; CHECK: %call.fast = call reassoc nnan ninf nsz arcp float @fmf1()
|
||||
; CHECK: %call.fast = call fast float @fmf1()
|
||||
|
||||
; Throw in some other attributes to make sure those stay in the right places.
|
||||
|
||||
|
|
|
|||
|
|
@ -757,10 +757,8 @@ define void @fastmathflags(float %op1, float %op2) {
|
|||
; CHECK: %f.nsz = fadd nsz float %op1, %op2
|
||||
%f.arcp = fadd arcp float %op1, %op2
|
||||
; CHECK: %f.arcp = fadd arcp float %op1, %op2
|
||||
; 'fast' used to be its own bit, but this changed in Oct 2017.
|
||||
; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'.
|
||||
%f.fast = fadd fast float %op1, %op2
|
||||
; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp float %op1, %op2
|
||||
; CHECK: %f.fast = fadd fast float %op1, %op2
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
@ -773,9 +771,7 @@ declare <4 x double> @fmf3()
|
|||
; CHECK-LABEL: fastMathFlagsForCalls(
|
||||
define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) {
|
||||
%call.fast = call fast float @fmf1()
|
||||
; 'fast' used to be its own bit, but this changed in Oct 2017.
|
||||
; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'.
|
||||
; CHECK: %call.fast = call reassoc nnan ninf nsz arcp float @fmf1()
|
||||
; CHECK: %call.fast = call fast float @fmf1()
|
||||
|
||||
; Throw in some other attributes to make sure those stay in the right places.
|
||||
|
||||
|
|
|
|||
|
|
@ -765,9 +765,7 @@ define void @fastmathflags(float %op1, float %op2) {
|
|||
%f.contract = fadd contract float %op1, %op2
|
||||
; CHECK: %f.contract = fadd contract float %op1, %op2
|
||||
%f.fast = fadd fast float %op1, %op2
|
||||
; 'fast' used to be its own bit, but this changed in Oct 2017.
|
||||
; The binary test file does not have the newer 'afn' bit set, so this is not fully 'fast'.
|
||||
; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp contract float %op1, %op2
|
||||
; CHECK: %f.fast = fadd fast float %op1, %op2
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
@ -780,9 +778,7 @@ declare <4 x double> @fmf3()
|
|||
; CHECK-LABEL: fastMathFlagsForCalls(
|
||||
define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) {
|
||||
%call.fast = call fast float @fmf1()
|
||||
; 'fast' used to be its own bit, but this changed in Oct 2017.
|
||||
; The binary test file does not have the newer 'afn' bit set, so this is not fully 'fast'.
|
||||
; CHECK: %call.fast = call reassoc nnan ninf nsz arcp contract float @fmf1()
|
||||
; CHECK: %call.fast = call fast float @fmf1()
|
||||
|
||||
; Throw in some other attributes to make sure those stay in the right places.
|
||||
|
||||
|
|
|
|||
131
test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir
Normal file
131
test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=aarch64-unknown-unknown -o - -global-isel -verify-machineinstrs -run-pass=instruction-select %s | FileCheck %s
|
||||
|
||||
# PR36345
|
||||
--- |
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64-arm-none-eabi"
|
||||
|
||||
; Function Attrs: noinline nounwind optnone
|
||||
define void @fp16_to_gpr([2 x half], [2 x half]* %addr) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @gpr_to_fp16() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @gpr_to_fp16_physreg() {
|
||||
ret void
|
||||
}
|
||||
...
|
||||
---
|
||||
name: fp16_to_gpr
|
||||
alignment: 2
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gpr }
|
||||
- { id: 1, class: fpr }
|
||||
- { id: 2, class: fpr }
|
||||
- { id: 3, class: gpr }
|
||||
- { id: 4, class: gpr }
|
||||
- { id: 5, class: gpr }
|
||||
- { id: 6, class: gpr }
|
||||
- { id: 7, class: gpr }
|
||||
- { id: 8, class: gpr }
|
||||
- { id: 9, class: gpr }
|
||||
- { id: 10, class: gpr }
|
||||
- { id: 11, class: gpr }
|
||||
- { id: 12, class: gpr }
|
||||
body: |
|
||||
bb.1 (%ir-block.1):
|
||||
liveins: %h0, %h1, %x0
|
||||
|
||||
; CHECK-LABEL: name: fp16_to_gpr
|
||||
; CHECK: liveins: %h0, %h1, %x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:fpr16 = COPY %h0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY %h1
|
||||
; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
|
||||
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub
|
||||
; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG]]
|
||||
; CHECK: [[BFMWri:%[0-9]+]]:gpr32 = BFMWri [[DEF]], [[COPY2]], 0, 15
|
||||
; CHECK: [[SUBREG_TO_REG1:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub
|
||||
; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG1]]
|
||||
; CHECK: [[BFMWri1:%[0-9]+]]:gpr32 = BFMWri [[BFMWri]], [[COPY3]], 16, 15
|
||||
; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY [[BFMWri1]]
|
||||
; CHECK: [[COPY5:%[0-9]+]]:gpr64sp = COPY %x0
|
||||
; CHECK: STRWui [[COPY4]], [[COPY5]], 0 :: (store 4 into %ir.addr, align 2)
|
||||
; CHECK: RET_ReallyLR
|
||||
%1:fpr(s16) = COPY %h0
|
||||
%2:fpr(s16) = COPY %h1
|
||||
%3:gpr(s32) = G_IMPLICIT_DEF
|
||||
%11:gpr(s16) = COPY %1(s16)
|
||||
%4:gpr(s32) = G_INSERT %3, %11(s16), 0
|
||||
%12:gpr(s16) = COPY %2(s16)
|
||||
%5:gpr(s32) = G_INSERT %4, %12(s16), 16
|
||||
%0:gpr(s32) = COPY %5(s32)
|
||||
%6:gpr(p0) = COPY %x0
|
||||
G_STORE %0(s32), %6(p0) :: (store 4 into %ir.addr, align 2)
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: gpr_to_fp16
|
||||
alignment: 2
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gpr }
|
||||
- { id: 1, class: gpr }
|
||||
- { id: 2, class: fpr }
|
||||
body: |
|
||||
bb.1 (%ir-block.0):
|
||||
liveins: %w0
|
||||
|
||||
; CHECK-LABEL: name: gpr_to_fp16
|
||||
; CHECK: liveins: %w0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
|
||||
; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[COPY1]]
|
||||
; CHECK: [[COPY3:%[0-9]+]]:fpr16 = COPY [[COPY2]].hsub
|
||||
; CHECK: [[COPY4:%[0-9]+]]:fpr16 = COPY [[COPY3]]
|
||||
; CHECK: %h0 = COPY [[COPY4]]
|
||||
; CHECK: RET_ReallyLR implicit %h0
|
||||
%0:gpr(s32) = COPY %w0
|
||||
%1:gpr(s16) = G_TRUNC %0(s32)
|
||||
%2:fpr(s16) = COPY %1(s16)
|
||||
%h0 = COPY %2(s16)
|
||||
RET_ReallyLR implicit %h0
|
||||
|
||||
...
|
||||
---
|
||||
name: gpr_to_fp16_physreg
|
||||
alignment: 2
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gpr }
|
||||
- { id: 1, class: gpr }
|
||||
body: |
|
||||
bb.1 (%ir-block.0):
|
||||
liveins: %w0
|
||||
|
||||
; CHECK-LABEL: name: gpr_to_fp16_physreg
|
||||
; CHECK: liveins: %w0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
|
||||
; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[COPY1]]
|
||||
; CHECK: [[COPY3:%[0-9]+]]:fpr16 = COPY [[COPY2]].hsub
|
||||
; CHECK: %h0 = COPY [[COPY3]]
|
||||
; CHECK: RET_ReallyLR implicit %h0
|
||||
%0:gpr(s32) = COPY %w0
|
||||
%1:gpr(s16) = G_TRUNC %0(s32)
|
||||
%h0 = COPY %1(s16)
|
||||
RET_ReallyLR implicit %h0
|
||||
|
||||
...
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
# CHECK-LABEL: name: insert_gprs
|
||||
name: insert_gprs
|
||||
name: insert_gprx
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
|
|
@ -10,26 +10,56 @@ body: |
|
|||
bb.0:
|
||||
liveins: %x0
|
||||
|
||||
; CHECK-LABEL: name: insert_gprx
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0
|
||||
; CHECK: [[DEF:%[0-9]+]]:gpr64 = IMPLICIT_DEF
|
||||
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.sub_32
|
||||
; CHECK: [[BFMXri:%[0-9]+]]:gpr64 = BFMXri [[DEF]], [[SUBREG_TO_REG]], 0, 31
|
||||
; CHECK: [[SUBREG_TO_REG1:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.sub_32
|
||||
; CHECK: [[BFMXri1:%[0-9]+]]:gpr64 = BFMXri [[DEF]], [[SUBREG_TO_REG1]], 51, 31
|
||||
; CHECK: %x0 = COPY [[BFMXri]]
|
||||
; CHECK: %x1 = COPY [[BFMXri1]]
|
||||
%0:gpr(s32) = COPY %w0
|
||||
|
||||
%1:gpr(s64) = G_IMPLICIT_DEF
|
||||
|
||||
; CHECK: body:
|
||||
; CHECK: [[TMP:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %0, %subreg.sub_32
|
||||
; CHECK: %2:gpr64 = BFMXri %1, [[TMP]], 0, 31
|
||||
%2:gpr(s64) = G_INSERT %1, %0, 0
|
||||
|
||||
; CHECK: [[TMP:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %0, %subreg.sub_32
|
||||
; CHECK: %3:gpr64 = BFMXri %1, [[TMP]], 51, 31
|
||||
%3:gpr(s64) = G_INSERT %1, %0, 13
|
||||
|
||||
%x0 = COPY %2
|
||||
%x1 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: insert_gprw
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: %w0, %w1
|
||||
; CHECK-LABEL: name: insert_gprw
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
|
||||
; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY]]
|
||||
; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
|
||||
; CHECK: [[BFMWri:%[0-9]+]]:gpr32 = BFMWri [[DEF]], [[COPY1]], 0, 15
|
||||
; CHECK: [[BFMWri1:%[0-9]+]]:gpr32 = BFMWri [[BFMWri]], [[COPY2]], 16, 15
|
||||
; CHECK: [[COPY3:%[0-9]+]]:gpr32all = COPY [[BFMWri1]]
|
||||
; CHECK: %w0 = COPY [[COPY3]]
|
||||
%1:gpr(s32) = COPY %w0
|
||||
%2:gpr(s32) = COPY %w1
|
||||
%3:gpr(s16) = G_TRUNC %1(s32)
|
||||
%4:gpr(s16) = G_TRUNC %1(s32)
|
||||
%5:gpr(s32) = G_IMPLICIT_DEF
|
||||
%6:gpr(s32) = G_INSERT %5, %3(s16), 0
|
||||
%7:gpr(s32) = G_INSERT %6, %4(s16), 16
|
||||
%0:gpr(s32) = COPY %7(s32)
|
||||
%w0 = COPY %0
|
||||
...
|
||||
|
||||
---
|
||||
# CHECK-LABEL: name: extract_gprs
|
||||
name: extract_gprs
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
|
@ -38,17 +68,49 @@ body: |
|
|||
bb.0:
|
||||
liveins: %x0
|
||||
|
||||
; CHECK-LABEL: name: extract_gprs
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0
|
||||
; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[COPY]], 0, 31
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[UBFMXri]].sub_32
|
||||
; CHECK: [[UBFMXri1:%[0-9]+]]:gpr64 = UBFMXri [[COPY]], 13, 44
|
||||
; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[UBFMXri1]].sub_32
|
||||
; CHECK: %w0 = COPY [[COPY1]]
|
||||
; CHECK: %w1 = COPY [[COPY2]]
|
||||
%0:gpr(s64) = COPY %x0
|
||||
|
||||
; CHECK: body:
|
||||
; CHECK: [[TMP:%[0-9]+]]:gpr64 = UBFMXri %0, 0, 31
|
||||
; CHECK: %1:gpr32 = COPY [[TMP]].sub_32
|
||||
%1:gpr(s32) = G_EXTRACT %0, 0
|
||||
|
||||
; CHECK: [[TMP:%[0-9]+]]:gpr64 = UBFMXri %0, 13, 44
|
||||
; CHECK: %2:gpr32 = COPY [[TMP]].sub_32
|
||||
%2:gpr(s32) = G_EXTRACT %0, 13
|
||||
|
||||
%w0 = COPY %1
|
||||
%w1 = COPY %2
|
||||
...
|
||||
|
||||
---
|
||||
name: extract_gprw
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: %w0
|
||||
|
||||
; CHECK-LABEL: name: extract_gprw
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0
|
||||
; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY]], 0, 15
|
||||
; CHECK: [[UBFMWri1:%[0-9]+]]:gpr32 = UBFMWri [[COPY]], 15, 30
|
||||
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[UBFMWri]]
|
||||
; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub
|
||||
; CHECK: %h0 = COPY [[COPY2]]
|
||||
; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[UBFMWri1]]
|
||||
; CHECK: [[COPY4:%[0-9]+]]:fpr16 = COPY [[COPY3]].hsub
|
||||
; CHECK: %h1 = COPY [[COPY4]]
|
||||
%0:gpr(s32) = COPY %w0
|
||||
|
||||
%1:gpr(s16) = G_EXTRACT %0, 0
|
||||
|
||||
%2:gpr(s16) = G_EXTRACT %0, 15
|
||||
|
||||
%h0 = COPY %1
|
||||
%h1 = COPY %2
|
||||
...
|
||||
|
|
|
|||
|
|
@ -261,8 +261,42 @@ main_body:
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}smrd_sgpr_descriptor_promoted
|
||||
; GCN: v_readfirstlane
|
||||
define amdgpu_cs void @smrd_sgpr_descriptor_promoted([0 x i8] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), i32) #0 {
|
||||
main_body:
|
||||
%descptr = bitcast [0 x i8] addrspace(2)* %0 to <4 x i32> addrspace(2)*, !amdgpu.uniform !0
|
||||
br label %.outer_loop_header
|
||||
|
||||
ret_block: ; preds = %.outer, %.label22, %main_body
|
||||
ret void
|
||||
|
||||
.outer_loop_header:
|
||||
br label %.inner_loop_header
|
||||
|
||||
.inner_loop_header: ; preds = %.inner_loop_body, %.outer_loop_header
|
||||
%loopctr.1 = phi i32 [ 0, %.outer_loop_header ], [ %loopctr.2, %.inner_loop_body ]
|
||||
%loopctr.2 = add i32 %loopctr.1, 1
|
||||
%inner_br1 = icmp slt i32 %loopctr.2, 10
|
||||
br i1 %inner_br1, label %.inner_loop_body, label %ret_block
|
||||
|
||||
.inner_loop_body:
|
||||
%descriptor = load <4 x i32>, <4 x i32> addrspace(2)* %descptr, align 16, !invariant.load !0
|
||||
%load1result = call float @llvm.SI.load.const.v4i32(<4 x i32> %descriptor, i32 0)
|
||||
%inner_br2 = icmp uge i32 %1, 10
|
||||
br i1 %inner_br2, label %.inner_loop_header, label %.outer_loop_body
|
||||
|
||||
.outer_loop_body:
|
||||
%offset = shl i32 %loopctr.2, 6
|
||||
%load2result = call float @llvm.SI.load.const.v4i32(<4 x i32> %descriptor, i32 %offset)
|
||||
%outer_br = fcmp ueq float %load2result, 0x0
|
||||
br i1 %outer_br, label %.outer_loop_header, label %ret_block
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
||||
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
||||
!0 = !{}
|
||||
|
|
|
|||
46
test/CodeGen/PowerPC/pr36292.ll
Normal file
46
test/CodeGen/PowerPC/pr36292.ll
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown < %s | \
|
||||
; RUN: FileCheck %s --implicit-check-not=mtctr --implicit-check-not=bdnz
|
||||
$test = comdat any
|
||||
|
||||
; No CTR loop due to frem (since it is always a call).
|
||||
define void @test() #0 comdat {
|
||||
; CHECK-LABEL: test:
|
||||
; CHECK: ld 29, 0(3)
|
||||
; CHECK: ld 30, 40(1)
|
||||
; CHECK: xxlxor 31, 31, 31
|
||||
; CHECK: cmpld 30, 29
|
||||
; CHECK-NEXT: bge- 0, .LBB0_2
|
||||
; CHECK-NEXT: .p2align 5
|
||||
; CHECK-NEXT: .LBB0_1: # %bounds.ok
|
||||
; CHECK: fmr 1, 31
|
||||
; CHECK-NEXT: lfsx 2, 0, 3
|
||||
; CHECK-NEXT: bl fmodf
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: addi 30, 30, 1
|
||||
; CHECK-NEXT: stfsx 1, 0, 3
|
||||
; CHECK-NEXT: cmpld 30, 29
|
||||
; CHECK-NEXT: blt+ 0, .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_2: # %bounds.fail
|
||||
; CHECK-NEXT: std 30, 40(1)
|
||||
%pos = alloca i64, align 8
|
||||
br label %forcond
|
||||
|
||||
forcond: ; preds = %bounds.ok, %0
|
||||
%1 = load i64, i64* %pos
|
||||
%.len1 = load i64, i64* undef
|
||||
%bounds.cmp = icmp ult i64 %1, %.len1
|
||||
br i1 %bounds.cmp, label %bounds.ok, label %bounds.fail
|
||||
|
||||
bounds.ok: ; preds = %forcond
|
||||
%2 = load float, float* undef
|
||||
%3 = frem float 0.000000e+00, %2
|
||||
store float %3, float* undef
|
||||
%4 = load i64, i64* %pos
|
||||
%5 = add i64 %4, 1
|
||||
store i64 %5, i64* %pos
|
||||
br label %forcond
|
||||
|
||||
bounds.fail: ; preds = %forcond
|
||||
unreachable
|
||||
}
|
||||
|
||||
|
|
@ -1,5 +1,12 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; NOTE: clwb is available in Skylake Server, not available in the newer
|
||||
; NOTE: Cannon Lake arch, but available again in the newer Ice Lake arch.
|
||||
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=clwb | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=skx | FileCheck %s
|
||||
; RUN: not llc < %s -mtriple=i686-apple-darwin -mcpu=cannonlake 2>&1 | FileCheck %s --check-prefix=CNL
|
||||
; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=icelake | FileCheck %s
|
||||
|
||||
; CNL: LLVM ERROR: Cannot select: intrinsic %llvm.x86.clwb
|
||||
|
||||
define void @clwb(i8* %p) nounwind {
|
||||
; CHECK-LABEL: clwb:
|
||||
|
|
|
|||
17
test/Transforms/InstCombine/pr36362.ll
Normal file
17
test/Transforms/InstCombine/pr36362.ll
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
;RUN: opt -instcombine -S %s | FileCheck %s
|
||||
|
||||
; We shouldn't remove the select before the srem
|
||||
define i32 @foo(i1 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: @foo(
|
||||
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[A:%.*]], i32 [[B:%.*]], i32 -1
|
||||
; CHECK-NEXT: [[REM:%.*]] = srem i32 [[C:%.*]], [[SEL1]]
|
||||
; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[A]], i32 [[REM]], i32 0
|
||||
; CHECK-NEXT: ret i32 [[SEL2]]
|
||||
;
|
||||
%sel1 = select i1 %a, i32 %b, i32 -1
|
||||
%rem = srem i32 %c, %sel1
|
||||
%sel2 = select i1 %a, i32 %rem, i32 0
|
||||
ret i32 %sel2
|
||||
}
|
||||
|
||||
|
|
@ -670,6 +670,67 @@ try.cont:
|
|||
ret void
|
||||
}
|
||||
|
||||
; The sinkable call should be sunk into an exit block split. After splitting
|
||||
; the exit block, BlockColor for new blocks should be added properly so
|
||||
; that we should be able to access valid ColorVector.
|
||||
;
|
||||
; CHECK-LABEL:@test21_pr36184
|
||||
; CHECK-LABEL: Loop
|
||||
; CHECK-NOT: %sinkableCall
|
||||
; CHECK-LABEL:Out.split.loop.exit
|
||||
; CHECK: %sinkableCall
|
||||
define i32 @test21_pr36184(i8* %P) personality i32 (...)* @__CxxFrameHandler3 {
|
||||
entry:
|
||||
br label %loop.ph
|
||||
|
||||
loop.ph:
|
||||
br label %Loop
|
||||
|
||||
Loop:
|
||||
%sinkableCall = call i32 @strlen( i8* %P ) readonly
|
||||
br i1 undef, label %ContLoop, label %Out
|
||||
|
||||
ContLoop:
|
||||
br i1 undef, label %Loop, label %Out
|
||||
|
||||
Out:
|
||||
%idx = phi i32 [ %sinkableCall, %Loop ], [0, %ContLoop ]
|
||||
ret i32 %idx
|
||||
}
|
||||
|
||||
; We do not support splitting a landingpad block if BlockColors is not empty.
|
||||
; CHECK-LABEL: @test22
|
||||
; CHECK-LABEL: while.body2
|
||||
; CHECK-LABEL: %mul
|
||||
; CHECK-NOT: lpadBB.split{{.*}}
|
||||
define void @test22(i1 %b, i32 %v1, i32 %v2) personality i32 (...)* @__CxxFrameHandler3 {
|
||||
entry:
|
||||
br label %while.cond
|
||||
while.cond:
|
||||
br i1 %b, label %try.cont, label %while.body
|
||||
|
||||
while.body:
|
||||
invoke void @may_throw()
|
||||
to label %while.body2 unwind label %lpadBB
|
||||
|
||||
while.body2:
|
||||
%v = call i32 @getv()
|
||||
%mul = mul i32 %v, %v2
|
||||
invoke void @may_throw2()
|
||||
to label %while.cond unwind label %lpadBB
|
||||
lpadBB:
|
||||
%.lcssa1 = phi i32 [ 0, %while.body ], [ %mul, %while.body2 ]
|
||||
landingpad { i8*, i32 }
|
||||
catch i8* null
|
||||
br label %lpadBBSucc1
|
||||
|
||||
lpadBBSucc1:
|
||||
ret void
|
||||
|
||||
try.cont:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @may_throw()
|
||||
declare void @may_throw2()
|
||||
declare i32 @__CxxFrameHandler3(...)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s 2>&1 | FileCheck %s
|
||||
; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -scev-version-unknown < %s 2>&1 | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s 2>&1 | FileCheck %s
|
||||
; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -scev-version-unknown < %s 2>&1 | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
@a = common local_unnamed_addr global i32 0, align 4
|
||||
@b = common local_unnamed_addr global i8 0, align 1
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|||
; CHECK-NEXT: [[TMP17]] = zext <4 x i8> [[TMP16]] to <4 x i32>
|
||||
; CHECK-NEXT: br i1 {{.*}}, label %middle.block, label %vector.body
|
||||
;
|
||||
define void @PR34687(i1 %c, i32 %x, i32 %n) {
|
||||
define i8 @PR34687(i1 %c, i32 %x, i32 %n) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
|
|
@ -36,5 +36,38 @@ if.end:
|
|||
|
||||
for.end:
|
||||
%tmp2 = phi i32 [ %r.next, %if.end ]
|
||||
ret void
|
||||
%tmp3 = trunc i32 %tmp2 to i8
|
||||
ret i8 %tmp3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @PR35734(
|
||||
; CHECK: vector.ph:
|
||||
; CHECK: [[TMP3:%.*]] = insertelement <4 x i32> zeroinitializer, i32 %y, i32 0
|
||||
; CHECK-NEXT: br label %vector.body
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP3]], %vector.ph ], [ [[TMP9:%.*]], %vector.body ]
|
||||
; CHECK: [[TMP5:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
|
||||
; CHECK: [[TMP8:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i1>
|
||||
; CHECK-NEXT: [[TMP9]] = sext <4 x i1> [[TMP8]] to <4 x i32>
|
||||
; CHECK-NEXT: br i1 {{.*}}, label %middle.block, label %vector.body
|
||||
;
|
||||
define i32 @PR35734(i32 %x, i32 %y) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i = phi i32 [ %x, %entry ], [ %i.next, %for.body ]
|
||||
%r = phi i32 [ %y, %entry ], [ %r.next, %for.body ]
|
||||
%tmp0 = and i32 %r, 1
|
||||
%r.next = add i32 %tmp0, -1
|
||||
%i.next = add nsw i32 %i, 1
|
||||
%cond = icmp sgt i32 %i, 77
|
||||
br i1 %cond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
%tmp1 = phi i32 [ %r.next, %for.body ]
|
||||
ret i32 %tmp1
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 < %s | FileCheck %s -check-prefix=VF8
|
||||
; RUN: opt -S -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 < %s | FileCheck %s -check-prefix=VF1
|
||||
; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -scev-version-unknown < %s | FileCheck %s -check-prefix=VF8
|
||||
; RUN: opt -S -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -scev-version-unknown < %s | FileCheck %s -check-prefix=VF1
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,6 @@ RUN: llvm-config --link-static --system-libs 2>&1 | FileCheck %s
|
|||
REQUIRES: static-libs
|
||||
REQUIRES: system-windows
|
||||
CHECK-NOT: -l
|
||||
CHECK: psapi.lib shell32.lib ole32.lib uuid.lib
|
||||
CHECK: psapi.lib shell32.lib ole32.lib uuid.lib advapi32.lib
|
||||
CHECK-NOT: error
|
||||
CHECK-NOT: warning
|
||||
|
|
|
|||
Loading…
Reference in a new issue