Vendor import of llvm release_50 branch r311606:

https://llvm.org/svn/llvm-project/llvm/branches/release_50@311606
This commit is contained in:
Dimitry Andric 2017-08-24 16:35:02 +00:00
parent 15c5c77fa0
commit 5e529592b1
47 changed files with 1355 additions and 4879 deletions

View file

@ -77,11 +77,33 @@ Changes to the LLVM IR
* Added speculatable attribute indicating a function which does has no
side-effects which could inhibit hoisting of calls.
Changes to the ARM Backend
Changes to the Arm Targets
--------------------------
During this release ...
During this release the AArch64 target has:
* A much improved Global ISel at O0.
* Support for ARMv8.1 8.2 and 8.3 instructions.
* New scheduler information for ThunderX2.
* Some SVE type changes but not much more than that.
* Made instruction fusion more aggressive, resulting in speedups
for code making use of AArch64 AES instructions. AES fusion has been
enabled for most Cortex-A cores and the AArch64MacroFusion pass was moved
to the generic MacroFusion pass.
* Added preferred function alignments for most Cortex-A cores.
* OpenMP "offload-to-self" base support.
During this release the ARM target has:
* Improved, but still mostly broken, Global ISel.
* Scheduling models update, new schedule for Cortex-A57.
* Hardware breakpoint support in LLDB.
* New assembler error handling, with spelling corrections and multiple
suggestions on how to fix problems.
* Improved mixed ARM/Thumb code generation. Some cases in which wrong
relocations were emitted have been fixed.
* Added initial support for mixed ARM/Thumb link-time optimization, using the
thumb-mode target feature.
Changes to the MIPS Target
--------------------------
@ -92,7 +114,29 @@ Changes to the MIPS Target
Changes to the PowerPC Target
-----------------------------
During this release ...
* Additional support and exploitation of POWER ISA 3.0: vabsdub, vabsduh,
vabsduw, modsw, moduw, modsd, modud, lxv, stxv, vextublx, vextubrx, vextuhlx,
vextuhrx, vextuwlx, vextuwrx, vextsb2w, vextsb2d, vextsh2w, vextsh2d, and
vextsw2d
* Implemented Optimal Code Sequences from The PowerPC Compiler Writer's Guide.
* Enable -fomit-frame-pointer by default.
* Improved handling of bit reverse intrinsic.
* Improved handling of memcpy and memcmp functions.
* Improved handling of branches with static branch hints.
* Improved codegen for atomic load_acquire.
* Improved block placement during code layout
* Many improvements to instruction selection and code generation
Changes to the X86 Target
-------------------------

View file

@ -85,7 +85,10 @@ namespace ISD {
/// If N is a BUILD_VECTOR node whose elements are all the same constant or
/// undefined, return true and return the constant value in \p SplatValue.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
/// This sets \p SplatValue to the smallest possible splat unless AllowShrink
/// is set to false.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue,
bool AllowShrink = true);
/// Return true if the specified node is a BUILD_VECTOR where all of the
/// elements are ~0 or undef.

View file

@ -627,6 +627,7 @@ private:
SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecOp_VSELECT(SDNode *N);
SDValue ScalarizeVecOp_VSETCC(SDNode *N);
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);

View file

@ -484,6 +484,9 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VSELECT:
Res = ScalarizeVecOp_VSELECT(N);
break;
case ISD::SETCC:
Res = ScalarizeVecOp_VSETCC(N);
break;
case ISD::STORE:
Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
@ -560,6 +563,36 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) {
N->getOperand(2));
}
/// If the operand is a vector that needs to be scalarized then the
/// result must be v1i1, so just convert to a scalar SETCC and wrap
/// with a scalar_to_vector since the res type is legal if we got here
SDValue DAGTypeLegalizer::ScalarizeVecOp_VSETCC(SDNode *N) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
"Operand types must be vectors");
assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type");
EVT VT = N->getValueType(0);
SDValue LHS = GetScalarizedVector(N->getOperand(0));
SDValue RHS = GetScalarizedVector(N->getOperand(1));
EVT OpVT = N->getOperand(0).getValueType();
EVT NVT = VT.getVectorElementType();
SDLoc DL(N);
// Turn it into a scalar SETCC.
SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
N->getOperand(2));
// Vectors may have a different boolean contents to scalars. Promote the
// value appropriately.
ISD::NodeType ExtendCode =
TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
Res = DAG.getNode(ExtendCode, DL, NVT, Res);
return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res);
}
/// If the value to store is a vector that needs to be scalarized, it must be
/// <1 x ty>. Just store the element.
SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){

View file

@ -116,7 +116,8 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
// ISD Namespace
//===----------------------------------------------------------------------===//
bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal,
bool AllowShrink) {
auto *BV = dyn_cast<BuildVectorSDNode>(N);
if (!BV)
return false;
@ -124,9 +125,11 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
APInt SplatUndef;
unsigned SplatBitSize;
bool HasUndefs;
EVT EltVT = N->getValueType(0).getVectorElementType();
return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs) &&
EltVT.getSizeInBits() >= SplatBitSize;
unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
unsigned MinSplatBits = AllowShrink ? 0 : EltSize;
return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs,
MinSplatBits) &&
EltSize >= SplatBitSize;
}
// FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be

View file

@ -14,6 +14,10 @@ add_llvm_library(LLVMExecutionEngine
intrinsics_gen
)
if(BUILD_SHARED_LIBS)
target_link_libraries(LLVMExecutionEngine PUBLIC LLVMRuntimeDyld)
endif()
add_subdirectory(Interpreter)
add_subdirectory(MCJIT)
add_subdirectory(Orc)

View file

@ -2239,14 +2239,14 @@ bool llvm::UpgradeDebugInfo(Module &M) {
}
bool llvm::UpgradeModuleFlags(Module &M) {
const NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
if (!ModFlags)
return false;
bool HasObjCFlag = false, HasClassProperties = false;
bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
MDNode *Op = ModFlags->getOperand(I);
if (Op->getNumOperands() < 2)
if (Op->getNumOperands() != 3)
continue;
MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
if (!ID)
@ -2255,7 +2255,24 @@ bool llvm::UpgradeModuleFlags(Module &M) {
HasObjCFlag = true;
if (ID->getString() == "Objective-C Class Properties")
HasClassProperties = true;
// Upgrade PIC/PIE Module Flags. The module flag behavior for these two
// field was Error and now they are Max.
if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
if (auto *Behavior =
mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
if (Behavior->getLimitedValue() == Module::Error) {
Type *Int32Ty = Type::getInt32Ty(M.getContext());
Metadata *Ops[3] = {
ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
MDString::get(M.getContext(), ID->getString()),
Op->getOperand(2)};
ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
Changed = true;
}
}
}
}
// "Objective-C Class Properties" is recently added for Objective-C. We
// upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
// flag of value 0, so we can correclty downgrade this flag when trying to
@ -2264,9 +2281,10 @@ bool llvm::UpgradeModuleFlags(Module &M) {
if (HasObjCFlag && !HasClassProperties) {
M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
(uint32_t)0);
return true;
Changed = true;
}
return false;
return Changed;
}
static bool isOldLoopArgument(Metadata *MD) {

View file

@ -232,7 +232,13 @@ private:
for (;;) {
read();
if (Tok.K == Identifier && Tok.Value[0] == '@') {
Tok.Value.drop_front().getAsInteger(10, E.Ordinal);
if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
// Not an ordinal modifier at all, but the next export (fastcall
// decorated) - complete the current one.
unget();
Info.Exports.push_back(E);
return Error::success();
}
read();
if (Tok.K == KwNoname) {
E.Noname = true;

View file

@ -5901,7 +5901,10 @@ static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
WhichResult = M[i] == 0 ? 0 : 1;
if (M.size() == NumElts * 2)
WhichResult = i / NumElts;
else
WhichResult = M[i] == 0 ? 0 : 1;
for (unsigned j = 0; j < NumElts; ++j) {
if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
return false;
@ -5932,7 +5935,10 @@ static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
unsigned Half = NumElts / 2;
for (unsigned i = 0; i < M.size(); i += NumElts) {
WhichResult = M[i] == 0 ? 0 : 1;
if (M.size() == NumElts * 2)
WhichResult = i / NumElts;
else
WhichResult = M[i] == 0 ? 0 : 1;
for (unsigned j = 0; j < NumElts; j += Half) {
unsigned Idx = WhichResult;
for (unsigned k = 0; k < Half; ++k) {
@ -5972,7 +5978,10 @@ static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
WhichResult = M[i] == 0 ? 0 : 1;
if (M.size() == NumElts * 2)
WhichResult = i / NumElts;
else
WhichResult = M[i] == 0 ? 0 : 1;
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned j = 0; j < NumElts; j += 2) {
if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
@ -6005,7 +6014,10 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
WhichResult = M[i] == 0 ? 0 : 1;
if (M.size() == NumElts * 2)
WhichResult = i / NumElts;
else
WhichResult = M[i] == 0 ? 0 : 1;
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned j = 0; j < NumElts; j += 2) {
if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
@ -8793,6 +8805,8 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
.addReg(ARM::R4, RegState::Implicit | RegState::Kill)
.addReg(ARM::R4, RegState::Implicit | RegState::Define)
.addReg(ARM::R12,
RegState::Implicit | RegState::Define | RegState::Dead)
.addReg(ARM::CPSR,
RegState::Implicit | RegState::Define | RegState::Dead);
break;
case CodeModel::Large:
@ -8808,6 +8822,8 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
.addReg(ARM::R4, RegState::Implicit | RegState::Kill)
.addReg(ARM::R4, RegState::Implicit | RegState::Define)
.addReg(ARM::R12,
RegState::Implicit | RegState::Define | RegState::Dead)
.addReg(ARM::CPSR,
RegState::Implicit | RegState::Define | RegState::Dead);
break;
}

View file

@ -29540,8 +29540,9 @@ static bool detectZextAbsDiff(const SDValue &Select, SDValue &Op0,
// In SetLT case, The second operand of the comparison can be either 1 or 0.
APInt SplatVal;
if ((CC == ISD::SETLT) &&
!((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal) &&
SplatVal == 1) ||
!((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal,
/*AllowShrink*/false) &&
SplatVal.isOneValue()) ||
(ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()))))
return false;
@ -30628,6 +30629,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Byte blends are only available in AVX2
if (VT == MVT::v32i8 && !Subtarget.hasAVX2())
return SDValue();
// There are no 512-bit blend instructions that use sign bits.
if (VT.is512BitVector())
return SDValue();
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
APInt DemandedMask(APInt::getSignMask(BitWidth));
@ -32058,7 +32062,8 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
return SDValue();
APInt SplatVal;
if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) ||
if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal,
/*AllowShrink*/false) ||
!SplatVal.isMask())
return SDValue();
@ -32642,7 +32647,8 @@ static SDValue detectUSatPattern(SDValue In, EVT VT) {
"Unexpected types for truncate operation");
APInt C;
if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) {
if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C,
/*AllowShrink*/false)) {
// C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
// the element size of the destination type.
return C.isMask(VT.getScalarSizeInBits()) ? In.getOperand(0) :
@ -35346,7 +35352,8 @@ static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) {
SDNode *N1 = N->getOperand(1).getNode();
APInt SplatVal;
if (!ISD::isConstantSplatVector(N1, SplatVal) || !SplatVal.isOneValue())
if (!ISD::isConstantSplatVector(N1, SplatVal, /*AllowShrink*/false) ||
!SplatVal.isOneValue())
return SDValue();
SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N));

View file

@ -3619,8 +3619,8 @@ let Predicates = [HasVLX] in {
def : Pat<(alignedstore256 (v4f64 (extract_subvector
(v8f64 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVAPDZ256mr addr:$dst, (v4f64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
def : Pat<(alignedstore (v8f32 (extract_subvector
(v16f32 VR512:$src), (iPTR 0))), addr:$dst),
def : Pat<(alignedstore256 (v8f32 (extract_subvector
(v16f32 VR512:$src), (iPTR 0))), addr:$dst),
(VMOVAPSZ256mr addr:$dst, (v8f32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
def : Pat<(alignedstore256 (v4i64 (extract_subvector
(v8i64 VR512:$src), (iPTR 0))), addr:$dst),

File diff suppressed because it is too large Load diff

View file

@ -161,6 +161,22 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
if (Path.empty())
Path = getImplibPath(Def->OutputFile);
if (Machine == IMAGE_FILE_MACHINE_I386 && Args.getLastArg(OPT_k)) {
for (COFFShortExport& E : Def->Exports) {
if (E.isWeak() || (!E.Name.empty() && E.Name[0] == '?'))
continue;
E.SymbolName = E.Name;
// Trim off the trailing decoration. Symbols will always have a
// starting prefix here (either _ for cdecl/stdcall, @ for fastcall
// or ? for C++ functions). (Vectorcall functions also will end up having
// a prefix here, even if they shouldn't.)
E.Name = E.Name.substr(0, E.Name.find('@', 1));
// By making sure E.SymbolName != E.Name for decorated symbols,
// writeImportLibrary writes these symbols with the type
// IMPORT_NAME_UNDECORATE.
}
}
if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine, true))
return 1;
return 0;

View file

@ -12,13 +12,13 @@ def D_long : JoinedOrSeparate<["--"], "dllname">, Alias<D>;
def d: JoinedOrSeparate<["-"], "d">, HelpText<"Input .def File">;
def d_long : JoinedOrSeparate<["--"], "input-def">, Alias<d>;
def k: Flag<["-"], "k">, HelpText<"Kill @n Symbol from export">;
def k_alias: Flag<["--"], "kill-at">, Alias<k>;
//==============================================================================
// The flags below do nothing. They are defined only for dlltool compatibility.
//==============================================================================
def k: Flag<["-"], "k">, HelpText<"Kill @n Symbol from export">;
def k_alias: Flag<["--"], "kill-at">, Alias<k>;
def S: JoinedOrSeparate<["-"], "S">, HelpText<"Assembler">;
def S_alias: JoinedOrSeparate<["--"], "as">, Alias<S>;

View file

@ -155,8 +155,7 @@ public:
}
bool runOnFunction(Function &F) override {
if (skipFunction(F))
return false;
// Don't skip optnone functions; atomics still need to be lowered.
FunctionAnalysisManager DummyFAM;
auto PA = Impl.run(F, DummyFAM);
return !PA.areAllPreserved();

View file

@ -1941,6 +1941,12 @@ Instruction *ReassociatePass::canonicalizeNegConstExpr(Instruction *I) {
if (!User->isCommutative() && User->getOperand(1) != I)
return nullptr;
// Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the
// resulting subtract will be broken up later. This can get us into an
// infinite loop during reassociation.
if (UserOpcode == Instruction::FAdd && ShouldBreakUpSubtract(User))
return nullptr;
// Change the sign of the constant.
APFloat Val = CF->getValueAPF();
Val.changeSign();

View file

@ -341,8 +341,9 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) {
// On the off-chance that this simplifies to an instruction in the old
// function, map it back into the new function.
if (Value *MappedV = VMap.lookup(V))
V = MappedV;
if (NewFunc != OldFunc)
if (Value *MappedV = VMap.lookup(V))
V = MappedV;
if (!NewInst->mayHaveSideEffects()) {
VMap[&*II] = V;

View file

@ -1,9 +1,13 @@
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
; RUN: verify-uselistorder < %s
!llvm.module.flags = !{!0}
!llvm.module.flags = !{!0, !1, !2}
!0 = !{i32 1, !"Objective-C Image Info Version", i32 0}
!0 = !{i32 1, !"PIC Level", i32 1}
!1 = !{i32 1, !"PIE Level", i32 1}
!2 = !{i32 1, !"Objective-C Image Info Version", i32 0}
; CHECK: !0 = !{i32 1, !"Objective-C Image Info Version", i32 0}
; CHECK: !1 = !{i32 4, !"Objective-C Class Properties", i32 0}
; CHECK: !0 = !{i32 7, !"PIC Level", i32 1}
; CHECK: !1 = !{i32 7, !"PIE Level", i32 1}
; CHECK: !2 = !{i32 1, !"Objective-C Image Info Version", i32 0}
; CHECK: !3 = !{i32 4, !"Objective-C Class Properties", i32 0}

View file

@ -0,0 +1,13 @@
; RUN: llc -mtriple thumbv7-windows-itanium -filetype asm -o /dev/null %s -print-machineinstrs=expand-isel-pseudos 2>&1 | FileCheck %s
declare arm_aapcs_vfpcc void @g(i8*) local_unnamed_addr
define arm_aapcs_vfpcc void @f(i32 %i) local_unnamed_addr {
entry:
%vla = alloca i8, i32 %i, align 1
call arm_aapcs_vfpcc void @g(i8* nonnull %vla)
ret void
}
; CHECK: tBL pred:14, pred:%noreg, <es:__chkstk>, %LR<imp-def>, %SP<imp-use>, %R4<imp-use,kill>, %R4<imp-def>, %R12<imp-def,dead>, %CPSR<imp-def,dead>

View file

@ -282,6 +282,25 @@ entry:
ret <8 x i16> %0
}
; NOTE: The mask here looks like something that could be done with a vzip,
; but which the current handling of two-result vzip can't do - thus ending up
; as a vtrn.
define <8 x i16> @vzip_lower_shufflemask_undef_rev(<4 x i16>* %A, <4 x i16>* %B) {
; CHECK-LABEL: vzip_lower_shufflemask_undef_rev:
; CHECK: @ BB#0: @ %entry
; CHECK-NEXT: vldr d16, [r1]
; CHECK-NEXT: vldr d19, [r0]
; CHECK-NEXT: vtrn.16 d19, d16
; CHECK-NEXT: vmov r0, r1, d18
; CHECK-NEXT: vmov r2, r3, d19
; CHECK-NEXT: mov pc, lr
entry:
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
%0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 4, i32 undef, i32 undef>
ret <8 x i16> %0
}
define <4 x i32> @vzip_lower_shufflemask_zeroed(<2 x i32>* %A) {
; CHECK-LABEL: vzip_lower_shufflemask_zeroed:
; CHECK: @ BB#0: @ %entry

View file

@ -10,8 +10,8 @@ define <4 x double> @test_addpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
; SANDY-LABEL: test_addpd:
; SANDY: # BB#0:
; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_addpd:
; HASWELL: # BB#0:
@ -40,8 +40,8 @@ define <8 x float> @test_addps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
; SANDY-LABEL: test_addps:
; SANDY: # BB#0:
; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_addps:
; HASWELL: # BB#0:
@ -70,8 +70,8 @@ define <4 x double> @test_addsubpd(<4 x double> %a0, <4 x double> %a1, <4 x doub
; SANDY-LABEL: test_addsubpd:
; SANDY: # BB#0:
; SANDY-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_addsubpd:
; HASWELL: # BB#0:
@ -101,8 +101,8 @@ define <8 x float> @test_addsubps(<8 x float> %a0, <8 x float> %a1, <8 x float>
; SANDY-LABEL: test_addsubps:
; SANDY: # BB#0:
; SANDY-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_addsubps:
; HASWELL: # BB#0:
@ -131,10 +131,10 @@ declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwi
define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
; SANDY-LABEL: test_andnotpd:
; SANDY: # BB#0:
; SANDY-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; SANDY-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SANDY-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_andnotpd:
; HASWELL: # BB#0:
@ -172,10 +172,10 @@ define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x doub
define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
; SANDY-LABEL: test_andnotps:
; SANDY: # BB#0:
; SANDY-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; SANDY-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SANDY-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_andnotps:
; HASWELL: # BB#0:
@ -213,10 +213,10 @@ define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float>
define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
; SANDY-LABEL: test_andpd:
; SANDY: # BB#0:
; SANDY-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SANDY-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_andpd:
; HASWELL: # BB#0:
@ -252,10 +252,10 @@ define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
; SANDY-LABEL: test_andps:
; SANDY: # BB#0:
; SANDY-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SANDY-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_andps:
; HASWELL: # BB#0:
@ -291,10 +291,10 @@ define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
; SANDY-LABEL: test_blendpd:
; SANDY: # BB#0:
; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00]
; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_blendpd:
; HASWELL: # BB#0:
@ -326,9 +326,9 @@ define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x doubl
define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
; SANDY-LABEL: test_blendps:
; SANDY: # BB#0:
; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00]
; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [8:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_blendps:
; HASWELL: # BB#0:
@ -356,9 +356,9 @@ define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> *
define <4 x double> @test_blendvpd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) {
; SANDY-LABEL: test_blendvpd:
; SANDY: # BB#0:
; SANDY-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
; SANDY-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
; SANDY-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_blendvpd:
; HASWELL: # BB#0:
@ -387,9 +387,9 @@ declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4
define <8 x float> @test_blendvps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) {
; SANDY-LABEL: test_blendvps:
; SANDY: # BB#0:
; SANDY-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
; SANDY-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
; SANDY-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_blendvps:
; HASWELL: # BB#0:
@ -418,8 +418,8 @@ declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x f
define <8 x float> @test_broadcastf128(<4 x float> *%a0) {
; SANDY-LABEL: test_broadcastf128:
; SANDY: # BB#0:
; SANDY-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_broadcastf128:
; HASWELL: # BB#0:
@ -443,8 +443,8 @@ define <8 x float> @test_broadcastf128(<4 x float> *%a0) {
define <4 x double> @test_broadcastsd_ymm(double *%a0) {
; SANDY-LABEL: test_broadcastsd_ymm:
; SANDY: # BB#0:
; SANDY-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_broadcastsd_ymm:
; HASWELL: # BB#0:
@ -469,8 +469,8 @@ define <4 x double> @test_broadcastsd_ymm(double *%a0) {
define <4 x float> @test_broadcastss(float *%a0) {
; SANDY-LABEL: test_broadcastss:
; SANDY: # BB#0:
; SANDY-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [4:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_broadcastss:
; HASWELL: # BB#0:
@ -496,7 +496,7 @@ define <8 x float> @test_broadcastss_ymm(float *%a0) {
; SANDY-LABEL: test_broadcastss_ymm:
; SANDY: # BB#0:
; SANDY-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_broadcastss_ymm:
; HASWELL: # BB#0:
@ -522,9 +522,9 @@ define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double>
; SANDY-LABEL: test_cmppd:
; SANDY: # BB#0:
; SANDY-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
; SANDY-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; SANDY-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_cmppd:
; HASWELL: # BB#0:
@ -560,9 +560,9 @@ define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
; SANDY-LABEL: test_cmpps:
; SANDY: # BB#0:
; SANDY-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
; SANDY-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; SANDY-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_cmpps:
; HASWELL: # BB#0:
@ -598,9 +598,9 @@ define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
; SANDY-LABEL: test_cvtdq2pd:
; SANDY: # BB#0:
; SANDY-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
; SANDY-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00]
; SANDY-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:1.00]
; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_cvtdq2pd:
; HASWELL: # BB#0:
@ -632,12 +632,12 @@ define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) {
; SANDY-LABEL: test_cvtdq2ps:
; SANDY: # BB#0:
; SANDY-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [7:1.00]
; SANDY-NEXT: vcvtdq2ps %ymm1, %ymm1 # sched: [3:1.00]
; SANDY-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:1.00]
; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50]
; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [5:1.00]
; SANDY-NEXT: vcvtdq2ps %ymm1, %ymm1 # sched: [4:1.00]
; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_cvtdq2ps:
; HASWELL: # BB#0:
@ -669,10 +669,10 @@ define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) {
define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) {
; SANDY-LABEL: test_cvtpd2dq:
; SANDY: # BB#0:
; SANDY-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
; SANDY-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
; SANDY-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [7:1.00]
; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_cvtpd2dq:
; HASWELL: # BB#0:
@ -704,10 +704,10 @@ define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) {
define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) {
; SANDY-LABEL: test_cvtpd2ps:
; SANDY: # BB#0:
; SANDY-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00]
; SANDY-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00]
; SANDY-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [7:1.00]
; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_cvtpd2ps:
; HASWELL: # BB#0:
@ -741,8 +741,8 @@ define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) {
; SANDY: # BB#0:
; SANDY-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [7:1.00]
; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_cvtps2dq:
; HASWELL: # BB#0:
@ -774,9 +774,9 @@ define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) {
define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
; SANDY-LABEL: test_divpd:
; SANDY: # BB#0:
; SANDY-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:3.00]
; SANDY-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:3.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [12:1.00]
; SANDY-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [16:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_divpd:
; HASWELL: # BB#0:
@ -804,9 +804,9 @@ define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
; SANDY-LABEL: test_divps:
; SANDY: # BB#0:
; SANDY-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [29:3.00]
; SANDY-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [36:3.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [12:1.00]
; SANDY-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [16:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_divps:
; HASWELL: # BB#0:
@ -834,9 +834,9 @@ define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
define <8 x float> @test_dpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
; SANDY-LABEL: test_dpps:
; SANDY: # BB#0:
; SANDY-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00]
; SANDY-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_dpps:
; HASWELL: # BB#0:
@ -866,9 +866,9 @@ define <4 x float> @test_extractf128(<8 x float> %a0, <8 x float> %a1, <4 x floa
; SANDY-LABEL: test_extractf128:
; SANDY: # BB#0:
; SANDY-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_extractf128:
; HASWELL: # BB#0:
@ -900,7 +900,7 @@ define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double
; SANDY: # BB#0:
; SANDY-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_haddpd:
; HASWELL: # BB#0:
@ -929,9 +929,9 @@ declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounw
define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
; SANDY-LABEL: test_haddps:
; SANDY: # BB#0:
; SANDY-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
; SANDY-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_haddps:
; HASWELL: # BB#0:
@ -960,9 +960,9 @@ declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind
define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
; SANDY-LABEL: test_hsubpd:
; SANDY: # BB#0:
; SANDY-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
; SANDY-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_hsubpd:
; HASWELL: # BB#0:
@ -991,9 +991,9 @@ declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounw
define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
; SANDY-LABEL: test_hsubps:
; SANDY: # BB#0:
; SANDY-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
; SANDY-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_hsubps:
; HASWELL: # BB#0:
@ -1023,9 +1023,9 @@ define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float
; SANDY-LABEL: test_insertf128:
; SANDY: # BB#0:
; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
; SANDY-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_insertf128:
; HASWELL: # BB#0:
@ -1059,8 +1059,8 @@ define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float
define <32 x i8> @test_lddqu(i8* %a0) {
; SANDY-LABEL: test_lddqu:
; SANDY: # BB#0:
; SANDY-NEXT: vlddqu (%rdi), %ymm0 # sched: [6:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vlddqu (%rdi), %ymm0 # sched: [4:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_lddqu:
; HASWELL: # BB#0:
@ -1084,10 +1084,10 @@ declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
define <2 x double> @test_maskmovpd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) {
; SANDY-LABEL: test_maskmovpd:
; SANDY: # BB#0:
; SANDY-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
; SANDY-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [?:0.000000e+00]
; SANDY-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [?:0.000000e+00]
; SANDY-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_maskmovpd:
; HASWELL: # BB#0:
@ -1119,10 +1119,10 @@ declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind
define <4 x double> @test_maskmovpd_ymm(i8* %a0, <4 x i64> %a1, <4 x double> %a2) {
; SANDY-LABEL: test_maskmovpd_ymm:
; SANDY: # BB#0:
; SANDY-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [5:1.00]
; SANDY-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00]
; SANDY-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00]
; SANDY-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_maskmovpd_ymm:
; HASWELL: # BB#0:
@ -1154,10 +1154,10 @@ declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwi
define <4 x float> @test_maskmovps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) {
; SANDY-LABEL: test_maskmovps:
; SANDY: # BB#0:
; SANDY-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
; SANDY-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [?:0.000000e+00]
; SANDY-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [?:0.000000e+00]
; SANDY-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_maskmovps:
; HASWELL: # BB#0:
@ -1189,10 +1189,10 @@ declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind
define <8 x float> @test_maskmovps_ymm(i8* %a0, <8 x i32> %a1, <8 x float> %a2) {
; SANDY-LABEL: test_maskmovps_ymm:
; SANDY: # BB#0:
; SANDY-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [1:0.50]
; SANDY-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00]
; SANDY-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00]
; SANDY-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_maskmovps_ymm:
; HASWELL: # BB#0:
@ -1225,8 +1225,8 @@ define <4 x double> @test_maxpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
; SANDY-LABEL: test_maxpd:
; SANDY: # BB#0:
; SANDY-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_maxpd:
; HASWELL: # BB#0:
@ -1256,8 +1256,8 @@ define <8 x float> @test_maxps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
; SANDY-LABEL: test_maxps:
; SANDY: # BB#0:
; SANDY-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_maxps:
; HASWELL: # BB#0:
@ -1288,7 +1288,7 @@ define <4 x double> @test_minpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
; SANDY: # BB#0:
; SANDY-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_minpd:
; HASWELL: # BB#0:
@ -1319,7 +1319,7 @@ define <8 x float> @test_minps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
; SANDY: # BB#0:
; SANDY-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_minps:
; HASWELL: # BB#0:
@ -1348,10 +1348,10 @@ declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind
define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) {
; SANDY-LABEL: test_movapd:
; SANDY: # BB#0:
; SANDY-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50]
; SANDY-NEXT: vmovapd (%rdi), %ymm0 # sched: [4:0.50]
; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movapd:
; HASWELL: # BB#0:
@ -1382,10 +1382,10 @@ define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) {
define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) {
; SANDY-LABEL: test_movaps:
; SANDY: # BB#0:
; SANDY-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50]
; SANDY-NEXT: vmovaps (%rdi), %ymm0 # sched: [4:0.50]
; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movaps:
; HASWELL: # BB#0:
@ -1417,9 +1417,9 @@ define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) {
; SANDY-LABEL: test_movddup:
; SANDY: # BB#0:
; SANDY-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
; SANDY-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
; SANDY-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [4:0.50]
; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movddup:
; HASWELL: # BB#0:
@ -1451,9 +1451,9 @@ define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) {
define i32 @test_movmskpd(<4 x double> %a0) {
; SANDY-LABEL: test_movmskpd:
; SANDY: # BB#0:
; SANDY-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00]
; SANDY-NEXT: vmovmskpd %ymm0, %eax # sched: [1:0.33]
; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movmskpd:
; HASWELL: # BB#0:
@ -1479,9 +1479,9 @@ declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
define i32 @test_movmskps(<8 x float> %a0) {
; SANDY-LABEL: test_movmskps:
; SANDY: # BB#0:
; SANDY-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00]
; SANDY-NEXT: vmovmskps %ymm0, %eax # sched: [1:0.33]
; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movmskps:
; HASWELL: # BB#0:
@ -1508,8 +1508,8 @@ define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) {
; SANDY-LABEL: test_movntpd:
; SANDY: # BB#0:
; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movntpd:
; HASWELL: # BB#0:
@ -1537,8 +1537,8 @@ define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) {
; SANDY-LABEL: test_movntps:
; SANDY: # BB#0:
; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movntps:
; HASWELL: # BB#0:
@ -1566,9 +1566,9 @@ define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) {
; SANDY-LABEL: test_movshdup:
; SANDY: # BB#0:
; SANDY-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
; SANDY-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
; SANDY-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [4:0.50]
; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movshdup:
; HASWELL: # BB#0:
@ -1601,9 +1601,9 @@ define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) {
; SANDY-LABEL: test_movsldup:
; SANDY: # BB#0:
; SANDY-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
; SANDY-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
; SANDY-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [4:0.50]
; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movsldup:
; HASWELL: # BB#0:
@ -1635,12 +1635,12 @@ define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) {
define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) {
; SANDY-LABEL: test_movupd:
; SANDY: # BB#0:
; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50]
; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00]
; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00]
; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movupd:
; HASWELL: # BB#0:
@ -1671,12 +1671,12 @@ define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) {
define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) {
; SANDY-LABEL: test_movups:
; SANDY: # BB#0:
; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50]
; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00]
; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00]
; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movups:
; HASWELL: # BB#0:
@ -1708,8 +1708,8 @@ define <4 x double> @test_mulpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
; SANDY-LABEL: test_mulpd:
; SANDY: # BB#0:
; SANDY-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_mulpd:
; HASWELL: # BB#0:
@ -1738,8 +1738,8 @@ define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
; SANDY-LABEL: test_mulps:
; SANDY: # BB#0:
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_mulps:
; HASWELL: # BB#0:
@ -1767,10 +1767,10 @@ define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
; SANDY-LABEL: orpd:
; SANDY: # BB#0:
; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SANDY-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: orpd:
; HASWELL: # BB#0:
@ -1806,10 +1806,10 @@ define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2)
define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
; SANDY-LABEL: test_orps:
; SANDY: # BB#0:
; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SANDY-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_orps:
; HASWELL: # BB#0:
@ -1846,9 +1846,9 @@ define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) {
; SANDY-LABEL: test_permilpd:
; SANDY: # BB#0:
; SANDY-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
; SANDY-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
; SANDY-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [5:1.00]
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_permilpd:
; HASWELL: # BB#0:
@ -1880,10 +1880,10 @@ define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) {
define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) {
; SANDY-LABEL: test_permilpd_ymm:
; SANDY: # BB#0:
; SANDY-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [8:1.00]
; SANDY-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
; SANDY-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [5:1.00]
; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_permilpd_ymm:
; HASWELL: # BB#0:
@ -1916,9 +1916,9 @@ define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) {
; SANDY-LABEL: test_permilps:
; SANDY: # BB#0:
; SANDY-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
; SANDY-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
; SANDY-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_permilps:
; HASWELL: # BB#0:
@ -1950,10 +1950,10 @@ define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) {
define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) {
; SANDY-LABEL: test_permilps_ymm:
; SANDY: # BB#0:
; SANDY-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [8:1.00]
; SANDY-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
; SANDY-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_permilps_ymm:
; HASWELL: # BB#0:
@ -1986,8 +1986,8 @@ define <2 x double> @test_permilvarpd(<2 x double> %a0, <2 x i64> %a1, <2 x i64>
; SANDY-LABEL: test_permilvarpd:
; SANDY: # BB#0:
; SANDY-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_permilvarpd:
; HASWELL: # BB#0:
@ -2018,7 +2018,7 @@ define <4 x double> @test_permilvarpd_ymm(<4 x double> %a0, <4 x i64> %a1, <4 x
; SANDY: # BB#0:
; SANDY-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_permilvarpd_ymm:
; HASWELL: # BB#0:
@ -2048,8 +2048,8 @@ define <4 x float> @test_permilvarps(<4 x float> %a0, <4 x i32> %a1, <4 x i32> *
; SANDY-LABEL: test_permilvarps:
; SANDY: # BB#0:
; SANDY-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_permilvarps:
; HASWELL: # BB#0:
@ -2080,7 +2080,7 @@ define <8 x float> @test_permilvarps_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x i3
; SANDY: # BB#0:
; SANDY-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_permilvarps_ymm:
; HASWELL: # BB#0:
@ -2112,7 +2112,7 @@ define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) {
; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vrcpps (%rdi), %ymm1 # sched: [9:1.00]
; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_rcpps:
; HASWELL: # BB#0:
@ -2148,7 +2148,7 @@ define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) {
; SANDY-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [7:1.00]
; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_roundpd:
; HASWELL: # BB#0:
@ -2184,7 +2184,7 @@ define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) {
; SANDY-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [7:1.00]
; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_roundps:
; HASWELL: # BB#0:
@ -2217,10 +2217,10 @@ declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readno
define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) {
; SANDY-LABEL: test_rsqrtps:
; SANDY: # BB#0:
; SANDY-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [14:3.00]
; SANDY-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:3.00]
; SANDY-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [9:1.00]
; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_rsqrtps:
; HASWELL: # BB#0:
@ -2254,9 +2254,9 @@ define <4 x double> @test_shufpd(<4 x double> %a0, <4 x double> %a1, <4 x double
; SANDY-LABEL: test_shufpd:
; SANDY: # BB#0:
; SANDY-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
; SANDY-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
; SANDY-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [5:1.00]
; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_shufpd:
; HASWELL: # BB#0:
@ -2289,8 +2289,8 @@ define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%
; SANDY-LABEL: test_shufps:
; SANDY: # BB#0:
; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [8:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_shufps:
; HASWELL: # BB#0:
@ -2318,10 +2318,10 @@ define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%
define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) {
; SANDY-LABEL: test_sqrtpd:
; SANDY: # BB#0:
; SANDY-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [52:3.00]
; SANDY-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [45:3.00]
; SANDY-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [15:1.00]
; SANDY-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [19:1.00]
; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_sqrtpd:
; HASWELL: # BB#0:
@ -2354,10 +2354,10 @@ declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) {
; SANDY-LABEL: test_sqrtps:
; SANDY: # BB#0:
; SANDY-NEXT: vsqrtps (%rdi), %ymm1 # sched: [36:3.00]
; SANDY-NEXT: vsqrtps %ymm0, %ymm0 # sched: [29:3.00]
; SANDY-NEXT: vsqrtps %ymm0, %ymm0 # sched: [15:1.00]
; SANDY-NEXT: vsqrtps (%rdi), %ymm1 # sched: [19:1.00]
; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_sqrtps:
; HASWELL: # BB#0:
@ -2391,8 +2391,8 @@ define <4 x double> @test_subpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
; SANDY-LABEL: test_subpd:
; SANDY: # BB#0:
; SANDY-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_subpd:
; HASWELL: # BB#0:
@ -2421,8 +2421,8 @@ define <8 x float> @test_subps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
; SANDY-LABEL: test_subps:
; SANDY: # BB#0:
; SANDY-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_subps:
; HASWELL: # BB#0:
@ -2451,11 +2451,11 @@ define i32 @test_testpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; SANDY-LABEL: test_testpd:
; SANDY: # BB#0:
; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33]
; SANDY-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: setb %al # sched: [1:1.00]
; SANDY-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00]
; SANDY-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: setb %al # sched: [1:0.33]
; SANDY-NEXT: vtestpd (%rdi), %xmm0 # sched: [5:0.50]
; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_testpd:
; HASWELL: # BB#0:
@ -2495,12 +2495,12 @@ define i32 @test_testpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a
; SANDY-LABEL: test_testpd_ymm:
; SANDY: # BB#0:
; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33]
; SANDY-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: setb %al # sched: [1:1.00]
; SANDY-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00]
; SANDY-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:0.33]
; SANDY-NEXT: setb %al # sched: [1:0.33]
; SANDY-NEXT: vtestpd (%rdi), %ymm0 # sched: [5:0.50]
; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33]
; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_testpd_ymm:
; HASWELL: # BB#0:
@ -2542,11 +2542,11 @@ define i32 @test_testps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
; SANDY-LABEL: test_testps:
; SANDY: # BB#0:
; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33]
; SANDY-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: setb %al # sched: [1:1.00]
; SANDY-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00]
; SANDY-NEXT: vtestps %xmm1, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: setb %al # sched: [1:0.33]
; SANDY-NEXT: vtestps (%rdi), %xmm0 # sched: [5:0.50]
; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_testps:
; HASWELL: # BB#0:
@ -2586,12 +2586,12 @@ define i32 @test_testps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2)
; SANDY-LABEL: test_testps_ymm:
; SANDY: # BB#0:
; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33]
; SANDY-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: setb %al # sched: [1:1.00]
; SANDY-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00]
; SANDY-NEXT: vtestps %ymm1, %ymm0 # sched: [1:0.33]
; SANDY-NEXT: setb %al # sched: [1:0.33]
; SANDY-NEXT: vtestps (%rdi), %ymm0 # sched: [5:0.50]
; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33]
; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_testps_ymm:
; HASWELL: # BB#0:
@ -2635,7 +2635,7 @@ define <4 x double> @test_unpckhpd(<4 x double> %a0, <4 x double> %a1, <4 x doub
; SANDY-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
; SANDY-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [5:1.00]
; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_unpckhpd:
; HASWELL: # BB#0:
@ -2669,7 +2669,7 @@ define <8 x float> @test_unpckhps(<8 x float> %a0, <8 x float> %a1, <8 x float>
; SANDY: # BB#0:
; SANDY-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
; SANDY-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_unpckhps:
; HASWELL: # BB#0:
@ -2698,9 +2698,9 @@ define <4 x double> @test_unpcklpd(<4 x double> %a0, <4 x double> %a1, <4 x doub
; SANDY-LABEL: test_unpcklpd:
; SANDY: # BB#0:
; SANDY-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
; SANDY-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
; SANDY-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [5:1.00]
; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_unpcklpd:
; HASWELL: # BB#0:
@ -2733,8 +2733,8 @@ define <8 x float> @test_unpcklps(<8 x float> %a0, <8 x float> %a1, <8 x float>
; SANDY-LABEL: test_unpcklps:
; SANDY: # BB#0:
; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_unpcklps:
; HASWELL: # BB#0:
@ -2762,10 +2762,10 @@ define <8 x float> @test_unpcklps(<8 x float> %a0, <8 x float> %a1, <8 x float>
define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
; SANDY-LABEL: test_xorpd:
; SANDY: # BB#0:
; SANDY-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SANDY-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_xorpd:
; HASWELL: # BB#0:
@ -2801,10 +2801,10 @@ define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
; SANDY-LABEL: test_xorps:
; SANDY: # BB#0:
; SANDY-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SANDY-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_xorps:
; HASWELL: # BB#0:
@ -2841,7 +2841,7 @@ define void @test_zeroall() {
; SANDY-LABEL: test_zeroall:
; SANDY: # BB#0:
; SANDY-NEXT: vzeroall # sched: [?:0.000000e+00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_zeroall:
; HASWELL: # BB#0:
@ -2866,7 +2866,7 @@ define void @test_zeroupper() {
; SANDY-LABEL: test_zeroupper:
; SANDY: # BB#0:
; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_zeroupper:
; HASWELL: # BB#0:

View file

@ -493,7 +493,7 @@ entry:
define void @extract_subvector512_v8f32_store_lo_align_16(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_16:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vmovaps %ymm0, (%rdi)
; SKX-NEXT: vmovups %ymm0, (%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
entry:

View file

@ -15,18 +15,18 @@ define void @test_extractelement_legalization_storereuse(<4 x i32> %a, i32* noca
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: paddd (%ecx), %xmm0
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: movdqa %xmm0, (%ecx)
; CHECK-NEXT: movl (%ecx), %esi
; CHECK-NEXT: movl 4(%ecx), %edi
; CHECK-NEXT: shll $4, %edx
; CHECK-NEXT: movl 8(%ecx), %ebx
; CHECK-NEXT: movl 12(%ecx), %ecx
; CHECK-NEXT: movl %esi, 12(%eax,%edx)
; CHECK-NEXT: movl %edi, (%eax,%edx)
; CHECK-NEXT: movl %ebx, 8(%eax,%edx)
; CHECK-NEXT: movl %ecx, 4(%eax,%edx)
; CHECK-NEXT: paddd (%edx), %xmm0
; CHECK-NEXT: movdqa %xmm0, (%edx)
; CHECK-NEXT: movl (%edx), %esi
; CHECK-NEXT: movl 4(%edx), %edi
; CHECK-NEXT: shll $4, %ecx
; CHECK-NEXT: movl 8(%edx), %ebx
; CHECK-NEXT: movl 12(%edx), %edx
; CHECK-NEXT: movl %esi, 12(%eax,%ecx)
; CHECK-NEXT: movl %edi, (%eax,%ecx)
; CHECK-NEXT: movl %ebx, 8(%eax,%ecx)
; CHECK-NEXT: movl %edx, 4(%eax,%ecx)
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK-NEXT: popl %ebx

View file

@ -1,144 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=IVY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
define <4 x float> @test_vcvtph2ps_128(<8 x i16> %a0, <8 x i16> *%a1) {
; IVY-LABEL: test_vcvtph2ps_128:
; IVY: # BB#0:
; IVY-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [7:1.00]
; IVY-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00]
; IVY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; IVY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vcvtph2ps_128:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [7:1.00]
; HASWELL-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [4:1.00]
; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_vcvtph2ps_128:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00]
; BTVER2-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_vcvtph2ps_128:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [12:1.00]
; ZNVER1-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [5:1.00]
; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load <8 x i16>, <8 x i16> *%a1
%2 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1)
%3 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0)
%4 = fadd <4 x float> %2, %3
ret <4 x float> %4
}
declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>)
define <8 x float> @test_vcvtph2ps_256(<8 x i16> %a0, <8 x i16> *%a1) {
; IVY-LABEL: test_vcvtph2ps_256:
; IVY: # BB#0:
; IVY-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00]
; IVY-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00]
; IVY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; IVY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vcvtph2ps_256:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00]
; HASWELL-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [4:1.00]
; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_vcvtph2ps_256:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00]
; BTVER2-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00]
; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_vcvtph2ps_256:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [12:1.00]
; ZNVER1-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [5:1.00]
; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load <8 x i16>, <8 x i16> *%a1
%2 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
%3 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0)
%4 = fadd <8 x float> %2, %3
ret <8 x float> %4
}
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>)
define <8 x i16> @test_vcvtps2ph_128(<4 x float> %a0, <4 x float> %a1, <4 x i16> *%a2) {
; IVY-LABEL: test_vcvtps2ph_128:
; IVY: # BB#0:
; IVY-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
; IVY-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [7:1.00]
; IVY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vcvtps2ph_128:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00]
; HASWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [8:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_vcvtps2ph_128:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_vcvtps2ph_128:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [5:1.00]
; ZNVER1-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [12:1.00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0)
%2 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a1, i32 0)
%3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x i16> %3, <4 x i16> *%a2
ret <8 x i16> %1
}
declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32)
define <8 x i16> @test_vcvtps2ph_256(<8 x float> %a0, <8 x float> %a1, <8 x i16> *%a2) {
; IVY-LABEL: test_vcvtps2ph_256:
; IVY: # BB#0:
; IVY-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
; IVY-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00]
; IVY-NEXT: vzeroupper # sched: [?:0.000000e+00]
; IVY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vcvtps2ph_256:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [4:1.00]
; HASWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00]
; HASWELL-NEXT: vzeroupper # sched: [1:0.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_vcvtps2ph_256:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_vcvtps2ph_256:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [5:1.00]
; ZNVER1-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [12:1.00]
; ZNVER1-NEXT: vzeroupper # sched: [?:0.000000e+00]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0)
%2 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a1, i32 0)
store <8 x i16> %2, <8 x i16> *%a2
ret <8 x i16> %1
}
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32)

View file

@ -50,8 +50,8 @@ define void @TestUnionLD1(fp128 %s, i64 %n) #0 {
; CHECK-NEXT: andq %rdi, %rcx
; CHECK-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000
; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: orq %rcx, %rdx
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
; CHECK-NEXT: jmp foo # TAILCALL

View file

@ -16,10 +16,10 @@
; LIN: sarq $32, %r[[REG2]]
; LIN: movslq %e[[REG4]], %r[[REG3:.+]]
; LIN: sarq $32, %r[[REG4]]
; LIN: movsd (%rdi,%r[[REG3]],8), %xmm1
; LIN: movhpd (%rdi,%r[[REG4]],8), %xmm1
; LIN: movq %rdi, %xmm1
; LIN: movq %r[[REG3]], %xmm0
; LIN: movsd (%rdi,%r[[REG1]],8), %xmm0
; LIN: movhpd (%rdi,%r[[REG2]],8), %xmm0
; LIN: movsd (%rdi,%r[[REG3]],8), %xmm1
; LIN: movhpd (%rdi,%r[[REG4]],8), %xmm1
; WIN: movdqa (%rdx), %xmm0
; WIN: pand (%r8), %xmm0
@ -29,10 +29,10 @@
; WIN: sarq $32, %r[[REG2]]
; WIN: movslq %e[[REG4]], %r[[REG3:.+]]
; WIN: sarq $32, %r[[REG4]]
; WIN: movsd (%rcx,%r[[REG3]],8), %xmm1
; WIN: movhpd (%rcx,%r[[REG4]],8), %xmm1
; WIN: movdqa (%r[[REG2]]), %xmm0
; WIN: movq %r[[REG2]], %xmm1
; WIN: movsd (%rcx,%r[[REG1]],8), %xmm0
; WIN: movhpd (%rcx,%r[[REG2]],8), %xmm0
; WIN: movsd (%rcx,%r[[REG3]],8), %xmm1
; WIN: movhpd (%rcx,%r[[REG4]],8), %xmm1
define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
%a = load <4 x i32>, <4 x i32>* %i

View file

@ -1,653 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
define i32 @test_lea_offset(i32) {
; GENERIC-LABEL: test_lea_offset:
; GENERIC: # BB#0:
; GENERIC-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; GENERIC-NEXT: leal -24(%rdi), %eax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_offset:
; ATOM: # BB#0:
; ATOM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ATOM-NEXT: leal -24(%rdi), %eax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_offset:
; SLM: # BB#0:
; SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SLM-NEXT: leal -24(%rdi), %eax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_offset:
; SANDY: # BB#0:
; SANDY-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SANDY-NEXT: leal -24(%rdi), %eax # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_offset:
; HASWELL: # BB#0:
; HASWELL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HASWELL-NEXT: leal -24(%rdi), %eax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_offset:
; BTVER2: # BB#0:
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal -24(%rdi), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal -24(%rdi), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = add nsw i32 %0, -24
ret i32 %2
}
define i32 @test_lea_offset_big(i32) {
; GENERIC-LABEL: test_lea_offset_big:
; GENERIC: # BB#0:
; GENERIC-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; GENERIC-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_offset_big:
; ATOM: # BB#0:
; ATOM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ATOM-NEXT: leal 1024(%rdi), %eax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_offset_big:
; SLM: # BB#0:
; SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SLM-NEXT: leal 1024(%rdi), %eax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_offset_big:
; SANDY: # BB#0:
; SANDY-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SANDY-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_offset_big:
; HASWELL: # BB#0:
; HASWELL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HASWELL-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_offset_big:
; BTVER2: # BB#0:
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal 1024(%rdi), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = add nsw i32 %0, 1024
ret i32 %2
}
; Function Attrs: norecurse nounwind readnone uwtable
define i32 @test_lea_add(i32, i32) {
; GENERIC-LABEL: test_lea_add:
; GENERIC: # BB#0:
; GENERIC-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; GENERIC-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; GENERIC-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add:
; ATOM: # BB#0:
; ATOM-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ATOM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ATOM-NEXT: leal (%rdi,%rsi), %eax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_add:
; SLM: # BB#0:
; SLM-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SLM-NEXT: leal (%rdi,%rsi), %eax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_add:
; SANDY: # BB#0:
; SANDY-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SANDY-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SANDY-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_add:
; HASWELL: # BB#0:
; HASWELL-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; HASWELL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HASWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add:
; BTVER2: # BB#0:
; BTVER2-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = add nsw i32 %1, %0
ret i32 %3
}
define i32 @test_lea_add_offset(i32, i32) {
; GENERIC-LABEL: test_lea_add_offset:
; GENERIC: # BB#0:
; GENERIC-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; GENERIC-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; GENERIC-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add_offset:
; ATOM: # BB#0:
; ATOM-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ATOM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ATOM-NEXT: leal 16(%rdi,%rsi), %eax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_add_offset:
; SLM: # BB#0:
; SLM-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SLM-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_add_offset:
; SANDY: # BB#0:
; SANDY-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SANDY-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SANDY-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
; SANDY-NEXT: addl $16, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_add_offset:
; HASWELL: # BB#0:
; HASWELL-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; HASWELL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HASWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
; HASWELL-NEXT: addl $16, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_offset:
; BTVER2: # BB#0:
; BTVER2-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = add i32 %0, 16
%4 = add i32 %3, %1
ret i32 %4
}
define i32 @test_lea_add_offset_big(i32, i32) {
; GENERIC-LABEL: test_lea_add_offset_big:
; GENERIC: # BB#0:
; GENERIC-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; GENERIC-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; GENERIC-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add_offset_big:
; ATOM: # BB#0:
; ATOM-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ATOM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ATOM-NEXT: leal -4096(%rdi,%rsi), %eax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_add_offset_big:
; SLM: # BB#0:
; SLM-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SLM-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_add_offset_big:
; SANDY: # BB#0:
; SANDY-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SANDY-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SANDY-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
; SANDY-NEXT: addl $-4096, %eax # imm = 0xF000
; SANDY-NEXT: # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_add_offset_big:
; HASWELL: # BB#0:
; HASWELL-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; HASWELL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HASWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
; HASWELL-NEXT: addl $-4096, %eax # imm = 0xF000
; HASWELL-NEXT: # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_offset_big:
; BTVER2: # BB#0:
; BTVER2-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = add i32 %0, -4096
%4 = add i32 %3, %1
ret i32 %4
}
define i32 @test_lea_mul(i32) {
; GENERIC-LABEL: test_lea_mul:
; GENERIC: # BB#0:
; GENERIC-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; GENERIC-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_mul:
; ATOM: # BB#0:
; ATOM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ATOM-NEXT: leal (%rdi,%rdi,2), %eax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_mul:
; SLM: # BB#0:
; SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_mul:
; SANDY: # BB#0:
; SANDY-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SANDY-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_mul:
; HASWELL: # BB#0:
; HASWELL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HASWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_mul:
; BTVER2: # BB#0:
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_mul:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = mul nsw i32 %0, 3
ret i32 %2
}
define i32 @test_lea_mul_offset(i32) {
; GENERIC-LABEL: test_lea_mul_offset:
; GENERIC: # BB#0:
; GENERIC-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; GENERIC-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_mul_offset:
; ATOM: # BB#0:
; ATOM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ATOM-NEXT: leal -32(%rdi,%rdi,2), %eax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_mul_offset:
; SLM: # BB#0:
; SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SLM-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_mul_offset:
; SANDY: # BB#0:
; SANDY-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SANDY-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; SANDY-NEXT: addl $-32, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_mul_offset:
; HASWELL: # BB#0:
; HASWELL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HASWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; HASWELL-NEXT: addl $-32, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_mul_offset:
; BTVER2: # BB#0:
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_mul_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = mul nsw i32 %0, 3
%3 = add nsw i32 %2, -32
ret i32 %3
}
define i32 @test_lea_mul_offset_big(i32) {
; GENERIC-LABEL: test_lea_mul_offset_big:
; GENERIC: # BB#0:
; GENERIC-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; GENERIC-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_mul_offset_big:
; ATOM: # BB#0:
; ATOM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ATOM-NEXT: leal 10000(%rdi,%rdi,8), %eax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_mul_offset_big:
; SLM: # BB#0:
; SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SLM-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_mul_offset_big:
; SANDY: # BB#0:
; SANDY-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SANDY-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
; SANDY-NEXT: addl $10000, %eax # imm = 0x2710
; SANDY-NEXT: # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_mul_offset_big:
; HASWELL: # BB#0:
; HASWELL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HASWELL-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
; HASWELL-NEXT: addl $10000, %eax # imm = 0x2710
; HASWELL-NEXT: # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_mul_offset_big:
; BTVER2: # BB#0:
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_mul_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = mul nsw i32 %0, 9
%3 = add nsw i32 %2, 10000
ret i32 %3
}
define i32 @test_lea_add_scale(i32, i32) {
; GENERIC-LABEL: test_lea_add_scale:
; GENERIC: # BB#0:
; GENERIC-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; GENERIC-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; GENERIC-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add_scale:
; ATOM: # BB#0:
; ATOM-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ATOM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ATOM-NEXT: leal (%rdi,%rsi,2), %eax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_add_scale:
; SLM: # BB#0:
; SLM-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SLM-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_add_scale:
; SANDY: # BB#0:
; SANDY-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SANDY-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SANDY-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_add_scale:
; HASWELL: # BB#0:
; HASWELL-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; HASWELL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HASWELL-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_scale:
; BTVER2: # BB#0:
; BTVER2-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_scale:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = shl i32 %1, 1
%4 = add nsw i32 %3, %0
ret i32 %4
}
define i32 @test_lea_add_scale_offset(i32, i32) {
; GENERIC-LABEL: test_lea_add_scale_offset:
; GENERIC: # BB#0:
; GENERIC-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; GENERIC-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; GENERIC-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add_scale_offset:
; ATOM: # BB#0:
; ATOM-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ATOM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ATOM-NEXT: leal 96(%rdi,%rsi,4), %eax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_add_scale_offset:
; SLM: # BB#0:
; SLM-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SLM-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_add_scale_offset:
; SANDY: # BB#0:
; SANDY-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SANDY-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SANDY-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50]
; SANDY-NEXT: addl $96, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_add_scale_offset:
; HASWELL: # BB#0:
; HASWELL-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; HASWELL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HASWELL-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50]
; HASWELL-NEXT: addl $96, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_scale_offset:
; BTVER2: # BB#0:
; BTVER2-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_scale_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = shl i32 %1, 2
%4 = add i32 %0, 96
%5 = add i32 %4, %3
ret i32 %5
}
define i32 @test_lea_add_scale_offset_big(i32, i32) {
; GENERIC-LABEL: test_lea_add_scale_offset_big:
; GENERIC: # BB#0:
; GENERIC-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; GENERIC-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; GENERIC-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add_scale_offset_big:
; ATOM: # BB#0:
; ATOM-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ATOM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ATOM-NEXT: leal -1200(%rdi,%rsi,8), %eax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_add_scale_offset_big:
; SLM: # BB#0:
; SLM-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SLM-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_add_scale_offset_big:
; SANDY: # BB#0:
; SANDY-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SANDY-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SANDY-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50]
; SANDY-NEXT: addl $-1200, %eax # imm = 0xFB50
; SANDY-NEXT: # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_add_scale_offset_big:
; HASWELL: # BB#0:
; HASWELL-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; HASWELL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HASWELL-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50]
; HASWELL-NEXT: addl $-1200, %eax # imm = 0xFB50
; HASWELL-NEXT: # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_scale_offset_big:
; BTVER2: # BB#0:
; BTVER2-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; BTVER2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; BTVER2-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_scale_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; ZNVER1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; ZNVER1-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = shl i32 %1, 3
%4 = add i32 %0, -1200
%5 = add i32 %4, %3
ret i32 %5
}

View file

@ -1,534 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
define i64 @test_lea_offset(i64) {
; GENERIC-LABEL: test_lea_offset:
; GENERIC: # BB#0:
; GENERIC-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_offset:
; ATOM: # BB#0:
; ATOM-NEXT: leaq -24(%rdi), %rax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_offset:
; SLM: # BB#0:
; SLM-NEXT: leaq -24(%rdi), %rax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_offset:
; SANDY: # BB#0:
; SANDY-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_offset:
; HASWELL: # BB#0:
; HASWELL-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_offset:
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq -24(%rdi), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = add nsw i64 %0, -24
ret i64 %2
}
define i64 @test_lea_offset_big(i64) {
; GENERIC-LABEL: test_lea_offset_big:
; GENERIC: # BB#0:
; GENERIC-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_offset_big:
; ATOM: # BB#0:
; ATOM-NEXT: leaq 1024(%rdi), %rax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_offset_big:
; SLM: # BB#0:
; SLM-NEXT: leaq 1024(%rdi), %rax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_offset_big:
; SANDY: # BB#0:
; SANDY-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_offset_big:
; HASWELL: # BB#0:
; HASWELL-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_offset_big:
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = add nsw i64 %0, 1024
ret i64 %2
}
; Function Attrs: norecurse nounwind readnone uwtable
define i64 @test_lea_add(i64, i64) {
; GENERIC-LABEL: test_lea_add:
; GENERIC: # BB#0:
; GENERIC-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add:
; ATOM: # BB#0:
; ATOM-NEXT: leaq (%rdi,%rsi), %rax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_add:
; SLM: # BB#0:
; SLM-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_add:
; SANDY: # BB#0:
; SANDY-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_add:
; HASWELL: # BB#0:
; HASWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add:
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = add nsw i64 %1, %0
ret i64 %3
}
define i64 @test_lea_add_offset(i64, i64) {
; GENERIC-LABEL: test_lea_add_offset:
; GENERIC: # BB#0:
; GENERIC-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add_offset:
; ATOM: # BB#0:
; ATOM-NEXT: leaq 16(%rdi,%rsi), %rax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_add_offset:
; SLM: # BB#0:
; SLM-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_add_offset:
; SANDY: # BB#0:
; SANDY-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
; SANDY-NEXT: addq $16, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_add_offset:
; HASWELL: # BB#0:
; HASWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
; HASWELL-NEXT: addq $16, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_offset:
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = add i64 %0, 16
%4 = add i64 %3, %1
ret i64 %4
}
define i64 @test_lea_add_offset_big(i64, i64) {
; GENERIC-LABEL: test_lea_add_offset_big:
; GENERIC: # BB#0:
; GENERIC-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add_offset_big:
; ATOM: # BB#0:
; ATOM-NEXT: leaq -4096(%rdi,%rsi), %rax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_add_offset_big:
; SLM: # BB#0:
; SLM-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_add_offset_big:
; SANDY: # BB#0:
; SANDY-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
; SANDY-NEXT: addq $-4096, %rax # imm = 0xF000
; SANDY-NEXT: # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_add_offset_big:
; HASWELL: # BB#0:
; HASWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
; HASWELL-NEXT: addq $-4096, %rax # imm = 0xF000
; HASWELL-NEXT: # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_offset_big:
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = add i64 %0, -4096
%4 = add i64 %3, %1
ret i64 %4
}
define i64 @test_lea_mul(i64) {
; GENERIC-LABEL: test_lea_mul:
; GENERIC: # BB#0:
; GENERIC-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_mul:
; ATOM: # BB#0:
; ATOM-NEXT: leaq (%rdi,%rdi,2), %rax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_mul:
; SLM: # BB#0:
; SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_mul:
; SANDY: # BB#0:
; SANDY-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_mul:
; HASWELL: # BB#0:
; HASWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_mul:
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_mul:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = mul nsw i64 %0, 3
ret i64 %2
}
define i64 @test_lea_mul_offset(i64) {
; GENERIC-LABEL: test_lea_mul_offset:
; GENERIC: # BB#0:
; GENERIC-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_mul_offset:
; ATOM: # BB#0:
; ATOM-NEXT: leaq -32(%rdi,%rdi,2), %rax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_mul_offset:
; SLM: # BB#0:
; SLM-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_mul_offset:
; SANDY: # BB#0:
; SANDY-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; SANDY-NEXT: addq $-32, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_mul_offset:
; HASWELL: # BB#0:
; HASWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; HASWELL-NEXT: addq $-32, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_mul_offset:
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_mul_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = mul nsw i64 %0, 3
%3 = add nsw i64 %2, -32
ret i64 %3
}
define i64 @test_lea_mul_offset_big(i64) {
; GENERIC-LABEL: test_lea_mul_offset_big:
; GENERIC: # BB#0:
; GENERIC-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_mul_offset_big:
; ATOM: # BB#0:
; ATOM-NEXT: leaq 10000(%rdi,%rdi,8), %rax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_mul_offset_big:
; SLM: # BB#0:
; SLM-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_mul_offset_big:
; SANDY: # BB#0:
; SANDY-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
; SANDY-NEXT: addq $10000, %rax # imm = 0x2710
; SANDY-NEXT: # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_mul_offset_big:
; HASWELL: # BB#0:
; HASWELL-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
; HASWELL-NEXT: addq $10000, %rax # imm = 0x2710
; HASWELL-NEXT: # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_mul_offset_big:
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_mul_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%2 = mul nsw i64 %0, 9
%3 = add nsw i64 %2, 10000
ret i64 %3
}
define i64 @test_lea_add_scale(i64, i64) {
; GENERIC-LABEL: test_lea_add_scale:
; GENERIC: # BB#0:
; GENERIC-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add_scale:
; ATOM: # BB#0:
; ATOM-NEXT: leaq (%rdi,%rsi,2), %rax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_add_scale:
; SLM: # BB#0:
; SLM-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_add_scale:
; SANDY: # BB#0:
; SANDY-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_add_scale:
; HASWELL: # BB#0:
; HASWELL-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_scale:
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_scale:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = shl i64 %1, 1
%4 = add nsw i64 %3, %0
ret i64 %4
}
define i64 @test_lea_add_scale_offset(i64, i64) {
; GENERIC-LABEL: test_lea_add_scale_offset:
; GENERIC: # BB#0:
; GENERIC-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add_scale_offset:
; ATOM: # BB#0:
; ATOM-NEXT: leaq 96(%rdi,%rsi,4), %rax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_add_scale_offset:
; SLM: # BB#0:
; SLM-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_add_scale_offset:
; SANDY: # BB#0:
; SANDY-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50]
; SANDY-NEXT: addq $96, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_add_scale_offset:
; HASWELL: # BB#0:
; HASWELL-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50]
; HASWELL-NEXT: addq $96, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_scale_offset:
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_scale_offset:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = shl i64 %1, 2
%4 = add i64 %0, 96
%5 = add i64 %4, %3
ret i64 %5
}
define i64 @test_lea_add_scale_offset_big(i64, i64) {
; GENERIC-LABEL: test_lea_add_scale_offset_big:
; GENERIC: # BB#0:
; GENERIC-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add_scale_offset_big:
; ATOM: # BB#0:
; ATOM-NEXT: leaq -1200(%rdi,%rsi,8), %rax
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
;
; SLM-LABEL: test_lea_add_scale_offset_big:
; SLM: # BB#0:
; SLM-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_lea_add_scale_offset_big:
; SANDY: # BB#0:
; SANDY-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50]
; SANDY-NEXT: addq $-1200, %rax # imm = 0xFB50
; SANDY-NEXT: # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_lea_add_scale_offset_big:
; HASWELL: # BB#0:
; HASWELL-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50]
; HASWELL-NEXT: addq $-1200, %rax # imm = 0xFB50
; HASWELL-NEXT: # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_scale_offset_big:
; BTVER2: # BB#0:
; BTVER2-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_lea_add_scale_offset_big:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%3 = shl i64 %1, 3
%4 = add i64 %0, -1200
%5 = add i64 %4, %3
ret i64 %5
}

View file

@ -1,167 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+popcnt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
define i16 @test_ctpop_i16(i16 zeroext %a0, i16 *%a1) {
; GENERIC-LABEL: test_ctpop_i16:
; GENERIC: # BB#0:
; GENERIC-NEXT: popcntw (%rsi), %cx
; GENERIC-NEXT: popcntw %di, %ax
; GENERIC-NEXT: orl %ecx, %eax
; GENERIC-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; GENERIC-NEXT: retq
;
; SLM-LABEL: test_ctpop_i16:
; SLM: # BB#0:
; SLM-NEXT: popcntw (%rsi), %cx # sched: [6:1.00]
; SLM-NEXT: popcntw %di, %ax # sched: [3:1.00]
; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50]
; SLM-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_ctpop_i16:
; SANDY: # BB#0:
; SANDY-NEXT: popcntw (%rsi), %cx # sched: [7:1.00]
; SANDY-NEXT: popcntw %di, %ax # sched: [3:1.00]
; SANDY-NEXT: orl %ecx, %eax # sched: [1:0.33]
; SANDY-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_ctpop_i16:
; HASWELL: # BB#0:
; HASWELL-NEXT: popcntw (%rsi), %cx # sched: [7:1.00]
; HASWELL-NEXT: popcntw %di, %ax # sched: [3:1.00]
; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_ctpop_i16:
; BTVER2: # BB#0:
; BTVER2-NEXT: popcntw (%rsi), %cx # sched: [8:1.00]
; BTVER2-NEXT: popcntw %di, %ax # sched: [3:1.00]
; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_ctpop_i16:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: popcntw (%rsi), %cx # sched: [10:1.00]
; ZNVER1-NEXT: popcntw %di, %ax # sched: [3:1.00]
; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i16, i16 *%a1
%2 = tail call i16 @llvm.ctpop.i16( i16 %1 )
%3 = tail call i16 @llvm.ctpop.i16( i16 %a0 )
%4 = or i16 %2, %3
ret i16 %4
}
declare i16 @llvm.ctpop.i16(i16)
define i32 @test_ctpop_i32(i32 %a0, i32 *%a1) {
; GENERIC-LABEL: test_ctpop_i32:
; GENERIC: # BB#0:
; GENERIC-NEXT: popcntl (%rsi), %ecx
; GENERIC-NEXT: popcntl %edi, %eax
; GENERIC-NEXT: orl %ecx, %eax
; GENERIC-NEXT: retq
;
; SLM-LABEL: test_ctpop_i32:
; SLM: # BB#0:
; SLM-NEXT: popcntl (%rsi), %ecx # sched: [6:1.00]
; SLM-NEXT: popcntl %edi, %eax # sched: [3:1.00]
; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_ctpop_i32:
; SANDY: # BB#0:
; SANDY-NEXT: popcntl (%rsi), %ecx # sched: [7:1.00]
; SANDY-NEXT: popcntl %edi, %eax # sched: [3:1.00]
; SANDY-NEXT: orl %ecx, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_ctpop_i32:
; HASWELL: # BB#0:
; HASWELL-NEXT: popcntl (%rsi), %ecx # sched: [7:1.00]
; HASWELL-NEXT: popcntl %edi, %eax # sched: [3:1.00]
; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_ctpop_i32:
; BTVER2: # BB#0:
; BTVER2-NEXT: popcntl (%rsi), %ecx # sched: [8:1.00]
; BTVER2-NEXT: popcntl %edi, %eax # sched: [3:1.00]
; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_ctpop_i32:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: popcntl (%rsi), %ecx # sched: [10:1.00]
; ZNVER1-NEXT: popcntl %edi, %eax # sched: [3:1.00]
; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i32, i32 *%a1
%2 = tail call i32 @llvm.ctpop.i32( i32 %1 )
%3 = tail call i32 @llvm.ctpop.i32( i32 %a0 )
%4 = or i32 %2, %3
ret i32 %4
}
declare i32 @llvm.ctpop.i32(i32)
define i64 @test_ctpop_i64(i64 %a0, i64 *%a1) {
; GENERIC-LABEL: test_ctpop_i64:
; GENERIC: # BB#0:
; GENERIC-NEXT: popcntq (%rsi), %rcx
; GENERIC-NEXT: popcntq %rdi, %rax
; GENERIC-NEXT: orq %rcx, %rax
; GENERIC-NEXT: retq
;
; SLM-LABEL: test_ctpop_i64:
; SLM: # BB#0:
; SLM-NEXT: popcntq (%rsi), %rcx # sched: [6:1.00]
; SLM-NEXT: popcntq %rdi, %rax # sched: [3:1.00]
; SLM-NEXT: orq %rcx, %rax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_ctpop_i64:
; SANDY: # BB#0:
; SANDY-NEXT: popcntq (%rsi), %rcx # sched: [9:1.00]
; SANDY-NEXT: popcntq %rdi, %rax # sched: [3:1.00]
; SANDY-NEXT: orq %rcx, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_ctpop_i64:
; HASWELL: # BB#0:
; HASWELL-NEXT: popcntq (%rsi), %rcx # sched: [7:1.00]
; HASWELL-NEXT: popcntq %rdi, %rax # sched: [3:1.00]
; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_ctpop_i64:
; BTVER2: # BB#0:
; BTVER2-NEXT: popcntq (%rsi), %rcx # sched: [8:1.00]
; BTVER2-NEXT: popcntq %rdi, %rax # sched: [3:1.00]
; BTVER2-NEXT: orq %rcx, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_ctpop_i64:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: popcntq (%rsi), %rcx # sched: [10:1.00]
; ZNVER1-NEXT: popcntq %rdi, %rax # sched: [3:1.00]
; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [5:0.50]
%1 = load i64, i64 *%a1
%2 = tail call i64 @llvm.ctpop.i64( i64 %1 )
%3 = tail call i64 @llvm.ctpop.i64( i64 %a0 )
%4 = or i64 %2, %3
ret i64 %4
}
declare i64 @llvm.ctpop.i64(i64)

View file

@ -0,0 +1,24 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl | FileCheck %s
define void @f_f(<16 x double>* %ptr) {
; CHECK-LABEL: f_f:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vmovdqa %xmm0, (%rax)
; CHECK-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0
; CHECK-NEXT: vmovapd (%rdi), %zmm1
; CHECK-NEXT: vmovapd 64(%rdi), %zmm2
; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k1
; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovapd %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vmovapd %zmm2, 64(%rdi)
; CHECK-NEXT: vmovapd %zmm1, (%rdi)
store <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8>* undef
%load_mask8.i.i.i = load <16 x i8>, <16 x i8>* undef
%v.i.i.i.i = load <16 x double>, <16 x double>* %ptr
%mask_vec_i1.i.i.i51.i.i = icmp ne <16 x i8> %load_mask8.i.i.i, zeroinitializer
%v1.i.i.i.i = select <16 x i1> %mask_vec_i1.i.i.i51.i.i, <16 x double> undef, <16 x double> %v.i.i.i.i
store <16 x double> %v1.i.i.i.i, <16 x double>* %ptr
unreachable
}

View file

@ -0,0 +1,52 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mattr=+avx512f | FileCheck %s
; RUN: llc < %s -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @test() local_unnamed_addr {
; CHECK-LABEL: test:
; CHECK: # BB#0:
; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [2,3]
; CHECK-NEXT: vpextrq $1, %xmm0, %rax
; CHECK-NEXT: vmovq %xmm0, %rcx
; CHECK-NEXT: negq %rdx
; CHECK-NEXT: fld1
; CHECK-NEXT: fldz
; CHECK-NEXT: fld %st(0)
; CHECK-NEXT: fcmove %st(2), %st(0)
; CHECK-NEXT: cmpq %rax, %rcx
; CHECK-NEXT: fld %st(1)
; CHECK-NEXT: fcmove %st(3), %st(0)
; CHECK-NEXT: cmpq %rax, %rax
; CHECK-NEXT: fld %st(2)
; CHECK-NEXT: fcmove %st(4), %st(0)
; CHECK-NEXT: movl $1, %eax
; CHECK-NEXT: cmpq %rax, %rax
; CHECK-NEXT: fld %st(3)
; CHECK-NEXT: fcmove %st(5), %st(0)
; CHECK-NEXT: fstp %st(5)
; CHECK-NEXT: fxch %st(2)
; CHECK-NEXT: fadd %st(3)
; CHECK-NEXT: fxch %st(4)
; CHECK-NEXT: fadd %st(3)
; CHECK-NEXT: fxch %st(2)
; CHECK-NEXT: fadd %st(3)
; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: faddp %st(3)
; CHECK-NEXT: fxch %st(3)
; CHECK-NEXT: fstpt (%rax)
; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fstpt (%rax)
; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fstpt (%rax)
; CHECK-NEXT: fstpt (%rax)
%1 = icmp eq <4 x i64> <i64 0, i64 1, i64 2, i64 3>, undef
%2 = select <4 x i1> %1, <4 x x86_fp80> <x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000>, <4 x x86_fp80> zeroinitializer
%3 = fadd <4 x x86_fp80> undef, %2
%4 = shufflevector <4 x x86_fp80> %3, <4 x x86_fp80> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
store <8 x x86_fp80> %4, <8 x x86_fp80>* undef, align 16
unreachable
}

View file

@ -0,0 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512vl,avx512bw | FileCheck %s
define <16 x i16> @foo(<16 x i32> %i) {
; CHECK-LABEL: foo:
; CHECK: # BB#0:
; CHECK-NEXT: vpminud {{.*}}(%rip){1to16}, %zmm0, %zmm0
; CHECK-NEXT: vpmovdw %zmm0, %ymm0
; CHECK-NEXT: retq
%x3 = icmp ult <16 x i32> %i, <i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009>
%x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009, i32 16843009>
%x6 = trunc <16 x i32> %x5 to <16 x i16>
ret <16 x i16> %x6
}

View file

@ -0,0 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
; CHECK: .LCPI0_0:
; CHECK-NEXT: .zero 16,1
define <4 x i32> @f(<4 x i32> %a) {
; CHECK-LABEL: f:
; CHECK: # BB#0:
; CHECK-NEXT: paddd .LCPI0_0(%rip), %xmm0
; CHECK-NEXT: retq
%v = add nuw nsw <4 x i32> %a, <i32 16843009, i32 16843009, i32 16843009, i32 16843009>
ret <4 x i32> %v
}

View file

@ -45,9 +45,9 @@ define float @f32_no_estimate(float %x) #0 {
;
; SANDY-LABEL: f32_no_estimate:
; SANDY: # BB#0:
; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [14:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: f32_no_estimate:
; HASWELL: # BB#0:
@ -113,11 +113,11 @@ define float @f32_one_step(float %x) #1 {
; SANDY: # BB#0:
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: f32_one_step:
; HASWELL: # BB#0:
@ -207,7 +207,7 @@ define float @f32_two_step(float %x) #2 {
; SANDY: # BB#0:
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50]
; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@ -215,7 +215,7 @@ define float @f32_two_step(float %x) #2 {
; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: f32_two_step:
; HASWELL: # BB#0:
@ -284,9 +284,9 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
;
; SANDY-LABEL: v4f32_no_estimate:
; SANDY: # BB#0:
; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [14:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v4f32_no_estimate:
; HASWELL: # BB#0:
@ -350,13 +350,13 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
;
; SANDY-LABEL: v4f32_one_step:
; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00]
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v4f32_one_step:
; HASWELL: # BB#0:
@ -453,9 +453,9 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
;
; SANDY-LABEL: v4f32_two_step:
; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00]
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@ -463,7 +463,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v4f32_two_step:
; HASWELL: # BB#0:
@ -546,9 +546,9 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
;
; SANDY-LABEL: v8f32_no_estimate:
; SANDY: # BB#0:
; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [29:3.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [12:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v8f32_no_estimate:
; HASWELL: # BB#0:
@ -621,11 +621,11 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v8f32_one_step:
; HASWELL: # BB#0:
@ -737,7 +737,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
@ -745,7 +745,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v8f32_two_step:
; HASWELL: # BB#0:

View file

@ -39,8 +39,8 @@ define float @f32_no_step_2(float %x) #3 {
; SANDY-LABEL: f32_no_step_2:
; SANDY: # BB#0:
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: f32_no_step_2:
; HASWELL: # BB#0:
@ -110,12 +110,12 @@ define float @f32_one_step_2(float %x) #1 {
; SANDY: # BB#0:
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: f32_one_step_2:
; HASWELL: # BB#0:
@ -198,13 +198,13 @@ define float @f32_one_step_2_divs(float %x) #1 {
; SANDY: # BB#0:
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00]
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: f32_one_step_2_divs:
; HASWELL: # BB#0:
@ -305,7 +305,7 @@ define float @f32_two_step_2(float %x) #2 {
; SANDY: # BB#0:
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50]
; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@ -313,8 +313,8 @@ define float @f32_two_step_2(float %x) #2 {
; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: f32_two_step_2:
; HASWELL: # BB#0:
@ -403,14 +403,14 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
;
; SANDY-LABEL: v4f32_one_step2:
; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00]
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v4f32_one_step2:
; HASWELL: # BB#0:
@ -501,15 +501,15 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
;
; SANDY-LABEL: v4f32_one_step_2_divs:
; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00]
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00]
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v4f32_one_step_2_divs:
; HASWELL: # BB#0:
@ -619,9 +619,9 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
;
; SANDY-LABEL: v4f32_two_step2:
; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00]
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@ -629,8 +629,8 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v4f32_two_step2:
; HASWELL: # BB#0:
@ -741,12 +741,12 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v8f32_one_step2:
; HASWELL: # BB#0:
@ -848,13 +848,13 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v8f32_one_step_2_divs:
; HASWELL: # BB#0:
@ -980,7 +980,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
@ -988,8 +988,8 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v8f32_two_step2:
; HASWELL: # BB#0:
@ -1070,7 +1070,7 @@ define <8 x float> @v8f32_no_step(<8 x float> %x) #3 {
; SANDY-LABEL: v8f32_no_step:
; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v8f32_no_step:
; HASWELL: # BB#0:
@ -1125,8 +1125,8 @@ define <8 x float> @v8f32_no_step2(<8 x float> %x) #3 {
; SANDY-LABEL: v8f32_no_step2:
; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v8f32_no_step2:
; HASWELL: # BB#0:

View file

@ -31,8 +31,8 @@ define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; SANDY-LABEL: test_addps:
; SANDY: # BB#0:
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_addps:
; HASWELL: # BB#0:
@ -79,8 +79,8 @@ define float @test_addss(float %a0, float %a1, float *%a2) {
; SANDY-LABEL: test_addss:
; SANDY: # BB#0:
; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_addss:
; HASWELL: # BB#0:
@ -134,9 +134,9 @@ define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
;
; SANDY-LABEL: test_andps:
; SANDY: # BB#0:
; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_andps:
; HASWELL: # BB#0:
@ -194,9 +194,9 @@ define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float>
;
; SANDY-LABEL: test_andnotps:
; SANDY: # BB#0:
; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_andnotps:
; HASWELL: # BB#0:
@ -252,9 +252,9 @@ define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; SANDY-LABEL: test_cmpps:
; SANDY: # BB#0:
; SANDY-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_cmpps:
; HASWELL: # BB#0:
@ -308,7 +308,7 @@ define float @test_cmpss(float %a0, float %a1, float *%a2) {
; SANDY: # BB#0:
; SANDY-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_cmpss:
; HASWELL: # BB#0:
@ -384,16 +384,16 @@ define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
; SANDY-LABEL: test_comiss:
; SANDY: # BB#0:
; SANDY-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: setnp %al # sched: [1:1.00]
; SANDY-NEXT: sete %cl # sched: [1:1.00]
; SANDY-NEXT: setnp %al # sched: [1:0.33]
; SANDY-NEXT: sete %cl # sched: [1:0.33]
; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
; SANDY-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00]
; SANDY-NEXT: setnp %al # sched: [1:1.00]
; SANDY-NEXT: sete %dl # sched: [1:1.00]
; SANDY-NEXT: setnp %al # sched: [1:0.33]
; SANDY-NEXT: sete %dl # sched: [1:0.33]
; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_comiss:
; HASWELL: # BB#0:
@ -468,10 +468,10 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) {
;
; SANDY-LABEL: test_cvtsi2ss:
; SANDY: # BB#0:
; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00]
; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_cvtsi2ss:
; HASWELL: # BB#0:
@ -524,10 +524,10 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) {
;
; SANDY-LABEL: test_cvtsi2ssq:
; SANDY: # BB#0:
; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_cvtsi2ssq:
; HASWELL: # BB#0:
@ -580,10 +580,10 @@ define i32 @test_cvtss2si(float %a0, float *%a1) {
;
; SANDY-LABEL: test_cvtss2si:
; SANDY: # BB#0:
; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00]
; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [10:1.00]
; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [3:1.00]
; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [7:1.00]
; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_cvtss2si:
; HASWELL: # BB#0:
@ -639,10 +639,10 @@ define i64 @test_cvtss2siq(float %a0, float *%a1) {
;
; SANDY-LABEL: test_cvtss2siq:
; SANDY: # BB#0:
; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00]
; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [10:1.00]
; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [3:1.00]
; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [7:1.00]
; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_cvtss2siq:
; HASWELL: # BB#0:
@ -698,10 +698,10 @@ define i32 @test_cvttss2si(float %a0, float *%a1) {
;
; SANDY-LABEL: test_cvttss2si:
; SANDY: # BB#0:
; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00]
; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [10:1.00]
; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [3:1.00]
; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [7:1.00]
; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_cvttss2si:
; HASWELL: # BB#0:
@ -754,10 +754,10 @@ define i64 @test_cvttss2siq(float %a0, float *%a1) {
;
; SANDY-LABEL: test_cvttss2siq:
; SANDY: # BB#0:
; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00]
; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [10:1.00]
; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [3:1.00]
; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [7:1.00]
; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_cvttss2siq:
; HASWELL: # BB#0:
@ -807,9 +807,9 @@ define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
;
; SANDY-LABEL: test_divps:
; SANDY: # BB#0:
; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_divps:
; HASWELL: # BB#0:
@ -855,9 +855,9 @@ define float @test_divss(float %a0, float %a1, float *%a2) {
;
; SANDY-LABEL: test_divss:
; SANDY: # BB#0:
; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_divss:
; HASWELL: # BB#0:
@ -904,8 +904,8 @@ define void @test_ldmxcsr(i32 %a0) {
; SANDY-LABEL: test_ldmxcsr:
; SANDY: # BB#0:
; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [4:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_ldmxcsr:
; HASWELL: # BB#0:
@ -954,8 +954,8 @@ define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; SANDY-LABEL: test_maxps:
; SANDY: # BB#0:
; SANDY-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_maxps:
; HASWELL: # BB#0:
@ -1003,8 +1003,8 @@ define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; SANDY-LABEL: test_maxss:
; SANDY: # BB#0:
; SANDY-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_maxss:
; HASWELL: # BB#0:
@ -1052,8 +1052,8 @@ define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; SANDY-LABEL: test_minps:
; SANDY: # BB#0:
; SANDY-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_minps:
; HASWELL: # BB#0:
@ -1101,8 +1101,8 @@ define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; SANDY-LABEL: test_minss:
; SANDY: # BB#0:
; SANDY-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_minss:
; HASWELL: # BB#0:
@ -1152,10 +1152,10 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) {
;
; SANDY-LABEL: test_movaps:
; SANDY: # BB#0:
; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [4:0.50]
; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movaps:
; HASWELL: # BB#0:
@ -1210,7 +1210,7 @@ define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) {
; SANDY-LABEL: test_movhlps:
; SANDY: # BB#0:
; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movhlps:
; HASWELL: # BB#0:
@ -1258,10 +1258,10 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
;
; SANDY-LABEL: test_movhps:
; SANDY: # BB#0:
; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movhps:
; HASWELL: # BB#0:
@ -1318,7 +1318,7 @@ define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) {
; SANDY: # BB#0:
; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movlhps:
; HASWELL: # BB#0:
@ -1366,10 +1366,10 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
;
; SANDY-LABEL: test_movlps:
; SANDY: # BB#0:
; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movlps:
; HASWELL: # BB#0:
@ -1421,8 +1421,8 @@ define i32 @test_movmskps(<4 x float> %a0) {
;
; SANDY-LABEL: test_movmskps:
; SANDY: # BB#0:
; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movmskps:
; HASWELL: # BB#0:
@ -1467,8 +1467,8 @@ define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) {
;
; SANDY-LABEL: test_movntps:
; SANDY: # BB#0:
; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movntps:
; HASWELL: # BB#0:
@ -1512,10 +1512,10 @@ define void @test_movss_mem(float* %a0, float* %a1) {
;
; SANDY-LABEL: test_movss_mem:
; SANDY: # BB#0:
; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [4:0.50]
; SANDY-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movss_mem:
; HASWELL: # BB#0:
@ -1567,8 +1567,8 @@ define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) {
;
; SANDY-LABEL: test_movss_reg:
; SANDY: # BB#0:
; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movss_reg:
; HASWELL: # BB#0:
@ -1612,10 +1612,10 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) {
;
; SANDY-LABEL: test_movups:
; SANDY: # BB#0:
; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50]
; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movups:
; HASWELL: # BB#0:
@ -1665,8 +1665,8 @@ define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; SANDY-LABEL: test_mulps:
; SANDY: # BB#0:
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_mulps:
; HASWELL: # BB#0:
@ -1713,8 +1713,8 @@ define float @test_mulss(float %a0, float %a1, float *%a2) {
; SANDY-LABEL: test_mulss:
; SANDY: # BB#0:
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_mulss:
; HASWELL: # BB#0:
@ -1768,9 +1768,9 @@ define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2
;
; SANDY-LABEL: test_orps:
; SANDY: # BB#0:
; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_orps:
; HASWELL: # BB#0:
@ -1823,8 +1823,8 @@ define void @test_prefetchnta(i8* %a0) {
;
; SANDY-LABEL: test_prefetchnta:
; SANDY: # BB#0:
; SANDY-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: prefetchnta (%rdi) # sched: [4:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_prefetchnta:
; HASWELL: # BB#0:
@ -1871,10 +1871,10 @@ define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) {
;
; SANDY-LABEL: test_rcpps:
; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [7:3.00]
; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00]
; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [9:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_rcpps:
; HASWELL: # BB#0:
@ -1934,10 +1934,10 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) {
; SANDY-LABEL: test_rcpss:
; SANDY: # BB#0:
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
; SANDY-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [9:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_rcpss:
; HASWELL: # BB#0:
@ -1999,9 +1999,9 @@ define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) {
; SANDY-LABEL: test_rsqrtps:
; SANDY: # BB#0:
; SANDY-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00]
; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [9:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_rsqrtps:
; HASWELL: # BB#0:
@ -2060,11 +2060,11 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) {
;
; SANDY-LABEL: test_rsqrtss:
; SANDY: # BB#0:
; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [9:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_rsqrtss:
; HASWELL: # BB#0:
@ -2124,7 +2124,7 @@ define void @test_sfence() {
; SANDY-LABEL: test_sfence:
; SANDY: # BB#0:
; SANDY-NEXT: sfence # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_sfence:
; HASWELL: # BB#0:
@ -2171,8 +2171,8 @@ define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
; SANDY-LABEL: test_shufps:
; SANDY: # BB#0:
; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_shufps:
; HASWELL: # BB#0:
@ -2222,10 +2222,10 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) {
;
; SANDY-LABEL: test_sqrtps:
; SANDY: # BB#0:
; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [20:1.00]
; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [15:1.00]
; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_sqrtps:
; HASWELL: # BB#0:
@ -2284,11 +2284,11 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) {
;
; SANDY-LABEL: test_sqrtss:
; SANDY: # BB#0:
; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00]
; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [114:1.00]
; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:1.00]
; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50]
; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_sqrtss:
; HASWELL: # BB#0:
@ -2342,9 +2342,9 @@ define i32 @test_stmxcsr() {
;
; SANDY-LABEL: test_stmxcsr:
; SANDY: # BB#0:
; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [4:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_stmxcsr:
; HASWELL: # BB#0:
@ -2393,8 +2393,8 @@ define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; SANDY-LABEL: test_subps:
; SANDY: # BB#0:
; SANDY-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_subps:
; HASWELL: # BB#0:
@ -2441,8 +2441,8 @@ define float @test_subss(float %a0, float %a1, float *%a2) {
; SANDY-LABEL: test_subss:
; SANDY: # BB#0:
; SANDY-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_subss:
; HASWELL: # BB#0:
@ -2513,16 +2513,16 @@ define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
; SANDY-LABEL: test_ucomiss:
; SANDY: # BB#0:
; SANDY-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: setnp %al # sched: [1:1.00]
; SANDY-NEXT: sete %cl # sched: [1:1.00]
; SANDY-NEXT: setnp %al # sched: [1:0.33]
; SANDY-NEXT: sete %cl # sched: [1:0.33]
; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
; SANDY-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00]
; SANDY-NEXT: setnp %al # sched: [1:1.00]
; SANDY-NEXT: sete %dl # sched: [1:1.00]
; SANDY-NEXT: setnp %al # sched: [1:0.33]
; SANDY-NEXT: sete %dl # sched: [1:0.33]
; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_ucomiss:
; HASWELL: # BB#0:
@ -2599,8 +2599,8 @@ define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; SANDY-LABEL: test_unpckhps:
; SANDY: # BB#0:
; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_unpckhps:
; HASWELL: # BB#0:
@ -2651,8 +2651,8 @@ define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; SANDY-LABEL: test_unpcklps:
; SANDY: # BB#0:
; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_unpcklps:
; HASWELL: # BB#0:
@ -2706,9 +2706,9 @@ define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
;
; SANDY-LABEL: test_xorps:
; SANDY: # BB#0:
; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_xorps:
; HASWELL: # BB#0:

File diff suppressed because it is too large Load diff

View file

@ -31,8 +31,8 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SANDY-LABEL: test_addsubpd:
; SANDY: # BB#0:
; SANDY-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_addsubpd:
; HASWELL: # BB#0:
@ -80,8 +80,8 @@ define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; SANDY-LABEL: test_addsubps:
; SANDY: # BB#0:
; SANDY-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_addsubps:
; HASWELL: # BB#0:
@ -128,9 +128,9 @@ define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double
;
; SANDY-LABEL: test_haddpd:
; SANDY: # BB#0:
; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_haddpd:
; HASWELL: # BB#0:
@ -177,9 +177,9 @@ define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
;
; SANDY-LABEL: test_haddps:
; SANDY: # BB#0:
; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_haddps:
; HASWELL: # BB#0:
@ -226,9 +226,9 @@ define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double
;
; SANDY-LABEL: test_hsubpd:
; SANDY: # BB#0:
; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_hsubpd:
; HASWELL: # BB#0:
@ -275,9 +275,9 @@ define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
;
; SANDY-LABEL: test_hsubps:
; SANDY: # BB#0:
; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_hsubps:
; HASWELL: # BB#0:
@ -323,8 +323,8 @@ define <16 x i8> @test_lddqu(i8* %a0) {
;
; SANDY-LABEL: test_lddqu:
; SANDY: # BB#0:
; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [4:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_lddqu:
; HASWELL: # BB#0:
@ -371,9 +371,9 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
; SANDY-LABEL: test_movddup:
; SANDY: # BB#0:
; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50]
; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [4:0.50]
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movddup:
; HASWELL: # BB#0:
@ -428,9 +428,9 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
; SANDY-LABEL: test_movshdup:
; SANDY: # BB#0:
; SANDY-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:0.50]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movshdup:
; HASWELL: # BB#0:
@ -485,9 +485,9 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
; SANDY-LABEL: test_movsldup:
; SANDY: # BB#0:
; SANDY-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:0.50]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movsldup:
; HASWELL: # BB#0:

View file

@ -25,10 +25,10 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
;
; SANDY-LABEL: test_blendpd:
; SANDY: # BB#0:
; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00]
; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_blendpd:
; HASWELL: # BB#0:
@ -72,9 +72,9 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *
;
; SANDY-LABEL: test_blendps:
; SANDY: # BB#0:
; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:1.00]
; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_blendps:
; HASWELL: # BB#0:
@ -120,9 +120,9 @@ define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
;
; SANDY-LABEL: test_blendvpd:
; SANDY: # BB#0:
; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_blendvpd:
; HASWELL: # BB#0:
@ -169,9 +169,9 @@ define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float>
;
; SANDY-LABEL: test_blendvps:
; SANDY: # BB#0:
; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_blendvps:
; HASWELL: # BB#0:
@ -212,9 +212,9 @@ define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double>
;
; SANDY-LABEL: test_dppd:
; SANDY: # BB#0:
; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_dppd:
; HASWELL: # BB#0:
@ -255,9 +255,9 @@ define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2
;
; SANDY-LABEL: test_dpps:
; SANDY: # BB#0:
; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00]
; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_dpps:
; HASWELL: # BB#0:
@ -299,8 +299,8 @@ define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2)
; SANDY-LABEL: test_insertps:
; SANDY: # BB#0:
; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_insertps:
; HASWELL: # BB#0:
@ -339,8 +339,8 @@ define <2 x i64> @test_movntdqa(i8* %a0) {
;
; SANDY-LABEL: test_movntdqa:
; SANDY: # BB#0:
; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [4:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_movntdqa:
; HASWELL: # BB#0:
@ -376,9 +376,9 @@ define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
;
; SANDY-LABEL: test_mpsadbw:
; SANDY: # BB#0:
; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_mpsadbw:
; HASWELL: # BB#0:
@ -421,8 +421,8 @@ define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-LABEL: test_packusdw:
; SANDY: # BB#0:
; SANDY-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_packusdw:
; HASWELL: # BB#0:
@ -471,8 +471,8 @@ define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16
; SANDY-LABEL: test_pblendvb:
; SANDY: # BB#0:
; SANDY-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pblendvb:
; HASWELL: # BB#0:
@ -514,8 +514,8 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SANDY-LABEL: test_pblendw:
; SANDY: # BB#0:
; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pblendw:
; HASWELL: # BB#0:
@ -555,9 +555,9 @@ define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
;
; SANDY-LABEL: test_pcmpeqq:
; SANDY: # BB#0:
; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pcmpeqq:
; HASWELL: # BB#0:
@ -599,9 +599,9 @@ define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) {
;
; SANDY-LABEL: test_pextrb:
; SANDY: # BB#0:
; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:0.50]
; SANDY-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pextrb:
; HASWELL: # BB#0:
@ -642,9 +642,9 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
;
; SANDY-LABEL: test_pextrd:
; SANDY: # BB#0:
; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.50]
; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pextrd:
; HASWELL: # BB#0:
@ -684,9 +684,9 @@ define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) {
;
; SANDY-LABEL: test_pextrq:
; SANDY: # BB#0:
; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:0.50]
; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pextrq:
; HASWELL: # BB#0:
@ -726,9 +726,9 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) {
;
; SANDY-LABEL: test_pextrw:
; SANDY: # BB#0:
; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:0.50]
; SANDY-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pextrw:
; HASWELL: # BB#0:
@ -769,9 +769,9 @@ define <8 x i16> @test_phminposuw(<8 x i16> *%a0) {
;
; SANDY-LABEL: test_phminposuw:
; SANDY: # BB#0:
; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [9:1.00]
; SANDY-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_phminposuw:
; HASWELL: # BB#0:
@ -812,9 +812,9 @@ define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) {
;
; SANDY-LABEL: test_pinsrb:
; SANDY: # BB#0:
; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pinsrb:
; HASWELL: # BB#0:
@ -854,9 +854,9 @@ define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
;
; SANDY-LABEL: test_pinsrd:
; SANDY: # BB#0:
; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pinsrd:
; HASWELL: # BB#0:
@ -898,10 +898,10 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) {
;
; SANDY-LABEL: test_pinsrq:
; SANDY: # BB#0:
; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [5:0.50]
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pinsrq:
; HASWELL: # BB#0:
@ -946,8 +946,8 @@ define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SANDY-LABEL: test_pmaxsb:
; SANDY: # BB#0:
; SANDY-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmaxsb:
; HASWELL: # BB#0:
@ -989,8 +989,8 @@ define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-LABEL: test_pmaxsd:
; SANDY: # BB#0:
; SANDY-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmaxsd:
; HASWELL: # BB#0:
@ -1032,8 +1032,8 @@ define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-LABEL: test_pmaxud:
; SANDY: # BB#0:
; SANDY-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmaxud:
; HASWELL: # BB#0:
@ -1075,8 +1075,8 @@ define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SANDY-LABEL: test_pmaxuw:
; SANDY: # BB#0:
; SANDY-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmaxuw:
; HASWELL: # BB#0:
@ -1118,8 +1118,8 @@ define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SANDY-LABEL: test_pminsb:
; SANDY: # BB#0:
; SANDY-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pminsb:
; HASWELL: # BB#0:
@ -1161,8 +1161,8 @@ define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-LABEL: test_pminsd:
; SANDY: # BB#0:
; SANDY-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pminsd:
; HASWELL: # BB#0:
@ -1204,8 +1204,8 @@ define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-LABEL: test_pminud:
; SANDY: # BB#0:
; SANDY-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pminud:
; HASWELL: # BB#0:
@ -1247,8 +1247,8 @@ define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SANDY-LABEL: test_pminuw:
; SANDY: # BB#0:
; SANDY-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pminuw:
; HASWELL: # BB#0:
@ -1293,9 +1293,9 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; SANDY-LABEL: test_pmovsxbw:
; SANDY: # BB#0:
; SANDY-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [5:0.50]
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmovsxbw:
; HASWELL: # BB#0:
@ -1344,9 +1344,9 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; SANDY-LABEL: test_pmovsxbd:
; SANDY: # BB#0:
; SANDY-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [5:0.50]
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmovsxbd:
; HASWELL: # BB#0:
@ -1395,9 +1395,9 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; SANDY-LABEL: test_pmovsxbq:
; SANDY: # BB#0:
; SANDY-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [5:0.50]
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmovsxbq:
; HASWELL: # BB#0:
@ -1446,9 +1446,9 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; SANDY-LABEL: test_pmovsxdq:
; SANDY: # BB#0:
; SANDY-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [5:0.50]
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmovsxdq:
; HASWELL: # BB#0:
@ -1497,9 +1497,9 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; SANDY-LABEL: test_pmovsxwd:
; SANDY: # BB#0:
; SANDY-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [5:0.50]
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmovsxwd:
; HASWELL: # BB#0:
@ -1548,9 +1548,9 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; SANDY-LABEL: test_pmovsxwq:
; SANDY: # BB#0:
; SANDY-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [5:0.50]
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmovsxwq:
; HASWELL: # BB#0:
@ -1599,9 +1599,9 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; SANDY-LABEL: test_pmovzxbw:
; SANDY: # BB#0:
; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:0.50]
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmovzxbw:
; HASWELL: # BB#0:
@ -1650,9 +1650,9 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; SANDY-LABEL: test_pmovzxbd:
; SANDY: # BB#0:
; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:0.50]
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmovzxbd:
; HASWELL: # BB#0:
@ -1701,9 +1701,9 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; SANDY-LABEL: test_pmovzxbq:
; SANDY: # BB#0:
; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:0.50]
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmovzxbq:
; HASWELL: # BB#0:
@ -1752,9 +1752,9 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; SANDY-LABEL: test_pmovzxdq:
; SANDY: # BB#0:
; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [7:0.50]
; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [5:0.50]
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmovzxdq:
; HASWELL: # BB#0:
@ -1803,9 +1803,9 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; SANDY-LABEL: test_pmovzxwd:
; SANDY: # BB#0:
; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:0.50]
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmovzxwd:
; HASWELL: # BB#0:
@ -1854,9 +1854,9 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; SANDY-LABEL: test_pmovzxwq:
; SANDY: # BB#0:
; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:0.50]
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmovzxwq:
; HASWELL: # BB#0:
@ -1901,9 +1901,9 @@ define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; SANDY-LABEL: test_pmuldq:
; SANDY: # BB#0:
; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmuldq:
; HASWELL: # BB#0:
@ -1945,9 +1945,9 @@ define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; SANDY-LABEL: test_pmulld:
; SANDY: # BB#0:
; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmulld:
; HASWELL: # BB#0:
@ -1995,13 +1995,13 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
;
; SANDY-LABEL: test_ptest:
; SANDY: # BB#0:
; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: setb %al # sched: [1:1.00]
; SANDY-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00]
; SANDY-NEXT: setb %cl # sched: [1:1.00]
; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: setb %al # sched: [1:0.33]
; SANDY-NEXT: vptest (%rdi), %xmm0 # sched: [5:0.50]
; SANDY-NEXT: setb %cl # sched: [1:0.33]
; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
; SANDY-NEXT: movzbl %cl, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_ptest:
; HASWELL: # BB#0:
@ -2059,9 +2059,9 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) {
; SANDY-LABEL: test_roundpd:
; SANDY: # BB#0:
; SANDY-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [9:1.00]
; SANDY-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [7:1.00]
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_roundpd:
; HASWELL: # BB#0:
@ -2110,9 +2110,9 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) {
; SANDY-LABEL: test_roundps:
; SANDY: # BB#0:
; SANDY-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [9:1.00]
; SANDY-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [7:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_roundps:
; HASWELL: # BB#0:
@ -2162,9 +2162,9 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; SANDY-LABEL: test_roundsd:
; SANDY: # BB#0:
; SANDY-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
; SANDY-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_roundsd:
; HASWELL: # BB#0:
@ -2214,9 +2214,9 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; SANDY-LABEL: test_roundss:
; SANDY: # BB#0:
; SANDY-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
; SANDY-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_roundss:
; HASWELL: # BB#0:

View file

@ -26,9 +26,9 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) {
; SANDY-LABEL: crc32_32_8:
; SANDY: # BB#0:
; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00]
; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: crc32_32_8:
; HASWELL: # BB#0:
@ -75,9 +75,9 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) {
; SANDY-LABEL: crc32_32_16:
; SANDY: # BB#0:
; SANDY-NEXT: crc32w %si, %edi # sched: [3:1.00]
; SANDY-NEXT: crc32w (%rdx), %edi # sched: [8:1.00]
; SANDY-NEXT: crc32w (%rdx), %edi # sched: [7:1.00]
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: crc32_32_16:
; HASWELL: # BB#0:
@ -126,7 +126,7 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) {
; SANDY-NEXT: crc32l %esi, %edi # sched: [3:1.00]
; SANDY-NEXT: crc32l (%rdx), %edi # sched: [7:1.00]
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: crc32_32_32:
; HASWELL: # BB#0:
@ -173,9 +173,9 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind {
; SANDY-LABEL: crc32_64_8:
; SANDY: # BB#0:
; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00]
; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: crc32_64_8:
; HASWELL: # BB#0:
@ -224,7 +224,7 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) {
; SANDY-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
; SANDY-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00]
; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: crc32_64_64:
; HASWELL: # BB#0:
@ -291,7 +291,7 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SANDY-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33]
; SANDY-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SANDY-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pcmpestri:
; HASWELL: # BB#0:
@ -368,7 +368,7 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SANDY-NEXT: movl $7, %eax # sched: [1:0.33]
; SANDY-NEXT: movl $7, %edx # sched: [1:0.33]
; SANDY-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pcmpestrm:
; HASWELL: # BB#0:
@ -427,12 +427,12 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
;
; SANDY-LABEL: test_pcmpistri:
; SANDY: # BB#0:
; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: movl %ecx, %eax # sched: [1:0.33]
; SANDY-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
; SANDY-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [3:1.00]
; SANDY-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SANDY-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pcmpistri:
; HASWELL: # BB#0:
@ -483,9 +483,9 @@ define <16 x i8> @test_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
;
; SANDY-LABEL: test_pcmpistrm:
; SANDY: # BB#0:
; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pcmpistrm:
; HASWELL: # BB#0:
@ -526,9 +526,9 @@ define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
;
; SANDY-LABEL: test_pcmpgtq:
; SANDY: # BB#0:
; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pcmpgtq:
; HASWELL: # BB#0:

View file

@ -35,9 +35,9 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) {
; SANDY-LABEL: test_pabsb:
; SANDY: # BB#0:
; SANDY-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [5:0.50]
; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pabsb:
; HASWELL: # BB#0:
@ -93,9 +93,9 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) {
; SANDY-LABEL: test_pabsd:
; SANDY: # BB#0:
; SANDY-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [5:0.50]
; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pabsd:
; HASWELL: # BB#0:
@ -150,7 +150,7 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) {
; SANDY-LABEL: test_pabsw:
; SANDY: # BB#0:
; SANDY-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pabsw:
; HASWELL: # BB#0:
@ -201,8 +201,8 @@ define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SANDY-LABEL: test_palignr:
; SANDY: # BB#0:
; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_palignr:
; HASWELL: # BB#0:
@ -248,9 +248,9 @@ define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; SANDY-LABEL: test_phaddd:
; SANDY: # BB#0:
; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_phaddd:
; HASWELL: # BB#0:
@ -305,9 +305,9 @@ define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; SANDY-LABEL: test_phaddsw:
; SANDY: # BB#0:
; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_phaddsw:
; HASWELL: # BB#0:
@ -354,9 +354,9 @@ define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; SANDY-LABEL: test_phaddw:
; SANDY: # BB#0:
; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_phaddw:
; HASWELL: # BB#0:
@ -403,9 +403,9 @@ define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; SANDY-LABEL: test_phsubd:
; SANDY: # BB#0:
; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_phsubd:
; HASWELL: # BB#0:
@ -460,9 +460,9 @@ define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; SANDY-LABEL: test_phsubsw:
; SANDY: # BB#0:
; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_phsubsw:
; HASWELL: # BB#0:
@ -509,9 +509,9 @@ define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; SANDY-LABEL: test_phsubw:
; SANDY: # BB#0:
; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_phsubw:
; HASWELL: # BB#0:
@ -558,9 +558,9 @@ define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
;
; SANDY-LABEL: test_pmaddubsw:
; SANDY: # BB#0:
; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmaddubsw:
; HASWELL: # BB#0:
@ -605,8 +605,8 @@ define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; SANDY-LABEL: test_pmulhrsw:
; SANDY: # BB#0:
; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pmulhrsw:
; HASWELL: # BB#0:
@ -651,8 +651,8 @@ define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SANDY-LABEL: test_pshufb:
; SANDY: # BB#0:
; SANDY-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_pshufb:
; HASWELL: # BB#0:
@ -708,8 +708,8 @@ define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SANDY-LABEL: test_psignb:
; SANDY: # BB#0:
; SANDY-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_psignb:
; HASWELL: # BB#0:
@ -765,8 +765,8 @@ define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-LABEL: test_psignd:
; SANDY: # BB#0:
; SANDY-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_psignd:
; HASWELL: # BB#0:
@ -822,8 +822,8 @@ define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SANDY-LABEL: test_psignw:
; SANDY: # BB#0:
; SANDY-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: test_psignw:
; HASWELL: # BB#0:

View file

@ -0,0 +1,26 @@
; RUN: llvm-dlltool -k -m i386 --input-def %s --output-lib %t.a
; RUN: llvm-readobj %t.a | FileCheck %s
; RUN: llvm-nm %t.a | FileCheck %s -check-prefix=CHECK-NM
LIBRARY test.dll
EXPORTS
CdeclFunction
StdcallFunction@4
@FastcallFunction@4
StdcallAlias@4=StdcallFunction@4
??_7exception@@6B@
; CHECK: Name type: noprefix
; CHECK: Symbol: __imp__CdeclFunction
; CHECK: Symbol: _CdeclFunction
; CHECK: Name type: undecorate
; CHECK: Symbol: __imp__StdcallFunction@4
; CHECK: Symbol: _StdcallFunction@4
; CHECK: Name type: undecorate
; CHECK: Symbol: __imp_@FastcallFunction@4
; CHECK: Symbol: @FastcallFunction@4
; CHECK: Name type: name
; CHECK: Symbol: __imp_??_7exception@@6B@
; CHECK: Symbol: ??_7exception@@6B@
; CHECK-NM: w _StdcallAlias@4
; CHECK-NM: U _StdcallFunction@4

View file

@ -57,7 +57,6 @@ attributes #0 = { optnone noinline }
; Additional IR passes that opt doesn't turn on by default.
; OPT-MORE-DAG: Skipping pass 'Dead Code Elimination'
; OPT-MORE-DAG: Skipping pass 'Dead Instruction Elimination'
; OPT-MORE-DAG: Skipping pass 'Lower atomic intrinsics
; Loop IR passes that opt doesn't turn on by default.
; OPT-LOOP-DAG: Skipping pass 'Delete dead loops'

View file

@ -2,8 +2,8 @@
; test linking modules with specified and default PIC levels
!0 = !{ i32 1, !"PIC Level", i32 1 }
!0 = !{ i32 7, !"PIC Level", i32 1 }
!llvm.module.flags = !{!0}
; CHECK: !llvm.module.flags = !{!0}
; CHECK: !0 = !{i32 1, !"PIC Level", i32 1}
; CHECK: !0 = !{i32 7, !"PIC Level", i32 1}

View file

@ -37,3 +37,34 @@ declare void @foo2(i8* %in)
declare i32 @foo(i32 %param)
; Check that when inlining a non-recursive path into a function's own body that
; we get the re-mapping of instructions correct.
define i32 @test_recursive_inlining_remapping(i1 %init, i8* %addr) {
; CHECK-LABEL: define i32 @test_recursive_inlining_remapping(
bb:
%n = alloca i32
br i1 %init, label %store, label %load
; CHECK-NOT: alloca
;
; CHECK: %[[N:.*]] = alloca i32
; CHECK-NEXT: br i1 %init,
store:
store i32 0, i32* %n
%cast = bitcast i32* %n to i8*
%v = call i32 @test_recursive_inlining_remapping(i1 false, i8* %cast)
ret i32 %v
; CHECK-NOT: call
;
; CHECK: store i32 0, i32* %[[N]]
; CHECK-NEXT: %[[CAST:.*]] = bitcast i32* %[[N]] to i8*
; CHECK-NEXT: %[[INLINED_LOAD:.*]] = load i32, i32* %[[N]]
; CHECK-NEXT: ret i32 %[[INLINED_LOAD]]
;
; CHECK-NOT: call
load:
%castback = bitcast i8* %addr to i32*
%n.load = load i32, i32* %castback
ret i32 %n.load
}

View file

@ -26,3 +26,14 @@ define i8 @swap() {
ret i8 %j
; CHECK: ret i8 [[INST]]
}
define i8 @swap_optnone() noinline optnone {
; CHECK-LABEL: @swap_optnone(
%i = alloca i8
%j = atomicrmw xchg i8* %i, i8 42 monotonic
; CHECK: [[INST:%[a-z0-9]+]] = load
; CHECK-NEXT: store
ret i8 %j
; CHECK: ret i8 [[INST]]
}

View file

@ -154,3 +154,25 @@ define i4 @test13(i4 %x) {
%add = add i4 %mul, 3
ret i4 %add
}
; This tests used to cause an infinite loop where we would loop between
; canonicalizing the negated constant (i.e., (X + Y*-5.0) -> (X - Y*5.0)) and
; breaking up a subtract (i.e., (X - Y*5.0) -> X + (0 - Y*5.0)). To break the
; cycle, we don't canonicalize the negative constant if we're going to later
; break up the subtract.
;
; Check to make sure we don't canonicalize
; (%pow2*-5.0 + %sub) -> (%sub - %pow2*5.0)
; as we would later break up this subtract causing a cycle.
;
; CHECK-LABEL: @pr34078
; CHECK: %mul5.neg = fmul fast double %pow2, -5.000000e-01
; CHECK: %sub1 = fadd fast double %mul5.neg, %sub
define double @pr34078(double %A) {
%sub = fsub fast double 1.000000e+00, %A
%pow2 = fmul double %A, %A
%mul5 = fmul fast double %pow2, 5.000000e-01
%sub1 = fsub fast double %sub, %mul5
%add = fadd fast double %sub1, %sub1
ret double %add
}