From c82ad72f63369bc462e59458f09960d66daa58a9 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 4 Jan 2017 22:11:11 +0000 Subject: [PATCH] Vendor import of llvm trunk r291012: https://llvm.org/svn/llvm-project/llvm/trunk@291012 --- cmake/config-ix.cmake | 7 + cmake/modules/CheckCompilerVersion.cmake | 4 +- include/llvm/ADT/IntrusiveRefCntPtr.h | 7 +- include/llvm/ADT/PriorityWorklist.h | 39 + include/llvm/Analysis/Loads.h | 18 +- include/llvm/CodeGen/AsmPrinter.h | 5 + include/llvm/CodeGen/MachineDominators.h | 4 +- include/llvm/DebugInfo/DWARF/DWARFDebugLine.h | 6 +- include/llvm/IR/IntrinsicsAMDGPU.td | 7 + include/llvm/IR/IntrinsicsX86.td | 124 -- include/llvm/Support/FileSystem.h | 18 +- include/llvm/Support/YAMLTraits.h | 98 +- lib/Analysis/ValueTracking.cpp | 6 - lib/Bitcode/Reader/MetadataLoader.cpp | 2 +- lib/CodeGen/Analysis.cpp | 26 +- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 57 + lib/CodeGen/InlineSpiller.cpp | 8 +- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 +- .../OProfileJIT/OProfileJITEventListener.cpp | 16 +- lib/Fuzzer/FuzzerTracePC.cpp | 2 + lib/IR/AutoUpgrade.cpp | 64 +- lib/LTO/LTO.cpp | 12 +- lib/Support/APFloat.cpp | 8 +- lib/Support/Host.cpp | 1 + lib/Support/NativeFormatting.cpp | 5 +- lib/Support/YAMLTraits.cpp | 26 +- lib/TableGen/StringMatcher.cpp | 19 +- lib/Target/AArch64/AArch64.td | 6 +- lib/Target/AArch64/AArch64AsmPrinter.cpp | 56 +- .../AArch64/AArch64LoadStoreOptimizer.cpp | 3 + lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 1 + lib/Target/AMDGPU/AMDGPUISelLowering.h | 1 + lib/Target/AMDGPU/AMDGPUInstrInfo.td | 4 + lib/Target/AMDGPU/SIISelLowering.cpp | 9 +- lib/Target/AMDGPU/SIInsertWaits.cpp | 5 +- lib/Target/AMDGPU/SOPInstructions.td | 5 +- lib/Target/ARM/ARMAsmPrinter.cpp | 3 - lib/Target/ARM/ARMAsmPrinter.h | 3 - lib/Target/ARM/ARMMCInstLower.cpp | 38 - lib/Target/Hexagon/BitTracker.cpp | 70 +- lib/Target/Hexagon/BitTracker.h | 55 +- lib/Target/Hexagon/HexagonBitTracker.cpp | 52 +- lib/Target/Hexagon/HexagonBitTracker.h | 22 +- lib/Target/Hexagon/HexagonInstrInfo.cpp | 181 +- lib/Target/Hexagon/HexagonInstrInfo.h | 19 +- .../Hexagon/HexagonMachineFunctionInfo.h | 27 +- .../Hexagon/HexagonTargetObjectFile.cpp | 39 +- .../MCTargetDesc/HexagonMCCompound.cpp | 45 +- lib/Target/Hexagon/RDFCopy.h | 19 +- lib/Target/Hexagon/RDFGraph.cpp | 60 +- lib/Target/Hexagon/RDFGraph.h | 99 +- lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 6 + lib/Target/Mips/MipsSEISelDAGToDAG.h | 2 + lib/Target/PowerPC/PPCISelLowering.cpp | 83 +- lib/Target/X86/X86AsmPrinter.cpp | 2 +- lib/Target/X86/X86FrameLowering.cpp | 22 +- lib/Target/X86/X86ISelLowering.cpp | 236 ++- lib/Target/X86/X86InstrAVX512.td | 43 +- lib/Target/X86/X86InstrSSE.td | 49 +- lib/Target/X86/X86InstrTablesInfo.h | 90 +- lib/Target/X86/X86IntrinsicsInfo.h | 26 +- lib/Target/X86/X86MCInstLower.cpp | 50 - lib/Target/X86/X86TargetTransformInfo.cpp | 266 ++- .../InstCombine/InstCombineAddSub.cpp | 12 + .../InstCombine/InstCombineCalls.cpp | 82 +- .../InstCombineLoadStoreAlloca.cpp | 18 +- .../InstCombine/InstCombineShifts.cpp | 19 + lib/Transforms/Scalar/EarlyCSE.cpp | 4 +- lib/Transforms/Scalar/NewGVN.cpp | 60 +- lib/Transforms/Utils/InlineFunction.cpp | 2 +- lib/Transforms/Utils/LoopUnrollPeel.cpp | 25 +- lib/Transforms/Utils/SimplifyCFG.cpp | 10 +- runtimes/CMakeLists.txt | 6 + .../CostModel/X86/alternate-shuffle-cost.ll | 44 +- .../Analysis/RegionInfo/bad_node_traversal.ll | 43 + test/Bitcode/DIGlobalVariableExpression.ll | 3 + .../AArch64/arm64-zero-cycle-zeroing.ll | 7 + .../AArch64/store_merge_pair_offset.ll | 12 + test/CodeGen/AMDGPU/amdgcn.sendmsg-m0.ll | 41 + test/CodeGen/AMDGPU/amdgcn.sendmsg.ll | 161 ++ test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll | 17 - test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll | 24 - test/CodeGen/PowerPC/ppc64-blnop.ll | 129 ++ test/CodeGen/PowerPC/ppc64-sibcall.ll | 8 +- test/CodeGen/SPARC/soft-float.ll | 6 +- test/CodeGen/X86/MergeConsecutiveStores.ll | 34 + test/CodeGen/X86/avx2-vbroadcast.ll | 233 +-- test/CodeGen/X86/avx512-any_extend_load.ll | 6 +- test/CodeGen/X86/avx512-extract-subvector.ll | 12 +- test/CodeGen/X86/avx512-insert-extract.ll | 32 +- test/CodeGen/X86/avx512-intrinsics-upgrade.ll | 184 ++ test/CodeGen/X86/avx512-intrinsics.ll | 127 -- test/CodeGen/X86/avx512-skx-insert-subvec.ll | 6 +- test/CodeGen/X86/avx512-vbroadcasti128.ll | 6 +- test/CodeGen/X86/avx512bwvl-intrinsics.ll | 8 +- .../X86/avx512dq-intrinsics-upgrade.ll | 136 ++ test/CodeGen/X86/avx512dq-intrinsics.ll | 121 -- .../X86/avx512dqvl-intrinsics-upgrade.ll | 59 + test/CodeGen/X86/avx512dqvl-intrinsics.ll | 60 - .../X86/avx512vl-intrinsics-upgrade.ll | 60 + test/CodeGen/X86/avx512vl-intrinsics.ll | 61 - .../X86/frame-lowering-debug-intrinsic-2.ll | 72 + .../X86/frame-lowering-debug-intrinsic.ll | 41 +- test/CodeGen/X86/i64-to-float.ll | 20 +- test/CodeGen/X86/masked_memop.ll | 56 +- test/CodeGen/X86/stack-folding-fp-avx512vl.ll | 8 +- .../CodeGen/X86/stack-folding-int-avx512vl.ll | 8 +- test/CodeGen/X86/subvector-broadcast.ll | 261 +-- test/CodeGen/X86/vec_fp_to_int.ll | 158 +- test/CodeGen/X86/vec_int_to_fp.ll | 28 +- test/CodeGen/X86/vector-half-conversions.ll | 64 +- test/CodeGen/X86/vector-lzcnt-256.ll | 86 +- test/CodeGen/X86/vector-shuffle-256-v16.ll | 834 +++------ test/CodeGen/X86/vector-shuffle-256-v32.ll | 90 +- test/CodeGen/X86/vector-shuffle-256-v4.ll | 68 +- test/CodeGen/X86/vector-shuffle-256-v8.ll | 530 ++---- test/CodeGen/X86/vector-shuffle-512-v16.ll | 72 +- test/CodeGen/X86/vector-shuffle-512-v8.ll | 196 +++ test/CodeGen/X86/vector-trunc-math.ll | 1505 +++++++---------- test/CodeGen/X86/vector-trunc.ll | 6 +- .../Generic/simplifycfg_sink_last_inst.ll | 70 + test/DebugInfo/X86/dbg-value-frame-index.ll | 39 + test/MC/ARM/coff-relocations.s | 2 +- test/ThinLTO/X86/drop-debug-info.ll | 4 +- test/Transforms/Inline/inline-invoke-tail.ll | 2 +- test/Transforms/InstCombine/add.ll | 12 + test/Transforms/InstCombine/assume.ll | 63 +- test/Transforms/InstCombine/fabs.ll | 48 + test/Transforms/InstCombine/fma.ll | 203 +++ test/Transforms/InstCombine/rem.ll | 10 +- test/Transforms/InstCombine/shift.ll | 12 + test/Transforms/InstCombine/sink-zext.ll | 71 + test/Transforms/LoopIdiom/basic.ll | 8 +- test/Transforms/LoopUnroll/peel-loop-pgo.ll | 2 +- test/Transforms/NewGVN/equivalent-phi.ll | 68 + test/Transforms/NewGVN/pr31483.ll | 106 ++ .../X86/good-prototype.ll | 21 + .../PartiallyInlineLibCalls/X86/lit.local.cfg | 2 + .../SLPVectorizer/X86/horizontal-list.ll | 15 +- test/tools/gold/X86/Inputs/thinlto.ll | 1 + .../tools/gold/X86/Inputs/thinlto_archive1.ll | 1 + .../tools/gold/X86/Inputs/thinlto_archive2.ll | 1 + test/tools/gold/X86/comdat.ll | 2 + test/tools/gold/X86/opt-level.ll | 4 + test/tools/gold/X86/pr25907.ll | 2 + test/tools/gold/X86/stats.ll | 1 + test/tools/gold/X86/strip_names.ll | 3 + test/tools/gold/X86/thinlto.ll | 8 + test/tools/gold/X86/thinlto_afdo.ll | 2 + test/tools/gold/X86/thinlto_archive.ll | 2 + test/tools/gold/X86/type-merge2.ll | 2 + test/tools/gold/X86/visibility.ll | 2 + tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp | 16 +- tools/llvm-link/CMakeLists.txt | 1 + tools/llvm-link/LLVMBuild.txt | 2 +- tools/llvm-link/llvm-link.cpp | 51 +- unittests/ADT/PriorityWorklistTest.cpp | 47 + .../DebugInfo/DWARF/DWARFDebugInfoTest.cpp | 73 +- unittests/Support/YAMLIOTest.cpp | 62 + 159 files changed, 4937 insertions(+), 4389 deletions(-) create mode 100644 test/Analysis/RegionInfo/bad_node_traversal.ll create mode 100644 test/CodeGen/AArch64/store_merge_pair_offset.ll create mode 100644 test/CodeGen/AMDGPU/amdgcn.sendmsg-m0.ll create mode 100644 test/CodeGen/AMDGPU/amdgcn.sendmsg.ll delete mode 100644 test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll delete mode 100644 test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll create mode 100644 test/CodeGen/PowerPC/ppc64-blnop.ll create mode 100644 test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll create mode 100644 test/CodeGen/X86/frame-lowering-debug-intrinsic-2.ll create mode 100644 test/DebugInfo/Generic/simplifycfg_sink_last_inst.ll create mode 100644 test/DebugInfo/X86/dbg-value-frame-index.ll create mode 100644 test/Transforms/InstCombine/fma.ll create mode 100644 test/Transforms/InstCombine/sink-zext.ll create mode 100644 test/Transforms/NewGVN/equivalent-phi.ll create mode 100644 test/Transforms/NewGVN/pr31483.ll create mode 100644 test/Transforms/PartiallyInlineLibCalls/X86/good-prototype.ll create mode 100644 test/Transforms/PartiallyInlineLibCalls/X86/lit.local.cfg diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index fe3afd3fcc2..530a5ddaab4 100755 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -457,6 +457,13 @@ if( MSVC ) if(LLVM_ENABLE_DIA_SDK AND NOT HAVE_DIA_SDK) message(FATAL_ERROR "DIA SDK not found. If you have both VS 2012 and 2013 installed, you may need to uninstall the former and re-install the latter afterwards.") endif() + + # Normalize to 0/1 for lit.site.cfg + if(LLVM_ENABLE_DIA_SDK) + set(LLVM_ENABLE_DIA_SDK 1) + else() + set(LLVM_ENABLE_DIA_SDK 0) + endif() else() set(LLVM_ENABLE_DIA_SDK 0) endif( MSVC ) diff --git a/cmake/modules/CheckCompilerVersion.cmake b/cmake/modules/CheckCompilerVersion.cmake index cdad7ce2765..2e8f5445781 100644 --- a/cmake/modules/CheckCompilerVersion.cmake +++ b/cmake/modules/CheckCompilerVersion.cmake @@ -43,8 +43,8 @@ int main() { return (float)x; }" elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.0) message(FATAL_ERROR "Host Visual Studio must be at least 2015") - elseif(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.00.24215.1) - message(WARNING "Host Visual Studio should at least be 2015 Update 3 (MSVC 19.00.24215.1)" + elseif(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.00.24213.1) + message(WARNING "Host Visual Studio should at least be 2015 Update 3 (MSVC 19.00.24213.1)" " due to miscompiles from earlier versions") endif() endif() diff --git a/include/llvm/ADT/IntrusiveRefCntPtr.h b/include/llvm/ADT/IntrusiveRefCntPtr.h index 559fb40773a..a77cf04ea4d 100644 --- a/include/llvm/ADT/IntrusiveRefCntPtr.h +++ b/include/llvm/ADT/IntrusiveRefCntPtr.h @@ -21,8 +21,8 @@ // class MyClass : public RefCountedBase {}; // // void foo() { -// // Objects that inherit from RefCountedBase should always be instantiated -// // on the heap, never on the stack. +// // Constructing an IntrusiveRefCntPtr increases the pointee's refcount by +// // 1 (from 0 in this case). // IntrusiveRefCntPtr Ptr1(new MyClass()); // // // Copying an IntrusiveRefCntPtr increases the pointee's refcount by 1. @@ -68,9 +68,6 @@ namespace llvm { /// calls to Release() and Retain(), which increment and decrement the object's /// refcount, respectively. When a Release() call decrements the refcount to 0, /// the object deletes itself. -/// -/// Objects that inherit from RefCountedBase should always be allocated with -/// operator new. template class RefCountedBase { mutable unsigned RefCount = 0; diff --git a/include/llvm/ADT/PriorityWorklist.h b/include/llvm/ADT/PriorityWorklist.h index c0b4709e98f..3198dd43870 100644 --- a/include/llvm/ADT/PriorityWorklist.h +++ b/include/llvm/ADT/PriorityWorklist.h @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include @@ -107,6 +108,39 @@ public: return false; } + /// Insert a sequence of new elements into the PriorityWorklist. + template + typename std::enable_if::value>::type + insert(SequenceT &&Input) { + if (std::begin(Input) == std::end(Input)) + // Nothing to do for an empty input sequence. + return; + + // First pull the input sequence into the vector as a bulk append + // operation. + ptrdiff_t StartIndex = V.size(); + V.insert(V.end(), std::begin(Input), std::end(Input)); + // Now walk backwards fixing up the index map and deleting any duplicates. + for (ptrdiff_t i = V.size() - 1; i >= StartIndex; --i) { + auto InsertResult = M.insert({V[i], i}); + if (InsertResult.second) + continue; + + // If the existing index is before this insert's start, nuke that one and + // move it up. + ptrdiff_t &Index = InsertResult.first->second; + if (Index < StartIndex) { + V[Index] = T(); + Index = i; + continue; + } + + // Otherwise the existing one comes first so just clear out the value in + // this slot. + V[i] = T(); + } + } + /// Remove the last element of the PriorityWorklist. void pop_back() { assert(!empty() && "Cannot remove an element when empty!"); @@ -169,6 +203,11 @@ public: return true; } + /// Reverse the items in the PriorityWorklist. + /// + /// This does an in-place reversal. Other kinds of reverse aren't easy to + /// support in the face of the worklist semantics. + /// Completely clear the PriorityWorklist void clear() { M.clear(); diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h index 139bf3c2116..e167f36219d 100644 --- a/include/llvm/Analysis/Loads.h +++ b/include/llvm/Analysis/Loads.h @@ -23,10 +23,9 @@ namespace llvm { class DataLayout; class MDNode; -/// isDereferenceablePointer - Return true if this is always a dereferenceable -/// pointer. If the context instruction is specified perform context-sensitive -/// analysis and return true if the pointer is dereferenceable at the -/// specified instruction. +/// Return true if this is always a dereferenceable pointer. If the context +/// instruction is specified perform context-sensitive analysis and return true +/// if the pointer is dereferenceable at the specified instruction. bool isDereferenceablePointer(const Value *V, const DataLayout &DL, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr); @@ -40,8 +39,7 @@ bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr); -/// isSafeToLoadUnconditionally - Return true if we know that executing a load -/// from this value cannot trap. +/// Return true if we know that executing a load from this value cannot trap. /// /// If DT and ScanFrom are specified this method performs context-sensitive /// analysis and returns true if it is safe to load immediately before ScanFrom. @@ -54,12 +52,12 @@ bool isSafeToLoadUnconditionally(Value *V, unsigned Align, Instruction *ScanFrom = nullptr, const DominatorTree *DT = nullptr); -/// DefMaxInstsToScan - the default number of maximum instructions -/// to scan in the block, used by FindAvailableLoadedValue(). +/// The default number of maximum instructions to scan in the block, used by +/// FindAvailableLoadedValue(). extern cl::opt DefMaxInstsToScan; -/// \brief Scan backwards to see if we have the value of the given load -/// available locally within a small number of instructions. +/// Scan backwards to see if we have the value of the given load available +/// locally within a small number of instructions. /// /// You can use this function to scan across multiple blocks: after you call /// this function, if ScanFrom points at the beginning of the block, it's safe diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index c1be46ddd7b..be8822df3db 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -208,6 +208,8 @@ public: SledKind Kind; bool AlwaysInstrument; const class Function *Fn; + + void emit(int, MCStreamer *, const MCSymbol *) const; }; // All the sleds to be emitted. @@ -216,6 +218,9 @@ public: // Helper function to record a given XRay sled. void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind); + /// Emit a table with all XRay instrumentation points. + void emitXRayTable(); + //===------------------------------------------------------------------===// // MachineFunctionPass Implementation. //===------------------------------------------------------------------===// diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h index 76e1df89169..21ecef587aa 100644 --- a/include/llvm/CodeGen/MachineDominators.h +++ b/include/llvm/CodeGen/MachineDominators.h @@ -59,6 +59,9 @@ class MachineDominatorTree : public MachineFunctionPass { /// such as BB == elt.NewBB. mutable SmallSet NewBBs; + /// The DominatorTreeBase that is used to compute a normal dominator tree + DominatorTreeBase* DT; + /// \brief Apply all the recorded critical edges to the DT. /// This updates the underlying DT information in a way that uses /// the fast query path of DT as much as possible. @@ -68,7 +71,6 @@ class MachineDominatorTree : public MachineFunctionPass { public: static char ID; // Pass ID, replacement for typeid - DominatorTreeBase* DT; MachineDominatorTree(); diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index ca9a6c82287..878f1c76ebf 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -116,12 +116,12 @@ public: // An unsigned integer indicating the identity of the source file // corresponding to a machine instruction. uint16_t File; - // An unsigned integer whose value encodes the applicable instruction set - // architecture for the current instruction. - uint8_t Isa; // An unsigned integer representing the DWARF path discriminator value // for this location. uint32_t Discriminator; + // An unsigned integer whose value encodes the applicable instruction set + // architecture for the current instruction. + uint8_t Isa; // A boolean indicating that the current instruction is the beginning of a // statement. uint8_t IsStmt:1, diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index 078959ce15d..07d5b5ea40d 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -104,6 +104,13 @@ def int_amdgcn_dispatch_id : // Instruction Intrinsics //===----------------------------------------------------------------------===// +// The first parameter is s_sendmsg immediate (i16), +// the second one is copied to m0 +def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">, + Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>; +def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">, + Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>; + def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, Intrinsic<[], [], [IntrConvergent]>; diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 3a496cb6645..85966af9c82 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -2063,130 +2063,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; } -// Vector extract and insert -let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx512_mask_vextractf32x4_512 : - GCCBuiltin<"__builtin_ia32_extractf32x4_mask">, - Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i32_ty, - llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextracti32x4_512 : - GCCBuiltin<"__builtin_ia32_extracti32x4_mask">, - Intrinsic<[llvm_v4i32_ty], [llvm_v16i32_ty, llvm_i32_ty, - llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextractf32x4_256 : - GCCBuiltin<"__builtin_ia32_extractf32x4_256_mask">, - Intrinsic<[llvm_v4f32_ty], [llvm_v8f32_ty, llvm_i32_ty, - llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextracti32x4_256 : - GCCBuiltin<"__builtin_ia32_extracti32x4_256_mask">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i32_ty, llvm_i32_ty, - llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextractf64x2_256 : - GCCBuiltin<"__builtin_ia32_extractf64x2_256_mask">, - Intrinsic<[llvm_v2f64_ty], [llvm_v4f64_ty, llvm_i32_ty, - llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextracti64x2_256 : - GCCBuiltin<"__builtin_ia32_extracti64x2_256_mask">, - Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty, llvm_i32_ty, - llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextractf64x2_512 : - GCCBuiltin<"__builtin_ia32_extractf64x2_512_mask">, - Intrinsic<[llvm_v2f64_ty], [llvm_v8f64_ty, llvm_i32_ty, - llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextracti64x2_512 : - GCCBuiltin<"__builtin_ia32_extracti64x2_512_mask">, - Intrinsic<[llvm_v2i64_ty], [llvm_v8i64_ty, llvm_i32_ty, - llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextractf32x8_512 : - GCCBuiltin<"__builtin_ia32_extractf32x8_mask">, - Intrinsic<[llvm_v8f32_ty], [llvm_v16f32_ty, llvm_i32_ty, - llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextracti32x8_512 : - GCCBuiltin<"__builtin_ia32_extracti32x8_mask">, - Intrinsic<[llvm_v8i32_ty],[llvm_v16i32_ty, llvm_i32_ty, - llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextractf64x4_512 : - GCCBuiltin<"__builtin_ia32_extractf64x4_mask">, - Intrinsic<[llvm_v4f64_ty], [llvm_v8f64_ty, llvm_i32_ty, - llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextracti64x4_512 : - GCCBuiltin<"__builtin_ia32_extracti64x4_mask">, - Intrinsic<[llvm_v4i64_ty], [llvm_v8i64_ty, llvm_i32_ty, - llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_insertf32x4_256 : - GCCBuiltin<"__builtin_ia32_insertf32x4_256_mask">, - Intrinsic<[llvm_v8f32_ty], - [llvm_v8f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_insertf32x4_512 : - GCCBuiltin<"__builtin_ia32_insertf32x4_mask">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v16f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_insertf32x8_512 : - GCCBuiltin<"__builtin_ia32_insertf32x8_mask">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v16f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_insertf64x2_256 : - GCCBuiltin<"__builtin_ia32_insertf64x2_256_mask">, - Intrinsic<[llvm_v4f64_ty], - [llvm_v4f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_insertf64x2_512 : - GCCBuiltin<"__builtin_ia32_insertf64x2_512_mask">, - Intrinsic<[llvm_v8f64_ty], - [llvm_v8f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_insertf64x4_512 : - GCCBuiltin<"__builtin_ia32_insertf64x4_mask">, - Intrinsic<[llvm_v8f64_ty], - [llvm_v8f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_inserti32x4_256 : - GCCBuiltin<"__builtin_ia32_inserti32x4_256_mask">, - Intrinsic<[llvm_v8i32_ty], - [llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_inserti32x4_512 : - GCCBuiltin<"__builtin_ia32_inserti32x4_mask">, - Intrinsic<[llvm_v16i32_ty], - [llvm_v16i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_inserti32x8_512 : - GCCBuiltin<"__builtin_ia32_inserti32x8_mask">, - Intrinsic<[llvm_v16i32_ty], - [llvm_v16i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_inserti64x2_256 : - GCCBuiltin<"__builtin_ia32_inserti64x2_256_mask">, - Intrinsic<[llvm_v4i64_ty], - [llvm_v4i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_inserti64x2_512 : - GCCBuiltin<"__builtin_ia32_inserti64x2_512_mask">, - Intrinsic<[llvm_v8i64_ty], - [llvm_v8i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_inserti64x4_512 : - GCCBuiltin<"__builtin_ia32_inserti64x4_mask">, - Intrinsic<[llvm_v8i64_ty], - [llvm_v8i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrNoMem]>; -} - // Conditional load ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_maskload_d : GCCBuiltin<"__builtin_ia32_maskloadd">, diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h index 9d8d8c3ffb5..347f2110891 100644 --- a/include/llvm/Support/FileSystem.h +++ b/include/llvm/Support/FileSystem.h @@ -769,17 +769,13 @@ namespace detail { std::error_code directory_iterator_increment(DirIterState &); std::error_code directory_iterator_destruct(DirIterState &); - /// DirIterState - Keeps state for the directory_iterator. It is reference - /// counted in order to preserve InputIterator semantics on copy. - struct DirIterState : public RefCountedBase { - DirIterState() - : IterationHandle(0) {} - + /// Keeps state for the directory_iterator. + struct DirIterState { ~DirIterState() { directory_iterator_destruct(*this); } - intptr_t IterationHandle; + intptr_t IterationHandle = 0; directory_entry CurrentEntry; }; } // end namespace detail @@ -788,23 +784,23 @@ namespace detail { /// operator++ because we need an error_code. If it's really needed we can make /// it call report_fatal_error on error. class directory_iterator { - IntrusiveRefCntPtr State; + std::shared_ptr State; public: explicit directory_iterator(const Twine &path, std::error_code &ec) { - State = new detail::DirIterState; + State = std::make_shared(); SmallString<128> path_storage; ec = detail::directory_iterator_construct(*State, path.toStringRef(path_storage)); } explicit directory_iterator(const directory_entry &de, std::error_code &ec) { - State = new detail::DirIterState; + State = std::make_shared(); ec = detail::directory_iterator_construct(*State, de.path()); } /// Construct end iterator. - directory_iterator() : State(nullptr) {} + directory_iterator() = default; // No operator++ because we need error_code. directory_iterator &increment(std::error_code &ec) { diff --git a/include/llvm/Support/YAMLTraits.h b/include/llvm/Support/YAMLTraits.h index 38acb36942b..cbba9c08275 100644 --- a/include/llvm/Support/YAMLTraits.h +++ b/include/llvm/Support/YAMLTraits.h @@ -209,6 +209,15 @@ struct DocumentListTraits { // static T::value_type& element(IO &io, T &seq, size_t index); }; +/// This class should be specialized by any type that needs to be converted +/// to/from a YAML mapping in the case where the names of the keys are not known +/// in advance, e.g. a string map. +template +struct CustomMappingTraits { + // static void inputOne(IO &io, StringRef key, T &elem); + // static void output(IO &io, T &elem); +}; + // Only used for better diagnostics of missing traits template struct MissingTrait; @@ -358,6 +367,23 @@ public: static bool const value = (sizeof(test>(nullptr)) == 1); }; +// Test if CustomMappingTraits is defined on type T. +template +struct has_CustomMappingTraits +{ + typedef void (*Signature_input)(IO &io, StringRef key, T &v); + + template + static char test(SameType*); + + template + static double test(...); + +public: + static bool const value = + (sizeof(test>(nullptr)) == 1); +}; + // has_FlowTraits will cause an error with some compilers because // it subclasses int. Using this wrapper only instantiates the // real has_FlowTraits only if the template type is a class. @@ -493,6 +519,7 @@ struct missingTraits !has_BlockScalarTraits::value && !has_MappingTraits::value && !has_SequenceTraits::value && + !has_CustomMappingTraits::value && !has_DocumentListTraits::value> {}; template @@ -531,6 +558,7 @@ public: virtual void endMapping() = 0; virtual bool preflightKey(const char*, bool, bool, bool &, void *&) = 0; virtual void postflightKey(void*) = 0; + virtual std::vector keys() = 0; virtual void beginFlowMapping() = 0; virtual void endFlowMapping() = 0; @@ -818,6 +846,21 @@ yamlize(IO &io, T &Val, bool, Context &Ctx) { } } +template +typename std::enable_if::value, void>::type +yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) { + if ( io.outputting() ) { + io.beginMapping(); + CustomMappingTraits::output(io, Val); + io.endMapping(); + } else { + io.beginMapping(); + for (StringRef key : io.keys()) + CustomMappingTraits::inputOne(io, key, Val); + io.endMapping(); + } +} + template typename std::enable_if::value, void>::type yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) { @@ -1074,6 +1117,7 @@ private: void endMapping() override; bool preflightKey(const char *, bool, bool, bool &, void *&) override; void postflightKey(void *) override; + std::vector keys() override; void beginFlowMapping() override; void endFlowMapping() override; unsigned beginSequence() override; @@ -1154,10 +1198,8 @@ private: typedef llvm::StringMap> NameToNode; - bool isValidKey(StringRef key); - NameToNode Mapping; - llvm::SmallVector ValidKeys; + llvm::SmallVector ValidKeys; }; class SequenceHNode : public HNode { @@ -1215,6 +1257,7 @@ public: void endMapping() override; bool preflightKey(const char *key, bool, bool, bool &, void *&) override; void postflightKey(void *) override; + std::vector keys() override; void beginFlowMapping() override; void endFlowMapping() override; unsigned beginSequence() override; @@ -1384,6 +1427,17 @@ operator>>(Input &In, T &Val) { return In; } +// Define non-member operator>> so that Input can stream in a string map. +template +inline +typename std::enable_if::value, Input &>::type +operator>>(Input &In, T &Val) { + EmptyContext Ctx; + if (In.setCurrentDocument()) + yamlize(In, Val, true, Ctx); + return In; +} + // Provide better error message about types missing a trait specialization template inline typename std::enable_if::value, @@ -1457,6 +1511,21 @@ operator<<(Output &Out, T &Val) { return Out; } +// Define non-member operator<< so that Output can stream out a string map. +template +inline +typename std::enable_if::value, Output &>::type +operator<<(Output &Out, T &Val) { + EmptyContext Ctx; + Out.beginDocuments(); + if (Out.preflightDocument(0)) { + yamlize(Out, Val, true, Ctx); + Out.postflightDocument(); + } + Out.endDocuments(); + return Out; +} + // Provide better error message about types missing a trait specialization template inline typename std::enable_if::value, @@ -1476,6 +1545,18 @@ template struct SequenceTraitsImpl { } }; +/// Implementation of CustomMappingTraits for std::map. +template struct StdMapStringCustomMappingTraitsImpl { + typedef std::map map_type; + static void inputOne(IO &io, StringRef key, map_type &v) { + io.mapRequired(key.str().c_str(), v[key]); + } + static void output(IO &io, map_type &v) { + for (auto &p : v) + io.mapRequired(p.first.c_str(), p.second); + } +}; + } // end namespace yaml } // end namespace llvm @@ -1530,4 +1611,15 @@ template struct SequenceTraitsImpl { } \ } +/// Utility for declaring that std::map should be considered +/// a YAML map. +#define LLVM_YAML_IS_STRING_MAP(_type) \ + namespace llvm { \ + namespace yaml { \ + template <> \ + struct CustomMappingTraits> \ + : public StdMapStringCustomMappingTraitsImpl<_type> {}; \ + } \ + } + #endif // LLVM_SUPPORT_YAMLTRAITS_H diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 2a77baec6c3..073b4e6ab26 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -2542,9 +2542,6 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, if (const ConstantFP *CFP = dyn_cast(V)) return !CFP->getValueAPF().isNegZero(); - // FIXME: Magic number! At the least, this should be given a name because it's - // used similarly in CannotBeOrderedLessThanZero(). A better fix may be to - // expose it as a parameter, so it can be used for testing / experimenting. if (Depth == MaxDepth) return false; // Limit search depth. @@ -2589,9 +2586,6 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V, if (const ConstantFP *CFP = dyn_cast(V)) return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero(); - // FIXME: Magic number! At the least, this should be given a name because it's - // used similarly in CannotBeNegativeZero(). A better fix may be to - // expose it as a parameter, so it can be used for testing / experimenting. if (Depth == MaxDepth) return false; // Limit search depth. diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp index cd08268d47b..5da421a79b7 100644 --- a/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/lib/Bitcode/Reader/MetadataLoader.cpp @@ -749,7 +749,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( // handles the case where this is type ODRed with a definition needed // by the importing module, in which case the existing definition is // used. - if (IsImporting && !ImportFullTypeDefinitions && + if (IsImporting && !ImportFullTypeDefinitions && Identifier && (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 0678bce449e..79ecc4308fe 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -272,28 +272,10 @@ static const Value *getNoopInput(const Value *V, TLI.allowTruncateForTailCall(Op->getType(), I->getType())) { DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits()); NoopInput = Op; - } else if (isa(I)) { - // Look through call (skipping callee) - for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 1; - i != e; ++i) { - unsigned attrInd = i - I->op_begin() + 1; - if (cast(I)->paramHasAttr(attrInd, Attribute::Returned) && - isNoopBitcast((*i)->getType(), I->getType(), TLI)) { - NoopInput = *i; - break; - } - } - } else if (isa(I)) { - // Look through invoke (skipping BB, BB, Callee) - for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 3; - i != e; ++i) { - unsigned attrInd = i - I->op_begin() + 1; - if (cast(I)->paramHasAttr(attrInd, Attribute::Returned) && - isNoopBitcast((*i)->getType(), I->getType(), TLI)) { - NoopInput = *i; - break; - } - } + } else if (auto CS = ImmutableCallSite(I)) { + const Value *ReturnedOp = CS.getReturnedArgOperand(); + if (ReturnedOp && isNoopBitcast(ReturnedOp->getType(), I->getType(), TLI)) + NoopInput = ReturnedOp; } else if (const InsertValueInst *IVI = dyn_cast(V)) { // Value may come from either the aggregate or the scalar ArrayRef InsertLoc = IVI->getIndices(); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index de0a4f0befa..5f15ac1d503 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -37,6 +37,8 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" @@ -2610,6 +2612,61 @@ AsmPrinterHandler::~AsmPrinterHandler() {} void AsmPrinterHandler::markFunctionEnd() {} +// In the binary's "xray_instr_map" section, an array of these function entries +// describes each instrumentation point. When XRay patches your code, the index +// into this table will be given to your handler as a patch point identifier. +void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out, + const MCSymbol *CurrentFnSym) const { + Out->EmitSymbolValue(Sled, Bytes); + Out->EmitSymbolValue(CurrentFnSym, Bytes); + auto Kind8 = static_cast(Kind); + Out->EmitBytes(StringRef(reinterpret_cast(&Kind8), 1)); + Out->EmitBytes( + StringRef(reinterpret_cast(&AlwaysInstrument), 1)); + Out->EmitZeros(2 * Bytes - 2); // Pad the previous two entries +} + +void AsmPrinter::emitXRayTable() { + if (Sleds.empty()) + return; + + auto PrevSection = OutStreamer->getCurrentSectionOnly(); + auto Fn = MF->getFunction(); + MCSection *Section = nullptr; + if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) { + if (Fn->hasComdat()) { + Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, + Fn->getComdat()->getName()); + } else { + Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC); + } + } else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) { + Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, + SectionKind::getReadOnlyWithRel()); + } else { + llvm_unreachable("Unsupported target"); + } + + // Before we switch over, we force a reference to a label inside the + // xray_instr_map section. Since this function is always called just + // before the function's end, we assume that this is happening after + // the last return instruction. + + auto WordSizeBytes = TM.getPointerSize(); + MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true); + OutStreamer->EmitCodeAlignment(16); + OutStreamer->EmitSymbolValue(Tmp, WordSizeBytes, false); + OutStreamer->SwitchSection(Section); + OutStreamer->EmitLabel(Tmp); + for (const auto &Sled : Sleds) + Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym); + + OutStreamer->SwitchSection(PrevSection); + Sleds.clear(); +} + void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind) { auto Fn = MI.getParent()->getParent()->getFunction(); diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 422f2dc2f2f..3d81184f774 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -1124,7 +1124,7 @@ void HoistSpillHelper::rmRedundantSpills( // earlier spill with smaller SlotIndex. for (const auto CurrentSpill : Spills) { MachineBasicBlock *Block = CurrentSpill->getParent(); - MachineDomTreeNode *Node = MDT.DT->getNode(Block); + MachineDomTreeNode *Node = MDT.getBase().getNode(Block); MachineInstr *PrevSpill = SpillBBToSpill[Node]; if (PrevSpill) { SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill); @@ -1132,9 +1132,9 @@ void HoistSpillHelper::rmRedundantSpills( MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill; MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill; SpillsToRm.push_back(SpillToRm); - SpillBBToSpill[MDT.DT->getNode(Block)] = SpillToKeep; + SpillBBToSpill[MDT.getBase().getNode(Block)] = SpillToKeep; } else { - SpillBBToSpill[MDT.DT->getNode(Block)] = CurrentSpill; + SpillBBToSpill[MDT.getBase().getNode(Block)] = CurrentSpill; } } for (const auto SpillToRm : SpillsToRm) @@ -1209,7 +1209,7 @@ void HoistSpillHelper::getVisitOrders( // Sort the nodes in WorkSet in top-down order and save the nodes // in Orders. Orders will be used for hoisting in runHoistSpills. unsigned idx = 0; - Orders.push_back(MDT.DT->getNode(Root)); + Orders.push_back(MDT.getBase().getNode(Root)); do { MachineDomTreeNode *Node = Orders[idx++]; const std::vector &Children = Node->getChildren(); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b4b41c3d001..4632484055d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4277,7 +4277,8 @@ struct BaseIndexOffset { } /// Parses tree in Ptr for base, index, offset addresses. - static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) { + static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG, + int64_t PartialOffset = 0) { bool IsIndexSignExt = false; // Split up a folded GlobalAddress+Offset into its component parts. @@ -4286,7 +4287,7 @@ struct BaseIndexOffset { return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(), SDLoc(GA), GA->getValueType(0), - /*Offset=*/0, + /*Offset=*/PartialOffset, /*isTargetGA=*/false, GA->getTargetFlags()), SDValue(), @@ -4298,14 +4299,13 @@ struct BaseIndexOffset { // instruction, then it could be just the BASE or everything else we don't // know how to handle. Just use Ptr as BASE and give up. if (Ptr->getOpcode() != ISD::ADD) - return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); // We know that we have at least an ADD instruction. Try to pattern match // the simple case of BASE + OFFSET. if (isa(Ptr->getOperand(1))) { int64_t Offset = cast(Ptr->getOperand(1))->getSExtValue(); - return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, - IsIndexSignExt); + return match(Ptr->getOperand(0), DAG, Offset + PartialOffset); } // Inside a loop the current BASE pointer is calculated using an ADD and a @@ -4314,7 +4314,7 @@ struct BaseIndexOffset { // (i64 mul (i64 %induction_var) // (i64 %element_size))) if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) - return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); // Look at Base + Index + Offset cases. SDValue Base = Ptr->getOperand(0); @@ -4328,14 +4328,14 @@ struct BaseIndexOffset { // Either the case of Base + Index (no offset) or something else. if (IndexOffset->getOpcode() != ISD::ADD) - return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt); + return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt); // Now we have the case of Base + Index + offset. SDValue Index = IndexOffset->getOperand(0); SDValue Offset = IndexOffset->getOperand(1); if (!isa(Offset)) - return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); // Ignore signextends. if (Index->getOpcode() == ISD::SIGN_EXTEND) { @@ -4344,7 +4344,7 @@ struct BaseIndexOffset { } else IsIndexSignExt = false; int64_t Off = cast(Offset)->getSExtValue(); - return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); + return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt); } }; } // namespace diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp index 324d0711870..57b5d85bb55 100644 --- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp @@ -88,15 +88,15 @@ void OProfileJITEventListener::NotifyObjectEmitted( // Use symbol info to iterate functions in the object. for (const std::pair &P : computeSymbolSizes(DebugObj)) { SymbolRef Sym = P.first; - if (Sym.getType() != SymbolRef::ST_Function) + if (!Sym.getType() || *Sym.getType() != SymbolRef::ST_Function) continue; - ErrorOr NameOrErr = Sym.getName(); - if (NameOrErr.getError()) + Expected NameOrErr = Sym.getName(); + if (!NameOrErr) continue; StringRef Name = *NameOrErr; - ErrorOr AddrOrErr = Sym.getAddress(); - if (AddrOrErr.getError()) + Expected AddrOrErr = Sym.getAddress(); + if (!AddrOrErr) continue; uint64_t Addr = *AddrOrErr; uint64_t Size = P.second; @@ -128,9 +128,9 @@ void OProfileJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) { for (symbol_iterator I = DebugObj.symbol_begin(), E = DebugObj.symbol_end(); I != E; ++I) { - if (I->getType() == SymbolRef::ST_Function) { - ErrorOr AddrOrErr = I->getAddress(); - if (AddrOrErr.getError()) + if (I->getType() && *I->getType() == SymbolRef::ST_Function) { + Expected AddrOrErr = I->getAddress(); + if (!AddrOrErr) continue; uint64_t Addr = *AddrOrErr; diff --git a/lib/Fuzzer/FuzzerTracePC.cpp b/lib/Fuzzer/FuzzerTracePC.cpp index 01c0b8c2ddb..39d6e602621 100644 --- a/lib/Fuzzer/FuzzerTracePC.cpp +++ b/lib/Fuzzer/FuzzerTracePC.cpp @@ -80,6 +80,7 @@ static bool IsInterestingCoverageFile(std::string &File) { } void TracePC::InitializePrintNewPCs() { + if (!DoPrintNewPCs) return; assert(!PrintedPCs); PrintedPCs = new std::set; for (size_t i = 1; i < GetNumPCs(); i++) @@ -88,6 +89,7 @@ void TracePC::InitializePrintNewPCs() { } void TracePC::PrintNewPCs() { + if (!DoPrintNewPCs) return; assert(PrintedPCs); for (size_t i = 1; i < GetNumPCs(); i++) if (PCs[i] && PrintedPCs->insert(PCs[i]).second) diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index 2d9d0f95efa..a87b9bec1ed 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -342,8 +342,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name == "avx.cvt.ps2.pd.256" || // Added in 3.9 Name.startswith("avx.vinsertf128.") || // Added in 3.7 Name == "avx2.vinserti128" || // Added in 3.7 + Name.startswith("avx512.mask.insert") || // Added in 4.0 Name.startswith("avx.vextractf128.") || // Added in 3.7 Name == "avx2.vextracti128" || // Added in 3.7 + Name.startswith("avx512.mask.vextract") || // Added in 4.0 Name.startswith("sse4a.movnt.") || // Added in 3.9 Name.startswith("avx.movnt.") || // Added in 3.2 Name.startswith("avx512.storent.") || // Added in 3.9 @@ -1150,21 +1152,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); } else if (IsX86 && (Name.startswith("avx.vinsertf128.") || - Name == "avx2.vinserti128")) { + Name == "avx2.vinserti128" || + Name.startswith("avx512.mask.insert"))) { Value *Op0 = CI->getArgOperand(0); Value *Op1 = CI->getArgOperand(1); unsigned Imm = cast(CI->getArgOperand(2))->getZExtValue(); - VectorType *VecTy = cast(CI->getType()); - unsigned NumElts = VecTy->getNumElements(); + unsigned DstNumElts = CI->getType()->getVectorNumElements(); + unsigned SrcNumElts = Op1->getType()->getVectorNumElements(); + unsigned Scale = DstNumElts / SrcNumElts; // Mask off the high bits of the immediate value; hardware ignores those. - Imm = Imm & 1; + Imm = Imm % Scale; - // Extend the second operand into a vector that is twice as big. + // Extend the second operand into a vector the size of the destination. Value *UndefV = UndefValue::get(Op1->getType()); - SmallVector Idxs(NumElts); - for (unsigned i = 0; i != NumElts; ++i) + SmallVector Idxs(DstNumElts); + for (unsigned i = 0; i != SrcNumElts; ++i) Idxs[i] = i; + for (unsigned i = SrcNumElts; i != DstNumElts; ++i) + Idxs[i] = SrcNumElts; Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs); // Insert the second operand into the first operand. @@ -1178,33 +1184,41 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // Imm = 1 // Imm = 0 - // The low half of the result is either the low half of the 1st operand - // or the low half of the 2nd operand (the inserted vector). - for (unsigned i = 0; i != NumElts / 2; ++i) - Idxs[i] = Imm ? i : (i + NumElts); - // The high half of the result is either the low half of the 2nd operand - // (the inserted vector) or the high half of the 1st operand. - for (unsigned i = NumElts / 2; i != NumElts; ++i) - Idxs[i] = Imm ? (i + NumElts / 2) : i; + // First fill with identify mask. + for (unsigned i = 0; i != DstNumElts; ++i) + Idxs[i] = i; + // Then replace the elements where we need to insert. + for (unsigned i = 0; i != SrcNumElts; ++i) + Idxs[i + Imm * SrcNumElts] = i + DstNumElts; Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs); + + // If the intrinsic has a mask operand, handle that. + if (CI->getNumArgOperands() == 5) + Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, + CI->getArgOperand(3)); } else if (IsX86 && (Name.startswith("avx.vextractf128.") || - Name == "avx2.vextracti128")) { + Name == "avx2.vextracti128" || + Name.startswith("avx512.mask.vextract"))) { Value *Op0 = CI->getArgOperand(0); unsigned Imm = cast(CI->getArgOperand(1))->getZExtValue(); - VectorType *VecTy = cast(CI->getType()); - unsigned NumElts = VecTy->getNumElements(); + unsigned DstNumElts = CI->getType()->getVectorNumElements(); + unsigned SrcNumElts = Op0->getType()->getVectorNumElements(); + unsigned Scale = SrcNumElts / DstNumElts; // Mask off the high bits of the immediate value; hardware ignores those. - Imm = Imm & 1; + Imm = Imm % Scale; - // Get indexes for either the high half or low half of the input vector. - SmallVector Idxs(NumElts); - for (unsigned i = 0; i != NumElts; ++i) { - Idxs[i] = Imm ? (i + NumElts) : i; + // Get indexes for the subvector of the input vector. + SmallVector Idxs(DstNumElts); + for (unsigned i = 0; i != DstNumElts; ++i) { + Idxs[i] = i + (Imm * DstNumElts); } + Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); - Value *UndefV = UndefValue::get(Op0->getType()); - Rep = Builder.CreateShuffleVector(Op0, UndefV, Idxs); + // If the intrinsic has a mask operand, handle that. + if (CI->getNumArgOperands() == 4) + Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, + CI->getArgOperand(2)); } else if (!IsX86 && Name == "stackprotectorcheck") { Rep = nullptr; } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") || diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp index 7364f0e0cd3..42b3a344352 100644 --- a/lib/LTO/LTO.cpp +++ b/lib/LTO/LTO.cpp @@ -891,23 +891,17 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, AddStream, Cache); - // Partition numbers for ThinLTO jobs start at 1 (see comments for - // GlobalResolution in LTO.h). Task numbers, however, start at - // ParallelCodeGenParallelismLevel if an LTO module is present, as tasks 0 - // through ParallelCodeGenParallelismLevel-1 are reserved for parallel code - // generation partitions. + // Task numbers start at ParallelCodeGenParallelismLevel if an LTO + // module is present, as tasks 0 through ParallelCodeGenParallelismLevel-1 + // are reserved for parallel code generation partitions. unsigned Task = HasRegularLTO ? RegularLTO.ParallelCodeGenParallelismLevel : 0; - unsigned Partition = 1; - for (auto &Mod : ThinLTO.ModuleMap) { if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first], ExportLists[Mod.first], ResolvedODR[Mod.first], ThinLTO.ModuleMap)) return E; - ++Task; - ++Partition; } return BackendProc->wait(); diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 30f0deab90a..4cfbbf8645e 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -76,8 +76,12 @@ namespace llvm { compile-time arithmetic on PPC double-double numbers, it is not able to represent all possible values held by a PPC double-double number, for example: (long double) 1.0 + (long double) 0x1p-106 - Should this be replaced by a full emulation of PPC double-double? */ - static const fltSemantics semPPCDoubleDouble = {0, 0, 0, 0}; + Should this be replaced by a full emulation of PPC double-double? + + Note: we need to make the value different from semBogus as otherwise + an unsafe optimization may collapse both values to a single address, + and we heavily rely on them having distinct addresses. */ + static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0}; /* There are temporary semantics for the real PPCDoubleDouble implementation. Currently, APFloat of PPCDoubleDouble holds one PPCDoubleDoubleImpl as the diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index dd19eee15f6..49d0ed55a71 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -1069,6 +1069,7 @@ StringRef sys::getHostCPUName() { .Case("POWER7", "pwr7") .Case("POWER8", "pwr8") .Case("POWER8E", "pwr8") + .Case("POWER8NVL", "pwr8") .Case("POWER9", "pwr9") .Default(generic); } diff --git a/lib/Support/NativeFormatting.cpp b/lib/Support/NativeFormatting.cpp index bb868914109..b951a88a38d 100644 --- a/lib/Support/NativeFormatting.cpp +++ b/lib/Support/NativeFormatting.cpp @@ -239,10 +239,7 @@ void llvm::write_double(raw_ostream &S, double N, FloatStyle Style, N *= 100.0; char Buf[32]; - unsigned Len; - Len = format(Spec.c_str(), N).snprint(Buf, sizeof(Buf)); - if (Style == FloatStyle::Percent) - ++Len; + format(Spec.c_str(), N).snprint(Buf, sizeof(Buf)); S << Buf; if (Style == FloatStyle::Percent) S << '%'; diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp index 99d2070cb6e..9849b3aa1ce 100644 --- a/lib/Support/YAMLTraits.cpp +++ b/lib/Support/YAMLTraits.cpp @@ -118,6 +118,18 @@ void Input::beginMapping() { } } +std::vector Input::keys() { + MapHNode *MN = dyn_cast(CurrentNode); + std::vector Ret; + if (!MN) { + setError(CurrentNode, "not a mapping"); + return Ret; + } + for (auto &P : MN->Mapping) + Ret.push_back(P.first()); + return Ret; +} + bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault, void *&SaveInfo) { UseDefault = false; @@ -163,7 +175,7 @@ void Input::endMapping() { if (!MN) return; for (const auto &NN : MN->Mapping) { - if (!MN->isValidKey(NN.first())) { + if (!is_contained(MN->ValidKeys, NN.first())) { setError(NN.second.get(), Twine("unknown key '") + NN.first() + "'"); break; } @@ -373,14 +385,6 @@ std::unique_ptr Input::createHNodes(Node *N) { } } -bool Input::MapHNode::isValidKey(StringRef Key) { - for (const char *K : ValidKeys) { - if (Key.equals(K)) - return true; - } - return false; -} - void Input::setError(const Twine &Message) { this->setError(CurrentNode, Message); } @@ -451,6 +455,10 @@ void Output::endMapping() { StateStack.pop_back(); } +std::vector Output::keys() { + report_fatal_error("invalid call"); +} + bool Output::preflightKey(const char *Key, bool Required, bool SameAsDefault, bool &UseDefault, void *&) { UseDefault = false; diff --git a/lib/TableGen/StringMatcher.cpp b/lib/TableGen/StringMatcher.cpp index 16681702d1d..0c83da65e19 100644 --- a/lib/TableGen/StringMatcher.cpp +++ b/lib/TableGen/StringMatcher.cpp @@ -11,9 +11,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/TableGen/StringMatcher.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/StringMatcher.h" +#include #include +#include +#include +#include + using namespace llvm; /// FindFirstNonCommonLetter - Find the first character in the keys of the @@ -67,7 +73,7 @@ EmitStringMatcherForChar(const std::vector &Matches, } // Bucket the matches by the character we are comparing. - std::map > MatchesByLetter; + std::map> MatchesByLetter; for (unsigned i = 0, e = Matches.size(); i != e; ++i) MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]); @@ -91,7 +97,7 @@ EmitStringMatcherForChar(const std::vector &Matches, // FIXME: Need to escape general strings. OS << Indent << "if (memcmp(" << StrVariableName << ".data()+" << CharNo << ", \"" << Matches[0]->first.substr(CharNo, NumChars) << "\", " - << NumChars << "))\n"; + << NumChars << ") != 0)\n"; OS << Indent << " break;\n"; } @@ -103,7 +109,7 @@ EmitStringMatcherForChar(const std::vector &Matches, OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n"; OS << Indent << "default: break;\n"; - for (std::map >::iterator LI = + for (std::map>::iterator LI = MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) { // TODO: escape hard stuff (like \n) if we ever care about it. OS << Indent << "case '" << LI->first << "':\t // " @@ -118,7 +124,6 @@ EmitStringMatcherForChar(const std::vector &Matches, return true; } - /// Emit - Top level entry point. /// void StringMatcher::Emit(unsigned Indent) const { @@ -126,7 +131,7 @@ void StringMatcher::Emit(unsigned Indent) const { if (Matches.empty()) return; // First level categorization: group strings by length. - std::map > MatchesByLength; + std::map> MatchesByLength; for (unsigned i = 0, e = Matches.size(); i != e; ++i) MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]); @@ -136,7 +141,7 @@ void StringMatcher::Emit(unsigned Indent) const { OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n"; OS.indent(Indent*2+2) << "default: break;\n"; - for (std::map >::iterator LI = + for (std::map>::iterator LI = MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) { OS.indent(Indent*2+2) << "case " << LI->first << ":\t // " << LI->second.size() diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index c40391d5ad9..740766b151b 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -264,9 +264,13 @@ def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", "Qualcomm Falkor processors", [ FeatureCRC, FeatureCrypto, + FeatureCustomCheapAsMoveHandling, FeatureFPARMv8, FeatureNEON, - FeaturePerfMon + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureZCZeroing ]>; def ProcVulcan : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan", diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index b2d96a32fd3..efc22189378 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -76,7 +76,6 @@ public: void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); - void EmitXRayTable(); void EmitSled(const MachineInstr &MI, SledKind Kind); /// \brief tblgen'erated driver function for lowering simple MI->MC @@ -95,7 +94,7 @@ public: AArch64FI = F.getInfo(); STI = static_cast(&F.getSubtarget()); bool Result = AsmPrinter::runOnMachineFunction(F); - EmitXRayTable(); + emitXRayTable(); return Result; } @@ -150,59 +149,6 @@ void AArch64AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) EmitSled(MI, SledKind::TAIL_CALL); } -void AArch64AsmPrinter::EmitXRayTable() -{ - //TODO: merge the logic for ELF XRay sleds at a higher level, so to avoid - // code duplication as it is now for x86_64, ARM32 and AArch64. - if (Sleds.empty()) - return; - - auto PrevSection = OutStreamer->getCurrentSectionOnly(); - auto Fn = MF->getFunction(); - MCSection *Section; - - if (STI->isTargetELF()) { - if (Fn->hasComdat()) - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, - Fn->getComdat()->getName()); - else - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC); - } else if (STI->isTargetMachO()) { - Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, - SectionKind::getReadOnlyWithRel()); - } else { - llvm_unreachable("Unsupported target"); - } - - // Before we switch over, we force a reference to a label inside the - // xray_instr_map section. Since EmitXRayTable() is always called just - // before the function's end, we assume that this is happening after the - // last return instruction. - // - // We then align the reference to 16 byte boundaries, which we determined - // experimentally to be beneficial to avoid causing decoder stalls. - MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true); - OutStreamer->EmitCodeAlignment(16); - OutStreamer->EmitSymbolValue(Tmp, 8, false); - OutStreamer->SwitchSection(Section); - OutStreamer->EmitLabel(Tmp); - for (const auto &Sled : Sleds) { - OutStreamer->EmitSymbolValue(Sled.Sled, 8); - OutStreamer->EmitSymbolValue(CurrentFnSym, 8); - auto Kind = static_cast(Sled.Kind); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast(&Kind), 1)); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast(&Sled.AlwaysInstrument), 1)); - OutStreamer->EmitZeros(14); - } - OutStreamer->SwitchSection(PrevSection); - - Sleds.clear(); -} - void AArch64AsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) { static const int8_t NoopsInSledCount = 7; diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index dcb05601e5f..8a76c42b589 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1470,6 +1470,9 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { bool IsUnscaled = TII->isUnscaledLdSt(MI); int Offset = getLdStOffsetOp(MI).getImm(); int OffsetStride = IsUnscaled ? getMemScale(MI) : 1; + // Allow one more for offset. + if (Offset > 0) + Offset -= OffsetStride; if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride)) return false; diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index a87204d46ea..0b0a0e7d083 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3048,6 +3048,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(KILL) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; NODE_NAME_CASE(SENDMSG) + NODE_NAME_CASE(SENDMSGHALT) NODE_NAME_CASE(INTERP_MOV) NODE_NAME_CASE(INTERP_P1) NODE_NAME_CASE(INTERP_P2) diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index 5cc5efb331e..745c9923de2 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -313,6 +313,7 @@ enum NodeType : unsigned { /// Pointer to the start of the shader's constant data. CONST_DATA_PTR, SENDMSG, + SENDMSGHALT, INTERP_MOV, INTERP_P1, INTERP_P2, diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td index e7b40016e27..f079c8d0c70 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -266,6 +266,10 @@ def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG", SDTypeProfile<0, 1, [SDTCisInt<0>]>, [SDNPHasChain, SDNPInGlue]>; +def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT", + SDTypeProfile<0, 1, [SDTCisInt<0>]>, + [SDNPHasChain, SDNPInGlue]>; + def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV", SDTypeProfile<1, 3, [SDTCisFP<0>]>, [SDNPInGlue]>; diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index fa53831cbe1..c78e97dfd46 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2706,12 +2706,19 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, unsigned IntrinsicID = cast(Op.getOperand(1))->getZExtValue(); switch (IntrinsicID) { - case AMDGPUIntrinsic::SI_sendmsg: { + case AMDGPUIntrinsic::SI_sendmsg: + case Intrinsic::amdgcn_s_sendmsg: { Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3)); SDValue Glue = Chain.getValue(1); return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain, Op.getOperand(2), Glue); } + case Intrinsic::amdgcn_s_sendmsghalt: { + Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3)); + SDValue Glue = Chain.getValue(1); + return DAG.getNode(AMDGPUISD::SENDMSGHALT, DL, MVT::Other, Chain, + Op.getOperand(2), Glue); + } case AMDGPUIntrinsic::SI_tbuffer_store: { SDValue Ops[] = { Chain, diff --git a/lib/Target/AMDGPU/SIInsertWaits.cpp b/lib/Target/AMDGPU/SIInsertWaits.cpp index 202a1e9ed8a..fceabd7a8fd 100644 --- a/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -504,7 +504,7 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB, return; // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG. - if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) { + if (LastInstWritesM0 && (I->getOpcode() == AMDGPU::S_SENDMSG || I->getOpcode() == AMDGPU::S_SENDMSGHALT)) { BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0); LastInstWritesM0 = false; return; @@ -619,7 +619,8 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { // signalling other hardware blocks if ((I->getOpcode() == AMDGPU::S_BARRIER && ST->needWaitcntBeforeBarrier()) || - I->getOpcode() == AMDGPU::S_SENDMSG) + I->getOpcode() == AMDGPU::S_SENDMSG || + I->getOpcode() == AMDGPU::S_SENDMSGHALT) Required = LastIssued; else Required = handleOperands(*I); diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td index 0aeb1297d3a..73cd5774128 100644 --- a/lib/Target/AMDGPU/SOPInstructions.td +++ b/lib/Target/AMDGPU/SOPInstructions.td @@ -828,9 +828,12 @@ let Uses = [EXEC, M0] in { def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16", [(AMDGPUsendmsg (i32 imm:$simm16))] >; + +def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16", + [(AMDGPUsendmsghalt (i32 imm:$simm16))] +>; } // End Uses = [EXEC, M0] -def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16">; def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">; def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> { let simm16 = 0; diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index f20768ab77a..8ec9cb02813 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -164,9 +164,6 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Emit the rest of the function body. EmitFunctionBody(); - // Emit the XRay table for this function. - EmitXRayTable(); - // If we need V4T thumb mode Register Indirect Jump pads, emit them. // These are created per function, rather than per TU, since it's // relatively easy to exceed the thumb branch range within a TU. diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index ce0b04d56d9..93fed10eb2d 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -113,9 +113,6 @@ public: void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); - // Helper function that emits the XRay sleds we've collected for a particular - // function. - void EmitXRayTable(); private: void EmitSled(const MachineInstr &MI, SledKind Kind); diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index 293a527b09e..07044b9697b 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -22,9 +22,6 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSymbolELF.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCStreamer.h" using namespace llvm; @@ -226,38 +223,3 @@ void ARMAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) { EmitSled(MI, SledKind::TAIL_CALL); } - -void ARMAsmPrinter::EmitXRayTable() -{ - if (Sleds.empty()) - return; - - MCSection *Section = nullptr; - if (Subtarget->isTargetELF()) { - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_GROUP | - ELF::SHF_MERGE, - 0, CurrentFnSym->getName()); - } else if (Subtarget->isTargetMachO()) { - Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, - SectionKind::getReadOnlyWithRel()); - } else { - llvm_unreachable("Unsupported target"); - } - - auto PrevSection = OutStreamer->getCurrentSectionOnly(); - OutStreamer->SwitchSection(Section); - for (const auto &Sled : Sleds) { - OutStreamer->EmitSymbolValue(Sled.Sled, 4); - OutStreamer->EmitSymbolValue(CurrentFnSym, 4); - auto Kind = static_cast(Sled.Kind); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast(&Kind), 1)); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast(&Sled.AlwaysInstrument), 1)); - OutStreamer->EmitZeros(6); - } - OutStreamer->SwitchSection(PrevSection); - - Sleds.clear(); -} diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp index c0591c332de..963fb99ce09 100644 --- a/lib/Target/Hexagon/BitTracker.cpp +++ b/lib/Target/Hexagon/BitTracker.cpp @@ -53,28 +53,36 @@ // // The code below is intended to be fully target-independent. +#include "BitTracker.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" - -#include "BitTracker.h" +#include +#include +#include using namespace llvm; typedef BitTracker BT; namespace { + // Local trickery to pretty print a register (without the whole "%vreg" // business). struct printv { printv(unsigned r) : R(r) {} + unsigned R; }; + raw_ostream &operator<< (raw_ostream &OS, const printv &PV) { if (PV.R) OS << 'v' << TargetRegisterInfo::virtReg2Index(PV.R); @@ -82,9 +90,11 @@ namespace { OS << 's'; return OS; } -} + +} // end anonymous namespace namespace llvm { + raw_ostream &operator<<(raw_ostream &OS, const BT::BitValue &BV) { switch (BV.Type) { case BT::BitValue::Top: @@ -167,14 +177,14 @@ namespace llvm { return OS; } -} + +} // end namespace llvm void BitTracker::print_cells(raw_ostream &OS) const { for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I) dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n"; } - BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F) : Trace(false), ME(E), MF(F), MRI(F.getRegInfo()), Map(*new CellMapType) {} @@ -182,7 +192,6 @@ BitTracker::~BitTracker() { delete ⤅ } - // If we were allowed to update a cell for a part of a register, the meet // operation would need to be parametrized by the register number and the // exact part of the register, so that the computer BitRefs correspond to @@ -201,7 +210,6 @@ bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) { return Changed; } - // Insert the entire cell RC into the current cell at position given by M. BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC, const BitMask &M) { @@ -224,7 +232,6 @@ BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC, return *this; } - BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const { uint16_t B = M.first(), E = M.last(), W = width(); assert(B < W && E < W); @@ -243,7 +250,6 @@ BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const { return RC; } - BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) { // Rotate left (i.e. towards increasing bit indices). // Swap the two parts: [0..W-Sh-1] [W-Sh..W-1] @@ -265,7 +271,6 @@ BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) { return *this; } - BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E, const BitValue &V) { assert(B <= E); @@ -274,7 +279,6 @@ BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E, return *this; } - BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) { // Append the cell given as the argument to the "this" cell. // Bit 0 of RC becomes bit W of the result, where W is this->width(). @@ -285,7 +289,6 @@ BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) { return *this; } - uint16_t BT::RegisterCell::ct(bool B) const { uint16_t W = width(); uint16_t C = 0; @@ -295,7 +298,6 @@ uint16_t BT::RegisterCell::ct(bool B) const { return C; } - uint16_t BT::RegisterCell::cl(bool B) const { uint16_t W = width(); uint16_t C = 0; @@ -305,7 +307,6 @@ uint16_t BT::RegisterCell::cl(bool B) const { return C; } - bool BT::RegisterCell::operator== (const RegisterCell &RC) const { uint16_t W = Bits.size(); if (RC.Bits.size() != W) @@ -316,7 +317,6 @@ bool BT::RegisterCell::operator== (const RegisterCell &RC) const { return true; } - uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const { // The general problem is with finding a register class that corresponds // to a given reference reg:sub. There can be several such classes, and @@ -342,7 +342,6 @@ uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const { return BW; } - BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR, const CellMapType &M) const { uint16_t BW = getRegBitWidth(RR); @@ -370,7 +369,6 @@ BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR, return RegisterCell::top(BW); } - void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const { // While updating the cell map can be done in a meaningful way for @@ -388,7 +386,6 @@ void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC, M[RR.Reg] = RC; } - // Check if the cell represents a compile-time integer value. bool BT::MachineEvaluator::isInt(const RegisterCell &A) const { uint16_t W = A.width(); @@ -398,7 +395,6 @@ bool BT::MachineEvaluator::isInt(const RegisterCell &A) const { return true; } - // Convert a cell to the integer value. The result must fit in uint64_t. uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const { assert(isInt(A)); @@ -411,7 +407,6 @@ uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const { return Val; } - // Evaluator helper functions. These implement some common operation on // register cells that can be used to implement target-specific instructions // in a target-specific evaluator. @@ -426,7 +421,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(int64_t V, uint16_t W) const { return Res; } - BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const { const APInt &A = CI->getValue(); uint16_t BW = A.getBitWidth(); @@ -437,7 +431,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const { return Res; } - BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -471,7 +464,6 @@ BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -505,29 +497,26 @@ BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eMLS(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width() + A2.width(); - uint16_t Z = A1.ct(0) + A2.ct(0); + uint16_t Z = A1.ct(false) + A2.ct(false); RegisterCell Res(W); Res.fill(0, Z, BitValue::Zero); Res.fill(Z, W, BitValue::self()); return Res; } - BT::RegisterCell BT::MachineEvaluator::eMLU(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width() + A2.width(); - uint16_t Z = A1.ct(0) + A2.ct(0); + uint16_t Z = A1.ct(false) + A2.ct(false); RegisterCell Res(W); Res.fill(0, Z, BitValue::Zero); Res.fill(Z, W, BitValue::self()); return Res; } - BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1, uint16_t Sh) const { assert(Sh <= A1.width()); @@ -537,7 +526,6 @@ BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1, uint16_t Sh) const { uint16_t W = A1.width(); @@ -548,7 +536,6 @@ BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1, uint16_t Sh) const { uint16_t W = A1.width(); @@ -560,7 +547,6 @@ BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -583,7 +569,6 @@ BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -606,7 +591,6 @@ BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -627,7 +611,6 @@ BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const { uint16_t W = A1.width(); RegisterCell Res(W); @@ -643,7 +626,6 @@ BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const { return Res; } - BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1, uint16_t BitN) const { assert(BitN < A1.width()); @@ -652,7 +634,6 @@ BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1, uint16_t BitN) const { assert(BitN < A1.width()); @@ -661,7 +642,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B, uint16_t W) const { uint16_t C = A1.cl(B), AW = A1.width(); @@ -672,7 +652,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B, return RegisterCell::self(0, W); } - BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B, uint16_t W) const { uint16_t C = A1.ct(B), AW = A1.width(); @@ -683,7 +662,6 @@ BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B, return RegisterCell::self(0, W); } - BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1, uint16_t FromN) const { uint16_t W = A1.width(); @@ -695,7 +673,6 @@ BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1, uint16_t FromN) const { uint16_t W = A1.width(); @@ -705,7 +682,6 @@ BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1, uint16_t B, uint16_t E) const { uint16_t W = A1.width(); @@ -718,7 +694,6 @@ BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1, const RegisterCell &A2, uint16_t AtN) const { uint16_t W1 = A1.width(), W2 = A2.width(); @@ -731,7 +706,6 @@ BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1, return Res; } - BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const { assert(Sub == 0 && "Generic BitTracker::mask called for Sub != 0"); uint16_t W = getRegBitWidth(Reg); @@ -785,7 +759,6 @@ bool BT::MachineEvaluator::evaluate(const MachineInstr &MI, return true; } - // Main W-Z implementation. void BT::visitPHI(const MachineInstr &PI) { @@ -977,7 +950,6 @@ void BT::visitBranchesFrom(const MachineInstr &BI) { } } - void BT::visitUsesOf(unsigned Reg) { if (Trace) dbgs() << "visiting uses of " << PrintReg(Reg, &ME.TRI) << "\n"; @@ -997,17 +969,14 @@ void BT::visitUsesOf(unsigned Reg) { } } - BT::RegisterCell BT::get(RegisterRef RR) const { return ME.getCell(RR, Map); } - void BT::put(RegisterRef RR, const RegisterCell &RC) { ME.putCell(RR, RC, Map); } - // Replace all references to bits from OldRR with the corresponding bits // in NewRR. void BT::subst(RegisterRef OldRR, RegisterRef NewRR) { @@ -1033,7 +1002,6 @@ void BT::subst(RegisterRef OldRR, RegisterRef NewRR) { } } - // Check if the block has been "executed" during propagation. (If not, the // block is dead, but it may still appear to be reachable.) bool BT::reached(const MachineBasicBlock *B) const { @@ -1047,7 +1015,6 @@ bool BT::reached(const MachineBasicBlock *B) const { return false; } - // Visit an individual instruction. This could be a newly added instruction, // or one that has been modified by an optimization. void BT::visit(const MachineInstr &MI) { @@ -1061,14 +1028,12 @@ void BT::visit(const MachineInstr &MI) { FlowQ.pop(); } - void BT::reset() { EdgeExec.clear(); InstrExec.clear(); Map.clear(); } - void BT::run() { reset(); assert(FlowQ.empty()); @@ -1141,4 +1106,3 @@ void BT::run() { if (Trace) print_cells(dbgs() << "Cells after propagation:\n"); } - diff --git a/lib/Target/Hexagon/BitTracker.h b/lib/Target/Hexagon/BitTracker.h index 74cafcd00b6..48c5f2266ac 100644 --- a/lib/Target/Hexagon/BitTracker.h +++ b/lib/Target/Hexagon/BitTracker.h @@ -1,4 +1,4 @@ -//===--- BitTracker.h -----------------------------------------------------===// +//===--- BitTracker.h -------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,24 +7,27 @@ // //===----------------------------------------------------------------------===// -#ifndef BITTRACKER_H -#define BITTRACKER_H +#ifndef LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H +#define LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" - +#include "llvm/CodeGen/MachineOperand.h" +#include +#include #include #include #include +#include namespace llvm { - class ConstantInt; - class MachineRegisterInfo; - class MachineBasicBlock; - class MachineInstr; - class MachineOperand; - class raw_ostream; + +class ConstantInt; +class MachineRegisterInfo; +class MachineBasicBlock; +class MachineInstr; +class raw_ostream; struct BitTracker { struct BitRef; @@ -76,19 +79,19 @@ private: CellMapType ⤅ }; - // Abstraction of a reference to bit at position Pos from a register Reg. struct BitTracker::BitRef { BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {} + bool operator== (const BitRef &BR) const { // If Reg is 0, disregard Pos. return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos); } + unsigned Reg; uint16_t Pos; }; - // Abstraction of a register reference in MachineOperand. It contains the // register number and the subregister index. struct BitTracker::RegisterRef { @@ -96,10 +99,10 @@ struct BitTracker::RegisterRef { : Reg(R), Sub(S) {} RegisterRef(const MachineOperand &MO) : Reg(MO.getReg()), Sub(MO.getSubReg()) {} + unsigned Reg, Sub; }; - // Value that a single bit can take. This is outside of the context of // any register, it is more of an abstraction of the two-element set of // possible bit values. One extension here is the "Ref" type, which @@ -158,6 +161,7 @@ struct BitTracker::BitValue { bool operator!= (const BitValue &V) const { return !operator==(V); } + bool is(unsigned T) const { assert(T == 0 || T == 1); return T == 0 ? Type == Zero @@ -209,6 +213,7 @@ struct BitTracker::BitValue { bool num() const { return Type == Zero || Type == One; } + operator bool() const { assert(Type == Zero || Type == One); return Type == One; @@ -217,7 +222,6 @@ struct BitTracker::BitValue { friend raw_ostream &operator<<(raw_ostream &OS, const BitValue &BV); }; - // This operation must be idempotent, i.e. ref(ref(V)) == ref(V). inline BitTracker::BitValue BitTracker::BitValue::ref(const BitValue &V) { @@ -228,25 +232,25 @@ BitTracker::BitValue::ref(const BitValue &V) { return self(); } - inline BitTracker::BitValue BitTracker::BitValue::self(const BitRef &Self) { return BitValue(Self.Reg, Self.Pos); } - // A sequence of bits starting from index B up to and including index E. // If E < B, the mask represents two sections: [0..E] and [B..W) where // W is the width of the register. struct BitTracker::BitMask { - BitMask() : B(0), E(0) {} + BitMask() = default; BitMask(uint16_t b, uint16_t e) : B(b), E(e) {} + uint16_t first() const { return B; } uint16_t last() const { return E; } -private: - uint16_t B, E; -}; +private: + uint16_t B = 0; + uint16_t E = 0; +}; // Representation of a register: a list of BitValues. struct BitTracker::RegisterCell { @@ -255,6 +259,7 @@ struct BitTracker::RegisterCell { uint16_t width() const { return Bits.size(); } + const BitValue &operator[](uint16_t BitN) const { assert(BitN < Bits.size()); return Bits[BitN]; @@ -297,12 +302,10 @@ private: friend raw_ostream &operator<<(raw_ostream &OS, const RegisterCell &RC); }; - inline bool BitTracker::has(unsigned Reg) const { return Map.find(Reg) != Map.end(); } - inline const BitTracker::RegisterCell& BitTracker::lookup(unsigned Reg) const { CellMapType::const_iterator F = Map.find(Reg); @@ -310,7 +313,6 @@ BitTracker::lookup(unsigned Reg) const { return F->second; } - inline BitTracker::RegisterCell BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) { RegisterCell RC(Width); @@ -319,7 +321,6 @@ BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) { return RC; } - inline BitTracker::RegisterCell BitTracker::RegisterCell::top(uint16_t Width) { RegisterCell RC(Width); @@ -328,7 +329,6 @@ BitTracker::RegisterCell::top(uint16_t Width) { return RC; } - inline BitTracker::RegisterCell BitTracker::RegisterCell::ref(const RegisterCell &C) { uint16_t W = C.width(); @@ -345,12 +345,13 @@ BitTracker::RegisterCell::ref(const RegisterCell &C) { struct BitTracker::MachineEvaluator { MachineEvaluator(const TargetRegisterInfo &T, MachineRegisterInfo &M) : TRI(T), MRI(M) {} - virtual ~MachineEvaluator() {} + virtual ~MachineEvaluator() = default; uint16_t getRegBitWidth(const RegisterRef &RR) const; RegisterCell getCell(const RegisterRef &RR, const CellMapType &M) const; void putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const; + // A result of any operation should use refs to the source cells, not // the cells directly. This function is a convenience wrapper to quickly // generate a ref for a cell corresponding to a register reference. @@ -435,4 +436,4 @@ struct BitTracker::MachineEvaluator { } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp index b78c4126e0b..436f88dcd45 100644 --- a/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -7,16 +7,30 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - #include "Hexagon.h" +#include "HexagonBitTracker.h" #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonTargetMachine.h" -#include "HexagonBitTracker.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include +#include +#include +#include +#include +#include using namespace llvm; @@ -76,11 +90,11 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri, } } - BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const { + using namespace Hexagon; + if (Sub == 0) return MachineEvaluator::mask(Reg, 0); - using namespace Hexagon; const TargetRegisterClass *RC = MRI.getRegClass(Reg); unsigned ID = RC->getID(); uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub)); @@ -102,6 +116,7 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const { } namespace { + class RegisterRefs { std::vector Vector; @@ -117,17 +132,21 @@ public: } size_t size() const { return Vector.size(); } + const BT::RegisterRef &operator[](unsigned n) const { // The main purpose of this operator is to assert with bad argument. assert(n < Vector.size()); return Vector[n]; } }; -} + +} // end anonymous namespace bool HexagonEvaluator::evaluate(const MachineInstr &MI, const CellMapType &Inputs, CellMapType &Outputs) const { + using namespace Hexagon; + unsigned NumDefs = 0; // Sanity verification: there should not be any defs with subregisters. @@ -142,7 +161,6 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, if (NumDefs == 0) return false; - using namespace Hexagon; unsigned Opc = MI.getOpcode(); if (MI.mayLoad()) { @@ -779,10 +797,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, case S2_cl0: case S2_cl0p: // Always produce a 32-bit result. - return rr0(eCLB(rc(1), 0/*bit*/, 32), Outputs); + return rr0(eCLB(rc(1), false/*bit*/, 32), Outputs); case S2_cl1: case S2_cl1p: - return rr0(eCLB(rc(1), 1/*bit*/, 32), Outputs); + return rr0(eCLB(rc(1), true/*bit*/, 32), Outputs); case S2_clb: case S2_clbp: { uint16_t W1 = getRegBitWidth(Reg[1]); @@ -794,10 +812,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, } case S2_ct0: case S2_ct0p: - return rr0(eCTB(rc(1), 0/*bit*/, 32), Outputs); + return rr0(eCTB(rc(1), false/*bit*/, 32), Outputs); case S2_ct1: case S2_ct1p: - return rr0(eCTB(rc(1), 1/*bit*/, 32), Outputs); + return rr0(eCTB(rc(1), true/*bit*/, 32), Outputs); case S5_popcountp: // TODO break; @@ -953,6 +971,8 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI, bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI, const CellMapType &Inputs, CellMapType &Outputs) const { + using namespace Hexagon; + if (TII.isPredicated(MI)) return false; assert(MI.mayLoad() && "A load that mayn't?"); @@ -960,7 +980,6 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI, uint16_t BitNum; bool SignEx; - using namespace Hexagon; switch (Opc) { default: @@ -1141,9 +1160,9 @@ bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr &MI, return true; } - unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const { using namespace Hexagon; + bool Is64 = DoubleRegsRegClass.contains(PReg); assert(PReg == 0 || Is64 || IntRegsRegClass.contains(PReg)); @@ -1180,7 +1199,6 @@ unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const { return (Idx64+1 < Num64) ? Phys64[Idx64+1] : 0; } - unsigned HexagonEvaluator::getVirtRegFor(unsigned PReg) const { typedef MachineRegisterInfo::livein_iterator iterator; for (iterator I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) { diff --git a/lib/Target/Hexagon/HexagonBitTracker.h b/lib/Target/Hexagon/HexagonBitTracker.h index 9e7b1dbe298..2cbf65e66ca 100644 --- a/lib/Target/Hexagon/HexagonBitTracker.h +++ b/lib/Target/Hexagon/HexagonBitTracker.h @@ -1,4 +1,4 @@ -//===--- HexagonBitTracker.h ----------------------------------------------===// +//===--- HexagonBitTracker.h ------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,15 +7,17 @@ // //===----------------------------------------------------------------------===// -#ifndef HEXAGONBITTRACKER_H -#define HEXAGONBITTRACKER_H +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H #include "BitTracker.h" #include "llvm/ADT/DenseMap.h" +#include namespace llvm { - class HexagonInstrInfo; - class HexagonRegisterInfo; + +class HexagonInstrInfo; +class HexagonRegisterInfo; struct HexagonEvaluator : public BitTracker::MachineEvaluator { typedef BitTracker::CellMapType CellMapType; @@ -49,10 +51,12 @@ private: // Type of formal parameter extension. struct ExtType { enum { SExt, ZExt }; - char Type; - uint16_t Width; - ExtType() : Type(0), Width(0) {} + + ExtType() = default; ExtType(char t, uint16_t w) : Type(t), Width(w) {} + + char Type = 0; + uint16_t Width = 0; }; // Map VR -> extension type. typedef DenseMap RegExtMap; @@ -61,4 +65,4 @@ private: } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 34ce3e65299..0a7dc6b49d0 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -11,26 +11,45 @@ // //===----------------------------------------------------------------------===// +#include "Hexagon.h" #include "HexagonHazardRecognizer.h" #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include #include +#include +#include +#include using namespace llvm; @@ -108,19 +127,16 @@ HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST) : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), RI() {} - static bool isIntRegForSubInst(unsigned Reg) { return (Reg >= Hexagon::R0 && Reg <= Hexagon::R7) || (Reg >= Hexagon::R16 && Reg <= Hexagon::R23); } - static bool isDblRegForSubInst(unsigned Reg, const HexagonRegisterInfo &HRI) { return isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::isub_lo)) && isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::isub_hi)); } - /// Calculate number of instructions excluding the debug instructions. static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB, MachineBasicBlock::const_instr_iterator MIE) { @@ -132,7 +148,6 @@ static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB, return Count; } - /// Find the hardware loop instruction used to set-up the specified loop. /// On Hexagon, we have two instructions used to set-up the hardware loop /// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions @@ -164,17 +179,16 @@ static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp, return &*I; // We've reached a different loop, which means the loop0 has been removed. if (Opc == EndLoopOp) - return 0; + return nullptr; } // Check the predecessors for the LOOP instruction. MachineInstr *loop = findLoopInstr(*PB, EndLoopOp, Visited); if (loop) return loop; } - return 0; + return nullptr; } - /// Gather register def/uses from MI. /// This treats possible (predicated) defs as actually happening ones /// (conservatively). @@ -201,7 +215,6 @@ static inline void parseOperands(const MachineInstr &MI, } } - // Position dependent, so check twice for swap. static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) { switch (Ga) { @@ -228,8 +241,6 @@ static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) { return false; } - - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If @@ -280,7 +291,6 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, return 0; } - /// isStoreToStackSlot - If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of /// the source reg along with the FrameIndex of the loaded stack slot. If @@ -337,7 +347,6 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr &MI, return 0; } - /// This function can analyze one/two way branching only and should (mostly) be /// called by target independent side. /// First entry is always the opcode of the branching instruction, except when @@ -401,7 +410,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, // Delete the J2_jump if it's equivalent to a fall-through. if (AllowModify && JumpToBlock && MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { - DEBUG(dbgs()<< "\nErasing the jump to successor block\n";); + DEBUG(dbgs() << "\nErasing the jump to successor block\n";); I->eraseFromParent(); I = MBB.instr_end(); if (I == MBB.instr_begin()) @@ -415,7 +424,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineInstr *LastInst = &*I; MachineInstr *SecondLastInst = nullptr; // Find one more terminator if present. - for (;;) { + while (true) { if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(*I)) { if (!SecondLastInst) SecondLastInst = &*I; @@ -524,7 +533,6 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, return true; } - unsigned HexagonInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const { assert(!BytesRemoved && "code size not handled"); @@ -730,7 +738,6 @@ bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, return nonDbgBBSize(&MBB) <= 3; } - bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTCycles, unsigned ExtraTCycles, MachineBasicBlock &FMBB, unsigned NumFCycles, unsigned ExtraFCycles, BranchProbability Probability) @@ -738,7 +745,6 @@ bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, return nonDbgBBSize(&TMBB) <= 3 && nonDbgBBSize(&FMBB) <= 3; } - bool HexagonInstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs, BranchProbability Probability) const { return NumInstrs <= 4; @@ -853,7 +859,6 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, llvm_unreachable("Unimplemented"); } - void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { @@ -976,7 +981,6 @@ void HexagonInstrInfo::loadRegFromStackSlot( } } - static void getLiveRegsAt(LivePhysRegs &Regs, const MachineInstr &MI) { const MachineBasicBlock &B = *MI.getParent(); Regs.addLiveOuts(B); @@ -1307,7 +1311,6 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return false; } - // We indicate that we want to reverse the branch by // inserting the reversed branching opcode. bool HexagonInstrInfo::reverseBranchCondition( @@ -1325,19 +1328,16 @@ bool HexagonInstrInfo::reverseBranchCondition( return false; } - void HexagonInstrInfo::insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { DebugLoc DL; BuildMI(MBB, MI, DL, get(Hexagon::A2_nop)); } - bool HexagonInstrInfo::isPostIncrement(const MachineInstr &MI) const { return getAddrMode(MI) == HexagonII::PostInc; } - // Returns true if an instruction is predicated irrespective of the predicate // sense. For example, all of the following will return true. // if (p0) R1 = add(R2, R3) @@ -1351,7 +1351,6 @@ bool HexagonInstrInfo::isPredicated(const MachineInstr &MI) const { return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask; } - bool HexagonInstrInfo::PredicateInstruction( MachineInstr &MI, ArrayRef Cond) const { if (Cond.empty() || isNewValueJump(Cond[0].getImm()) || @@ -1403,14 +1402,12 @@ bool HexagonInstrInfo::PredicateInstruction( return true; } - bool HexagonInstrInfo::SubsumesPredicate(ArrayRef Pred1, ArrayRef Pred2) const { // TODO: Fix this return false; } - bool HexagonInstrInfo::DefinesPredicate( MachineInstr &MI, std::vector &Pred) const { auto &HRI = getRegisterInfo(); @@ -1427,7 +1424,6 @@ bool HexagonInstrInfo::DefinesPredicate( return false; } - bool HexagonInstrInfo::isPredicable(MachineInstr &MI) const { return MI.getDesc().isPredicable(); } @@ -1466,7 +1462,6 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr &MI, return false; } - /// Measure the specified inline asm to determine an approximation of its /// length. /// Comments (which run till the next SeparatorString or newline) do not @@ -1502,7 +1497,6 @@ unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str, return Length; } - ScheduleHazardRecognizer* HexagonInstrInfo::CreateTargetPostRAHazardRecognizer( const InstrItineraryData *II, const ScheduleDAG *DAG) const { @@ -1513,7 +1507,6 @@ HexagonInstrInfo::CreateTargetPostRAHazardRecognizer( return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); } - /// \brief For a comparison instruction, return the source registers in /// \p SrcReg and \p SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction @@ -1609,14 +1602,12 @@ unsigned HexagonInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, return getInstrTimingClassLatency(ItinData, MI); } - DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState( const TargetSubtargetInfo &STI) const { const InstrItineraryData *II = STI.getInstrItineraryData(); return static_cast(STI).createDFAPacketizer(II); } - // Inspired by this pair: // %R13 = L2_loadri_io %R29, 136; mem:LD4[FixedStack0] // S2_storeri_io %R29, 132, %R1; flags: mem:ST4[FixedStack1] @@ -1661,7 +1652,6 @@ bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint( return false; } - /// If the instruction is an increment of a constant value, return the amount. bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI, int &Value) const { @@ -1677,7 +1667,6 @@ bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI, return false; } - unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const { MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetRegisterClass *TRC; @@ -1695,18 +1684,15 @@ unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const { return NewReg; } - bool HexagonInstrInfo::isAbsoluteSet(const MachineInstr &MI) const { return (getAddrMode(MI) == HexagonII::AbsoluteSet); } - bool HexagonInstrInfo::isAccumulator(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask); } - bool HexagonInstrInfo::isComplex(const MachineInstr &MI) const { const MachineFunction *MF = MI.getParent()->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -1727,13 +1713,11 @@ bool HexagonInstrInfo::isComplex(const MachineInstr &MI) const { return false; } - // Return true if the instruction is a compund branch instruction. bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr &MI) const { return (getType(MI) == HexagonII::TypeCOMPOUND && MI.isBranch()); } - bool HexagonInstrInfo::isCondInst(const MachineInstr &MI) const { return (MI.isBranch() && isPredicated(MI)) || isConditionalTransfer(MI) || @@ -1744,7 +1728,6 @@ bool HexagonInstrInfo::isCondInst(const MachineInstr &MI) const { !isPredicatedNew(MI)); } - bool HexagonInstrInfo::isConditionalALU32(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::A2_paddf: @@ -1802,7 +1785,6 @@ bool HexagonInstrInfo::isConditionalALU32(const MachineInstr &MI) const { return false; } - // FIXME - Function name and it's functionality don't match. // It should be renamed to hasPredNewOpcode() bool HexagonInstrInfo::isConditionalLoad(const MachineInstr &MI) const { @@ -1814,7 +1796,6 @@ bool HexagonInstrInfo::isConditionalLoad(const MachineInstr &MI) const { return PNewOpcode >= 0; } - // Returns true if an instruction is a conditional store. // // Note: It doesn't include conditional new-value stores as they can't be @@ -1872,7 +1853,6 @@ bool HexagonInstrInfo::isConditionalStore(const MachineInstr &MI) const { } } - bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::A2_tfrt: @@ -1893,7 +1873,6 @@ bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr &MI) const { return false; } - // TODO: In order to have isExtendable for fpimm/f32Ext, we need to handle // isFPImm and later getFPImm as well. bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const { @@ -1942,7 +1921,6 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const { return (ImmValue < MinValue || ImmValue > MaxValue); } - bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::L4_return : @@ -1957,7 +1935,6 @@ bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const { return false; } - // Return true when ConsMI uses a register defined by ProdMI. bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI, const MachineInstr &ConsMI) const { @@ -1994,7 +1971,6 @@ bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI, return false; } - // Returns true if the instruction is alread a .cur. bool HexagonInstrInfo::isDotCurInst(const MachineInstr &MI) const { switch (MI.getOpcode()) { @@ -2007,7 +1983,6 @@ bool HexagonInstrInfo::isDotCurInst(const MachineInstr &MI) const { return false; } - // Returns true, if any one of the operands is a dot new // insn, whether it is predicated dot new or register dot new. bool HexagonInstrInfo::isDotNewInst(const MachineInstr &MI) const { @@ -2017,7 +1992,6 @@ bool HexagonInstrInfo::isDotNewInst(const MachineInstr &MI) const { return false; } - /// Symmetrical. See if these two instructions are fit for duplex pair. bool HexagonInstrInfo::isDuplexPair(const MachineInstr &MIa, const MachineInstr &MIb) const { @@ -2026,7 +2000,6 @@ bool HexagonInstrInfo::isDuplexPair(const MachineInstr &MIa, return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG)); } - bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr &MI) const { if (MI.mayLoad() || MI.mayStore() || MI.isCompare()) return true; @@ -2038,13 +2011,11 @@ bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr &MI) const { return false; } - bool HexagonInstrInfo::isEndLoopN(unsigned Opcode) const { return (Opcode == Hexagon::ENDLOOP0 || Opcode == Hexagon::ENDLOOP1); } - bool HexagonInstrInfo::isExpr(unsigned OpType) const { switch(OpType) { case MachineOperand::MO_MachineBasicBlock: @@ -2059,7 +2030,6 @@ bool HexagonInstrInfo::isExpr(unsigned OpType) const { } } - bool HexagonInstrInfo::isExtendable(const MachineInstr &MI) const { const MCInstrDesc &MID = MI.getDesc(); const uint64_t F = MID.TSFlags; @@ -2079,7 +2049,6 @@ bool HexagonInstrInfo::isExtendable(const MachineInstr &MI) const { return false; } - // This returns true in two cases: // - The OP code itself indicates that this is an extended instruction. // - One of MOs has been marked with HMOTF_ConstExtended flag. @@ -2098,14 +2067,12 @@ bool HexagonInstrInfo::isExtended(const MachineInstr &MI) const { return false; } - bool HexagonInstrInfo::isFloat(const MachineInstr &MI) const { unsigned Opcode = MI.getOpcode(); const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::FPPos) & HexagonII::FPMask; } - // No V60 HVX VMEM with A_INDIRECT. bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr &I, const MachineInstr &J) const { @@ -2116,7 +2083,6 @@ bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr &I, return J.isIndirectBranch() || isIndirectCall(J) || isIndirectL4Return(J); } - bool HexagonInstrInfo::isIndirectCall(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::J2_callr : @@ -2128,7 +2094,6 @@ bool HexagonInstrInfo::isIndirectCall(const MachineInstr &MI) const { return false; } - bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::L4_return : @@ -2143,7 +2108,6 @@ bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr &MI) const { return false; } - bool HexagonInstrInfo::isJumpR(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::J2_jumpr : @@ -2158,7 +2122,6 @@ bool HexagonInstrInfo::isJumpR(const MachineInstr &MI) const { return false; } - // Return true if a given MI can accommodate given offset. // Use abs estimate as oppose to the exact number. // TODO: This will need to be changed to use MC level @@ -2203,7 +2166,6 @@ bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr &MI, } } - bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI, const MachineInstr &ESMI) const { bool isLate = isLateResultInstr(LRMI); @@ -2222,7 +2184,6 @@ bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI, return false; } - bool HexagonInstrInfo::isLateResultInstr(const MachineInstr &MI) const { switch (MI.getOpcode()) { case TargetOpcode::EXTRACT_SUBREG: @@ -2259,14 +2220,12 @@ bool HexagonInstrInfo::isLateResultInstr(const MachineInstr &MI) const { return true; } - bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr &MI) const { // Instructions with iclass A_CVI_VX and attribute A_CVI_LATE uses a multiply // resource, but all operands can be received late like an ALU instruction. return MI.getDesc().getSchedClass() == Hexagon::Sched::CVI_VX_LATE; } - bool HexagonInstrInfo::isLoopN(const MachineInstr &MI) const { unsigned Opcode = MI.getOpcode(); return Opcode == Hexagon::J2_loop0i || @@ -2279,7 +2238,6 @@ bool HexagonInstrInfo::isLoopN(const MachineInstr &MI) const { Opcode == Hexagon::J2_loop1rext; } - bool HexagonInstrInfo::isMemOp(const MachineInstr &MI) const { switch (MI.getOpcode()) { default: return false; @@ -2312,46 +2270,38 @@ bool HexagonInstrInfo::isMemOp(const MachineInstr &MI) const { return false; } - bool HexagonInstrInfo::isNewValue(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask; } - bool HexagonInstrInfo::isNewValue(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask; } - bool HexagonInstrInfo::isNewValueInst(const MachineInstr &MI) const { return isNewValueJump(MI) || isNewValueStore(MI); } - bool HexagonInstrInfo::isNewValueJump(const MachineInstr &MI) const { return isNewValue(MI) && MI.isBranch(); } - bool HexagonInstrInfo::isNewValueJump(unsigned Opcode) const { return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode); } - bool HexagonInstrInfo::isNewValueStore(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask; } - bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask; } - // Returns true if a particular operand is extendable for an instruction. bool HexagonInstrInfo::isOperandExtended(const MachineInstr &MI, unsigned OperandNum) const { @@ -2360,28 +2310,24 @@ bool HexagonInstrInfo::isOperandExtended(const MachineInstr &MI, == OperandNum; } - bool HexagonInstrInfo::isPredicatedNew(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; assert(isPredicated(MI)); return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask; } - bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; assert(isPredicated(Opcode)); return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask; } - bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return !((F >> HexagonII::PredicatedFalsePos) & HexagonII::PredicatedFalseMask); } - bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; // Make sure that the instruction is predicated. @@ -2390,19 +2336,16 @@ bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const { HexagonII::PredicatedFalseMask); } - bool HexagonInstrInfo::isPredicated(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask; } - bool HexagonInstrInfo::isPredicateLate(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return ~(F >> HexagonII::PredicateLatePos) & HexagonII::PredicateLateMask; } - bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; assert(get(Opcode).isBranch() && @@ -2410,7 +2353,6 @@ bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const { return (F >> HexagonII::TakenPos) & HexagonII::TakenMask; } - bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr &MI) const { return MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4 || MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT || @@ -2496,13 +2438,11 @@ bool HexagonInstrInfo::isSignExtendingLoad(const MachineInstr &MI) const { } } - bool HexagonInstrInfo::isSolo(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::SoloPos) & HexagonII::SoloMask; } - bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::STriw_pred : @@ -2513,7 +2453,6 @@ bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr &MI) const { } } - bool HexagonInstrInfo::isTailCall(const MachineInstr &MI) const { if (!MI.isBranch()) return false; @@ -2524,7 +2463,6 @@ bool HexagonInstrInfo::isTailCall(const MachineInstr &MI) const { return false; } - // Returns true when SU has a timing class TC1. bool HexagonInstrInfo::isTC1(const MachineInstr &MI) const { unsigned SchedClass = MI.getDesc().getSchedClass(); @@ -2544,7 +2482,6 @@ bool HexagonInstrInfo::isTC1(const MachineInstr &MI) const { } } - bool HexagonInstrInfo::isTC2(const MachineInstr &MI) const { unsigned SchedClass = MI.getDesc().getSchedClass(); switch (SchedClass) { @@ -2561,7 +2498,6 @@ bool HexagonInstrInfo::isTC2(const MachineInstr &MI) const { } } - bool HexagonInstrInfo::isTC2Early(const MachineInstr &MI) const { unsigned SchedClass = MI.getDesc().getSchedClass(); switch (SchedClass) { @@ -2582,13 +2518,11 @@ bool HexagonInstrInfo::isTC2Early(const MachineInstr &MI) const { } } - bool HexagonInstrInfo::isTC4x(const MachineInstr &MI) const { unsigned SchedClass = MI.getDesc().getSchedClass(); return SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23; } - // Schedule this ASAP. bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr &MI1, const MachineInstr &MI2) const { @@ -2608,13 +2542,11 @@ bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr &MI1, return false; } - bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr &MI) const { const uint64_t V = getType(MI); return HexagonII::TypeCVI_FIRST <= V && V <= HexagonII::TypeCVI_LAST; } - // Check if the Offset is a valid auto-inc imm by Load/Store Type. // bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const { @@ -2653,7 +2585,6 @@ bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const { llvm_unreachable("Not an auto-inc opc!"); } - bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, bool Extend) const { // This function is to check whether the "Offset" is in the correct range of @@ -2808,12 +2739,10 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, "Please define it in the above switch statement!"); } - bool HexagonInstrInfo::isVecAcc(const MachineInstr &MI) const { return isV60VectorInstruction(MI) && isAccumulator(MI); } - bool HexagonInstrInfo::isVecALU(const MachineInstr &MI) const { const uint64_t F = get(MI.getOpcode()).TSFlags; const uint64_t V = ((F >> HexagonII::TypePos) & HexagonII::TypeMask); @@ -2822,7 +2751,6 @@ bool HexagonInstrInfo::isVecALU(const MachineInstr &MI) const { V == HexagonII::TypeCVI_VA_DV; } - bool HexagonInstrInfo::isVecUsableNextPacket(const MachineInstr &ProdMI, const MachineInstr &ConsMI) const { if (EnableACCForwarding && isVecAcc(ProdMI) && isVecAcc(ConsMI)) @@ -2915,7 +2843,6 @@ bool HexagonInstrInfo::isZeroExtendingLoad(const MachineInstr &MI) const { } } - // Add latency to instruction. bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1, const MachineInstr &MI2) const { @@ -2925,7 +2852,6 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1, return false; } - /// \brief Get the base register and byte offset of a load/store instr. bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) @@ -2937,7 +2863,6 @@ bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, return BaseReg != 0; } - /// \brief Can these instructions execute at the same time in a bundle. bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First, const MachineInstr &Second) const { @@ -2959,13 +2884,11 @@ bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First, return false; } - bool HexagonInstrInfo::doesNotReturn(const MachineInstr &CallMI) const { unsigned Opc = CallMI.getOpcode(); return Opc == Hexagon::PS_call_nr || Opc == Hexagon::PS_callr_nr; } - bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const { for (auto &I : *B) if (I.isEHLabel()) @@ -2973,7 +2896,6 @@ bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const { return false; } - // Returns true if an instruction can be converted into a non-extended // equivalent instruction. bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr &MI) const { @@ -3011,13 +2933,11 @@ bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr &MI) const { return false; } - bool HexagonInstrInfo::hasPseudoInstrPair(const MachineInstr &MI) const { return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Pseudo) >= 0; } - bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B) const { MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end(); @@ -3029,7 +2949,6 @@ bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B) return false; } - // Returns true, if a LD insn can be promoted to a cur load. bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr &MI) const { auto &HST = MI.getParent()->getParent()->getSubtarget(); @@ -3038,14 +2957,12 @@ bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr &MI) const { HST.hasV60TOps(); } - // Returns true, if a ST insn can be promoted to a new-value store. bool HexagonInstrInfo::mayBeNewStore(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::mayNVStorePos) & HexagonII::mayNVStoreMask; } - bool HexagonInstrInfo::producesStall(const MachineInstr &ProdMI, const MachineInstr &ConsMI) const { // There is no stall when ProdMI is not a V60 vector. @@ -3064,7 +2981,6 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &ProdMI, return true; } - bool HexagonInstrInfo::producesStall(const MachineInstr &MI, MachineBasicBlock::const_instr_iterator BII) const { // There is no stall when I is not a V60 vector. @@ -3091,7 +3007,6 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &MI, return false; } - bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI, unsigned PredReg) const { for (unsigned opNum = 0; opNum < MI.getNumOperands(); opNum++) { @@ -3106,7 +3021,6 @@ bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI, return MI.getOpcode() != Hexagon::A4_tlbmatch; } - bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const { return (Opcode == Hexagon::J2_jumpt) || (Opcode == Hexagon::J2_jumpf) || @@ -3116,25 +3030,21 @@ bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const { (Opcode == Hexagon::J2_jumpfnewpt); } - bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef Cond) const { if (Cond.empty() || !isPredicated(Cond[0].getImm())) return false; return !isPredicatedTrue(Cond[0].getImm()); } - short HexagonInstrInfo::getAbsoluteForm(const MachineInstr &MI) const { return Hexagon::getAbsoluteForm(MI.getOpcode()); } - unsigned HexagonInstrInfo::getAddrMode(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask; } - // Returns the base register in a memory access (load/store). The offset is // returned in Offset and the access size is returned in AccessSize. unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI, @@ -3171,7 +3081,6 @@ unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI, return MI.getOperand(basePos).getReg(); } - /// Return the position of the base and offset operands for this instruction. bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const { @@ -3203,7 +3112,6 @@ bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr &MI, return true; } - // Inserts branching instructions in reverse order of their occurrence. // e.g. jump_t t1 (i1) // jump t2 (i2) @@ -3265,24 +3173,20 @@ SmallVector HexagonInstrInfo::getBranchingInstrs( return Jumpers; } - short HexagonInstrInfo::getBaseWithLongOffset(short Opcode) const { if (Opcode < 0) return -1; return Hexagon::getBaseWithLongOffset(Opcode); } - short HexagonInstrInfo::getBaseWithLongOffset(const MachineInstr &MI) const { return Hexagon::getBaseWithLongOffset(MI.getOpcode()); } - short HexagonInstrInfo::getBaseWithRegOffset(const MachineInstr &MI) const { return Hexagon::getBaseWithRegOffset(MI.getOpcode()); } - // Returns Operand Index for the constant extended instruction. unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; @@ -3379,7 +3283,6 @@ HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup( return HexagonII::HCG_None; } - // Returns -1 when there is no opcode found. unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA, const MachineInstr &GB) const { @@ -3398,7 +3301,6 @@ unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA, return -1; } - int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const { enum Hexagon::PredSense inPredSense; inPredSense = invertPredicate ? Hexagon::PredSense_false : @@ -3410,7 +3312,6 @@ int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const { llvm_unreachable("Unexpected predicable instruction"); } - // Return the cur value instruction for a given store. int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const { switch (MI.getOpcode()) { @@ -3428,8 +3329,6 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const { return 0; } - - // The diagram below shows the steps involved in the conversion of a predicated // store instruction to its .new predicated new-value form. // @@ -3509,7 +3408,6 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const { // promoted. Therefore, in case of dependence check failure (due to R5) during // next iteration, it should be converted back to its most basic form. - // Return the new value instruction for a given store. int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const { int NVOpcode = Hexagon::getNewValueOpcode(MI.getOpcode()); @@ -3552,7 +3450,6 @@ int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const { return 0; } - // Returns the opcode to use when converting MI, which is a conditional jump, // into a conditional instruction which uses the .new value of the predicate. // We also use branch probabilities to add a hint to the jump. @@ -3579,7 +3476,6 @@ int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI, } } - // Return .new predicate version for an instruction. int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI, const MachineBranchProbabilityInfo *MBPI) const { @@ -3599,7 +3495,6 @@ int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI, return 0; } - int HexagonInstrInfo::getDotOldOp(const int opc) const { int NewOp = opc; if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form @@ -3615,7 +3510,6 @@ int HexagonInstrInfo::getDotOldOp(const int opc) const { return NewOp; } - // See if instruction could potentially be a duplex candidate. // If so, return its group. Zero otherwise. HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( @@ -3960,12 +3854,10 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( return HexagonII::HSIG_None; } - short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr &MI) const { return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Real); } - // Return first non-debug instruction in the basic block. MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB) const { @@ -3978,7 +3870,6 @@ MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB) return nullptr; } - unsigned HexagonInstrInfo::getInstrTimingClassLatency( const InstrItineraryData *ItinData, const MachineInstr &MI) const { // Default to one cycle for no itinerary. However, an "empty" itinerary may @@ -4000,7 +3891,6 @@ unsigned HexagonInstrInfo::getInstrTimingClassLatency( return Latency; } - // inverts the predication logic. // p -> NotP // NotP -> P @@ -4013,7 +3903,6 @@ bool HexagonInstrInfo::getInvertedPredSense( return true; } - unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { int InvPredOpcode; InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc) @@ -4024,7 +3913,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { llvm_unreachable("Unexpected predicated instruction"); } - // Returns the max value that doesn't need to be extended. int HexagonInstrInfo::getMaxValue(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; @@ -4039,13 +3927,11 @@ int HexagonInstrInfo::getMaxValue(const MachineInstr &MI) const { return ~(-1U << bits); } - unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::MemAccessSizePos) & HexagonII::MemAccesSizeMask; } - // Returns the min value that doesn't need to be extended. int HexagonInstrInfo::getMinValue(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; @@ -4060,7 +3946,6 @@ int HexagonInstrInfo::getMinValue(const MachineInstr &MI) const { return 0; } - // Returns opcode of the non-extended equivalent instruction. short HexagonInstrInfo::getNonExtOpcode(const MachineInstr &MI) const { // Check if the instruction has a register form that uses register in place @@ -4086,7 +3971,6 @@ short HexagonInstrInfo::getNonExtOpcode(const MachineInstr &MI) const { return -1; } - bool HexagonInstrInfo::getPredReg(ArrayRef Cond, unsigned &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const { if (Cond.empty()) @@ -4107,17 +3991,14 @@ bool HexagonInstrInfo::getPredReg(ArrayRef Cond, return true; } - short HexagonInstrInfo::getPseudoInstrPair(const MachineInstr &MI) const { return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Pseudo); } - short HexagonInstrInfo::getRegForm(const MachineInstr &MI) const { return Hexagon::getRegForm(MI.getOpcode()); } - // Return the number of bytes required to encode the instruction. // Hexagon instructions are fixed length, 4 bytes, unless they // use a constant extender, which requires another 4 bytes. @@ -4156,13 +4037,11 @@ unsigned HexagonInstrInfo::getSize(const MachineInstr &MI) const { return Size; } - uint64_t HexagonInstrInfo::getType(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::TypePos) & HexagonII::TypeMask; } - unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const { const TargetSubtargetInfo &ST = MI.getParent()->getParent()->getSubtarget(); const InstrItineraryData &II = *ST.getInstrItineraryData(); @@ -4171,19 +4050,16 @@ unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const { return IS.getUnits(); } - unsigned HexagonInstrInfo::getValidSubTargets(const unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask; } - // Calculate size of the basic block without debug instructions. unsigned HexagonInstrInfo::nonDbgBBSize(const MachineBasicBlock *BB) const { return nonDbgMICount(BB->instr_begin(), BB->instr_end()); } - unsigned HexagonInstrInfo::nonDbgBundleSize( MachineBasicBlock::const_iterator BundleHead) const { assert(BundleHead->isBundle() && "Not a bundle header"); @@ -4192,7 +4068,6 @@ unsigned HexagonInstrInfo::nonDbgBundleSize( return nonDbgMICount(++MII, getBundleEnd(BundleHead.getInstrIterator())); } - /// immediateExtend - Changes the instruction in place to one using an immediate /// extender. void HexagonInstrInfo::immediateExtend(MachineInstr &MI) const { @@ -4208,7 +4083,6 @@ void HexagonInstrInfo::immediateExtend(MachineInstr &MI) const { MO.addTargetFlag(HexagonII::HMOTF_ConstExtended); } - bool HexagonInstrInfo::invertAndChangeJumpTarget( MachineInstr &MI, MachineBasicBlock *NewTarget) const { DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to BB#" @@ -4229,7 +4103,6 @@ bool HexagonInstrInfo::invertAndChangeJumpTarget( return true; } - void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const { /* +++ The code below is used to generate complete set of Hexagon Insn +++ */ MachineFunction::iterator A = MF.begin(); @@ -4248,7 +4121,6 @@ void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const { /* --- The code above is used to generate complete set of Hexagon Insn --- */ } - // inverts the predication logic. // p -> NotP // NotP -> P @@ -4258,7 +4130,6 @@ bool HexagonInstrInfo::reversePredSense(MachineInstr &MI) const { return true; } - // Reverse the branch prediction. unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const { int PredRevOpcode = -1; @@ -4270,14 +4141,12 @@ unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const { return PredRevOpcode; } - // TODO: Add more rigorous validation. bool HexagonInstrInfo::validateBranchCond(const ArrayRef &Cond) const { return Cond.empty() || (Cond[0].isImm() && (Cond.size() != 1)); } - short HexagonInstrInfo::xformRegToImmOffset(const MachineInstr &MI) const { return Hexagon::xformRegToImmOffset(MI.getOpcode()); } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 2d184d1484e..2358d4b7e4c 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -16,9 +16,14 @@ #include "HexagonRegisterInfo.h" #include "MCTargetDesc/HexagonBaseInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/Target/TargetInstrInfo.h" +#include +#include #define GET_INSTRINFO_HEADER #include "HexagonGenInstrInfo.inc" @@ -29,9 +34,10 @@ struct EVT; class HexagonSubtarget; class HexagonInstrInfo : public HexagonGenInstrInfo { - virtual void anchor(); const HexagonRegisterInfo RI; + virtual void anchor(); + public: explicit HexagonInstrInfo(HexagonSubtarget &ST); @@ -260,7 +266,7 @@ public: /// PredCost. unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, - unsigned *PredCost = 0) const override; + unsigned *PredCost = nullptr) const override; /// Create machine specific model for scheduling. DFAPacketizer * @@ -378,7 +384,6 @@ public: bool PredOpcodeHasJMP_c(unsigned Opcode) const; bool predOpcodeHasNot(ArrayRef Cond) const; - short getAbsoluteForm(const MachineInstr &MI) const; unsigned getAddrMode(const MachineInstr &MI) const; unsigned getBaseAndOffset(const MachineInstr &MI, int &Offset, @@ -421,13 +426,11 @@ public: unsigned getUnits(const MachineInstr &MI) const; unsigned getValidSubTargets(const unsigned Opcode) const; - /// getInstrTimingClassLatency - Compute the instruction latency of a given /// instruction using Timing Class information, if available. unsigned nonDbgBBSize(const MachineBasicBlock *BB) const; unsigned nonDbgBundleSize(MachineBasicBlock::const_iterator BundleHead) const; - void immediateExtend(MachineInstr &MI) const; bool invertAndChangeJumpTarget(MachineInstr &MI, MachineBasicBlock* NewTarget) const; @@ -438,6 +441,6 @@ public: short xformRegToImmOffset(const MachineInstr &MI) const; }; -} +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h index 371b52108b9..d83bcbc4155 100644 --- a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h +++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h @@ -15,33 +15,31 @@ namespace llvm { - namespace Hexagon { +namespace Hexagon { + const unsigned int StartPacket = 0x1; const unsigned int EndPacket = 0x2; - } +} // end namespace Hexagon /// Hexagon target-specific information for each MachineFunction. class HexagonMachineFunctionInfo : public MachineFunctionInfo { // SRetReturnReg - Some subtargets require that sret lowering includes // returning the value of the returned struct in a register. This field // holds the virtual register into which the sret argument is passed. - unsigned SRetReturnReg; - unsigned StackAlignBaseVReg; // Aligned-stack base register (virtual) - unsigned StackAlignBasePhysReg; // (physical) + unsigned SRetReturnReg = 0; + unsigned StackAlignBaseVReg = 0; // Aligned-stack base register (virtual) + unsigned StackAlignBasePhysReg = 0; // (physical) int VarArgsFrameIndex; - bool HasClobberLR; - bool HasEHReturn; + bool HasClobberLR = false; + bool HasEHReturn = false; std::map PacketInfo; virtual void anchor(); public: - HexagonMachineFunctionInfo() : SRetReturnReg(0), StackAlignBaseVReg(0), - StackAlignBasePhysReg(0), HasClobberLR(0), HasEHReturn(false) {} + HexagonMachineFunctionInfo() = default; - HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0), - StackAlignBaseVReg(0), StackAlignBasePhysReg(0), HasClobberLR(0), - HasEHReturn(false) {} + HexagonMachineFunctionInfo(MachineFunction &MF) {} unsigned getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } @@ -75,6 +73,7 @@ public: void setStackAlignBasePhysReg(unsigned R) { StackAlignBasePhysReg = R; } unsigned getStackAlignBasePhysReg() const { return StackAlignBasePhysReg; } }; -} // End llvm namespace -#endif +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index e902f600e88..c9c4f95dbaa 100644 --- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -10,17 +10,27 @@ // This file contains the declarations of the HexagonTargetAsmInfo properties. // //===----------------------------------------------------------------------===// + #define DEBUG_TYPE "hexagon-sdata" -#include "HexagonTargetMachine.h" #include "HexagonTargetObjectFile.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Type.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/SectionKind.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -44,13 +54,21 @@ static cl::opt TraceGVPlacement("trace-gv-placement", // (e.g. -debug and -debug-only=globallayout) #define TRACE_TO(s, X) s << X #ifdef NDEBUG -#define TRACE(X) do { if (TraceGVPlacement) { TRACE_TO(errs(), X); } } while (0) +#define TRACE(X) \ + do { \ + if (TraceGVPlacement) { \ + TRACE_TO(errs(), X); \ + } \ + } while (false) #else -#define TRACE(X) \ - do { \ - if (TraceGVPlacement) { TRACE_TO(errs(), X); } \ - else { DEBUG( TRACE_TO(dbgs(), X) ); } \ - } while (0) +#define TRACE(X) \ + do { \ + if (TraceGVPlacement) { \ + TRACE_TO(errs(), X); \ + } else { \ + DEBUG(TRACE_TO(dbgs(), X)); \ + } \ + } while (false) #endif // Returns true if the section name is such that the symbol will be put @@ -69,7 +87,6 @@ static bool isSmallDataSection(StringRef Sec) { Sec.find(".scommon.") != StringRef::npos; } - static const char *getSectionSuffixForSize(unsigned Size) { switch (Size) { default: @@ -163,7 +180,6 @@ MCSection *HexagonTargetObjectFile::getExplicitSectionGlobal( return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, Kind, TM); } - /// Return true if this global value should be placed into small data/bss /// section. bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO, @@ -232,17 +248,14 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO, return true; } - bool HexagonTargetObjectFile::isSmallDataEnabled() const { return SmallDataThreshold > 0; } - unsigned HexagonTargetObjectFile::getSmallDataSize() const { return SmallDataThreshold; } - /// Descends any type down to "elementary" components, /// discovering the smallest addressable one. /// If zero is returned, declaration will not be modified. diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp index 5feaffe6efb..9a09a17767a 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp @@ -1,5 +1,4 @@ - -//=== HexagonMCCompound.cpp - Hexagon Compound checker -------===// +//=== HexagonMCCompound.cpp - Hexagon Compound checker -------------------===// // // The LLVM Compiler Infrastructure // @@ -11,18 +10,17 @@ // This file is looks at a packet and tries to form compound insns // //===----------------------------------------------------------------------===// + #include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" -#include "MCTargetDesc/HexagonMCShuffler.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/MC/MCAssembler.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include +#include using namespace llvm; using namespace Hexagon; @@ -79,8 +77,7 @@ static const unsigned cmpgtn1BitOpcode[8] = { }; // enum HexagonII::CompoundGroup -namespace { -unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { +static unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { unsigned DstReg, SrcReg, Src1Reg, Src2Reg; switch (MI.getOpcode()) { @@ -173,11 +170,9 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { return HexagonII::HCG_None; } -} /// getCompoundOp - Return the index from 0-7 into the above opcode lists. -namespace { -unsigned getCompoundOp(MCInst const &HMCI) { +static unsigned getCompoundOp(MCInst const &HMCI) { const MCOperand &Predicate = HMCI.getOperand(0); unsigned PredReg = Predicate.getReg(); @@ -198,11 +193,10 @@ unsigned getCompoundOp(MCInst const &HMCI) { return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t; } } -} -namespace { -MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { - MCInst *CompoundInsn = 0; +static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, + MCInst const &R) { + MCInst *CompoundInsn = nullptr; unsigned compoundOpcode; MCOperand Rs, Rt; int64_t Value; @@ -336,12 +330,10 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { return CompoundInsn; } -} /// Non-Symmetrical. See if these two instructions are fit for compound pair. -namespace { -bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA, - MCInst const &MIb, bool IsExtendedB) { +static bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA, + MCInst const &MIb, bool IsExtendedB) { unsigned MIaG = getCompoundCandidateGroup(MIa, IsExtendedA); unsigned MIbG = getCompoundCandidateGroup(MIb, IsExtendedB); // We have two candidates - check that this is the same register @@ -353,10 +345,9 @@ bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA, return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) && (MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg())); } -} -namespace { -bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { +static bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, + MCInst &MCI) { assert(HexagonMCInstrInfo::isBundle(MCI)); bool JExtended = false; for (MCInst::iterator J = @@ -367,8 +358,7 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { JExtended = true; continue; } - if (llvm::HexagonMCInstrInfo::getType(MCII, *JumpInst) == - HexagonII::TypeJ) { + if (HexagonMCInstrInfo::getType(MCII, *JumpInst) == HexagonII::TypeJ) { // Try to pair with another insn (B)undled with jump. bool BExtended = false; for (MCInst::iterator B = @@ -401,7 +391,6 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { } return false; } -} /// tryCompound - Given a bundle check for compound insns when one /// is found update the contents fo the bundle with the compound insn. @@ -420,6 +409,4 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, // a compound is found. while (lookForCompound(MCII, Context, MCI)) ; - - return; } diff --git a/lib/Target/Hexagon/RDFCopy.h b/lib/Target/Hexagon/RDFCopy.h index 517f17cc9c6..5ece11bd5ce 100644 --- a/lib/Target/Hexagon/RDFCopy.h +++ b/lib/Target/Hexagon/RDFCopy.h @@ -1,4 +1,4 @@ -//===--- RDFCopy.h --------------------------------------------------------===// +//===--- RDFCopy.h ----------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,23 +7,26 @@ // //===----------------------------------------------------------------------===// -#ifndef RDF_COPY_H -#define RDF_COPY_H +#ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H +#define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H #include "RDFGraph.h" #include #include namespace llvm { + class MachineBasicBlock; class MachineDominatorTree; class MachineInstr; namespace rdf { + struct CopyPropagation { CopyPropagation(DataFlowGraph &dfg) : MDT(dfg.getDT()), DFG(dfg), Trace(false) {} - virtual ~CopyPropagation() {} + + virtual ~CopyPropagation() = default; bool run(); void trace(bool On) { Trace = On; } @@ -49,7 +52,9 @@ namespace rdf { void updateMap(NodeAddr IA); bool scanBlock(MachineBasicBlock *B); }; -} // namespace rdf -} // namespace llvm -#endif +} // end namespace rdf + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp index 33c3f03790f..fa272ea1a76 100644 --- a/lib/Target/Hexagon/RDFGraph.cpp +++ b/lib/Target/Hexagon/RDFGraph.cpp @@ -10,16 +10,31 @@ // Target-independent, SSA-based data flow graph for register data flow (RDF). // #include "RDFGraph.h" - #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominanceFrontier.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" +#include +#include +#include +#include +#include +#include +#include using namespace llvm; using namespace rdf; @@ -88,14 +103,12 @@ raw_ostream &operator<< (raw_ostream &OS, const Print &P) { return OS; } -namespace { - void printRefHeader(raw_ostream &OS, const NodeAddr RA, - const DataFlowGraph &G) { - OS << Print(RA.Id, G) << '<' - << Print(RA.Addr->getRegRef(G), G) << '>'; - if (RA.Addr->getFlags() & NodeAttrs::Fixed) - OS << '!'; - } +static void printRefHeader(raw_ostream &OS, const NodeAddr RA, + const DataFlowGraph &G) { + OS << Print(RA.Id, G) << '<' + << Print(RA.Addr->getRegRef(G), G) << '>'; + if (RA.Addr->getFlags() & NodeAttrs::Fixed) + OS << '!'; } template<> @@ -183,9 +196,11 @@ raw_ostream &operator<< (raw_ostream &OS, const Print &P) { } namespace { + template struct PrintListV { PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {} + typedef T Type; const NodeList &List; const DataFlowGraph &G; @@ -201,7 +216,8 @@ namespace { } return OS; } -} + +} // end anonymous namespace template<> raw_ostream &operator<< (raw_ostream &OS, const Print> &P) { @@ -219,10 +235,10 @@ raw_ostream &operator<< (raw_ostream &OS, // Print the target for calls and branches (for readability). if (MI.isCall() || MI.isBranch()) { MachineInstr::const_mop_iterator T = - find_if(MI.operands(), - [] (const MachineOperand &Op) -> bool { - return Op.isMBB() || Op.isGlobal() || Op.isSymbol(); - }); + llvm::find_if(MI.operands(), + [] (const MachineOperand &Op) -> bool { + return Op.isMBB() || Op.isGlobal() || Op.isSymbol(); + }); if (T != MI.operands_end()) { OS << ' '; if (T->isMBB()) @@ -327,8 +343,8 @@ raw_ostream &operator<< (raw_ostream &OS, return OS; } -} // namespace rdf -} // namespace llvm +} // end namespace rdf +} // end namespace llvm // Node allocation functions. // @@ -390,7 +406,6 @@ void NodeAllocator::clear() { ActiveEnd = nullptr; } - // Insert node NA after "this" in the circular chain. void NodeBase::append(NodeAddr NA) { NodeId Nx = Next; @@ -401,7 +416,6 @@ void NodeBase::append(NodeAddr NA) { } } - // Fundamental node manipulator functions. // Obtain the register reference from a reference node. @@ -590,7 +604,6 @@ NodeAddr FuncNode::getEntryBlock(const DataFlowGraph &G) { return findBlock(EntryB, G); } - // Target operand information. // @@ -641,7 +654,6 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum) return false; } - RegisterRef RegisterAggr::normalize(RegisterRef RR) const { RegisterId SuperReg = RR.Reg; while (true) { @@ -745,7 +757,6 @@ void RegisterAggr::print(raw_ostream &OS) const { OS << " }"; } - // // The data flow graph construction. // @@ -753,10 +764,9 @@ void RegisterAggr::print(raw_ostream &OS) const { DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii, const TargetRegisterInfo &tri, const MachineDominatorTree &mdt, const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi) - : LMI(), MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) { + : MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) { } - // The implementation of the definition stack. // Each register reference has its own definition stack. In particular, // for a register references "Reg" and "Reg:subreg" will each have their @@ -845,7 +855,6 @@ unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const { return P; } - // Register information. // Get the list of references aliased to RR. Lane masks are ignored. @@ -915,7 +924,6 @@ NodeAddr DataFlowGraph::cloneNode(const NodeAddr B) { return NA; } - // Allocation routines for specific node types/kinds. NodeAddr DataFlowGraph::newUse(NodeAddr Owner, @@ -1248,7 +1256,6 @@ bool DataFlowGraph::alias(RegisterRef RA, RegisterRef RB) const { return false; } - // Clear all information in the graph. void DataFlowGraph::reset() { Memory.clear(); @@ -1256,7 +1263,6 @@ void DataFlowGraph::reset() { Func = NodeAddr(); } - // Return the next reference node in the instruction node IA that is related // to RA. Conceptually, two reference nodes are related if they refer to the // same instance of a register access, but differ in flags or other minor diff --git a/lib/Target/Hexagon/RDFGraph.h b/lib/Target/Hexagon/RDFGraph.h index 871062ff2b0..49d78a8b22b 100644 --- a/lib/Target/Hexagon/RDFGraph.h +++ b/lib/Target/Hexagon/RDFGraph.h @@ -1,4 +1,4 @@ -//===--- RDFGraph.h -------------------------------------------------------===// +//===--- RDFGraph.h ---------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -221,20 +221,25 @@ // The statement s5 has two use nodes for t0: u7" and u9". The quotation // mark " indicates that the node is a shadow. // -#ifndef RDF_GRAPH_H -#define RDF_GRAPH_H + +#ifndef LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H +#define LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Timer.h" #include "llvm/Target/TargetRegisterInfo.h" - +#include +#include +#include #include #include #include #include +#include #include // RDF uses uint32_t to refer to registers. This is to ensure that the type @@ -243,6 +248,7 @@ static_assert(sizeof(uint32_t) == sizeof(unsigned), "Those should be equal"); namespace llvm { + class MachineBasicBlock; class MachineFunction; class MachineInstr; @@ -252,6 +258,7 @@ namespace llvm { class TargetInstrInfo; namespace rdf { + typedef uint32_t NodeId; typedef uint32_t RegisterId; @@ -293,9 +300,11 @@ namespace rdf { static uint16_t set_type(uint16_t A, uint16_t T) { return (A & ~TypeMask) | T; } + static uint16_t set_kind(uint16_t A, uint16_t K) { return (A & ~KindMask) | K; } + static uint16_t set_flags(uint16_t A, uint16_t F) { return (A & ~FlagMask) | F; } @@ -326,9 +335,14 @@ namespace rdf { }; template struct NodeAddr { - NodeAddr() : Addr(nullptr), Id(0) {} + NodeAddr() : Addr(nullptr) {} NodeAddr(T A, NodeId I) : Addr(A), Id(I) {} + // Type cast (casting constructor). The reason for having this class + // instead of std::pair. + template NodeAddr(const NodeAddr &NA) + : Addr(static_cast(NA.Addr)), Id(NA.Id) {} + bool operator== (const NodeAddr &NA) const { assert((Addr == NA.Addr) == (Id == NA.Id)); return Addr == NA.Addr; @@ -336,13 +350,9 @@ namespace rdf { bool operator!= (const NodeAddr &NA) const { return !operator==(NA); } - // Type cast (casting constructor). The reason for having this class - // instead of std::pair. - template NodeAddr(const NodeAddr &NA) - : Addr(static_cast(NA.Addr)), Id(NA.Id) {} T Addr; - NodeId Id; + NodeId Id = 0; }; struct NodeBase; @@ -366,17 +376,20 @@ namespace rdf { struct NodeAllocator { // Amount of storage for a single node. enum { NodeMemSize = 32 }; + NodeAllocator(uint32_t NPB = 4096) : NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)), - IndexMask((1 << BitsPerIndex)-1), ActiveEnd(nullptr) { + IndexMask((1 << BitsPerIndex)-1) { assert(isPowerOf2_32(NPB)); } + NodeBase *ptr(NodeId N) const { uint32_t N1 = N-1; uint32_t BlockN = N1 >> BitsPerIndex; uint32_t Offset = (N1 & IndexMask) * NodeMemSize; return reinterpret_cast(Blocks[BlockN]+Offset); } + NodeId id(const NodeBase *P) const; NodeAddr New(); void clear(); @@ -384,6 +397,7 @@ namespace rdf { private: void startNewBlock(); bool needNewBlock(); + uint32_t makeId(uint32_t Block, uint32_t Index) const { // Add 1 to the id, to avoid the id of 0, which is treated as "null". return ((Block << BitsPerIndex) | Index) + 1; @@ -392,7 +406,7 @@ namespace rdf { const uint32_t NodesPerBlock; const uint32_t BitsPerIndex; const uint32_t IndexMask; - char *ActiveEnd; + char *ActiveEnd = nullptr; std::vector Blocks; typedef BumpPtrAllocatorImpl AllocatorTy; AllocatorTy MemPool; @@ -405,6 +419,7 @@ namespace rdf { RegisterRef() : RegisterRef(0) {} explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll()) : Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {} + operator bool() const { return Reg != 0 && Mask.any(); } bool operator== (const RegisterRef &RR) const { return Reg == RR.Reg && Mask == RR.Mask; @@ -420,7 +435,8 @@ namespace rdf { struct TargetOperandInfo { TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {} - virtual ~TargetOperandInfo() {} + virtual ~TargetOperandInfo() = default; + virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const; virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const; virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const; @@ -428,7 +444,6 @@ namespace rdf { const TargetInstrInfo &TII; }; - // Packed register reference. Only used for storage. struct PackedRegisterRef { RegisterId Reg; @@ -442,11 +457,13 @@ namespace rdf { template struct IndexedSet { IndexedSet() : Map() { Map.reserve(N); } + T get(uint32_t Idx) const { // Index Idx corresponds to Map[Idx-1]. assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size()); return Map[Idx-1]; } + uint32_t insert(T Val) { // Linear search. auto F = llvm::find(Map, Val); @@ -455,11 +472,13 @@ namespace rdf { Map.push_back(Val); return Map.size(); // Return actual_index + 1. } + uint32_t find(T Val) const { auto F = llvm::find(Map, Val); assert(F != Map.end()); return F - Map.begin(); } + private: std::vector Map; }; @@ -478,12 +497,14 @@ namespace rdf { assert(LM.any()); return LM.all() ? 0 : find(LM); } + PackedRegisterRef pack(RegisterRef RR) { return { RR.Reg, getIndexForLaneMask(RR.Mask) }; } PackedRegisterRef pack(RegisterRef RR) const { return { RR.Reg, getIndexForLaneMask(RR.Mask) }; } + RegisterRef unpack(PackedRegisterRef PR) const { return RegisterRef(PR.Reg, getLaneMaskForIndex(PR.MaskId)); } @@ -491,11 +512,8 @@ namespace rdf { struct RegisterAggr { RegisterAggr(const TargetRegisterInfo &tri) - : Masks(), ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false), - TRI(tri) {} - RegisterAggr(const RegisterAggr &RG) - : Masks(RG.Masks), ExpAliasUnits(RG.ExpAliasUnits), - CheckUnits(RG.CheckUnits), TRI(RG.TRI) {} + : ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false), TRI(tri) {} + RegisterAggr(const RegisterAggr &RG) = default; bool empty() const { return Masks.empty(); } bool hasAliasOf(RegisterRef RR) const; @@ -530,11 +548,11 @@ namespace rdf { const TargetRegisterInfo &TRI; }; - struct NodeBase { public: // Make sure this is a POD. NodeBase() = default; + uint16_t getType() const { return NodeAttrs::type(Attrs); } uint16_t getKind() const { return NodeAttrs::kind(Attrs); } uint16_t getFlags() const { return NodeAttrs::flags(Attrs); } @@ -596,29 +614,36 @@ namespace rdf { struct RefNode : public NodeBase { RefNode() = default; + RegisterRef getRegRef(const DataFlowGraph &G) const; + MachineOperand &getOp() { assert(!(getFlags() & NodeAttrs::PhiRef)); return *Ref.Op; } + void setRegRef(RegisterRef RR, DataFlowGraph &G); void setRegRef(MachineOperand *Op, DataFlowGraph &G); + NodeId getReachingDef() const { return Ref.RD; } void setReachingDef(NodeId RD) { Ref.RD = RD; } + NodeId getSibling() const { return Ref.Sib; } void setSibling(NodeId Sib) { Ref.Sib = Sib; } + bool isUse() const { assert(getType() == NodeAttrs::Ref); return getKind() == NodeAttrs::Use; } + bool isDef() const { assert(getType() == NodeAttrs::Ref); return getKind() == NodeAttrs::Def; @@ -702,6 +727,7 @@ namespace rdf { MachineBasicBlock *getCode() const { return CodeNode::getCode(); } + void addPhi(NodeAddr PA, const DataFlowGraph &G); }; @@ -709,6 +735,7 @@ namespace rdf { MachineFunction *getCode() const { return CodeNode::getCode(); } + NodeAddr findBlock(const MachineBasicBlock *BB, const DataFlowGraph &G) const; NodeAddr getEntryBlock(const DataFlowGraph &G); @@ -723,6 +750,7 @@ namespace rdf { template T ptr(NodeId N) const { return static_cast(ptr(N)); } + NodeId id(const NodeBase *P) const; template NodeAddr addr(NodeId N) const { @@ -738,13 +766,17 @@ namespace rdf { struct DefStack { DefStack() = default; + bool empty() const { return Stack.empty() || top() == bottom(); } + private: typedef NodeAddr value_type; struct Iterator { typedef DefStack::value_type value_type; + Iterator &up() { Pos = DS.nextUp(Pos); return *this; } Iterator &down() { Pos = DS.nextDown(Pos); return *this; } + value_type operator*() const { assert(Pos >= 1); return DS.Stack[Pos-1]; @@ -755,14 +787,17 @@ namespace rdf { } bool operator==(const Iterator &It) const { return Pos == It.Pos; } bool operator!=(const Iterator &It) const { return Pos != It.Pos; } + private: Iterator(const DefStack &S, bool Top); + // Pos-1 is the index in the StorageType object that corresponds to // the top of the DefStack. const DefStack &DS; unsigned Pos; friend struct DefStack; }; + public: typedef Iterator iterator; iterator top() const { return Iterator(*this, true); } @@ -773,14 +808,18 @@ namespace rdf { void pop(); void start_block(NodeId N); void clear_block(NodeId N); + private: friend struct Iterator; typedef std::vector StorageType; + bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const { return (P.Addr == nullptr) && (N == 0 || P.Id == N); } + unsigned nextUp(unsigned P) const; unsigned nextDown(unsigned P) const; + StorageType Stack; }; @@ -819,6 +858,7 @@ namespace rdf { if (RemoveFromOwner) removeFromOwner(UA); } + void unlinkDef(NodeAddr DA, bool RemoveFromOwner) { unlinkDefDF(DA); if (RemoveFromOwner) @@ -831,23 +871,28 @@ namespace rdf { return BA.Addr->getType() == NodeAttrs::Ref && BA.Addr->getKind() == Kind; } + template static bool IsCode(const NodeAddr BA) { return BA.Addr->getType() == NodeAttrs::Code && BA.Addr->getKind() == Kind; } + static bool IsDef(const NodeAddr BA) { return BA.Addr->getType() == NodeAttrs::Ref && BA.Addr->getKind() == NodeAttrs::Def; } + static bool IsUse(const NodeAddr BA) { return BA.Addr->getType() == NodeAttrs::Ref && BA.Addr->getKind() == NodeAttrs::Use; } + static bool IsPhi(const NodeAddr BA) { return BA.Addr->getType() == NodeAttrs::Code && BA.Addr->getKind() == NodeAttrs::Phi; } + static bool IsPreservingDef(const NodeAddr DA) { uint16_t Flags = DA.Addr->getFlags(); return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef); @@ -902,6 +947,7 @@ namespace rdf { void unlinkUseDF(NodeAddr UA); void unlinkDefDF(NodeAddr DA); + void removeFromOwner(NodeAddr RA) { NodeAddr IA = RA.Addr->getOwner(*this); IA.Addr->removeMember(RA, *this); @@ -967,7 +1013,6 @@ namespace rdf { return MM; } - // Optionally print the lane mask, if it is not ~0. struct PrintLaneMaskOpt { PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {} @@ -991,7 +1036,9 @@ namespace rdf { PrintNode(const NodeAddr &x, const DataFlowGraph &g) : Print>(x, g) {} }; -} // namespace rdf -} // namespace llvm -#endif // RDF_GRAPH_H +} // end namespace rdf + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 6f0fdddd7d5..92d3c001df9 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -28,6 +28,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" +#include "llvm/IR/Dominators.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -43,6 +44,11 @@ bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { return MipsDAGToDAGISel::runOnMachineFunction(MF); } +void MipsSEDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + SelectionDAGISel::getAnalysisUsage(AU); +} + void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI, MachineFunction &MF) { MachineInstrBuilder MIB(MF, &MI); diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h index 2a8e5877e84..f89a350cab0 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -28,6 +28,8 @@ private: bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI, MachineFunction &MF); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index aa3ffde24b9..2b9195b095e 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3981,40 +3981,46 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, static bool isFunctionGlobalAddress(SDValue Callee); static bool -resideInSameModule(SDValue Callee, Reloc::Model RelMod) { +resideInSameSection(const Function *Caller, SDValue Callee, + const TargetMachine &TM) { // If !G, Callee can be an external symbol. GlobalAddressSDNode *G = dyn_cast(Callee); - if (!G) return false; - - const GlobalValue *GV = G->getGlobal(); - - if (GV->isDeclaration()) return false; - - switch(GV->getLinkage()) { - default: llvm_unreachable("unknow linkage type"); - case GlobalValue::AvailableExternallyLinkage: - case GlobalValue::ExternalWeakLinkage: + if (!G) return false; - // Callee with weak linkage is allowed if it has hidden or protected - // visibility - case GlobalValue::LinkOnceAnyLinkage: - case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions - case GlobalValue::WeakAnyLinkage: - case GlobalValue::WeakODRLinkage: // e.g. c++ template instantiation - if (GV->hasDefaultVisibility()) - return false; + const GlobalValue *GV = G->getGlobal(); + if (!GV->isStrongDefinitionForLinker()) + return false; - case GlobalValue::ExternalLinkage: - case GlobalValue::InternalLinkage: - case GlobalValue::PrivateLinkage: - break; + // Any explicitly-specified sections and section prefixes must also match. + // Also, if we're using -ffunction-sections, then each function is always in + // a different section (the same is true for COMDAT functions). + if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() || + GV->getSection() != Caller->getSection()) + return false; + if (const auto *F = dyn_cast(GV)) { + if (F->getSectionPrefix() != Caller->getSectionPrefix()) + return false; } - // With '-fPIC', calling default visiblity function need insert 'nop' after - // function call, no matter that function resides in same module or not, so - // we treat it as in different module. - if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility()) + // If the callee might be interposed, then we can't assume the ultimate call + // target will be in the same section. Even in cases where we can assume that + // interposition won't happen, in any case where the linker might insert a + // stub to allow for interposition, we must generate code as though + // interposition might occur. To understand why this matters, consider a + // situation where: a -> b -> c where the arrows indicate calls. b and c are + // in the same section, but a is in a different module (i.e. has a different + // TOC base pointer). If the linker allows for interposition between b and c, + // then it will generate a stub for the call edge between b and c which will + // save the TOC pointer into the designated stack slot allocated by b. If we + // return true here, and therefore allow a tail call between b and c, that + // stack slot won't exist and the b -> c stub will end up saving b'c TOC base + // pointer into the stack slot allocated by a (where the a -> b stub saved + // a's TOC base pointer). If we're not considering a tail call, but rather, + // whether a nop is needed after the call instruction in b, because the linker + // will insert a stub, it might complain about a missing nop if we omit it + // (although many don't complain in this case). + if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV)) return false; return true; @@ -4130,11 +4136,11 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( !isa(Callee)) return false; - // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI - // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another - // module. + // Check if Callee resides in the same section, because for now, PPC64 SVR4 + // ABI (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another + // section. // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 - if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel())) + if (!resideInSameSection(MF.getFunction(), Callee, getTargetMachine())) return false; // TCO allows altering callee ABI, so we don't have to check further. @@ -4592,14 +4598,6 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, return CallOpc; } -static -bool isLocalCall(const SDValue &Callee) -{ - if (GlobalAddressSDNode *G = dyn_cast(Callee)) - return G->getGlobal()->isStrongDefinitionForLinker(); - return false; -} - SDValue PPCTargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, @@ -4701,6 +4699,7 @@ SDValue PPCTargetLowering::FinishCall( // stack frame. If caller and callee belong to the same module (and have the // same TOC), the NOP will remain unchanged. + MachineFunction &MF = DAG.getMachineFunction(); if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && !isPatchPoint) { if (CallOpc == PPCISD::BCTRL) { @@ -4724,11 +4723,11 @@ SDValue PPCTargetLowering::FinishCall( // The address needs to go after the chain input but before the flag (or // any other variadic arguments). Ops.insert(std::next(Ops.begin()), AddTOC); - } else if ((CallOpc == PPCISD::CALL) && - (!isLocalCall(Callee) || - DAG.getTarget().getRelocationModel() == Reloc::PIC_)) + } else if (CallOpc == PPCISD::CALL && + !resideInSameSection(MF.getFunction(), Callee, DAG.getTarget())) { // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP; + } } Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index d42e1187ce6..e1825ca1eda 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -70,7 +70,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { EmitFunctionBody(); // Emit the XRay table for this function. - EmitXRayTable(); + emitXRayTable(); // We didn't modify anything. return false; diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 1deefe1231c..cd690442bb9 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -373,6 +373,10 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI; MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr : std::next(MBBI); + PI = skipDebugInstructionsBackward(PI, MBB.begin()); + if (NI != nullptr) + NI = skipDebugInstructionsForward(NI, MBB.end()); + unsigned Opc = PI->getOpcode(); int Offset = 0; @@ -2586,6 +2590,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0; uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0; I = MBB.erase(I); + auto InsertPos = skipDebugInstructionsForward(I, MBB.end()); if (!reserveCallFrame) { // If the stack pointer can be changed after prologue, turn the @@ -2615,7 +2620,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, if (HasDwarfEHHandlers && !isDestroy && MF.getInfo()->getHasPushSequences()) - BuildCFI(MBB, I, DL, + BuildCFI(MBB, InsertPos, DL, MCCFIInstruction::createGnuArgsSize(nullptr, Amount)); if (Amount == 0) @@ -2629,7 +2634,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // If this is a callee-pop calling convention, emit a CFA adjust for // the amount the callee popped. if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF)) - BuildCFI(MBB, I, DL, + BuildCFI(MBB, InsertPos, DL, MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt)); // Add Amount to SP to destroy a frame, or subtract to setup. @@ -2640,13 +2645,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // Merge with any previous or following adjustment instruction. Note: the // instructions merged with here do not have CFI, so their stack // adjustments do not feed into CfaAdjustment. - StackAdjustment += mergeSPUpdates(MBB, I, true); - StackAdjustment += mergeSPUpdates(MBB, I, false); + StackAdjustment += mergeSPUpdates(MBB, InsertPos, true); + StackAdjustment += mergeSPUpdates(MBB, InsertPos, false); if (StackAdjustment) { if (!(Fn->optForMinSize() && - adjustStackWithPops(MBB, I, DL, StackAdjustment))) - BuildStackAdjustment(MBB, I, DL, StackAdjustment, + adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment))) + BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment, /*InEpilogue=*/false); } } @@ -2662,8 +2667,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // TODO: When not using precise CFA, we also need to adjust for the // InternalAmt here. if (CfaAdjustment) { - BuildCFI(MBB, I, DL, MCCFIInstruction::createAdjustCfaOffset( - nullptr, CfaAdjustment)); + BuildCFI(MBB, InsertPos, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, + CfaAdjustment)); } } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b293dfa98f8..fd218939727 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11474,6 +11474,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, const SmallBitVector &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { + SmallVector WidenedMask; + if (!canWidenShuffleElements(Mask, WidenedMask)) + return SDValue(); + // TODO: If minimizing size and one of the inputs is a zero vector and the // the zero vector has only one use, we could use a VPERM2X128 to save the // instruction bytes needed to explicitly generate the zero vector. @@ -11521,15 +11525,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, // [6] - ignore // [7] - zero high half of destination - int MaskLO = Mask[0]; - if (MaskLO == SM_SentinelUndef) - MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1]; + int MaskLO = WidenedMask[0] < 0 ? 0 : WidenedMask[0]; + int MaskHI = WidenedMask[1] < 0 ? 0 : WidenedMask[1]; - int MaskHI = Mask[2]; - if (MaskHI == SM_SentinelUndef) - MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3]; - - unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4; + unsigned PermMask = MaskLO | (MaskHI << 4); // If either input is a zero vector, replace it with an undef input. // Shuffle mask values < 4 are selecting elements of V1. @@ -11538,16 +11537,16 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, // selecting the zero vector and setting the zero mask bit. if (IsV1Zero) { V1 = DAG.getUNDEF(VT); - if (MaskLO < 4) + if (MaskLO < 2) PermMask = (PermMask & 0xf0) | 0x08; - if (MaskHI < 4) + if (MaskHI < 2) PermMask = (PermMask & 0x0f) | 0x80; } if (IsV2Zero) { V2 = DAG.getUNDEF(VT); - if (MaskLO >= 4) + if (MaskLO >= 2) PermMask = (PermMask & 0xf0) | 0x08; - if (MaskHI >= 4) + if (MaskHI >= 2) PermMask = (PermMask & 0x0f) | 0x80; } @@ -12012,11 +12011,9 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!"); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); - SmallVector WidenedMask; - if (canWidenShuffleElements(Mask, WidenedMask)) - if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask, - Zeroable, Subtarget, DAG)) - return V; + if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) + return V; if (V2.isUndef()) { // Check for being able to broadcast a single element. @@ -12107,11 +12104,9 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!"); - SmallVector WidenedMask; - if (canWidenShuffleElements(Mask, WidenedMask)) - if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask, - Zeroable, Subtarget, DAG)) - return V; + if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) + return V; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) @@ -12605,33 +12600,72 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT, if (!canWidenShuffleElements(Mask, WidenedMask)) return SDValue(); - SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)}; - // Insure elements came from the same Op. - int MaxOp1Index = VT.getVectorNumElements()/2 - 1; - for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) { - if (WidenedMask[i] == SM_SentinelZero) - return SDValue(); - if (WidenedMask[i] == SM_SentinelUndef) + // Check for patterns which can be matched with a single insert of a 256-bit + // subvector. + bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, + {0, 1, 2, 3, 0, 1, 2, 3}); + if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, + {0, 1, 2, 3, 8, 9, 10, 11})) { + MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4); + SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, + DAG.getIntPtrConstant(0, DL)); + SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, + OnlyUsesV1 ? V1 : V2, + DAG.getIntPtrConstant(0, DL)); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV); + } + + assert(WidenedMask.size() == 4); + + // See if this is an insertion of the lower 128-bits of V2 into V1. + bool IsInsert = true; + int V2Index = -1; + for (int i = 0; i < 4; ++i) { + assert(WidenedMask[i] >= -1); + if (WidenedMask[i] < 0) continue; - SDValue Op = WidenedMask[i] > MaxOp1Index ? V2 : V1; - unsigned OpIndex = (i < Size/2) ? 0 : 1; + // Make sure all V1 subvectors are in place. + if (WidenedMask[i] < 4) { + if (WidenedMask[i] != i) { + IsInsert = false; + break; + } + } else { + // Make sure we only have a single V2 index and its the lowest 128-bits. + if (V2Index >= 0 || WidenedMask[i] != 4) { + IsInsert = false; + break; + } + V2Index = i; + } + } + if (IsInsert && V2Index >= 0) { + MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); + SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2, + DAG.getIntPtrConstant(0, DL)); + return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL); + } + + // Try to lower to to vshuf64x2/vshuf32x4. + SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)}; + unsigned PermMask = 0; + // Insure elements came from the same Op. + for (int i = 0; i < 4; ++i) { + assert(WidenedMask[i] >= -1); + if (WidenedMask[i] < 0) + continue; + + SDValue Op = WidenedMask[i] >= 4 ? V2 : V1; + unsigned OpIndex = i / 2; if (Ops[OpIndex].isUndef()) Ops[OpIndex] = Op; else if (Ops[OpIndex] != Op) return SDValue(); - } - // Form a 128-bit permutation. - // Convert the 64-bit shuffle mask selection values into 128-bit selection - // bits defined by a vshuf64x2 instruction's immediate control byte. - unsigned PermMask = 0, Imm = 0; - unsigned ControlBitsNum = WidenedMask.size() / 2; - - for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) { - // Use first element in place of undef mask. - Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i]; - PermMask |= (Imm % WidenedMask.size()) << (i * ControlBitsNum); + // Convert the 128-bit shuffle mask selection values into 128-bit selection + // bits defined by a vshuf64x2 instruction's immediate control byte. + PermMask |= (WidenedMask[i] % 4) << (i * 2); } return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1], @@ -13051,10 +13085,10 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, static bool canonicalizeShuffleMaskWithCommute(ArrayRef Mask) { int NumElements = Mask.size(); - int NumV1Elements = 0, NumV2Elements = 0, NumSentinelElements = 0; + int NumV1Elements = 0, NumV2Elements = 0; for (int M : Mask) if (M < 0) - ++NumSentinelElements; + continue; else if (M < NumElements) ++NumV1Elements; else @@ -18660,8 +18694,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_3OP_IMM8_MASK: - case INTR_TYPE_3OP_MASK: - case INSERT_SUBVEC: { + case INTR_TYPE_3OP_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); @@ -18670,13 +18703,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK) Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3); - else if (IntrData->Type == INSERT_SUBVEC) { - // imm should be adapted to ISD::INSERT_SUBVECTOR behavior - assert(isa(Src3) && "Expected a ConstantSDNode here!"); - unsigned Imm = cast(Src3)->getZExtValue(); - Imm *= Src2.getSimpleValueType().getVectorNumElements(); - Src3 = DAG.getTargetConstant(Imm, dl, MVT::i32); - } // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, @@ -28693,6 +28719,29 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG, return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1), Op.getOperand(2)); } + case ISD::INSERT_SUBVECTOR: { + unsigned EltSize = EltVT.getSizeInBits(); + if (EltSize != 32 && EltSize != 64) + return false; + MVT OpEltVT = Op.getSimpleValueType().getVectorElementType(); + // Only change element size, not type. + if (VT.isInteger() != OpEltVT.isInteger()) + return false; + uint64_t Imm = cast(Op.getOperand(2))->getZExtValue(); + Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize; + SDValue Op0 = DAG.getBitcast(VT, Op.getOperand(0)); + DCI.AddToWorklist(Op0.getNode()); + // Op1 needs to be bitcasted to a smaller vector with the same element type. + SDValue Op1 = Op.getOperand(1); + MVT Op1VT = MVT::getVectorVT(EltVT, + Op1.getSimpleValueType().getSizeInBits() / EltSize); + Op1 = DAG.getBitcast(Op1VT, Op1); + DCI.AddToWorklist(Op1.getNode()); + DCI.CombineTo(OrigOp.getNode(), + DAG.getNode(Opcode, DL, VT, Op0, Op1, + DAG.getConstant(Imm, DL, MVT::i8))); + return true; + } } return false; @@ -31784,6 +31833,83 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify +/// the codegen. +/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) ) +static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget, + SDLoc &DL) { + assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode"); + SDValue Src = N->getOperand(0); + unsigned Opcode = Src.getOpcode(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + EVT VT = N->getValueType(0); + EVT SrcVT = Src.getValueType(); + + auto IsRepeatedOpOrOneUseConstant = [](SDValue Op0, SDValue Op1) { + // TODO: Add extra cases where we can truncate both inputs for the + // cost of one (or none). + // e.g. TRUNC( BINOP( EXT( X ), EXT( Y ) ) ) --> BINOP( X, Y ) + if (Op0 == Op1) + return true; + + SDValue BC0 = peekThroughOneUseBitcasts(Op0); + SDValue BC1 = peekThroughOneUseBitcasts(Op1); + return ISD::isBuildVectorOfConstantSDNodes(BC0.getNode()) || + ISD::isBuildVectorOfConstantSDNodes(BC1.getNode()); + }; + + auto TruncateArithmetic = [&](SDValue N0, SDValue N1) { + SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0); + SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1); + return DAG.getNode(Opcode, DL, VT, Trunc0, Trunc1); + }; + + // Don't combine if the operation has other uses. + if (!N->isOnlyUserOf(Src.getNode())) + return SDValue(); + + // Only support vector truncation for now. + // TODO: i64 scalar math would benefit as well. + if (!VT.isVector()) + return SDValue(); + + // In most cases its only worth pre-truncating if we're only facing the cost + // of one truncation. + // i.e. if one of the inputs will constant fold or the input is repeated. + switch (Opcode) { + case ISD::AND: + case ISD::XOR: + case ISD::OR: { + SDValue Op0 = Src.getOperand(0); + SDValue Op1 = Src.getOperand(1); + if (TLI.isOperationLegalOrPromote(Opcode, VT) && + IsRepeatedOpOrOneUseConstant(Op0, Op1)) + return TruncateArithmetic(Op0, Op1); + break; + } + + case ISD::MUL: + // X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its + // better to truncate if we have the chance. + if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) && + !TLI.isOperationLegal(Opcode, SrcVT)) + return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1)); + LLVM_FALLTHROUGH; + case ISD::ADD: { + SDValue Op0 = Src.getOperand(0); + SDValue Op1 = Src.getOperand(1); + if (TLI.isOperationLegal(Opcode, VT) && + IsRepeatedOpOrOneUseConstant(Op0, Op1)) + return TruncateArithmetic(Op0, Op1); + break; + } + } + + return SDValue(); +} + /// Truncate a group of v4i32 into v16i8/v8i16 using X86ISD::PACKUS. static SDValue combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG, @@ -31970,6 +32096,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, SDValue Src = N->getOperand(0); SDLoc DL(N); + // Attempt to pre-truncate inputs to arithmetic ops instead. + if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL)) + return V; + // Try to detect AVG pattern first. if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) return Avg; diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index da7437ea0cc..908053e1342 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -650,33 +650,6 @@ multiclass vextract_for_size; - - // Intrinsic call with masking. - def : Pat<(!cast("int_x86_avx512_mask_vextract" # To.EltTypeName # - "x" # To.NumElts # "_" # From.Size) - From.RC:$src1, (iPTR imm:$idx), To.RC:$src0, To.MRC:$mask), - (!cast(NAME # To.EltSize # "x" # To.NumElts # - From.ZSuffix # "rrk") - To.RC:$src0, - (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM), - From.RC:$src1, imm:$idx)>; - - // Intrinsic call with zero-masking. - def : Pat<(!cast("int_x86_avx512_mask_vextract" # To.EltTypeName # - "x" # To.NumElts # "_" # From.Size) - From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, To.MRC:$mask), - (!cast(NAME # To.EltSize # "x" # To.NumElts # - From.ZSuffix # "rrkz") - (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM), - From.RC:$src1, imm:$idx)>; - - // Intrinsic call without masking. - def : Pat<(!cast("int_x86_avx512_mask_vextract" # To.EltTypeName # - "x" # To.NumElts # "_" # From.Size) - From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)), - (!cast(NAME # To.EltSize # "x" # To.NumElts # - From.ZSuffix # "rr") - From.RC:$src1, imm:$idx)>; } // Codegen pattern for the alternative types @@ -6871,18 +6844,18 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in { VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } let isCodeGenOnly = 1 in { - defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem, - load, "ucomiss">, PS, EVEX, VEX_LIG, + defm Int_VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, + sse_load_f32, "ucomiss">, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; - defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem, - load, "ucomisd">, PD, EVEX, + defm Int_VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, + sse_load_f64, "ucomisd">, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; - defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem, - load, "comiss">, PS, EVEX, VEX_LIG, + defm Int_VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, + sse_load_f32, "comiss">, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; - defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem, - load, "comisd">, PD, EVEX, + defm Int_VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, + sse_load_f64, "comisd">, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 9d6a8936304..4cd6ae563f0 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2373,6 +2373,23 @@ multiclass sse12_ord_cmp opc, RegisterClass RC, SDNode OpNode, Sched<[WriteFAddLd, ReadAfterLd]>; } +// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp +multiclass sse12_ord_cmp_int opc, RegisterClass RC, SDNode OpNode, + ValueType vt, Operand memop, + ComplexPattern mem_cpat, string OpcodeStr> { + def rr: SI, + Sched<[WriteFAdd]>; + def rm: SI, + Sched<[WriteFAddLd, ReadAfterLd]>; +} + let Defs = [EFLAGS] in { defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, "ucomiss">, PS, VEX, VEX_LIG; @@ -2386,15 +2403,15 @@ let Defs = [EFLAGS] in { } let isCodeGenOnly = 1 in { - defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, - load, "ucomiss">, PS, VEX; - defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, - load, "ucomisd">, PD, VEX; + defm Int_VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, + sse_load_f32, "ucomiss">, PS, VEX; + defm Int_VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, + sse_load_f64, "ucomisd">, PD, VEX; - defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, - load, "comiss">, PS, VEX; - defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, - load, "comisd">, PD, VEX; + defm Int_VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, + sse_load_f32, "comiss">, PS, VEX; + defm Int_VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, + sse_load_f64, "comisd">, PD, VEX; } defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, "ucomiss">, PS; @@ -2409,15 +2426,15 @@ let Defs = [EFLAGS] in { } let isCodeGenOnly = 1 in { - defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, - load, "ucomiss">, PS; - defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, - load, "ucomisd">, PD; + defm Int_UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, + sse_load_f32, "ucomiss">, PS; + defm Int_UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, + sse_load_f64, "ucomisd">, PD; - defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, - "comiss">, PS; - defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load, - "comisd">, PD; + defm Int_COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, + sse_load_f32, "comiss">, PS; + defm Int_COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, + sse_load_f64, "comisd">, PD; } } // Defs = [EFLAGS] diff --git a/lib/Target/X86/X86InstrTablesInfo.h b/lib/Target/X86/X86InstrTablesInfo.h index 5d2af829028..415a891bfd9 100755 --- a/lib/Target/X86/X86InstrTablesInfo.h +++ b/lib/Target/X86/X86InstrTablesInfo.h @@ -1,4 +1,4 @@ -//===-- X86AVX512Info.h - X86 Instruction Tables Information ----*- C++ -*-===// +//===-- X86InstrTablesInfo.h - X86 Instruction Tables -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -25,8 +25,7 @@ struct X86EvexToVexCompressTableEntry { // X86 EVEX encoded instructions that have a VEX 128 encoding // (table format: ). -static const X86EvexToVexCompressTableEntry - X86EvexToVex128CompressTable[] = { +static const X86EvexToVexCompressTableEntry X86EvexToVex128CompressTable[] = { // EVEX scalar with corresponding VEX. { X86::Int_VCOMISDZrm , X86::Int_VCOMISDrm }, { X86::Int_VCOMISDZrr , X86::Int_VCOMISDrr }, @@ -250,20 +249,20 @@ static const X86EvexToVexCompressTableEntry { X86::VUCOMISDZrr , X86::VUCOMISDrr }, { X86::VUCOMISSZrm , X86::VUCOMISSrm }, { X86::VUCOMISSZrr , X86::VUCOMISSrr }, - + { X86::VMOV64toPQIZrr , X86::VMOV64toPQIrr }, { X86::VMOV64toSDZrr , X86::VMOV64toSDrr }, { X86::VMOVDI2PDIZrm , X86::VMOVDI2PDIrm }, { X86::VMOVDI2PDIZrr , X86::VMOVDI2PDIrr }, { X86::VMOVLHPSZrr , X86::VMOVLHPSrr }, - { X86::VMOVHLPSZrr , X86::VMOVHLPSrr }, + { X86::VMOVHLPSZrr , X86::VMOVHLPSrr }, { X86::VMOVPDI2DIZmr , X86::VMOVPDI2DImr }, { X86::VMOVPDI2DIZrr , X86::VMOVPDI2DIrr }, { X86::VMOVPQI2QIZmr , X86::VMOVPQI2QImr }, { X86::VMOVPQIto64Zrr , X86::VMOVPQIto64rr }, { X86::VMOVQI2PQIZrm , X86::VMOVQI2PQIrm }, { X86::VMOVZPQILo2PQIZrr , X86::VMOVZPQILo2PQIrr }, - + { X86::VPEXTRBZmr , X86::VPEXTRBmr }, { X86::VPEXTRBZrr , X86::VPEXTRBrr }, { X86::VPEXTRDZmr , X86::VPEXTRDmr }, @@ -272,7 +271,7 @@ static const X86EvexToVexCompressTableEntry { X86::VPEXTRQZrr , X86::VPEXTRQrr }, { X86::VPEXTRWZmr , X86::VPEXTRWmr }, { X86::VPEXTRWZrr , X86::VPEXTRWri }, - + { X86::VPINSRBZrm , X86::VPINSRBrm }, { X86::VPINSRBZrr , X86::VPINSRBrr }, { X86::VPINSRDZrm , X86::VPINSRDrm }, @@ -294,7 +293,7 @@ static const X86EvexToVexCompressTableEntry { X86::VANDPDZ128rm , X86::VANDPDrm }, { X86::VANDPDZ128rr , X86::VANDPDrr }, { X86::VANDPSZ128rm , X86::VANDPSrm }, - { X86::VANDPSZ128rr , X86::VANDPSrr }, + { X86::VANDPSZ128rr , X86::VANDPSrr }, { X86::VBROADCASTSSZ128m , X86::VBROADCASTSSrm }, { X86::VBROADCASTSSZ128r , X86::VBROADCASTSSrr }, { X86::VBROADCASTSSZ128r_s , X86::VBROADCASTSSrr }, @@ -414,8 +413,8 @@ static const X86EvexToVexCompressTableEntry { X86::VMOVAPDZ128rm , X86::VMOVAPDrm }, { X86::VMOVAPDZ128rr , X86::VMOVAPDrr }, { X86::VMOVAPDZ128rr_REV , X86::VMOVAPDrr_REV }, - { X86::VMOVAPSZ128mr , X86::VMOVAPSmr }, - { X86::VMOVAPSZ128rm , X86::VMOVAPSrm }, + { X86::VMOVAPSZ128mr , X86::VMOVAPSmr }, + { X86::VMOVAPSZ128rm , X86::VMOVAPSrm }, { X86::VMOVAPSZ128rr , X86::VMOVAPSrr }, { X86::VMOVAPSZ128rr_REV , X86::VMOVAPSrr_REV }, { X86::VMOVDDUPZ128rm , X86::VMOVDDUPrm }, @@ -464,8 +463,8 @@ static const X86EvexToVexCompressTableEntry { X86::VMOVUPDZ128rm , X86::VMOVUPDrm }, { X86::VMOVUPDZ128rr , X86::VMOVUPDrr }, { X86::VMOVUPDZ128rr_REV , X86::VMOVUPDrr_REV }, - { X86::VMOVUPSZ128mr , X86::VMOVUPSmr }, - { X86::VMOVUPSZ128rm , X86::VMOVUPSrm }, + { X86::VMOVUPSZ128mr , X86::VMOVUPSmr }, + { X86::VMOVUPSZ128rm , X86::VMOVUPSrm }, { X86::VMOVUPSZ128rr , X86::VMOVUPSrr }, { X86::VMOVUPSZ128rr_REV , X86::VMOVUPSrr_REV }, { X86::VMULPDZ128rm , X86::VMULPDrm }, @@ -520,9 +519,9 @@ static const X86EvexToVexCompressTableEntry { X86::VPBROADCASTBZ128r , X86::VPBROADCASTBrr }, { X86::VPBROADCASTDZ128m , X86::VPBROADCASTDrm }, { X86::VPBROADCASTDZ128r , X86::VPBROADCASTDrr }, - { X86::VPBROADCASTQZ128m , X86::VPBROADCASTQrm }, - { X86::VPBROADCASTQZ128r , X86::VPBROADCASTQrr }, - { X86::VPBROADCASTWZ128m , X86::VPBROADCASTWrm }, + { X86::VPBROADCASTQZ128m , X86::VPBROADCASTQrm }, + { X86::VPBROADCASTQZ128r , X86::VPBROADCASTQrr }, + { X86::VPBROADCASTWZ128m , X86::VPBROADCASTWrm }, { X86::VPBROADCASTWZ128r , X86::VPBROADCASTWrr }, { X86::VPERMILPDZ128mi , X86::VPERMILPDmi }, { X86::VPERMILPDZ128ri , X86::VPERMILPDri }, @@ -583,7 +582,7 @@ static const X86EvexToVexCompressTableEntry { X86::VPMOVZXWDZ128rm , X86::VPMOVZXWDrm }, { X86::VPMOVZXWDZ128rr , X86::VPMOVZXWDrr }, { X86::VPMOVZXWQZ128rm , X86::VPMOVZXWQrm }, - { X86::VPMOVZXWQZ128rr , X86::VPMOVZXWQrr }, + { X86::VPMOVZXWQZ128rr , X86::VPMOVZXWQrr }, { X86::VPMULDQZ128rm , X86::VPMULDQrm }, { X86::VPMULDQZ128rr , X86::VPMULDQrr }, { X86::VPMULHRSWZ128rm , X86::VPMULHRSWrm }, @@ -612,10 +611,10 @@ static const X86EvexToVexCompressTableEntry { X86::VPSHUFHWZ128ri , X86::VPSHUFHWri }, { X86::VPSHUFLWZ128mi , X86::VPSHUFLWmi }, { X86::VPSHUFLWZ128ri , X86::VPSHUFLWri }, - { X86::VPSLLDQZ128rr , X86::VPSLLDQri }, + { X86::VPSLLDQZ128rr , X86::VPSLLDQri }, { X86::VPSLLDZ128ri , X86::VPSLLDri }, { X86::VPSLLDZ128rm , X86::VPSLLDrm }, - { X86::VPSLLDZ128rr , X86::VPSLLDrr }, + { X86::VPSLLDZ128rr , X86::VPSLLDrr }, { X86::VPSLLQZ128ri , X86::VPSLLQri }, { X86::VPSLLQZ128rm , X86::VPSLLQrm }, { X86::VPSLLQZ128rr , X86::VPSLLQrr }, @@ -713,8 +712,7 @@ static const X86EvexToVexCompressTableEntry // X86 EVEX encoded instructions that have a VEX 256 encoding // (table format: ). - static const X86EvexToVexCompressTableEntry - X86EvexToVex256CompressTable[] = { + static const X86EvexToVexCompressTableEntry X86EvexToVex256CompressTable[] = { { X86::VADDPDZ256rm , X86::VADDPDYrm }, { X86::VADDPDZ256rr , X86::VADDPDYrr }, { X86::VADDPSZ256rm , X86::VADDPSYrm }, @@ -727,11 +725,11 @@ static const X86EvexToVexCompressTableEntry { X86::VANDPDZ256rr , X86::VANDPDYrr }, { X86::VANDPSZ256rm , X86::VANDPSYrm }, { X86::VANDPSZ256rr , X86::VANDPSYrr }, - { X86::VBROADCASTSDZ256m , X86::VBROADCASTSDYrm }, - { X86::VBROADCASTSDZ256r , X86::VBROADCASTSDYrr }, - { X86::VBROADCASTSDZ256r_s , X86::VBROADCASTSDYrr }, + { X86::VBROADCASTSDZ256m , X86::VBROADCASTSDYrm }, + { X86::VBROADCASTSDZ256r , X86::VBROADCASTSDYrr }, + { X86::VBROADCASTSDZ256r_s , X86::VBROADCASTSDYrr }, { X86::VBROADCASTSSZ256m , X86::VBROADCASTSSYrm }, - { X86::VBROADCASTSSZ256r , X86::VBROADCASTSSYrr }, + { X86::VBROADCASTSSZ256r , X86::VBROADCASTSSYrr }, { X86::VBROADCASTSSZ256r_s , X86::VBROADCASTSSYrr }, { X86::VCVTDQ2PDZ256rm , X86::VCVTDQ2PDYrm }, { X86::VCVTDQ2PDZ256rr , X86::VCVTDQ2PDYrr }, @@ -757,6 +755,14 @@ static const X86EvexToVexCompressTableEntry { X86::VDIVPDZ256rr , X86::VDIVPDYrr }, { X86::VDIVPSZ256rm , X86::VDIVPSYrm }, { X86::VDIVPSZ256rr , X86::VDIVPSYrr }, + { X86::VEXTRACTF32x4Z256mr , X86::VEXTRACTF128mr }, + { X86::VEXTRACTF64x2Z256mr , X86::VEXTRACTF128mr }, + { X86::VEXTRACTF32x4Z256rr , X86::VEXTRACTF128rr }, + { X86::VEXTRACTF64x2Z256rr , X86::VEXTRACTF128rr }, + { X86::VEXTRACTI32x4Z256mr , X86::VEXTRACTI128mr }, + { X86::VEXTRACTI64x2Z256mr , X86::VEXTRACTI128mr }, + { X86::VEXTRACTI32x4Z256rr , X86::VEXTRACTI128rr }, + { X86::VEXTRACTI64x2Z256rr , X86::VEXTRACTI128rr }, { X86::VFMADD132PDZ256m , X86::VFMADD132PDYm }, { X86::VFMADD132PDZ256r , X86::VFMADD132PDYr }, { X86::VFMADD132PSZ256m , X86::VFMADD132PSYm }, @@ -829,6 +835,14 @@ static const X86EvexToVexCompressTableEntry { X86::VFNMSUB231PDZ256r , X86::VFNMSUB231PDYr }, { X86::VFNMSUB231PSZ256m , X86::VFNMSUB231PSYm }, { X86::VFNMSUB231PSZ256r , X86::VFNMSUB231PSYr }, + { X86::VINSERTF32x4Z256rm , X86::VINSERTF128rm }, + { X86::VINSERTF64x2Z256rm , X86::VINSERTF128rm }, + { X86::VINSERTF32x4Z256rr , X86::VINSERTF128rr }, + { X86::VINSERTF64x2Z256rr , X86::VINSERTF128rr }, + { X86::VINSERTI32x4Z256rm , X86::VINSERTI128rm }, + { X86::VINSERTI64x2Z256rm , X86::VINSERTI128rm }, + { X86::VINSERTI32x4Z256rr , X86::VINSERTI128rr }, + { X86::VINSERTI64x2Z256rr , X86::VINSERTI128rr }, { X86::VMAXCPDZ256rm , X86::VMAXCPDYrm }, { X86::VMAXCPDZ256rr , X86::VMAXCPDYrr }, { X86::VMAXCPSZ256rm , X86::VMAXCPSYrm }, @@ -849,8 +863,8 @@ static const X86EvexToVexCompressTableEntry { X86::VMOVAPDZ256rm , X86::VMOVAPDYrm }, { X86::VMOVAPDZ256rr , X86::VMOVAPDYrr }, { X86::VMOVAPDZ256rr_REV , X86::VMOVAPDYrr_REV }, - { X86::VMOVAPSZ256mr , X86::VMOVAPSYmr }, - { X86::VMOVAPSZ256rm , X86::VMOVAPSYrm }, + { X86::VMOVAPSZ256mr , X86::VMOVAPSYmr }, + { X86::VMOVAPSZ256rm , X86::VMOVAPSYrm }, { X86::VMOVAPSZ256rr , X86::VMOVAPSYrr }, { X86::VMOVAPSZ256rr_REV , X86::VMOVAPSYrr_REV }, { X86::VMOVDDUPZ256rm , X86::VMOVDDUPYrm }, @@ -943,14 +957,14 @@ static const X86EvexToVexCompressTableEntry { X86::VPAVGBZ256rr , X86::VPAVGBYrr }, { X86::VPAVGWZ256rm , X86::VPAVGWYrm }, { X86::VPAVGWZ256rr , X86::VPAVGWYrr }, - { X86::VPBROADCASTBZ256m , X86::VPBROADCASTBYrm }, - { X86::VPBROADCASTBZ256r , X86::VPBROADCASTBYrr }, - { X86::VPBROADCASTDZ256m , X86::VPBROADCASTDYrm }, - { X86::VPBROADCASTDZ256r , X86::VPBROADCASTDYrr }, - { X86::VPBROADCASTQZ256m , X86::VPBROADCASTQYrm }, - { X86::VPBROADCASTQZ256r , X86::VPBROADCASTQYrr }, - { X86::VPBROADCASTWZ256m , X86::VPBROADCASTWYrm }, - { X86::VPBROADCASTWZ256r , X86::VPBROADCASTWYrr }, + { X86::VPBROADCASTBZ256m , X86::VPBROADCASTBYrm }, + { X86::VPBROADCASTBZ256r , X86::VPBROADCASTBYrr }, + { X86::VPBROADCASTDZ256m , X86::VPBROADCASTDYrm }, + { X86::VPBROADCASTDZ256r , X86::VPBROADCASTDYrr }, + { X86::VPBROADCASTQZ256m , X86::VPBROADCASTQYrm }, + { X86::VPBROADCASTQZ256r , X86::VPBROADCASTQYrr }, + { X86::VPBROADCASTWZ256m , X86::VPBROADCASTWYrm }, + { X86::VPBROADCASTWZ256r , X86::VPBROADCASTWYrr }, { X86::VPERMDZ256rm , X86::VPERMDYrm }, { X86::VPERMDZ256rr , X86::VPERMDYrr }, { X86::VPERMILPDZ256mi , X86::VPERMILPDYmi }, @@ -1050,7 +1064,7 @@ static const X86EvexToVexCompressTableEntry { X86::VPSLLDQZ256rr , X86::VPSLLDQYri }, { X86::VPSLLDZ256ri , X86::VPSLLDYri }, { X86::VPSLLDZ256rm , X86::VPSLLDYrm }, - { X86::VPSLLDZ256rr , X86::VPSLLDYrr }, + { X86::VPSLLDZ256rr , X86::VPSLLDYrr }, { X86::VPSLLQZ256ri , X86::VPSLLQYri }, { X86::VPSLLQZ256rm , X86::VPSLLQYrm }, { X86::VPSLLQZ256rr , X86::VPSLLQYrr }, @@ -1060,7 +1074,7 @@ static const X86EvexToVexCompressTableEntry { X86::VPSLLVQZ256rr , X86::VPSLLVQYrr }, { X86::VPSLLWZ256ri , X86::VPSLLWYri }, { X86::VPSLLWZ256rm , X86::VPSLLWYrm }, - { X86::VPSLLWZ256rr , X86::VPSLLWYrr }, + { X86::VPSLLWZ256rr , X86::VPSLLWYrr }, { X86::VPSRADZ256ri , X86::VPSRADYri }, { X86::VPSRADZ256rm , X86::VPSRADYrm }, { X86::VPSRADZ256rr , X86::VPSRADYrr }, @@ -1072,7 +1086,7 @@ static const X86EvexToVexCompressTableEntry { X86::VPSRLDQZ256rr , X86::VPSRLDQYri }, { X86::VPSRLDZ256ri , X86::VPSRLDYri }, { X86::VPSRLDZ256rm , X86::VPSRLDYrm }, - { X86::VPSRLDZ256rr , X86::VPSRLDYrr }, + { X86::VPSRLDZ256rr , X86::VPSRLDYrr }, { X86::VPSRLQZ256ri , X86::VPSRLQYri }, { X86::VPSRLQZ256rm , X86::VPSRLQYrm }, { X86::VPSRLQZ256rr , X86::VPSRLQYrr }, @@ -1145,4 +1159,4 @@ static const X86EvexToVexCompressTableEntry { X86::VXORPSZ256rr , X86::VXORPSYrr }, }; -#endif \ No newline at end of file +#endif diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index df47b4ad583..63a02af02fa 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -34,7 +34,7 @@ enum IntrinsicType : uint16_t { INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, BRCST32x2_TO_VEC, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, - EXPAND_FROM_MEM, INSERT_SUBVEC, + EXPAND_FROM_MEM, TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS, FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK }; @@ -795,30 +795,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VGETMANTS, 0), X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM, X86ISD::VGETMANTS, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf32x4_256, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf32x4_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf32x8_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf64x2_256, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf64x2_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf64x4_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti32x4_256, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti32x4_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti32x8_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti64x2_256, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti64x2_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti64x4_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK, ISD::CTLZ, 0), X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK, diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 2f69df064e7..a38a4b30b77 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -1115,56 +1115,6 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLo OutStreamer->EmitInstruction(TC, getSubtargetInfo()); } -void X86AsmPrinter::EmitXRayTable() { - if (Sleds.empty()) - return; - - auto PrevSection = OutStreamer->getCurrentSectionOnly(); - auto Fn = MF->getFunction(); - MCSection *Section = nullptr; - if (Subtarget->isTargetELF()) { - if (Fn->hasComdat()) { - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, - Fn->getComdat()->getName()); - } else { - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC); - } - } else if (Subtarget->isTargetMachO()) { - Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, - SectionKind::getReadOnlyWithRel()); - } else { - llvm_unreachable("Unsupported target"); - } - - // Before we switch over, we force a reference to a label inside the - // xray_instr_map section. Since EmitXRayTable() is always called just - // before the function's end, we assume that this is happening after the - // last return instruction. - // - // We then align the reference to 16 byte boundaries, which we determined - // experimentally to be beneficial to avoid causing decoder stalls. - MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true); - OutStreamer->EmitCodeAlignment(16); - OutStreamer->EmitSymbolValue(Tmp, 8, false); - OutStreamer->SwitchSection(Section); - OutStreamer->EmitLabel(Tmp); - for (const auto &Sled : Sleds) { - OutStreamer->EmitSymbolValue(Sled.Sled, 8); - OutStreamer->EmitSymbolValue(CurrentFnSym, 8); - auto Kind = static_cast(Sled.Kind); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast(&Kind), 1)); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast(&Sled.AlwaysInstrument), 1)); - OutStreamer->EmitZeros(14); - } - OutStreamer->SwitchSection(PrevSection); - - Sleds.clear(); -} - // Returns instruction preceding MBBI in MachineFunction. // If MBBI is the first instruction of the first basic block, returns null. static MachineBasicBlock::const_iterator diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 2b0e672d56f..d7792e296a5 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -598,197 +598,135 @@ int X86TTIImpl::getArithmeticInstrCost( int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { - - if (Kind == TTI::SK_Reverse) { - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - - static const CostTblEntry AVX512VBMIShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v64i8, 1 }, // vpermb - { ISD::VECTOR_SHUFFLE, MVT::v32i8, 1 } // vpermb - }; - - if (ST->hasVBMI()) - if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - - static const CostTblEntry AVX512BWShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v32i16, 1 }, // vpermw - { ISD::VECTOR_SHUFFLE, MVT::v16i16, 1 }, // vpermw - { ISD::VECTOR_SHUFFLE, MVT::v64i8, 6 } // vextracti64x4 + 2*vperm2i128 - // + 2*pshufb + vinserti64x4 - }; - - if (ST->hasBWI()) - if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - - static const CostTblEntry AVX512ShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v8f64, 1 }, // vpermpd - { ISD::VECTOR_SHUFFLE, MVT::v16f32, 1 }, // vpermps - { ISD::VECTOR_SHUFFLE, MVT::v8i64, 1 }, // vpermq - { ISD::VECTOR_SHUFFLE, MVT::v16i32, 1 }, // vpermd - }; - - if (ST->hasAVX512()) - if (const auto *Entry = - CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - - static const CostTblEntry AVX2ShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v4f64, 1 }, // vpermpd - { ISD::VECTOR_SHUFFLE, MVT::v8f32, 1 }, // vpermps - { ISD::VECTOR_SHUFFLE, MVT::v4i64, 1 }, // vpermq - { ISD::VECTOR_SHUFFLE, MVT::v8i32, 1 }, // vpermd - { ISD::VECTOR_SHUFFLE, MVT::v16i16, 2 }, // vperm2i128 + pshufb - { ISD::VECTOR_SHUFFLE, MVT::v32i8, 2 } // vperm2i128 + pshufb - }; - - if (ST->hasAVX2()) - if (const auto *Entry = - CostTableLookup(AVX2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - - static const CostTblEntry AVX1ShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd - { ISD::VECTOR_SHUFFLE, MVT::v8f32, 2 }, // vperm2f128 + vpermilps - { ISD::VECTOR_SHUFFLE, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd - { ISD::VECTOR_SHUFFLE, MVT::v8i32, 2 }, // vperm2f128 + vpermilps - { ISD::VECTOR_SHUFFLE, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb - // + vinsertf128 - { ISD::VECTOR_SHUFFLE, MVT::v32i8, 4 } // vextractf128 + 2*pshufb - // + vinsertf128 - }; - - if (ST->hasAVX()) - if (const auto *Entry = - CostTableLookup(AVX1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - - static const CostTblEntry SSSE3ShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v8i16, 1 }, // pshufb - { ISD::VECTOR_SHUFFLE, MVT::v16i8, 1 } // pshufb - }; - - if (ST->hasSSSE3()) - if (const auto *Entry = - CostTableLookup(SSSE3ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - - static const CostTblEntry SSE2ShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 }, // shufpd - { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 }, // pshufd - { ISD::VECTOR_SHUFFLE, MVT::v4i32, 1 }, // pshufd - { ISD::VECTOR_SHUFFLE, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd - { ISD::VECTOR_SHUFFLE, MVT::v16i8, 9 } // 2*pshuflw + 2*pshufhw - // + 2*pshufd + 2*unpck + packus - }; - - if (ST->hasSSE2()) - if (const auto *Entry = - CostTableLookup(SSE2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - - static const CostTblEntry SSE1ShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v4f32, 1 }, // shufps - }; - - if (ST->hasSSE1()) - if (const auto *Entry = - CostTableLookup(SSE1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - - } else if (Kind == TTI::SK_Alternate) { + if (Kind == TTI::SK_Reverse || Kind == TTI::SK_Alternate) { // 64-bit packed float vectors (v2f32) are widened to type v4f32. // 64-bit packed integer vectors (v2i32) are promoted to type v2i64. std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - // The backend knows how to generate a single VEX.256 version of - // instruction VPBLENDW if the target supports AVX2. - if (ST->hasAVX2() && LT.second == MVT::v16i16) - return LT.first; + static const CostTblEntry AVX512VBMIShuffleTbl[] = { + { TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb + { TTI::SK_Reverse, MVT::v32i8, 1 } // vpermb + }; - static const CostTblEntry AVXAltShuffleTbl[] = { - {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vblendpd - {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vblendpd + if (ST->hasVBMI()) + if (const auto *Entry = + CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; - {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vblendps - {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vblendps + static const CostTblEntry AVX512BWShuffleTbl[] = { + { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw + { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw + { TTI::SK_Reverse, MVT::v64i8, 6 } // vextracti64x4 + 2*vperm2i128 + // + 2*pshufb + vinserti64x4 + }; - // This shuffle is custom lowered into a sequence of: - // 2x vextractf128 , 2x vpblendw , 1x vinsertf128 - {ISD::VECTOR_SHUFFLE, MVT::v16i16, 5}, + if (ST->hasBWI()) + if (const auto *Entry = + CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; - // This shuffle is custom lowered into a long sequence of: - // 2x vextractf128 , 4x vpshufb , 2x vpor , 1x vinsertf128 - {ISD::VECTOR_SHUFFLE, MVT::v32i8, 9} + static const CostTblEntry AVX512ShuffleTbl[] = { + { TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd + { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps + { TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq + { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd + }; + + if (ST->hasAVX512()) + if (const auto *Entry = + CostTableLookup(AVX512ShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; + + static const CostTblEntry AVX2ShuffleTbl[] = { + { TTI::SK_Reverse, MVT::v4f64, 1 }, // vpermpd + { TTI::SK_Reverse, MVT::v8f32, 1 }, // vpermps + { TTI::SK_Reverse, MVT::v4i64, 1 }, // vpermq + { TTI::SK_Reverse, MVT::v8i32, 1 }, // vpermd + { TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb + { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb + + { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw + { TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb + }; + + if (ST->hasAVX2()) + if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; + + static const CostTblEntry AVX1ShuffleTbl[] = { + { TTI::SK_Reverse, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd + { TTI::SK_Reverse, MVT::v8f32, 2 }, // vperm2f128 + vpermilps + { TTI::SK_Reverse, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd + { TTI::SK_Reverse, MVT::v8i32, 2 }, // vperm2f128 + vpermilps + { TTI::SK_Reverse, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb + // + vinsertf128 + { TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb + // + vinsertf128 + + { TTI::SK_Alternate, MVT::v4i64, 1 }, // vblendpd + { TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd + { TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps + { TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps + { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor + { TTI::SK_Alternate, MVT::v32i8, 3 } // vpand + vpandn + vpor }; if (ST->hasAVX()) - if (const auto *Entry = CostTableLookup(AVXAltShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) + if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; - static const CostTblEntry SSE41AltShuffleTbl[] = { - // These are lowered into movsd. - {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, - - // packed float vectors with four elements are lowered into BLENDI dag - // nodes. A v4i32/v4f32 BLENDI generates a single 'blendps'/'blendpd'. - {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, - - // This shuffle generates a single pshufw. - {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, - - // There is no instruction that matches a v16i8 alternate shuffle. - // The backend will expand it into the sequence 'pshufb + pshufb + or'. - {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} + static const CostTblEntry SSE41ShuffleTbl[] = { + { TTI::SK_Alternate, MVT::v2i64, 1 }, // pblendw + { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd + { TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw + { TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps + { TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw + { TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb }; if (ST->hasSSE41()) - if (const auto *Entry = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE, - LT.second)) + if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; - static const CostTblEntry SSSE3AltShuffleTbl[] = { - {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd - {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd + static const CostTblEntry SSSE3ShuffleTbl[] = { + { TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb + { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb - // SSE3 doesn't have 'blendps'. The following shuffles are expanded into - // the sequence 'shufps + pshufd' - {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, - {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, - - {ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or - {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or + { TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por + { TTI::SK_Alternate, MVT::v16i8, 3 } // pshufb + pshufb + por }; if (ST->hasSSSE3()) - if (const auto *Entry = CostTableLookup(SSSE3AltShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) + if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; - static const CostTblEntry SSEAltShuffleTbl[] = { - {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd - {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd + static const CostTblEntry SSE2ShuffleTbl[] = { + { TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd + { TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd + { TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd + { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd + { TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw + // + 2*pshufd + 2*unpck + packus - {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd - {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd - - // This is expanded into a long sequence of four extract + four insert. - {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw. - - // 8 x (pinsrw + pextrw + and + movb + movzb + or) - {ISD::VECTOR_SHUFFLE, MVT::v16i8, 48} + { TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd + { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd + { TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps + { TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por + { TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por }; - // Fall-back (SSE3 and SSE2). - if (const auto *Entry = CostTableLookup(SSEAltShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; + if (ST->hasSSE2()) + if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; + + static const CostTblEntry SSE1ShuffleTbl[] = { + { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps + { TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps + }; + + if (ST->hasSSE1()) + if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; } else if (Kind == TTI::SK_PermuteTwoSrc) { // We assume that source and destination have the same vector type. diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 3bbc70ab21c..55151c13b43 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1057,6 +1057,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // add(zext(xor i16 X, -32768), -32768) --> sext X return CastInst::Create(Instruction::SExt, X, LHS->getType()); } + + if (Val->isNegative() && + match(LHS, m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C)))) && + Val->sge(-C->sext(Val->getBitWidth()))) { + // (add (zext (add nuw X, C)), Val) -> (zext (add nuw X, C+Val)) + return CastInst::Create( + Instruction::ZExt, + Builder->CreateNUWAdd( + X, Constant::getIntegerValue(X->getType(), + *C + Val->trunc(C->getBitWidth()))), + I.getType()); + } } // FIXME: Use the match above instead of dyn_cast to allow these transforms diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 92369bd70b1..f863d192fc2 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1581,6 +1581,62 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return replaceInstUsesWith(*II, V); break; } + case Intrinsic::fma: + case Intrinsic::fmuladd: { + Value *Src0 = II->getArgOperand(0); + Value *Src1 = II->getArgOperand(1); + + // Canonicalize constants into the RHS. + if (isa(Src0) && !isa(Src1)) { + II->setArgOperand(0, Src1); + II->setArgOperand(1, Src0); + std::swap(Src0, Src1); + } + + Value *LHS = nullptr; + Value *RHS = nullptr; + + // fma fneg(x), fneg(y), z -> fma x, y, z + if (match(Src0, m_FNeg(m_Value(LHS))) && + match(Src1, m_FNeg(m_Value(RHS)))) { + CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(), + {LHS, RHS, II->getArgOperand(2)}); + NewCall->takeName(II); + NewCall->copyFastMathFlags(II); + return replaceInstUsesWith(*II, NewCall); + } + + // fma fabs(x), fabs(x), z -> fma x, x, z + if (match(Src0, m_Intrinsic(m_Value(LHS))) && + match(Src1, m_Intrinsic(m_Value(RHS))) && LHS == RHS) { + CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(), + {LHS, LHS, II->getArgOperand(2)}); + NewCall->takeName(II); + NewCall->copyFastMathFlags(II); + return replaceInstUsesWith(*II, NewCall); + } + + // fma x, 1, z -> fadd x, z + if (match(Src1, m_FPOne())) { + Instruction *RI = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2)); + RI->copyFastMathFlags(II); + return RI; + } + + break; + } + case Intrinsic::fabs: { + Value *Cond; + Constant *LHS, *RHS; + if (match(II->getArgOperand(0), + m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) { + CallInst *Call0 = Builder->CreateCall(II->getCalledFunction(), {LHS}); + CallInst *Call1 = Builder->CreateCall(II->getCalledFunction(), {RHS}); + return SelectInst::Create(Cond, Call0, Call1); + } + + break; + } case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: // Turn PPC lvx -> load if the pointer is known aligned. @@ -2669,24 +2725,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // assume( (load addr) != null ) -> add 'nonnull' metadata to load // (if assume is valid at the load) - if (ICmpInst* ICmp = dyn_cast(IIOperand)) { - Value *LHS = ICmp->getOperand(0); - Value *RHS = ICmp->getOperand(1); - if (ICmpInst::ICMP_NE == ICmp->getPredicate() && - isa(LHS) && - isa(RHS) && - RHS->getType()->isPointerTy() && - cast(RHS)->isNullValue()) { - LoadInst* LI = cast(LHS); - if (isValidAssumeForContext(II, LI, &DT)) { - MDNode *MD = MDNode::get(II->getContext(), None); - LI->setMetadata(LLVMContext::MD_nonnull, MD); - return eraseInstFromFunction(*II); - } - } + CmpInst::Predicate Pred; + Instruction *LHS; + if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) && + Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load && + LHS->getType()->isPointerTy() && + isValidAssumeForContext(II, LHS, &DT)) { + MDNode *MD = MDNode::get(II->getContext(), None); + LHS->setMetadata(LLVMContext::MD_nonnull, MD); + return eraseInstFromFunction(*II); + // TODO: apply nonnull return attributes to calls and invokes // TODO: apply range metadata for range check patterns? } + // If there is a dominating assume with the same condition as this one, // then this one is redundant, and should be removed. APInt KnownZero(1, 0), KnownOne(1, 0); diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 5276bee4e0a..388c5e4e7fa 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -850,20 +850,10 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // separated by a few arithmetic operations. BasicBlock::iterator BBI(LI); bool IsLoadCSE = false; - if (Value *AvailableVal = - FindAvailableLoadedValue(&LI, LI.getParent(), BBI, - DefMaxInstsToScan, AA, &IsLoadCSE)) { - if (IsLoadCSE) { - LoadInst *NLI = cast(AvailableVal); - unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, LLVMContext::MD_range, - LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull, - LLVMContext::MD_invariant_group, LLVMContext::MD_align, - LLVMContext::MD_dereferenceable, - LLVMContext::MD_dereferenceable_or_null}; - combineMetadata(NLI, &LI, KnownIDs); - }; + if (Value *AvailableVal = FindAvailableLoadedValue( + &LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) { + if (IsLoadCSE) + combineMetadataForCSE(cast(AvailableVal), &LI); return replaceInstUsesWith( LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(), diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index bc38c4aca34..5ad2a1c0e3e 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -731,6 +731,25 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { if (ConstantInt *Op1C = dyn_cast(I.getOperand(1))) { unsigned ShAmt = Op1C->getZExtValue(); + // Turn: + // %zext = zext i32 %V to i64 + // %res = shl i64 %V, 8 + // + // Into: + // %shl = shl i32 %V, 8 + // %res = zext i32 %shl to i64 + // + // This is only valid if %V would have zeros shifted out. + if (auto *ZI = dyn_cast(I.getOperand(0))) { + unsigned SrcBitWidth = ZI->getSrcTy()->getScalarSizeInBits(); + if (ShAmt < SrcBitWidth && + MaskedValueIsZero(ZI->getOperand(0), + APInt::getHighBitsSet(SrcBitWidth, ShAmt), 0, &I)) { + auto *Shl = Builder->CreateShl(ZI->getOperand(0), ShAmt); + return new ZExtInst(Shl, I.getType()); + } + } + // If the shifted-out value is known-zero, then this is a NUW shift. if (!I.hasNoUnsignedWrap() && MaskedValueIsZero(I.getOperand(0), diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 9bf638dcbae..16e08ee58fb 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -481,9 +481,9 @@ private: bool processNode(DomTreeNode *Node); Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const { - if (LoadInst *LI = dyn_cast(Inst)) + if (auto *LI = dyn_cast(Inst)) return LI; - else if (StoreInst *SI = dyn_cast(Inst)) + if (auto *SI = dyn_cast(Inst)) return SI->getValueOperand(); assert(isa(Inst) && "Instruction not supported"); return TTI.getOrCreateResultFromMemIntrinsic(cast(Inst), diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index dee61b77412..8b8236390bf 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -79,6 +79,7 @@ STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted"); STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted"); STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified"); STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same"); +STATISTIC(NumGVNMaxIterations, "Maximum Number of iterations it took to converge GVN"); //===----------------------------------------------------------------------===// // GVN Pass @@ -714,16 +715,15 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I, // Unlike loads, we never try to eliminate stores, so we do not check if they // are simple and avoid value numbering them. auto *SI = cast(I); - // If this store's memorydef stores the same value as the last store, the - // memory accesses are equivalent. - // Get the expression, if any, for the RHS of the MemoryDef. MemoryAccess *StoreAccess = MSSA->getMemoryAccess(SI); - MemoryAccess *StoreRHS = lookupMemoryAccessEquiv( - cast(StoreAccess)->getDefiningAccess()); - const Expression *OldStore = createStoreExpression(SI, StoreRHS, B); - // See if this store expression already has a value, and it's the same as our - // current store. FIXME: Right now, we only do this for simple stores. + // See if we are defined by a previous store expression, it already has a + // value, and it's the same value as our current store. FIXME: Right now, we + // only do this for simple stores, we should expand to cover memcpys, etc. if (SI->isSimple()) { + // Get the expression, if any, for the RHS of the MemoryDef. + MemoryAccess *StoreRHS = lookupMemoryAccessEquiv( + cast(StoreAccess)->getDefiningAccess()); + const Expression *OldStore = createStoreExpression(SI, StoreRHS, B); CongruenceClass *CC = ExpressionToClass.lookup(OldStore); if (CC && CC->DefiningExpr && isa(CC->DefiningExpr) && CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B)) @@ -1092,23 +1092,16 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { if (auto *I = dyn_cast(V)) { if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) { // If this is a MemoryDef, we need to update the equivalence table. If - // we - // determined the expression is congruent to a different memory state, - // use that different memory state. If we determined it didn't, we - // update - // that as well. Note that currently, we do not guarantee the - // "different" memory state dominates us. The goal is to make things - // that are congruent look congruent, not ensure we can eliminate one in - // favor of the other. - // Right now, the only way they can be equivalent is for store - // expresions. - if (!isa(MA)) { - if (E && isa(E) && EClass->Members.size() != 1) { - auto *DefAccess = cast(E)->getDefiningAccess(); - setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr); - } else { - setMemoryAccessEquivTo(MA, nullptr); - } + // we determined the expression is congruent to a different memory + // state, use that different memory state. If we determined it didn't, + // we update that as well. Right now, we only support store + // expressions. + if (!isa(MA) && isa(E) && + EClass->Members.size() != 1) { + auto *DefAccess = cast(E)->getDefiningAccess(); + setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr); + } else { + setMemoryAccessEquivTo(MA, nullptr); } markMemoryUsersTouched(MA); } @@ -1391,7 +1384,7 @@ void NewGVN::valueNumberInstruction(Instruction *I) { } else { // Handle terminators that return values. All of them produce values we // don't currently understand. - if (!I->getType()->isVoidTy()){ + if (!I->getType()->isVoidTy()) { auto *Symbolized = createUnknownExpression(I); performCongruenceFinding(I, Symbolized); } @@ -1427,14 +1420,12 @@ void NewGVN::verifyMemoryCongruency() { continue; if (auto *FirstMUD = dyn_cast(KV.first)) { auto *SecondMUD = dyn_cast(KV.second); - if (FirstMUD && SecondMUD) { - auto *FirstInst = FirstMUD->getMemoryInst(); - auto *SecondInst = SecondMUD->getMemoryInst(); + if (FirstMUD && SecondMUD) assert( - ValueToClass.lookup(FirstInst) == ValueToClass.lookup(SecondInst) && + ValueToClass.lookup(FirstMUD->getMemoryInst()) == + ValueToClass.lookup(SecondMUD->getMemoryInst()) && "The instructions for these memory operations should have been in " "the same congruence class"); - } } else if (auto *FirstMP = dyn_cast(KV.first)) { // We can only sanely verify that MemoryDefs in the operand list all have @@ -1538,9 +1529,11 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC, initializeCongruenceClasses(F); + unsigned int Iterations = 0; // We start out in the entry block. BasicBlock *LastBlock = &F.getEntryBlock(); while (TouchedInstructions.any()) { + ++Iterations; // Walk through all the instructions in all the blocks in RPO. for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1; InstrNum = TouchedInstructions.find_next(InstrNum)) { @@ -1587,8 +1580,7 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC, TouchedInstructions.reset(InstrNum); } } - -// FIXME: Move this to expensive checks when we are satisfied with NewGVN + NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations); #ifndef NDEBUG verifyMemoryCongruency(); #endif @@ -2070,7 +2062,7 @@ bool NewGVN::eliminateInstructions(Function &F) { // Cleanup the congruence class. SmallPtrSet MembersLeft; - for (Value * Member : CC->Members) { + for (Value *Member : CC->Members) { if (Member->getType()->isVoidTy()) { MembersLeft.insert(Member); continue; diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index a2ceded106b..a40079ca8e7 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -760,7 +760,7 @@ static void PropagateParallelLoopAccessMetadata(CallSite CS, /// When inlining a function that contains noalias scope metadata, /// this metadata needs to be cloned so that the inlined blocks -/// have different "unqiue scopes" at every call site. Were this not done, then +/// have different "unique scopes" at every call site. Were this not done, then /// aliasing scopes from a function inlined into a caller multiple times could /// not be differentiated (and this would lead to miscompiles because the /// non-aliasing property communicated by the metadata could have diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp index dc526a20c90..842cf31f2e3 100644 --- a/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -335,10 +335,12 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); uint64_t TrueWeight, FalseWeight; - uint64_t ExitWeight = 0, BackEdgeWeight = 0; + uint64_t ExitWeight = 0, CurHeaderWeight = 0; if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) { ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; - BackEdgeWeight = HeaderIdx ? FalseWeight : TrueWeight; + // The # of times the loop body executes is the sum of the exit block + // weight and the # of times the backedges are taken. + CurHeaderWeight = TrueWeight + FalseWeight; } // For each peeled-off iteration, make a copy of the loop. @@ -346,15 +348,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, SmallVector NewBlocks; ValueToValueMapTy VMap; - // The exit weight of the previous iteration is the header entry weight - // of the current iteration. So this is exactly how many dynamic iterations - // the current peeled-off static iteration uses up. + // Subtract the exit weight from the current header weight -- the exit + // weight is exactly the weight of the previous iteration's header. // FIXME: due to the way the distribution is constructed, we need a // guard here to make sure we don't end up with non-positive weights. - if (ExitWeight < BackEdgeWeight) - BackEdgeWeight -= ExitWeight; + if (ExitWeight < CurHeaderWeight) + CurHeaderWeight -= ExitWeight; else - BackEdgeWeight = 1; + CurHeaderWeight = 1; cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit, NewBlocks, LoopBlocks, VMap, LVMap, LI); @@ -388,6 +389,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // Adjust the branch weights on the loop exit. if (ExitWeight) { + // The backedge count is the difference of current header weight and + // current loop exit weight. If the current header weight is smaller than + // the current loop exit weight, we mark the loop backedge weight as 1. + uint64_t BackEdgeWeight = 0; + if (ExitWeight < CurHeaderWeight) + BackEdgeWeight = CurHeaderWeight - ExitWeight; + else + BackEdgeWeight = 1; MDBuilder MDB(LatchBR->getContext()); MDNode *WeightNode = HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 3846b21c502..54390e77bb1 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1574,12 +1574,20 @@ static bool sinkLastInstruction(ArrayRef Blocks) { I0->getOperandUse(O).set(NewOperands[O]); I0->moveBefore(&*BBEnd->getFirstInsertionPt()); - // Update metadata and IR flags. + // The debug location for the "common" instruction is the merged locations of + // all the commoned instructions. We start with the original location of the + // "common" instruction and iteratively merge each location in the loop below. + DILocation *Loc = I0->getDebugLoc(); + + // Update metadata and IR flags, and merge debug locations. for (auto *I : Insts) if (I != I0) { + Loc = DILocation::getMergedLocation(Loc, I->getDebugLoc()); combineMetadataForCSE(I0, I); I0->andIRFlags(I); } + if (!isa(I0)) + I0->setDebugLoc(Loc); if (!isa(I0)) { // canSinkLastInstruction checked that all instructions were used by diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index b1a47b55cfc..bf802a3b4ea 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -73,7 +73,13 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) # Setting a variable to let sub-projects detect which other projects # will be included under here. set(HAVE_${canon_name} On) + endforeach() + # We do this in two loops so that HAVE_* is set for each runtime before the + # other runtimes are added. + foreach(entry ${runtimes}) + get_filename_component(projName ${entry} NAME) + # Between each sub-project we want to cache and clear the LIT properties set_property(GLOBAL PROPERTY LLVM_LIT_TESTSUITES) set_property(GLOBAL PROPERTY LLVM_LIT_PARAMS) diff --git a/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll b/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll index 2e162f0f000..9e706d62f8f 100644 --- a/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll +++ b/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll @@ -207,7 +207,7 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) { ret <8 x i16> %1 } ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i16': -; SSE2: Cost Model: {{.*}} 8 for instruction: %1 = shufflevector +; SSE2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector ; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector ; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector ; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector @@ -219,7 +219,7 @@ define <8 x i16> @test_v8i16_2(<8 x i16> %a, <8 x i16> %b) { ret <8 x i16> %1 } ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i16_2': -; SSE2: Cost Model: {{.*}} 8 for instruction: %1 = shufflevector +; SSE2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector ; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector ; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector ; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector @@ -280,11 +280,11 @@ define <16 x i8> @test_v16i8(<16 x i8> %a, <16 x i8> %b) { ret <16 x i8> %1 } ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i8': -; SSE2: Cost Model: {{.*}} 48 for instruction: %1 = shufflevector +; SSE2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector ; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector -; SSE41: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector -; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector -; AVX2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector define <16 x i8> @test_v16i8_2(<16 x i8> %a, <16 x i8> %b) { @@ -292,11 +292,11 @@ define <16 x i8> @test_v16i8_2(<16 x i8> %a, <16 x i8> %b) { ret <16 x i8> %1 } ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i8_2': -; SSE2: Cost Model: {{.*}} 48 for instruction: %1 = shufflevector +; SSE2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector ; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector -; SSE41: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector -; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector -; AVX2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { @@ -304,10 +304,10 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { ret <16 x i16> %1 } ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i16': -; SSE2: Cost Model: {{.*}} 16 for instruction: %1 = shufflevector +; SSE2: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector ; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector ; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector -; AVX: Cost Model: {{.*}} 5 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector ; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector @@ -316,10 +316,10 @@ define <16 x i16> @test_v16i16_2(<16 x i16> %a, <16 x i16> %b) { ret <16 x i16> %1 } ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i16_2': -; SSE2: Cost Model: {{.*}} 16 for instruction: %1 = shufflevector +; SSE2: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector ; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector ; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector -; AVX: Cost Model: {{.*}} 5 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector ; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector define <32 x i8> @test_v32i8(<32 x i8> %a, <32 x i8> %b) { @@ -327,11 +327,11 @@ define <32 x i8> @test_v32i8(<32 x i8> %a, <32 x i8> %b) { ret <32 x i8> %1 } ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v32i8': -; SSE2: Cost Model: {{.*}} 96 for instruction: %1 = shufflevector +; SSE2: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector ; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector -; SSE41: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector -; AVX: Cost Model: {{.*}} 9 for instruction: %1 = shufflevector -; AVX2: Cost Model: {{.*}} 9 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector define <32 x i8> @test_v32i8_2(<32 x i8> %a, <32 x i8> %b) { @@ -339,9 +339,9 @@ define <32 x i8> @test_v32i8_2(<32 x i8> %a, <32 x i8> %b) { ret <32 x i8> %1 } ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v32i8_2': -; SSE2: Cost Model: {{.*}} 96 for instruction: %1 = shufflevector +; SSE2: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector ; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector -; SSE41: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector -; AVX: Cost Model: {{.*}} 9 for instruction: %1 = shufflevector -; AVX2: Cost Model: {{.*}} 9 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector diff --git a/test/Analysis/RegionInfo/bad_node_traversal.ll b/test/Analysis/RegionInfo/bad_node_traversal.ll new file mode 100644 index 00000000000..00dd1207af9 --- /dev/null +++ b/test/Analysis/RegionInfo/bad_node_traversal.ll @@ -0,0 +1,43 @@ +; REQUIRES: asserts +; RUN: opt -regions -analyze < %s | FileCheck %s + +; While working on improvements to the region info analysis, this test +; case caused an incorrect region 3 => 8 to be detected. + +define internal i8 @wibble() { +bb: + br i1 true, label %bb1, label %bb8 + +bb1: ; preds = %bb + switch i32 0, label %bb2 [ + i32 0, label %bb3 + i32 1, label %bb7 + ] + +bb2: ; preds = %bb1 + br label %bb4 + +bb3: ; preds = %bb1 + br label %bb5 + +bb4: ; preds = %bb2 + br label %bb6 + +bb5: ; preds = %bb3 + br label %bb6 + +bb6: ; preds = %bb5, %bb4 + br label %bb7 + +bb7: ; preds = %bb6, %bb1 + br label %bb8 + +bb8: ; preds = %bb7, %bb + ret i8 1 +} + +; CHECK: [0] bb => +; CHECK-NEXT: [1] bb => bb8 +; CHECK-NEXT: [2] bb1 => bb7 +; CHECK-NEXT: End region tree + diff --git a/test/Bitcode/DIGlobalVariableExpression.ll b/test/Bitcode/DIGlobalVariableExpression.ll index 0424a0e42a3..0bb0488b131 100644 --- a/test/Bitcode/DIGlobalVariableExpression.ll +++ b/test/Bitcode/DIGlobalVariableExpression.ll @@ -1,5 +1,8 @@ ; RUN: llvm-dis -o - %s.bc | FileCheck %s +; RUN: llvm-dis -o - %s.bc | llvm-as - | llvm-bcanalyzer -dump - | FileCheck %s --check-prefix=BC +; BC: GLOBAL_VAR_EXPR +; BC: GLOBAL_DECL_ATTACHMENT ; CHECK: @g = common global i32 0, align 4, !dbg ![[G:[0-9]+]] ; CHECK: @h = common global i32 0, align 4, !dbg ![[H:[0-9]+]] ; CHECK: ![[G]] = {{.*}}!DIGlobalVariableExpression(var: ![[GVAR:[0-9]+]], expr: ![[GEXPR:[0-9]+]]) diff --git a/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll index ae77f7e099d..412651c5567 100644 --- a/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll +++ b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll @@ -1,5 +1,6 @@ ; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone < %s | FileCheck %s -check-prefix=CYCLONE --check-prefix=ALL ; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=kryo < %s | FileCheck %s -check-prefix=KRYO --check-prefix=ALL +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=falkor < %s | FileCheck %s -check-prefix=FALKOR --check-prefix=ALL ; rdar://11481771 ; rdar://13713797 @@ -16,6 +17,10 @@ entry: ; KRYO: movi v1.2d, #0000000000000000 ; KRYO: movi v2.2d, #0000000000000000 ; KRYO: movi v3.2d, #0000000000000000 +; FALKOR: movi v0.2d, #0000000000000000 +; FALKOR: movi v1.2d, #0000000000000000 +; FALKOR: movi v2.2d, #0000000000000000 +; FALKOR: movi v3.2d, #0000000000000000 tail call void @bar(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00) nounwind ret void } @@ -47,6 +52,8 @@ define void @t4() nounwind ssp { ; CYCLONE: movi.2d v1, #0000000000000000 ; KRYO: movi v0.2d, #0000000000000000 ; KRYO: movi v1.2d, #0000000000000000 +; FALKOR: movi v0.2d, #0000000000000000 +; FALKOR: movi v1.2d, #0000000000000000 tail call void @barf(float 0.000000e+00, float 0.000000e+00) nounwind ret void } diff --git a/test/CodeGen/AArch64/store_merge_pair_offset.ll b/test/CodeGen/AArch64/store_merge_pair_offset.ll new file mode 100644 index 00000000000..a091f0fd911 --- /dev/null +++ b/test/CodeGen/AArch64/store_merge_pair_offset.ll @@ -0,0 +1,12 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -enable-misched=false -enable-post-misched=false -o - %s | FileCheck %s + +define i64 @test(i64* %a) nounwind { + ; CHECK: ldp x{{[0-9]+}}, x{{[0-9]+}} + ; CHECK-NOT: ldr + %p1 = getelementptr inbounds i64, i64* %a, i32 64 + %tmp1 = load i64, i64* %p1, align 2 + %p2 = getelementptr inbounds i64, i64* %a, i32 63 + %tmp2 = load i64, i64* %p2, align 2 + %tmp3 = add i64 %tmp1, %tmp2 + ret i64 %tmp3 +} diff --git a/test/CodeGen/AMDGPU/amdgcn.sendmsg-m0.ll b/test/CodeGen/AMDGPU/amdgcn.sendmsg-m0.ll new file mode 100644 index 00000000000..8d8885852af --- /dev/null +++ b/test/CodeGen/AMDGPU/amdgcn.sendmsg-m0.ll @@ -0,0 +1,41 @@ +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s + +; GCN-LABEL: {{^}}main: +; GCN: s_mov_b32 m0, s0 +; VI-NEXT: s_nop 0 +; GCN-NEXT: sendmsg(MSG_GS_DONE, GS_OP_NOP) +; GCN-NEXT: s_endpgm + +define amdgpu_gs void @main(i32 inreg %a) #0 { + call void @llvm.amdgcn.s.sendmsg(i32 3, i32 %a) + ret void +} + +; GCN-LABEL: {{^}}main_halt: +; GCN: s_mov_b32 m0, s0 +; VI-NEXT: s_nop 0 +; GCN-NEXT: s_sendmsghalt sendmsg(MSG_INTERRUPT) +; GCN-NEXT: s_endpgm + +define void @main_halt(i32 inreg %a) #0 { + call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 %a) + ret void +} + +; GCN-LABEL: {{^}}legacy: +; GCN: s_mov_b32 m0, s0 +; VI-NEXT: s_nop 0 +; GCN-NEXT: sendmsg(MSG_GS_DONE, GS_OP_NOP) +; GCN-NEXT: s_endpgm + +define amdgpu_gs void @legacy(i32 inreg %a) #0 { + call void @llvm.SI.sendmsg(i32 3, i32 %a) + ret void +} + +declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0 +declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) #0 +declare void @llvm.SI.sendmsg(i32, i32) #0 + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/amdgcn.sendmsg.ll b/test/CodeGen/AMDGPU/amdgcn.sendmsg.ll new file mode 100644 index 00000000000..31f9cfca6de --- /dev/null +++ b/test/CodeGen/AMDGPU/amdgcn.sendmsg.ll @@ -0,0 +1,161 @@ +;RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s + +; CHECK-LABEL: {{^}}test_interrupt: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_INTERRUPT) +define void @test_interrupt() { +body: + call void @llvm.amdgcn.s.sendmsg(i32 1, i32 0); + ret void +} + +; CHECK-LABEL: {{^}}test_gs_emit: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT, 0) +define void @test_gs_emit() { +body: + call void @llvm.amdgcn.s.sendmsg(i32 34, i32 0); + ret void +} + +; CHECK-LABEL: {{^}}test_gs_cut: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_CUT, 1) +define void @test_gs_cut() { +body: + call void @llvm.amdgcn.s.sendmsg(i32 274, i32 0); + ret void +} + +; CHECK-LABEL: {{^}}test_gs_emit_cut: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT_CUT, 2) +define void @test_gs_emit_cut() { +body: + call void @llvm.amdgcn.s.sendmsg(i32 562, i32 0) + ret void +} + +; CHECK-LABEL: {{^}}test_gs_done: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP) +define void @test_gs_done() { +body: + call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0) + ret void +} + + +; CHECK-LABEL: {{^}}test_interrupt_halt: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsghalt sendmsg(MSG_INTERRUPT) +define void @test_interrupt_halt() { +body: + call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 0) + ret void +} + +; CHECK-LABEL: {{^}}test_gs_emit_halt: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsghalt sendmsg(MSG_GS, GS_OP_EMIT, 0) +define void @test_gs_emit_halt() { +body: + call void @llvm.amdgcn.s.sendmsghalt(i32 34, i32 0) + ret void +} + +; CHECK-LABEL: {{^}}test_gs_cut_halt: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsghalt sendmsg(MSG_GS, GS_OP_CUT, 1) +define void @test_gs_cut_halt() { +body: + call void @llvm.amdgcn.s.sendmsghalt(i32 274, i32 0) + ret void +} + +; CHECK-LABEL: {{^}}test_gs_emit_cut_halt: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsghalt sendmsg(MSG_GS, GS_OP_EMIT_CUT, 2) +define void @test_gs_emit_cut_halt() { +body: + call void @llvm.amdgcn.s.sendmsghalt(i32 562, i32 0) + ret void +} + +; CHECK-LABEL: {{^}}test_gs_done_halt: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsghalt sendmsg(MSG_GS_DONE, GS_OP_NOP) +define void @test_gs_done_halt() { +body: + call void @llvm.amdgcn.s.sendmsghalt(i32 3, i32 0) + ret void +} + +; Legacy +; CHECK-LABEL: {{^}}test_legacy_interrupt: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_INTERRUPT) +define void @test_legacy_interrupt() { +body: + call void @llvm.SI.sendmsg(i32 1, i32 0) + ret void +} + +; CHECK-LABEL: {{^}}test_legacy_gs_emit: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT, 0) +define void @test_legacy_gs_emit() { +body: + call void @llvm.SI.sendmsg(i32 34, i32 0) + ret void +} + +; CHECK-LABEL: {{^}}test_legacy_gs_cut: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_CUT, 1) +define void @test_legacy_gs_cut() { +body: + call void @llvm.SI.sendmsg(i32 274, i32 0) + ret void +} + +; CHECK-LABEL: {{^}}test_legacy_gs_emit_cut: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT_CUT, 2) +define void @test_legacy_gs_emit_cut() { +body: + call void @llvm.SI.sendmsg(i32 562, i32 0) + ret void +} + +; CHECK-LABEL: {{^}}test_legacy_gs_done: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP) +define void @test_legacy_gs_done() { +body: + call void @llvm.SI.sendmsg(i32 3, i32 0) + ret void +} + +; Function Attrs: nounwind +declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0 +declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) #0 +declare void @llvm.SI.sendmsg(i32, i32) #0 + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll b/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll deleted file mode 100644 index 2d4987643a2..00000000000 --- a/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s - -; GCN-LABEL: {{^}}main: -; GCN: s_mov_b32 m0, s0 -; VI-NEXT: s_nop 0 -; GCN-NEXT: sendmsg(MSG_GS_DONE, GS_OP_NOP) -; GCN-NEXT: s_endpgm - -define amdgpu_gs void @main(i32 inreg %a) #0 { - call void @llvm.SI.sendmsg(i32 3, i32 %a) - ret void -} - -declare void @llvm.SI.sendmsg(i32, i32) #0 - -attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll b/test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll deleted file mode 100644 index c4bb27676e7..00000000000 --- a/test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll +++ /dev/null @@ -1,24 +0,0 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s - -; CHECK-LABEL: {{^}}main: -; CHECK: s_mov_b32 m0, 0 -; CHECK-NOT: s_mov_b32 m0 -; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT, 0) -; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_CUT, 1) -; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT_CUT, 2) -; CHECK: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP) - -define void @main() { -main_body: - call void @llvm.SI.sendmsg(i32 34, i32 0); - call void @llvm.SI.sendmsg(i32 274, i32 0); - call void @llvm.SI.sendmsg(i32 562, i32 0); - call void @llvm.SI.sendmsg(i32 3, i32 0); - ret void -} - -; Function Attrs: nounwind -declare void @llvm.SI.sendmsg(i32, i32) #0 - -attributes #0 = { nounwind } diff --git a/test/CodeGen/PowerPC/ppc64-blnop.ll b/test/CodeGen/PowerPC/ppc64-blnop.ll new file mode 100644 index 00000000000..2fe23f91c83 --- /dev/null +++ b/test/CodeGen/PowerPC/ppc64-blnop.ll @@ -0,0 +1,129 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s +; RUN: llc < %s -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s +; RUN: llc < %s -function-sections -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK-FS +; RUN: llc < %s -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s +; RUN: llc < %s -function-sections -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK-FS + +%class.T = type { [2 x i8] } + +define void @e_callee(%class.T* %this, i8* %c) { ret void } +define void @e_caller(%class.T* %this, i8* %c) { + call void @e_callee(%class.T* %this, i8* %c) + ret void + +; CHECK-LABEL: e_caller: +; CHECK: bl e_callee +; CHECK-NEXT: nop + +; CHECK-FS-LABEL: e_caller: +; CHECK-FS: bl e_callee +; CHECK-FS-NEXT: nop +} + +define void @e_scallee(%class.T* %this, i8* %c) section "different" { ret void } +define void @e_scaller(%class.T* %this, i8* %c) { + call void @e_scallee(%class.T* %this, i8* %c) + ret void + +; CHECK-LABEL: e_scaller: +; CHECK: bl e_scallee +; CHECK-NEXT: nop +} + +define void @e_s2callee(%class.T* %this, i8* %c) { ret void } +define void @e_s2caller(%class.T* %this, i8* %c) section "different" { + call void @e_s2callee(%class.T* %this, i8* %c) + ret void + +; CHECK-LABEL: e_s2caller: +; CHECK: bl e_s2callee +; CHECK-NEXT: nop +} + +$cd1 = comdat any +$cd2 = comdat any + +define void @e_ccallee(%class.T* %this, i8* %c) comdat($cd1) { ret void } +define void @e_ccaller(%class.T* %this, i8* %c) comdat($cd2) { + call void @e_ccallee(%class.T* %this, i8* %c) + ret void + +; CHECK-LABEL: e_ccaller: +; CHECK: bl e_ccallee +; CHECK-NEXT: nop +} + +$cd = comdat any + +define void @e_c1callee(%class.T* %this, i8* %c) comdat($cd) { ret void } +define void @e_c1caller(%class.T* %this, i8* %c) comdat($cd) { + call void @e_c1callee(%class.T* %this, i8* %c) + ret void + +; CHECK-LABEL: e_c1caller: +; CHECK: bl e_c1callee +; CHECK-NEXT: nop +} + +define weak_odr hidden void @wo_hcallee(%class.T* %this, i8* %c) { ret void } +define void @wo_hcaller(%class.T* %this, i8* %c) { + call void @wo_hcallee(%class.T* %this, i8* %c) + ret void + +; CHECK-LABEL: wo_hcaller: +; CHECK: bl wo_hcallee +; CHECK-NEXT: nop +} + +define weak_odr protected void @wo_pcallee(%class.T* %this, i8* %c) { ret void } +define void @wo_pcaller(%class.T* %this, i8* %c) { + call void @wo_pcallee(%class.T* %this, i8* %c) + ret void + +; CHECK-LABEL: wo_pcaller: +; CHECK: bl wo_pcallee +; CHECK-NEXT: nop +} + +define weak_odr void @wo_callee(%class.T* %this, i8* %c) { ret void } +define void @wo_caller(%class.T* %this, i8* %c) { + call void @wo_callee(%class.T* %this, i8* %c) + ret void + +; CHECK-LABEL: wo_caller: +; CHECK: bl wo_callee +; CHECK-NEXT: nop +} + +define weak protected void @w_pcallee(i8* %ptr) { ret void } +define void @w_pcaller(i8* %ptr) { + call void @w_pcallee(i8* %ptr) + ret void + +; CHECK-LABEL: w_pcaller: +; CHECK: bl w_pcallee +; CHECK-NEXT: nop +} + +define weak hidden void @w_hcallee(i8* %ptr) { ret void } +define void @w_hcaller(i8* %ptr) { + call void @w_hcallee(i8* %ptr) + ret void + +; CHECK-LABEL: w_hcaller: +; CHECK: bl w_hcallee +; CHECK-NEXT: nop +} + +define weak void @w_callee(i8* %ptr) { ret void } +define void @w_caller(i8* %ptr) { + call void @w_callee(i8* %ptr) + ret void + +; CHECK-LABEL: w_caller: +; CHECK: bl w_callee +; CHECK-NEXT: nop +} + diff --git a/test/CodeGen/PowerPC/ppc64-sibcall.ll b/test/CodeGen/PowerPC/ppc64-sibcall.ll index 418b7828f1d..59e54560147 100644 --- a/test/CodeGen/PowerPC/ppc64-sibcall.ll +++ b/test/CodeGen/PowerPC/ppc64-sibcall.ll @@ -142,7 +142,7 @@ define void @wo_hcaller(%class.T* %this, i8* %c) { ret void ; CHECK-SCO-LABEL: wo_hcaller: -; CHECK-SCO: b wo_hcallee +; CHECK-SCO: bl wo_hcallee } define weak_odr protected void @wo_pcallee(%class.T* %this, i8* %c) { ret void } @@ -151,7 +151,7 @@ define void @wo_pcaller(%class.T* %this, i8* %c) { ret void ; CHECK-SCO-LABEL: wo_pcaller: -; CHECK-SCO: b wo_pcallee +; CHECK-SCO: bl wo_pcallee } define weak_odr void @wo_callee(%class.T* %this, i8* %c) { ret void } @@ -169,7 +169,7 @@ define void @w_pcaller(i8* %ptr) { ret void ; CHECK-SCO-LABEL: w_pcaller: -; CHECK-SCO: b w_pcallee +; CHECK-SCO: bl w_pcallee } define weak hidden void @w_hcallee(i8* %ptr) { ret void } @@ -178,7 +178,7 @@ define void @w_hcaller(i8* %ptr) { ret void ; CHECK-SCO-LABEL: w_hcaller: -; CHECK-SCO: b w_hcallee +; CHECK-SCO: bl w_hcallee } define weak void @w_callee(i8* %ptr) { ret void } diff --git a/test/CodeGen/SPARC/soft-float.ll b/test/CodeGen/SPARC/soft-float.ll index 53ca1974659..582804444f3 100644 --- a/test/CodeGen/SPARC/soft-float.ll +++ b/test/CodeGen/SPARC/soft-float.ll @@ -45,21 +45,21 @@ define fp128 @test_multf3(fp128 %a, fp128 %b) #0 { } define float @test_subsf3(float %a, float %b) #0 { - ; CHCEK-LABEL: test_subsf3: + ; CHECK-LABEL: test_subsf3: ; CHECK: call __subsf3 %sub = fsub float %a, %b ret float %sub } define double @test_subdf3(double %a, double %b) #0 { - ; CHCEK-LABEL: test_subdf3: + ; CHECK-LABEL: test_subdf3: ; CHECK: call __subdf3 %sub = fsub double %a, %b ret double %sub } define fp128 @test_subtf3(fp128 %a, fp128 %b) #0 { - ; CHCEK-LABEL: test_subtf3: + ; CHECK-LABEL: test_subtf3: ; CHECK: call __subtf3 %sub = fsub fp128 %a, %b ret fp128 %sub diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll index b50253bf2b0..4d7cb765d7b 100644 --- a/test/CodeGen/X86/MergeConsecutiveStores.ll +++ b/test/CodeGen/X86/MergeConsecutiveStores.ll @@ -370,6 +370,40 @@ define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) { ret void } +; Make sure that we merge the consecutive load/store sequence below and use a +; word (16 bit) instead of a byte copy for complicated address calculation. +; . +; CHECK-LABEL: MergeLoadStoreBaseIndexOffsetComplicated: +; BWON: movzwl (%{{.*}},%{{.*}}), %e[[REG:[a-z]+]] +; BWOFF: movw (%{{.*}},%{{.*}}), %[[REG:[a-z]+]] +; CHECK: movw %[[REG]], (%{{.*}}) +define void @MergeLoadStoreBaseIndexOffsetComplicated(i8* %a, i8* %b, i8* %c, i64 %n) { + br label %1 + +;