diff --git a/CMakeLists.txt b/CMakeLists.txt
index ac3b978ee6f..78fc78b1178 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -60,7 +60,7 @@ set(CMAKE_MODULE_PATH
 
 set(LLVM_VERSION_MAJOR 3)
 set(LLVM_VERSION_MINOR 7)
-set(LLVM_VERSION_PATCH 0)
+set(LLVM_VERSION_PATCH 1)
 set(LLVM_VERSION_SUFFIX "")
 
 if (NOT PACKAGE_VERSION)
diff --git a/CREDITS.TXT b/CREDITS.TXT
index 7cdd97c309a..fd5119f0111 100644
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@@ -509,3 +509,10 @@ N: Michael Wong
 E: fraggamuffin@gmail.com 
 D: Clang OpenMP implementation
 
+N: Alexander Mussman
+E: alexander.musman@intel.com 
+D: Clang OpenMP implementation
+
+N: Kevin O'Brien
+E: caomhin@us.ibm.com 
+D: Clang OpenMP implementation
\ No newline at end of file
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 74ebea2f5a7..af57712b57c 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -32,11 +32,11 @@ dnl===-----------------------------------------------------------------------===
 dnl Initialize autoconf and define the package name, version number and
 dnl address for reporting bugs.
 
-AC_INIT([LLVM],[3.7.0],[http://llvm.org/bugs/])
+AC_INIT([LLVM],[3.7.1],[http://llvm.org/bugs/])
 
 LLVM_VERSION_MAJOR=3
 LLVM_VERSION_MINOR=7
-LLVM_VERSION_PATCH=0
+LLVM_VERSION_PATCH=1
 LLVM_VERSION_SUFFIX=
 
 AC_DEFINE_UNQUOTED([LLVM_VERSION_MAJOR], $LLVM_VERSION_MAJOR, [Major version of the LLVM API])
diff --git a/bindings/go/llvm/ir.go b/bindings/go/llvm/ir.go
index 80f7798ea06..76f5f06017c 100644
--- a/bindings/go/llvm/ir.go
+++ b/bindings/go/llvm/ir.go
@@ -1728,7 +1728,7 @@ func (b Builder) CreatePtrDiff(lhs, rhs Value, name string) (v Value) {
 func (b Builder) CreateLandingPad(t Type, personality Value, nclauses int, name string) (l Value) {
 	cname := C.CString(name)
 	defer C.free(unsafe.Pointer(cname))
-	l.C = C.LLVMBuildLandingPad(b.C, t.C, C.unsigned(nclauses), cname)
+	l.C = C.LLVMBuildLandingPad(b.C, t.C, nil, C.unsigned(nclauses), cname)
 	return l
 }
 
diff --git a/bindings/ocaml/llvm/llvm_ocaml.c b/bindings/ocaml/llvm/llvm_ocaml.c
index 26835d01559..3889f9276cc 100644
--- a/bindings/ocaml/llvm/llvm_ocaml.c
+++ b/bindings/ocaml/llvm/llvm_ocaml.c
@@ -1745,7 +1745,7 @@ CAMLprim LLVMValueRef llvm_build_invoke_bc(value Args[], int NumArgs) {
 CAMLprim LLVMValueRef llvm_build_landingpad(LLVMTypeRef Ty, LLVMValueRef PersFn,
                                             value NumClauses,  value Name,
                                             value B) {
-    return LLVMBuildLandingPad(Builder_val(B), Ty, Int_val(NumClauses),
+    return LLVMBuildLandingPad(Builder_val(B), Ty, PersFn, Int_val(NumClauses),
                                String_val(Name));
 }
 
diff --git a/configure b/configure
index c562f830b3a..c192415c24a 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.60 for LLVM 3.7.0.
+# Generated by GNU Autoconf 2.60 for LLVM 3.7.1.
 #
 # Report bugs to <http://llvm.org/bugs/>.
 #
@@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
 # Identity of this package.
 PACKAGE_NAME='LLVM'
 PACKAGE_TARNAME='llvm'
-PACKAGE_VERSION='3.7.0'
-PACKAGE_STRING='LLVM 3.7.0'
+PACKAGE_VERSION='3.7.1'
+PACKAGE_STRING='LLVM 3.7.1'
 PACKAGE_BUGREPORT='http://llvm.org/bugs/'
 
 ac_unique_file="lib/IR/Module.cpp"
@@ -1333,7 +1333,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures LLVM 3.7.0 to adapt to many kinds of systems.
+\`configure' configures LLVM 3.7.1 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1399,7 +1399,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of LLVM 3.7.0:";;
+     short | recursive ) echo "Configuration of LLVM 3.7.1:";;
    esac
   cat <<\_ACEOF
 
@@ -1583,7 +1583,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-LLVM configure 3.7.0
+LLVM configure 3.7.1
 generated by GNU Autoconf 2.60
 
 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1599,7 +1599,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by LLVM $as_me 3.7.0, which was
+It was created by LLVM $as_me 3.7.1, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   $ $0 $@
@@ -1955,7 +1955,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 LLVM_VERSION_MAJOR=3
 LLVM_VERSION_MINOR=7
-LLVM_VERSION_PATCH=0
+LLVM_VERSION_PATCH=1
 LLVM_VERSION_SUFFIX=
 
 
@@ -8643,87 +8643,6 @@ fi
 
 if test "$llvm_cv_os_type" = "MingW" ; then
 
-{ echo "$as_me:$LINENO: checking for main in -limagehlp" >&5
-echo $ECHO_N "checking for main in -limagehlp... $ECHO_C" >&6; }
-if test "${ac_cv_lib_imagehlp_main+set}" = set; then
-  echo $ECHO_N "(cached) $ECHO_C" >&6
-else
-  ac_check_lib_save_LIBS=$LIBS
-LIBS="-limagehlp  $LIBS"
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h.  */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h.  */
-
-
-int
-main ()
-{
-return main ();
-  ;
-  return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (ac_try="$ac_link"
-case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_link") 2>conftest.er1
-  ac_status=$?
-  grep -v '^ *+' conftest.er1 >conftest.err
-  rm -f conftest.er1
-  cat conftest.err >&5
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } &&
-	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
-  { (case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_try") 2>&5
-  ac_status=$?
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); }; } &&
-	 { ac_try='test -s conftest$ac_exeext'
-  { (case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_try") 2>&5
-  ac_status=$?
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); }; }; then
-  ac_cv_lib_imagehlp_main=yes
-else
-  echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-	ac_cv_lib_imagehlp_main=no
-fi
-
-rm -f core conftest.err conftest.$ac_objext \
-      conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ echo "$as_me:$LINENO: result: $ac_cv_lib_imagehlp_main" >&5
-echo "${ECHO_T}$ac_cv_lib_imagehlp_main" >&6; }
-if test $ac_cv_lib_imagehlp_main = yes; then
-  cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBIMAGEHLP 1
-_ACEOF
-
-  LIBS="-limagehlp $LIBS"
-
-fi
-
-
 { echo "$as_me:$LINENO: checking for main in -lole32" >&5
 echo $ECHO_N "checking for main in -lole32... $ECHO_C" >&6; }
 if test "${ac_cv_lib_ole32_main+set}" = set; then
@@ -18610,7 +18529,7 @@ exec 6>&1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by LLVM $as_me 3.7.0, which was
+This file was extended by LLVM $as_me 3.7.1, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -18663,7 +18582,7 @@ Report bugs to <bug-autoconf@gnu.org>."
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-LLVM config.status 3.7.0
+LLVM config.status 3.7.1
 configured by $0, generated by GNU Autoconf 2.60,
   with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
 
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index fd149c97e44..b68f5ecd493 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -25,7 +25,35 @@ LLVM web page, this document applies to the *next* release, not the current
 one.  To see the release notes for a specific release, please see the `releases
 page <http://llvm.org/releases/>`_.
 
-Non-comprehensive list of changes in this release
+Major changes in 3.7.1
+======================
+
+* 3.7.0 was released with an inadvertent change to the signature of the C
+  API function: LLVMBuildLandingPad, which made the C API incompatible with
+  prior releases.  This has been corrected in LLVM 3.7.1.
+
+  As a result of this change, 3.7.0 is not ABI compatible with 3.7.1.
+
+  +----------------------------------------------------------------------------+
+  | History of the LLVMBuildLandingPad() function                              |
+  +===========================+================================================+
+  | 3.6.2 and prior releases  | LLVMBuildLandingPad(LLVMBuilderRef,            |
+  |                           |                     LLVMTypeRef,               |
+  |                           |                     LLVMValueRef,              |
+  |                           |                     unsigned, const char*)     |
+  +---------------------------+------------------------------------------------+
+  | 3.7.0                     | LLVMBuildLandingPad(LLVMBuilderRef,            |
+  |                           |                     LLVMTypeRef,               |
+  |                           |                     unsigned, const char*)     |
+  +---------------------------+------------------------------------------------+
+  | 3.7.1 and future releases | LLVMBuildLandingPad(LLVMBuilderRef,            |
+  |                           |                     LLVMTypeRef,               |
+  |                           |                     LLVMValueRef,              |
+  |                           |                     unsigned, const char*)     |
+  +---------------------------+------------------------------------------------+
+
+
+Non-comprehensive list of changes in 3.7.0
 =================================================
 
 .. NOTE
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 15290072abe..9dbcbfea387 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -2675,7 +2675,8 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef, LLVMValueRef Fn,
                              LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
                              const char *Name);
 LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty,
-                                 unsigned NumClauses, const char *Name);
+                                 LLVMValueRef PersFn, unsigned NumClauses,
+                                 const char *Name);
 LLVMValueRef LLVMBuildResume(LLVMBuilderRef B, LLVMValueRef Exn);
 LLVMValueRef LLVMBuildUnreachable(LLVMBuilderRef);
 
diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h
index 4b2e0b06584..bedb7d5549e 100644
--- a/include/llvm/CodeGen/CommandFlags.h
+++ b/include/llvm/CodeGen/CommandFlags.h
@@ -21,7 +21,7 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCTargetOptionsCommandFlags.h"
-#include "llvm//MC/SubtargetFeature.h"
+#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Host.h"
diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp
index 71c77815e28..a2b9316aa87 100644
--- a/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -169,7 +169,7 @@ void WinException::endFunction(const MachineFunction *MF) {
     Asm->OutStreamer->PopSection();
   }
 
-  if (shouldEmitMoves)
+  if (shouldEmitMoves || shouldEmitPersonality)
     Asm->OutStreamer->EmitWinCFIEndProc();
 }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 21ab07234c8..fbc8f1e89f6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -439,7 +439,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
                              ISD::ANY_EXTEND, dl, VT, Result);
 
       ValResult = Result;
-      ChainResult = Chain;
+      ChainResult = newLoad.getValue(1);
       return;
     }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index a7392fabf1e..54cfaf57061 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -1010,6 +1010,8 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
 
   // Calculate the element offset and add it to the pointer.
   unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
+  assert(EltSize * 8 == EltVT.getSizeInBits() &&
+         "Converting bits to bytes lost precision");
 
   Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
                       DAG.getConstant(EltSize, dl, Index.getValueType()));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 4348ab79f7d..51cd6619f78 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1528,9 +1528,25 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
   if (CustomLowerNode(N, N->getValueType(0), true))
     return SDValue();
 
-  // Store the vector to the stack.
-  EVT EltVT = VecVT.getVectorElementType();
+  // Make the vector elements byte-addressable if they aren't already.
   SDLoc dl(N);
+  EVT EltVT = VecVT.getVectorElementType();
+  if (EltVT.getSizeInBits() < 8) {
+    SmallVector<SDValue, 4> ElementOps;
+    for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) {
+      ElementOps.push_back(DAG.getAnyExtOrTrunc(
+          DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec,
+                      DAG.getConstant(i, dl, MVT::i8)),
+          dl, MVT::i8));
+    }
+
+    EltVT = MVT::i8;
+    VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+                             VecVT.getVectorNumElements());
+    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, ElementOps);
+  }
+
+  // Store the vector to the stack.
   SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
                                MachinePointerInfo(), false, false, 0);
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index adc620db897..b553f11018c 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -794,6 +794,10 @@ void SlotTracker::processFunction() {
   ST_DEBUG("begin processFunction!\n");
   fNext = 0;
 
+  // Process function metadata if it wasn't hit at the module-level.
+  if (!ShouldInitializeAllMetadata)
+    processFunctionMetadata(*TheFunction);
+
   // Add all the function arguments with no names.
   for(Function::const_arg_iterator AI = TheFunction->arg_begin(),
       AE = TheFunction->arg_end(); AI != AE; ++AI)
@@ -807,8 +811,6 @@ void SlotTracker::processFunction() {
     if (!BB.hasName())
       CreateFunctionSlot(&BB);
 
-    processFunctionMetadata(*TheFunction);
-
     for (auto &I : BB) {
       if (!I.getType()->isVoidTy() && !I.hasName())
         CreateFunctionSlot(&I);
@@ -836,11 +838,11 @@ void SlotTracker::processFunction() {
 
 void SlotTracker::processFunctionMetadata(const Function &F) {
   SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
-  for (auto &BB : F) {
-    F.getAllMetadata(MDs);
-    for (auto &MD : MDs)
-      CreateMetadataSlot(MD.second);
+  F.getAllMetadata(MDs);
+  for (auto &MD : MDs)
+    CreateMetadataSlot(MD.second);
 
+  for (auto &BB : F) {
     for (auto &I : BB)
       processInstructionMetadata(I);
   }
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index e0e729d534b..0eb88a96757 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -2257,7 +2257,14 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,
 }
 
 LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty,
-                                 unsigned NumClauses, const char *Name) {
+                                 LLVMValueRef PersFn, unsigned NumClauses,
+                                 const char *Name) {
+  // The personality used to live on the landingpad instruction, but now it
+  // lives on the parent function. For compatibility, take the provided
+  // personality and put it on the parent function.
+  if (PersFn)
+    unwrap(B)->GetInsertBlock()->getParent()->setPersonalityFn(
+        cast<Function>(unwrap(PersFn)));
   return wrap(unwrap(B)->CreateLandingPad(unwrap(Ty), NumClauses, Name));
 }
 
diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
index 149ec6a4f37..25ae4ac76e3 100644
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -63,14 +63,21 @@ const char* LTOCodeGenerator::getVersionString() {
 #endif
 }
 
+static void handleLTODiagnostic(const DiagnosticInfo &DI) {
+  DiagnosticPrinterRawOStream DP(errs());
+  DI.print(DP);
+  errs() << "\n";
+}
+
 LTOCodeGenerator::LTOCodeGenerator()
-    : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context)) {
+    : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context),
+                                            handleLTODiagnostic) {
   initializeLTOPasses();
 }
 
 LTOCodeGenerator::LTOCodeGenerator(std::unique_ptr<LLVMContext> Context)
     : OwnedContext(std::move(Context)), Context(*OwnedContext),
-      IRLinker(new Module("ld-temp.o", *OwnedContext)) {
+      IRLinker(new Module("ld-temp.o", *OwnedContext), handleLTODiagnostic) {
   initializeLTOPasses();
 }
 
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index c601c56f395..a85796cfbad 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -82,6 +82,7 @@ void MCContext::reset() {
 
   UsedNames.clear();
   Symbols.clear();
+  SectionSymbols.clear();
   Allocator.Reset();
   Instances.clear();
   CompilationDir.clear();
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 709d7531d38..0a5309b16ee 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -264,6 +264,12 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   for (const MachineBasicBlock &MBB : MF) {
     for (const MachineInstr &MI : MBB) {
       // TODO: CodeSize should account for multiple functions.
+
+      // TODO: Should we count size of debug info?
+      if (MI.isDebugValue())
+        continue;
+
+      // FIXME: This is reporting 0 for many instructions.
       CodeSize += MI.getDesc().Size;
 
       unsigned numOperands = MI.getNumOperands();
diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 4a65bfc57f1..57b7a73bf56 100644
--- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -134,13 +134,17 @@ static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
 //
 // TODO: Check isTriviallyVectorizable for calls and handle other
 // instructions.
-static bool canVectorizeInst(Instruction *Inst) {
+static bool canVectorizeInst(Instruction *Inst, User *User) {
   switch (Inst->getOpcode()) {
   case Instruction::Load:
-  case Instruction::Store:
   case Instruction::BitCast:
   case Instruction::AddrSpaceCast:
     return true;
+  case Instruction::Store: {
+    // Must be the stored pointer operand, not a stored value.
+    StoreInst *SI = cast<StoreInst>(Inst);
+    return SI->getPointerOperand() == User;
+  }
   default:
     return false;
   }
@@ -166,7 +170,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
   for (User *AllocaUser : Alloca->users()) {
     GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser);
     if (!GEP) {
-      if (!canVectorizeInst(cast<Instruction>(AllocaUser)))
+      if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca))
         return false;
 
       WorkList.push_back(AllocaUser);
@@ -184,7 +188,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
 
     GEPVectorIdx[GEP] = Index;
     for (User *GEPUser : AllocaUser->users()) {
-      if (!canVectorizeInst(cast<Instruction>(GEPUser)))
+      if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser))
         return false;
 
       WorkList.push_back(GEPUser);
@@ -240,7 +244,12 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
   for (User *User : Val->users()) {
     if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())
       continue;
-    if (isa<CallInst>(User)) {
+    if (CallInst *CI = dyn_cast<CallInst>(User)) {
+      // TODO: We might be able to handle some cases where the callee is a
+      // constantexpr bitcast of a function.
+      if (!CI->getCalledFunction())
+        return false;
+
       WorkList.push_back(User);
       continue;
     }
@@ -250,6 +259,12 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
     if (UseInst && UseInst->getOpcode() == Instruction::PtrToInt)
       return false;
 
+    if (StoreInst *SI = dyn_cast_or_null<StoreInst>(UseInst)) {
+      // Reject if the stored value is not the pointer operand.
+      if (SI->getPointerOperand() != Val)
+        return false;
+    }
+
     if (!User->getType()->isPointerTy())
       continue;
 
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.td b/lib/Target/AMDGPU/AMDGPURegisterInfo.td
index 835a1464395..ba0490abee8 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.td
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.td
@@ -14,8 +14,7 @@
 let Namespace = "AMDGPU" in {
 
 foreach Index = 0-15 in {
-  // Indices are used in a variety of ways here, so don't set a size/offset.
-  def sub#Index : SubRegIndex<-1, -1>;
+  def sub#Index : SubRegIndex<32, !shl(Index, 5)>;
 }
 
 def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">;
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 468563c4498..4434d9b119c 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -71,12 +71,26 @@ void AMDGPUMCObjectWriter::writeObject(MCAssembler &Asm,
   }
 }
 
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+  switch (Kind) {
+  case FK_Data_1:
+    return 1;
+  case FK_Data_2:
+    return 2;
+  case FK_Data_4:
+    return 4;
+  case FK_Data_8:
+    return 8;
+  default:
+    llvm_unreachable("Unknown fixup kind!");
+  }
+}
+
 void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
                                   unsigned DataSize, uint64_t Value,
                                   bool IsPCRel) const {
 
   switch ((unsigned)Fixup.getKind()) {
-    default: llvm_unreachable("Unknown fixup kind");
     case AMDGPU::fixup_si_sopp_br: {
       uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset());
       *Dst = (Value - 4) / 4;
@@ -96,6 +110,24 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
       *Dst = Value + 4;
       break;
     }
+    default: {
+      // FIXME: Copied from AArch64
+      unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+      if (!Value)
+        return; // Doesn't change encoding.
+      MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
+
+      // Shift the value into position.
+      Value <<= Info.TargetOffset;
+
+      unsigned Offset = Fixup.getOffset();
+      assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+      // For each byte of the fragment that the fixup touches, mask in the
+      // bits from the fixup value.
+      for (unsigned i = 0; i != NumBytes; ++i)
+        Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+    }
   }
 }
 
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 099b0b15942..c2db9ff537e 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -157,6 +157,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
 
   setTruncStoreAction(MVT::i64, MVT::i32, Expand);
   setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
+  setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);
   setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
 
   setOperationAction(ISD::LOAD, MVT::i1, Custom);
@@ -2252,10 +2253,8 @@ MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,
                                                   SDValue Ptr) const {
   const SIInstrInfo *TII =
       static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE |
-                  0xffffffff; // Size
 
-  return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
+  return buildRSRC(DAG, DL, Ptr, 0, TII->getScratchRsrcWords23());
 }
 
 SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 18910615beb..cfd2c42d1ae 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2778,3 +2778,16 @@ uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
 
   return RsrcDataFormat;
 }
+
+uint64_t SIInstrInfo::getScratchRsrcWords23() const {
+  uint64_t Rsrc23 = getDefaultRsrcDataFormat() |
+                    AMDGPU::RSRC_TID_ENABLE |
+                    0xffffffff; // Size;
+
+  // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
+  // Clear them unless we want a huge stride.
+  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+    Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
+
+  return Rsrc23;
+}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
index 015ea12d459..5053786a39f 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -353,7 +353,7 @@ public:
   }
 
   uint64_t getDefaultRsrcDataFormat() const;
-
+  uint64_t getScratchRsrcWords23() const;
 };
 
 namespace AMDGPU {
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index f78ffd72314..e0eeea9034b 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -1548,6 +1548,12 @@ defm V_WRITELANE_B32 : VOP2SI_3VI_m <
 // These instructions only exist on SI and CI
 let SubtargetPredicate = isSICI in {
 
+let isCommutable = 1 in {
+defm V_MAC_LEGACY_F32 : VOP2InstSI <vop2<0x6>, "v_mac_legacy_f32",
+  VOP_F32_F32_F32
+>;
+} // End isCommutable = 1
+
 defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32",
   VOP_F32_F32_F32, AMDGPUfmin_legacy
 >;
@@ -1562,12 +1568,6 @@ defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32>;
 } // End isCommutable = 1
 } // End let SubtargetPredicate = SICI
 
-let isCommutable = 1 in {
-defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32",
-  VOP_F32_F32_F32
->;
-} // End isCommutable = 1
-
 defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32",
   VOP_I32_I32_I32
 >;
diff --git a/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp b/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
index 0a7f684552f..2cd600df226 100644
--- a/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
+++ b/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
@@ -135,8 +135,7 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
       unsigned ScratchRsrcReg =
           RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
 
-      uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
-                      0xffffffff; // Size
+      uint64_t Rsrc23 = TII->getScratchRsrcWords23();
 
       unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
       unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
@@ -152,11 +151,11 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
               .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
 
       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
-              .addImm(Rsrc & 0xffffffff)
+              .addImm(Rsrc23 & 0xffffffff)
               .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
 
       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
-              .addImm(Rsrc >> 32)
+              .addImm(Rsrc23 >> 32)
               .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
 
       // Scratch Offset
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 54c4d549fac..e9e8412e263 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -26,23 +26,25 @@ using namespace llvm;
 
 SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {}
 
+void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
+  MCRegAliasIterator R(Reg, this, true);
+
+  for (; R.isValid(); ++R)
+    Reserved.set(*R);
+}
+
 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
-  Reserved.set(AMDGPU::EXEC);
-
-  // EXEC_LO and EXEC_HI could be allocated and used as regular register,
-  // but this seems likely to result in bugs, so I'm marking them as reserved.
-  Reserved.set(AMDGPU::EXEC_LO);
-  Reserved.set(AMDGPU::EXEC_HI);
-
   Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
-  Reserved.set(AMDGPU::FLAT_SCR);
-  Reserved.set(AMDGPU::FLAT_SCR_LO);
-  Reserved.set(AMDGPU::FLAT_SCR_HI);
+
+  // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
+  // this seems likely to result in bugs, so I'm marking them as reserved.
+  reserveRegisterTuples(Reserved, AMDGPU::EXEC);
+  reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
 
   // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs
-  Reserved.set(AMDGPU::VGPR255);
-  Reserved.set(AMDGPU::VGPR254);
+  reserveRegisterTuples(Reserved, AMDGPU::VGPR254);
+  reserveRegisterTuples(Reserved, AMDGPU::VGPR255);
 
   // Tonga and Iceland can only allocate a fixed number of SGPRs due
   // to a hw bug.
@@ -54,10 +56,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 
     for (unsigned i = Limit; i < NumSGPRs; ++i) {
       unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
-      MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true);
-
-      for (; R.isValid(); ++R)
-        Reserved.set(*R);
+      reserveRegisterTuples(Reserved, Reg);
     }
   }
 
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h
index bfdb67c5e12..7da6de282c1 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -23,7 +23,10 @@
 namespace llvm {
 
 struct SIRegisterInfo : public AMDGPURegisterInfo {
+private:
+  void reserveRegisterTuples(BitVector &, unsigned Reg) const;
 
+public:
   SIRegisterInfo();
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index f8f0eb2d4ba..cf6b8929f31 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
@@ -9104,6 +9105,10 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
     return false;
   }
 
+  Triple T;
+  STI.setDefaultFeatures(T.getARMCPUForArch(Arch));
+  setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+
   getTargetStreamer().emitArch(ID);
   return false;
 }
diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp
index d9e654c7642..9d5f1d406d0 100644
--- a/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -50,6 +50,7 @@ private:
 
   // Complex Pattern for address selection.
   bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset);
+  bool SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset);
 };
 }
 
@@ -67,7 +68,7 @@ bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
       Addr.getOpcode() == ISD::TargetGlobalAddress)
     return false;
 
-  // Addresses of the form FI+const or FI|const
+  // Addresses of the form Addr+const or Addr|const
   if (CurDAG->isBaseWithConstantOffset(Addr)) {
     ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
     if (isInt<32>(CN->getSExtValue())) {
@@ -89,6 +90,31 @@ bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
   return true;
 }
 
+// ComplexPattern used on BPF FI instruction
+bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
+  SDLoc DL(Addr);
+
+  if (!CurDAG->isBaseWithConstantOffset(Addr))
+    return false;
+
+  // Addresses of the form Addr+const or Addr|const
+  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+  if (isInt<32>(CN->getSExtValue())) {
+
+    // If the first operand is a FI, get the TargetFI Node
+    if (FrameIndexSDNode *FIN =
+            dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+      Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+    else
+      return false;
+
+    Offset = CurDAG->getTargetConstant(CN->getSExtValue(), DL, MVT::i64);
+    return true;
+  }
+
+  return false;
+}
+
 SDNode *BPFDAGToDAGISel::Select(SDNode *Node) {
   unsigned Opcode = Node->getOpcode();
 
@@ -104,13 +130,6 @@ SDNode *BPFDAGToDAGISel::Select(SDNode *Node) {
   // tablegen selection should be handled here.
   switch (Opcode) {
   default: break;
-
-  case ISD::UNDEF: {
-    errs() << "BUG: "; Node->dump(CurDAG); errs() << '\n';
-    report_fatal_error("shouldn't see UNDEF during Select");
-    break;
-  }
-
   case ISD::INTRINSIC_W_CHAIN: {
     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
     switch (IntNo) {
diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp
index 58498a1aec7..73418283d9b 100644
--- a/lib/Target/BPF/BPFISelLowering.cpp
+++ b/lib/Target/BPF/BPFISelLowering.cpp
@@ -102,6 +102,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::BR_CC, MVT::i64, Custom);
   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+  setOperationAction(ISD::BRIND, MVT::Other, Expand);
   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
   setOperationAction(ISD::SETCC, MVT::i64, Expand);
   setOperationAction(ISD::SELECT, MVT::i64, Expand);
@@ -128,9 +129,6 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SUBC, MVT::i64, Expand);
   setOperationAction(ISD::SUBE, MVT::i64, Expand);
 
-  // no UNDEF allowed
-  setOperationAction(ISD::UNDEF, MVT::i64, Expand);
-
   setOperationAction(ISD::ROTR, MVT::i64, Expand);
   setOperationAction(ISD::ROTL, MVT::i64, Expand);
   setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td
index 26b2cfebdc8..6b73db87fa2 100644
--- a/lib/Target/BPF/BPFInstrInfo.td
+++ b/lib/Target/BPF/BPFInstrInfo.td
@@ -54,7 +54,8 @@ def i64immSExt32 : PatLeaf<(imm),
                 [{return isInt<32>(N->getSExtValue()); }]>;
 
 // Addressing modes.
-def ADDRri : ComplexPattern<i64, 2, "SelectAddr", [frameindex], []>;
+def ADDRri : ComplexPattern<i64, 2, "SelectAddr", [], []>;
+def FIri : ComplexPattern<i64, 2, "SelectFIAddr", [add, or], []>;
 
 // Address operands
 def MEMri : Operand<i64> {
@@ -260,6 +261,15 @@ def MOV_rr : MOV_RR<"mov">;
 def MOV_ri : MOV_RI<"mov">;
 }
 
+def FI_ri
+    : InstBPF<(outs GPR:$dst), (ins MEMri:$addr),
+               "lea\t$dst, $addr",
+               [(set i64:$dst, FIri:$addr)]> {
+  // This is a tentative instruction, and will be replaced
+  // with MOV_rr and ADD_ri in PEI phase
+}
+
+
 def LD_pseudo
     : InstBPF<(outs GPR:$dst), (ins i64imm:$pseudo, u64imm:$imm),
               "ld_pseudo\t$dst, $pseudo, $imm",
diff --git a/lib/Target/BPF/BPFRegisterInfo.cpp b/lib/Target/BPF/BPFRegisterInfo.cpp
index 8f885c3ea61..952615bd1c2 100644
--- a/lib/Target/BPF/BPFRegisterInfo.cpp
+++ b/lib/Target/BPF/BPFRegisterInfo.cpp
@@ -58,14 +58,13 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   unsigned FrameReg = getFrameRegister(MF);
   int FrameIndex = MI.getOperand(i).getIndex();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  MachineBasicBlock &MBB = *MI.getParent();
 
   if (MI.getOpcode() == BPF::MOV_rr) {
-    const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
     int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
 
     MI.getOperand(i).ChangeToRegister(FrameReg, false);
-
-    MachineBasicBlock &MBB = *MI.getParent();
     unsigned reg = MI.getOperand(i - 1).getReg();
     BuildMI(MBB, ++II, DL, TII.get(BPF::ADD_ri), reg)
         .addReg(reg)
@@ -79,8 +78,24 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   if (!isInt<32>(Offset))
     llvm_unreachable("bug in frame offset");
 
-  MI.getOperand(i).ChangeToRegister(FrameReg, false);
-  MI.getOperand(i + 1).ChangeToImmediate(Offset);
+  if (MI.getOpcode() == BPF::FI_ri) {
+    // architecture does not really support FI_ri, replace it with
+    //    MOV_rr <target_reg>, frame_reg
+    //    ADD_ri <target_reg>, imm
+    unsigned reg = MI.getOperand(i - 1).getReg();
+
+    BuildMI(MBB, ++II, DL, TII.get(BPF::MOV_rr), reg)
+        .addReg(FrameReg);
+    BuildMI(MBB, II, DL, TII.get(BPF::ADD_ri), reg)
+        .addReg(reg)
+        .addImm(Offset);
+
+    // Remove FI_ri instruction
+    MI.eraseFromParent();
+  } else {
+    MI.getOperand(i).ChangeToRegister(FrameReg, false);
+    MI.getOperand(i + 1).ChangeToImmediate(Offset);
+  }
 }
 
 unsigned BPFRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 6fe8f830d35..b3d861d34da 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -269,6 +269,14 @@ namespace llvm {
     unsigned getRegisterByName(const char* RegName, EVT VT,
                                SelectionDAG &DAG) const override;
 
+    /// Returns true if a cast between SrcAS and DestAS is a noop.
+    bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
+      // Mips doesn't have any special address spaces so we just reserve
+      // the first 256 for software use (e.g. OpenCL) and treat casts
+      // between them as noops.
+      return SrcAS < 256 && DestAS < 256;
+    }
+
   protected:
     SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const;
 
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index cb46d731da2..2ebfbd17d7d 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -115,6 +115,11 @@ bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
     if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
       continue;
 
+    // Also, we have to check that the register class of the operand
+    // contains the zero register.
+    if (!MRI->getRegClass(MO.getReg())->contains(ZeroReg))
+      continue;
+
     MO.setReg(ZeroReg);
   }
 
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 444446692c5..8e118ec27e6 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -947,11 +947,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
   case PPC::ADDISdtprelHA:
-    // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
-    // Into:      %Xd = ADDIS8 %X3, sym@dtprel@ha
+    // Transform: %Xd = ADDISdtprelHA %Xs, <ga:@sym>
+    // Into:      %Xd = ADDIS8 %Xs, sym@dtprel@ha
   case PPC::ADDISdtprelHA32: {
-    // Transform: %Rd = ADDISdtprelHA32 %R3, <ga:@sym>
-    // Into:      %Rd = ADDIS %R3, sym@dtprel@ha
+    // Transform: %Rd = ADDISdtprelHA32 %Rs, <ga:@sym>
+    // Into:      %Rd = ADDIS %Rs, sym@dtprel@ha
     const MachineOperand &MO = MI->getOperand(2);
     const GlobalValue *GValue = MO.getGlobal();
     MCSymbol *MOSymbol = getSymbol(GValue);
@@ -962,7 +962,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
         *OutStreamer,
         MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDIS8 : PPC::ADDIS)
             .addReg(MI->getOperand(0).getReg())
-            .addReg(Subtarget->isPPC64() ? PPC::X3 : PPC::R3)
+            .addReg(MI->getOperand(1).getReg())
             .addExpr(SymDtprel));
     return;
   }
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index baadf081a64..fd150beeb5a 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -197,10 +197,18 @@ static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
 // Determining the address of a TLS variable results in a function call in
 // certain TLS models.
 static bool memAddrUsesCTR(const PPCTargetMachine *TM,
-                           const llvm::Value *MemAddr) {
+                           const Value *MemAddr) {
   const auto *GV = dyn_cast<GlobalValue>(MemAddr);
-  if (!GV)
+  if (!GV) {
+    // Recurse to check for constants that refer to TLS global variables.
+    if (const auto *CV = dyn_cast<Constant>(MemAddr))
+      for (const auto &CO : CV->operands())
+        if (memAddrUsesCTR(TM, CO))
+          return true;
+
     return false;
+  }
+
   if (!GV->isThreadLocal())
     return false;
   if (!TM)
@@ -239,6 +247,11 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
         if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
           switch (F->getIntrinsicID()) {
           default: continue;
+          // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
+          // we're definitely using CTR.
+          case Intrinsic::ppc_is_decremented_ctr_nonzero:
+	  case Intrinsic::ppc_mtctr:
+	    return true;
 
 // VisualStudio defines setjmp as _setjmp
 #if defined(_MSC_VER) && defined(setjmp) && \
@@ -426,6 +439,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
   // Process nested loops first.
   for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
     MadeChange |= convertToCTRLoop(*I);
+    DEBUG(dbgs() << "Nested loop converted\n");
   }
 
   // If a nested loop has been converted, then we can't convert this loop.
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index b6025bf66ef..932226842bb 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -2570,13 +2570,25 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       return nullptr;
     }
     // ISD::OR doesn't get all the bitfield insertion fun.
-    // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert
+    // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
+    // bitfield insert.
     if (isInt32Immediate(N->getOperand(1), Imm) &&
         N->getOperand(0).getOpcode() == ISD::OR &&
         isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
+      // The idea here is to check whether this is equivalent to:
+      //   (c1 & m) | (x & ~m)
+      // where m is a run-of-ones mask. The logic here is that, for each bit in
+      // c1 and c2:
+      //  - if both are 1, then the output will be 1.
+      //  - if both are 0, then the output will be 0.
+      //  - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
+      //    come from x.
+      //  - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
+      //    be 0.
+      //  If that last condition is never the case, then we can form m from the
+      //  bits that are the same between c1 and c2.
       unsigned MB, ME;
-      Imm = ~(Imm^Imm2);
-      if (isRunOfOnes(Imm, MB, ME)) {
+      if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
         SDValue Ops[] = { N->getOperand(0).getOperand(0),
                             N->getOperand(0).getOperand(1),
                             getI32Imm(0, dl), getI32Imm(MB, dl),
@@ -2787,6 +2799,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
         SDValue Base, Offset;
 
         if (LD->isUnindexed() &&
+            (LD->getMemoryVT() == MVT::f64 ||
+             LD->getMemoryVT() == MVT::i64) &&
             SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
           SDValue Chain = LD->getChain();
           SDValue Ops[] = { Base, Offset, Chain };
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 1e28913d1fc..1b8f8fb2f45 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -431,6 +431,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
       setOperationAction(ISD::SELECT, VT, Promote);
       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
+      setOperationAction(ISD::SELECT_CC, VT, Promote);
+      AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
       setOperationAction(ISD::STORE, VT, Promote);
       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
 
@@ -7175,7 +7177,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
         PPC::isSplatShuffleMask(SVOp, 4) ||
         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
-        PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
@@ -7183,8 +7184,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
-        PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG)   ||
-        PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)) {
+        (Subtarget.hasP8Altivec() && (
+         PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
+         PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
+         PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
       return Op;
     }
   }
@@ -7195,7 +7198,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
-      PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
@@ -7203,8 +7205,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
-      PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG)             ||
-      PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))
+      (Subtarget.hasP8Altivec() && (
+       PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
+       PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
+       PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
     return Op;
 
   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index bf6e4029640..d4e666cc1f3 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -309,6 +309,11 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
   unsigned MB = MI->getOperand(4).getImm();
   unsigned ME = MI->getOperand(5).getImm();
 
+  // We can't commute a trivial mask (there is no way to represent an all-zero
+  // mask).
+  if (MB == 0 && ME == 31)
+    return nullptr;
+
   if (NewMI) {
     // Create a new instruction.
     unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg();
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index b50124db1ea..24fd9bd5c1f 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -2835,24 +2835,84 @@ def : Pat<(i64 (anyext i1:$in)),
           (SELECT_I8 $in, (LI8 1), (LI8 0))>;
 
 // match setcc on i1 variables.
+// CRANDC is:
+//   1 1 : F
+//   1 0 : T
+//   0 1 : F
+//   0 0 : F
+//
+// LT is:
+//  -1 -1  : F
+//  -1  0  : T
+//   0 -1  : F
+//   0  0  : F
+//
+// ULT is:
+//   1 1 : F
+//   1 0 : F
+//   0 1 : T
+//   0 0 : F
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLT)),
-          (CRANDC $s2, $s1)>;
+          (CRANDC $s1, $s2)>;
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULT)),
           (CRANDC $s2, $s1)>;
+// CRORC is:
+//   1 1 : T
+//   1 0 : T
+//   0 1 : F
+//   0 0 : T
+//
+// LE is:
+//  -1 -1 : T
+//  -1  0 : T
+//   0 -1 : F
+//   0  0 : T
+//
+// ULE is:
+//   1 1 : T
+//   1 0 : F
+//   0 1 : T
+//   0 0 : T
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLE)),
-          (CRORC $s2, $s1)>;
+          (CRORC $s1, $s2)>;
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULE)),
           (CRORC $s2, $s1)>;
+
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETEQ)),
           (CREQV $s1, $s2)>;
+
+// GE is:
+//  -1 -1 : T
+//  -1  0 : F
+//   0 -1 : T
+//   0  0 : T
+//
+// UGE is:
+//   1 1 : T
+//   1 0 : T
+//   0 1 : F
+//   0 0 : T
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGE)),
-          (CRORC $s1, $s2)>;
+          (CRORC $s2, $s1)>;
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGE)),
           (CRORC $s1, $s2)>;
+
+// GT is:
+//  -1 -1 : F
+//  -1  0 : F
+//   0 -1 : T
+//   0  0 : F
+//
+// UGT is:
+//  1 1 : F
+//  1 0 : T
+//  0 1 : F
+//  0 0 : F
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGT)),
-          (CRANDC $s1, $s2)>;
+          (CRANDC $s2, $s1)>;
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGT)),
           (CRANDC $s1, $s2)>;
+
 def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETNE)),
           (CRXOR $s1, $s2)>;
 
@@ -3203,18 +3263,30 @@ def : Pat<(i1 (select i1:$cond, i1:$tval, i1:$fval)),
 //   select (lhs == rhs), tval, fval is:
 //   ((lhs == rhs) & tval) | (!(lhs == rhs) & fval)
 def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLT)),
+           (CROR (CRAND (CRANDC $lhs, $rhs), $tval),
+                 (CRAND (CRORC  $rhs, $lhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETULT)),
            (CROR (CRAND (CRANDC $rhs, $lhs), $tval),
                  (CRAND (CRORC  $lhs, $rhs), $fval))>;
 def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLE)),
+           (CROR (CRAND (CRORC  $lhs, $rhs), $tval),
+                 (CRAND (CRANDC $rhs, $lhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETULE)),
            (CROR (CRAND (CRORC  $rhs, $lhs), $tval),
                  (CRAND (CRANDC $lhs, $rhs), $fval))>;
 def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETEQ)),
            (CROR (CRAND (CREQV $lhs, $rhs), $tval),
                  (CRAND (CRXOR $lhs, $rhs), $fval))>;
 def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGE)),
+           (CROR (CRAND (CRORC  $rhs, $lhs), $tval),
+                 (CRAND (CRANDC $lhs, $rhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETUGE)),
            (CROR (CRAND (CRORC  $lhs, $rhs), $tval),
                  (CRAND (CRANDC $rhs, $lhs), $fval))>;
 def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGT)),
+           (CROR (CRAND (CRANDC $rhs, $lhs), $tval),
+                 (CRAND (CRORC  $lhs, $rhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETUGT)),
            (CROR (CRAND (CRANDC $lhs, $rhs), $tval),
                  (CRAND (CRORC  $rhs, $lhs), $fval))>;
 def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)),
@@ -3223,66 +3295,106 @@ def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)),
 
 // match selectcc on i1 variables with non-i1 output.
 def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLT)),
+          (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETULT)),
           (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLE)),
+          (SELECT_I4 (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETULE)),
           (SELECT_I4 (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETEQ)),
           (SELECT_I4 (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGE)),
+          (SELECT_I4 (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETUGE)),
           (SELECT_I4 (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGT)),
+          (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETUGT)),
           (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETNE)),
           (SELECT_I4 (CRXOR $lhs, $rhs), $tval, $fval)>;
 
 def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLT)),
+          (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETULT)),
           (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLE)),
+          (SELECT_I8 (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETULE)),
           (SELECT_I8 (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETEQ)),
           (SELECT_I8 (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGE)),
+          (SELECT_I8 (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETUGE)),
           (SELECT_I8 (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGT)),
+          (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETUGT)),
           (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETNE)),
           (SELECT_I8 (CRXOR $lhs, $rhs), $tval, $fval)>;
 
 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
+          (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)),
           (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)),
+          (SELECT_F4 (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)),
           (SELECT_F4 (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),
           (SELECT_F4 (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)),
+          (SELECT_F4 (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)),
           (SELECT_F4 (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
+          (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)),
           (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
           (SELECT_F4 (CRXOR $lhs, $rhs), $tval, $fval)>;
 
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
+          (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)),
           (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
+          (SELECT_F8 (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)),
           (SELECT_F8 (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
           (SELECT_F8 (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
+          (SELECT_F8 (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)),
           (SELECT_F8 (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
+          (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)),
           (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
           (SELECT_F8 (CRXOR $lhs, $rhs), $tval, $fval)>;
 
 def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLT)),
+          (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETULT)),
           (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLE)),
+          (SELECT_VRRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETULE)),
           (SELECT_VRRC (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETEQ)),
           (SELECT_VRRC (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGE)),
+          (SELECT_VRRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGE)),
           (SELECT_VRRC (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGT)),
+          (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGT)),
           (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)),
           (SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>;
diff --git a/lib/Target/PowerPC/PPCInstrQPX.td b/lib/Target/PowerPC/PPCInstrQPX.td
index 5c66b42690c..0a044c5c6ea 100644
--- a/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/lib/Target/PowerPC/PPCInstrQPX.td
@@ -1115,40 +1115,64 @@ def : Pat<(v4f64 (PPCqbflt v4i1:$src)),
           (COPY_TO_REGCLASS $src, QFRC)>;
 
 def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLT)),
+          (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULT)),
           (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLE)),
+          (SELECT_QFRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULE)),
           (SELECT_QFRC (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETEQ)),
           (SELECT_QFRC (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGE)),
+          (SELECT_QFRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGE)),
           (SELECT_QFRC (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGT)),
+          (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGT)),
           (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETNE)),
           (SELECT_QFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
 
 def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLT)),
+          (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULT)),
           (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLE)),
+          (SELECT_QSRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULE)),
           (SELECT_QSRC (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETEQ)),
           (SELECT_QSRC (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGE)),
+          (SELECT_QSRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGE)),
           (SELECT_QSRC (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGT)),
+          (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGT)),
           (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETNE)),
           (SELECT_QSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
 
 def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLT)),
+          (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULT)),
           (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLE)),
+          (SELECT_QBRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULE)),
           (SELECT_QBRC (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETEQ)),
           (SELECT_QBRC (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGE)),
+          (SELECT_QBRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGE)),
           (SELECT_QBRC (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGT)),
+          (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGT)),
           (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETNE)),
           (SELECT_QBRC (CRXOR $lhs, $rhs), $tval, $fval)>;
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 20c95fe888e..ce63c22992e 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -958,27 +958,43 @@ def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;
 
 // Selects.
 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
+          (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULT)),
           (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)),
+          (SELECT_VSRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULE)),
           (SELECT_VSRC (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)),
           (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)),
+          (SELECT_VSRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGE)),
           (SELECT_VSRC (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)),
+          (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGT)),
           (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)),
           (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
 
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
+          (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)),
           (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
+          (SELECT_VSFRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)),
           (SELECT_VSFRC (CRORC  $rhs, $lhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
           (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
+          (SELECT_VSFRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)),
           (SELECT_VSFRC (CRORC  $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
+          (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)),
           (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
           (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
@@ -1060,18 +1076,27 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
             (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>;
   def : Pat<(f64 (fextend f32:$src)),
             (COPY_TO_REGCLASS $src, VSFRC)>;
+
   def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
+            (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)),
             (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
   def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)),
+            (SELECT_VSSRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)),
             (SELECT_VSSRC (CRORC  $rhs, $lhs), $tval, $fval)>;
   def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),
             (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>;
   def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)),
+            (SELECT_VSSRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)),
             (SELECT_VSSRC (CRORC  $lhs, $rhs), $tval, $fval)>;
   def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
+            (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+  def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)),
             (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
   def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
-          (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+            (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
 
   // VSX Elementary Scalar FP arithmetic (SP)
   let isCommutable = 1 in {
diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index 58d3c3d3fa2..46b8d13e47b 100644
--- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -103,6 +103,11 @@ protected:
 
         VNInfo *AddendValNo =
           LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn();
+        if (!AddendValNo) {
+          // This can be null if the register is undef.
+          continue;
+        }
+
         MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
 
         // The addend and this instruction must be in the same block.
@@ -181,11 +186,14 @@ protected:
         if (!KilledProdOp)
           continue;
 
-        // For virtual registers, verify that the addend source register
-        // is live here (as should have been assured above).
-        assert((!TargetRegisterInfo::isVirtualRegister(AddendSrcReg) ||
-                LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) &&
-               "Addend source register is not live!");
+	// If the addend copy is used only by this MI, then the addend source
+	// register is likely not live here. This could be fixed (based on the
+	// legality checks above, the live range for the addend source register
+	// could be extended), but it seems likely that such a trivial copy can
+	// be coalesced away later, and thus is not worth the effort.
+	if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) &&
+            !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx))
+          continue;
 
         // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
 
diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 3fb1dcc3d4a..d7132d5272d 100644
--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -240,6 +240,9 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
   for (MachineBasicBlock &MBB : *MF) {
     for (MachineInstr &MI : MBB) {
 
+      if (MI.isDebugValue())
+        continue;
+
       bool RelevantInstr = false;
       bool Partial = false;
 
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 4a33f7fc346..1c4e486da41 100644
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -77,7 +77,7 @@ class SparcAsmParser : public MCTargetAsmParser {
   bool parseDirectiveWord(unsigned Size, SMLoc L);
 
   bool is64Bit() const {
-    return STI.getTargetTriple().getArchName().startswith("sparcv9");
+    return STI.getTargetTriple().getArch() == Triple::sparcv9;
   }
 
   void expandSET(MCInst &Inst, SMLoc IDLoc,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 71ccb1ab1e5..0f29b514146 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -13573,6 +13573,35 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
                        DAG.getConstant(SSECC, dl, MVT::i8));
   }
 
+  MVT VTOp0 = Op0.getSimpleValueType();
+  assert(VTOp0 == Op1.getSimpleValueType() &&
+         "Expected operands with same type!");
+  assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() &&
+         "Invalid number of packed elements for source and destination!");
+
+  if (VT.is128BitVector() && VTOp0.is256BitVector()) {
+    // On non-AVX512 targets, a vector of MVT::i1 is promoted by the type
+    // legalizer to a wider vector type.  In the case of 'vsetcc' nodes, the
+    // legalizer firstly checks if the first operand in input to the setcc has
+    // a legal type. If so, then it promotes the return type to that same type.
+    // Otherwise, the return type is promoted to the 'next legal type' which,
+    // for a vector of MVT::i1 is always a 128-bit integer vector type.
+    //
+    // We reach this code only if the following two conditions are met:
+    // 1. Both return type and operand type have been promoted to wider types
+    //    by the type legalizer.
+    // 2. The original operand type has been promoted to a 256-bit vector.
+    //
+    // Note that condition 2. only applies for AVX targets.
+    SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, SetCCOpcode);
+    return DAG.getZExtOrTrunc(NewOp, dl, VT);
+  }
+
+  // The non-AVX512 code below works under the assumption that source and
+  // destination types are the same.
+  assert((Subtarget->hasAVX512() || (VT == VTOp0)) &&
+         "Value types for source and destination must be the same!");
+
   // Break 256-bit integer vector compare into smaller ones.
   if (VT.is256BitVector() && !Subtarget->hasInt256())
     return Lower256IntVSETCC(Op, DAG);
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 88e5e479136..909baae9254 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -228,7 +228,7 @@ void PassManagerBuilder::populateModulePassManager(
   // Start of function pass.
   // Break up aggregate allocas, using SSAUpdater.
   if (UseNewSROA)
-    MPM.add(createSROAPass(/*RequiresDomTree*/ false));
+    MPM.add(createSROAPass());
   else
     MPM.add(createScalarReplAggregatesPass(-1, false));
   MPM.add(createEarlyCSEPass());              // Catch trivial redundancies
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index d1eba6e70e5..89a0d0af93b 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1761,7 +1761,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
     if (isa<PHINode>(V))
       V->takeName(LI);
     if (Instruction *I = dyn_cast<Instruction>(V))
-      I->setDebugLoc(LI->getDebugLoc());
+      if (LI->getDebugLoc())
+        I->setDebugLoc(LI->getDebugLoc());
     if (V->getType()->getScalarType()->isPointerTy())
       MD->invalidateCachedPointerInfo(V);
     markInstructionForDeletion(LI);
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 50ca6234d0b..ba8af47b54e 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -869,6 +869,11 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
       PN->replaceAllUsesWith(*Inserted.first);
       PN->eraseFromParent();
       Changed = true;
+
+      // The RAUW can change PHIs that we already visited. Start over from the
+      // beginning.
+      PHISet.clear();
+      I = BB->begin();
     }
   }
 
diff --git a/test/CodeGen/AMDGPU/llvm.dbg.value.ll b/test/CodeGen/AMDGPU/llvm.dbg.value.ll
new file mode 100644
index 00000000000..d001bcb4db1
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.dbg.value.ll
@@ -0,0 +1,37 @@
+; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}test_debug_value:
+; CHECK: s_load_dwordx2
+; CHECK: DEBUG_VALUE: test_debug_value:globalptr_arg <- SGPR0_SGPR1
+; CHECK: buffer_store_dword
+; CHECK: s_endpgm
+define void @test_debug_value(i32 addrspace(1)* nocapture %globalptr_arg) #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata i32 addrspace(1)* %globalptr_arg, i64 0, metadata !10, metadata !13), !dbg !14
+  store i32 123, i32 addrspace(1)* %globalptr_arg, align 4
+  ret void
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !12}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 244715) (llvm/trunk 244718)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "/tmp/test_debug_value.cl", directory: "/Users/matt/src/llvm/build_debug")
+!2 = !{}
+!3 = !{!4}
+!4 = !DISubprogram(name: "test_debug_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, function: void (i32 addrspace(1)*)* @test_debug_value, variables: !9)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7}
+!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64, align: 32)
+!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{!10}
+!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "globalptr_arg", arg: 1, scope: !4, file: !1, line: 1, type: !7)
+!11 = !{i32 2, !"Dwarf Version", i32 4}
+!12 = !{i32 2, !"Debug Info Version", i32 3}
+!13 = !DIExpression()
+!14 = !DILocation(line: 1, column: 42, scope: !4)
diff --git a/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll b/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll
new file mode 100644
index 00000000000..10739df0837
--- /dev/null
+++ b/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll
@@ -0,0 +1,22 @@
+; RUN: not llc -march=amdgcn < %s 2>&1 | FileCheck %s
+
+; Make sure that AMDGPUPromoteAlloca doesn't crash if the called
+; function is a constantexpr cast of a function.
+
+declare void @foo(float*) #0
+declare void @foo.varargs(...) #0
+
+; CHECK: error: unsupported call to function foo in crash_call_constexpr_cast
+define void @crash_call_constexpr_cast() #0 {
+  %alloca = alloca i32
+  call void bitcast (void (float*)* @foo to void (i32*)*)(i32* %alloca) #0
+  ret void
+}
+
+define void @crash_call_constexpr_cast_varargs() #0 {
+  %alloca = alloca i32
+  call void bitcast (void (...)* @foo.varargs to void (i32*)*)(i32* %alloca) #0
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll b/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll
new file mode 100644
index 00000000000..2ee98cc3d2d
--- /dev/null
+++ b/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll
@@ -0,0 +1,52 @@
+; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s
+
+; Pointer value is stored in a candidate for LDS usage.
+
+; GCN-LABEL: {{^}}stored_lds_pointer_value:
+; GCN: buffer_store_dword v
+define void @stored_lds_pointer_value(float* addrspace(1)* %ptr) #0 {
+  %tmp = alloca float
+  store float 0.0, float *%tmp
+  store float* %tmp, float* addrspace(1)* %ptr
+  ret void
+}
+
+; GCN-LABEL: {{^}}stored_lds_pointer_value_gep:
+; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
+; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+define void @stored_lds_pointer_value_gep(float* addrspace(1)* %ptr, i32 %idx) #0 {
+bb:
+  %tmp = alloca float, i32 16
+  store float 0.0, float* %tmp
+  %tmp2 = getelementptr inbounds float, float* %tmp, i32 %idx
+  store float* %tmp2, float* addrspace(1)* %ptr
+  ret void
+}
+
+; Pointer value is stored in a candidate for vector usage
+; GCN-LABEL: {{^}}stored_vector_pointer_value:
+; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
+; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
+; GCN: buffer_store_dword
+; GCN: buffer_store_dword
+; GCN: buffer_store_dword
+; GCN: buffer_store_dword
+define void @stored_vector_pointer_value(i32* addrspace(1)* %out, i32 %index) {
+entry:
+  %tmp0 = alloca [4 x i32]
+  %x = getelementptr [4 x i32], [4 x i32]* %tmp0, i32 0, i32 0
+  %y = getelementptr [4 x i32], [4 x i32]* %tmp0, i32 0, i32 1
+  %z = getelementptr [4 x i32], [4 x i32]* %tmp0, i32 0, i32 2
+  %w = getelementptr [4 x i32], [4 x i32]* %tmp0, i32 0, i32 3
+  store i32 0, i32* %x
+  store i32 1, i32* %y
+  store i32 2, i32* %z
+  store i32 3, i32* %w
+  %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp0, i32 0, i32 %index
+  store i32* %tmp1, i32* addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/trunc-store.ll b/test/CodeGen/AMDGPU/trunc-store.ll
new file mode 100644
index 00000000000..4ba815f2669
--- /dev/null
+++ b/test/CodeGen/AMDGPU/trunc-store.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}truncstore_arg_v16i32_to_v16i8:
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+define void @truncstore_arg_v16i32_to_v16i8(<16 x i8> addrspace(1)* %out, <16 x i32> %in) {
+  %trunc = trunc <16 x i32> %in to <16 x i8>
+  store <16 x i8> %trunc, <16 x i8> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}truncstore_arg_v16i64_to_v16i8:
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+define void @truncstore_arg_v16i64_to_v16i8(<16 x i8> addrspace(1)* %out, <16 x i64> %in) {
+  %trunc = trunc <16 x i64> %in to <16 x i8>
+  store <16 x i8> %trunc, <16 x i8> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/BPF/fi_ri.ll b/test/CodeGen/BPF/fi_ri.ll
new file mode 100644
index 00000000000..64773b429fb
--- /dev/null
+++ b/test/CodeGen/BPF/fi_ri.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=bpf | FileCheck %s
+
+%struct.key_t = type { i32, [16 x i8] }
+
+; Function Attrs: nounwind uwtable
+define i32 @test() #0 {
+  %key = alloca %struct.key_t, align 4
+  %1 = bitcast %struct.key_t* %key to i8*
+; CHECK: mov	r1, 0
+; CHECK: stw	-8(r10), r1
+; CHECK: std	-16(r10), r1
+; CHECK: std	-24(r10), r1
+  call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 20, i32 4, i1 false)
+; CHECK: mov	r1, r10
+; CHECK: addi	r1, -20
+  %2 = getelementptr inbounds %struct.key_t, %struct.key_t* %key, i64 0, i32 1, i64 0
+; CHECK: call	test1
+  call void @test1(i8* %2) #3
+  ret i32 0
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
+
+declare void @test1(i8*) #2
diff --git a/test/CodeGen/BPF/sockex2.ll b/test/CodeGen/BPF/sockex2.ll
index d372a5982f6..5de2787d5b0 100644
--- a/test/CodeGen/BPF/sockex2.ll
+++ b/test/CodeGen/BPF/sockex2.ll
@@ -311,7 +311,7 @@ flow_dissector.exit.thread:                       ; preds = %86, %12, %196, %199
 ; CHECK-LABEL: bpf_prog2:
 ; CHECK: ldabs_h r0, r6.data + 12 # encoding: [0x28,0x00,0x00,0x00,0x0c,0x00,0x00,0x00]
 ; CHECK: ldabs_h r0, r6.data + 16 # encoding: [0x28,0x00,0x00,0x00,0x10,0x00,0x00,0x00]
-; CHECK-NOT: implicit
+; CHECK: implicit-def: R
 ; CHECK: ld_64   r1
 ; CHECK-NOT: ori
 ; CHECK: call 1 # encoding: [0x85,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
diff --git a/test/CodeGen/BPF/undef.ll b/test/CodeGen/BPF/undef.ll
new file mode 100644
index 00000000000..ef712c4a595
--- /dev/null
+++ b/test/CodeGen/BPF/undef.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -march=bpf | FileCheck %s
+
+%struct.bpf_map_def = type { i32, i32, i32, i32 }
+%struct.__sk_buff = type opaque
+%struct.routing_key_2 = type { [6 x i8] }
+
+@routing = global %struct.bpf_map_def { i32 1, i32 6, i32 12, i32 1024 }, section "maps", align 4
+@routing_miss_0 = global %struct.bpf_map_def { i32 1, i32 1, i32 12, i32 1 }, section "maps", align 4
+@test1 = global %struct.bpf_map_def { i32 2, i32 4, i32 8, i32 1024 }, section "maps", align 4
+@test1_miss_4 = global %struct.bpf_map_def { i32 2, i32 1, i32 8, i32 1 }, section "maps", align 4
+@_license = global [4 x i8] c"GPL\00", section "license", align 1
+@llvm.used = appending global [6 x i8*] [i8* getelementptr inbounds ([4 x i8], [4 x i8]* @_license, i32 0, i32 0), i8* bitcast (i32 (%struct.__sk_buff*)* @ebpf_filter to i8*), i8* bitcast (%struct.bpf_map_def* @routing to i8*), i8* bitcast (%struct.bpf_map_def* @routing_miss_0 to i8*), i8* bitcast (%struct.bpf_map_def* @test1 to i8*), i8* bitcast (%struct.bpf_map_def* @test1_miss_4 to i8*)], section "llvm.metadata"
+
+; Function Attrs: nounwind uwtable
+define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 section "socket1" {
+  %key = alloca %struct.routing_key_2, align 1
+  %1 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 0
+; CHECK: mov	r1, 5
+; CHECK: stb	-8(r10), r1
+  store i8 5, i8* %1, align 1
+  %2 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 1
+; CHECK: mov	r1, 6
+; CHECK: stb	-7(r10), r1
+  store i8 6, i8* %2, align 1
+  %3 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 2
+; CHECK: mov	r1, 7
+; CHECK: stb	-6(r10), r1
+  store i8 7, i8* %3, align 1
+  %4 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 3
+; CHECK: mov	r1, 8
+; CHECK: stb	-5(r10), r1
+  store i8 8, i8* %4, align 1
+  %5 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 4
+; CHECK: mov	r1, 9
+; CHECK: stb	-4(r10), r1
+  store i8 9, i8* %5, align 1
+  %6 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 5
+; CHECK: mov	r1, 10
+; CHECK: stb	-3(r10), r1
+  store i8 10, i8* %6, align 1
+  %7 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 1, i32 0, i64 0
+; CHECK: mov	r1, r10
+; CHECK: addi	r1, -2
+; CHECK: mov	r2, 0
+; CHECK: sth	6(r1), r2
+; CHECK: sth	4(r1), r2
+; CHECK: sth	2(r1), r2
+; CHECK: sth	24(r10), r2
+; CHECK: sth	22(r10), r2
+; CHECK: sth	20(r10), r2
+; CHECK: sth	18(r10), r2
+; CHECK: sth	16(r10), r2
+; CHECK: sth	14(r10), r2
+; CHECK: sth	12(r10), r2
+; CHECK: sth	10(r10), r2
+; CHECK: sth	8(r10), r2
+; CHECK: sth	6(r10), r2
+; CHECK: sth	-2(r10), r2
+; CHECK: sth	26(r10), r2
+  call void @llvm.memset.p0i8.i64(i8* %7, i8 0, i64 30, i32 1, i1 false)
+  %8 = call i32 (%struct.bpf_map_def*, %struct.routing_key_2*, ...) bitcast (i32 (...)* @bpf_map_lookup_elem to i32 (%struct.bpf_map_def*, %struct.routing_key_2*, ...)*)(%struct.bpf_map_def* nonnull @routing, %struct.routing_key_2* nonnull %key) #3
+  ret i32 undef
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
+
+declare i32 @bpf_map_lookup_elem(...) #2
diff --git a/test/CodeGen/Mips/llvm-ir/addrspacecast.ll b/test/CodeGen/Mips/llvm-ir/addrspacecast.ll
new file mode 100644
index 00000000000..060fa4ce7bb
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/addrspacecast.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s -check-prefix=ALL
+
+; Address spaces 1-255 are software defined.
+define i32* @cast(i32 *%arg) {
+  %1 = addrspacecast i32* %arg to i32 addrspace(1)*
+  %2 = addrspacecast i32 addrspace(1)* %1 to i32 addrspace(2)*
+  %3 = addrspacecast i32 addrspace(2)* %2 to i32 addrspace(0)*
+  ret i32* %3
+}
+
+; ALL-LABEL: cast:
+; ALL:           move   $2, $4
diff --git a/test/CodeGen/Mips/llvm-ir/extractelement.ll b/test/CodeGen/Mips/llvm-ir/extractelement.ll
new file mode 100644
index 00000000000..1e1b02df99a
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/extractelement.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s -check-prefix=ALL
+
+; This test triggered a bug in the vector splitting where the type legalizer
+; attempted to extract the element with by storing the vector, then reading
+; an element back. However, the address calculation was:
+;   Base + Index * (EltSizeInBits / 8)
+; and EltSizeInBits was 1. This caused the index to be forgotten.
+define i1 @via_stack_bug(i8 signext %idx) {
+  %1 = extractelement <2 x i1> <i1 false, i1 true>, i8 %idx
+  ret i1 %1
+}
+
+; ALL-LABEL: via_stack_bug:
+; ALL-DAG:       addiu  [[ONE:\$[0-9]+]], $zero, 1
+; ALL-DAG:       sb     [[ONE]], 7($sp)
+; ALL-DAG:       sb     $zero, 6($sp)
+; ALL-DAG:       addiu  [[VPTR:\$[0-9]+]], $sp, 6
+; ALL-DAG:       addu   [[EPTR:\$[0-9]+]], $4, [[VPTR]]
+; ALL:           lbu    $2, 0([[EPTR]])
diff --git a/test/CodeGen/Mips/micromips-zero-mat-uses.ll b/test/CodeGen/Mips/micromips-zero-mat-uses.ll
new file mode 100644
index 00000000000..b38747a2d2c
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-zero-mat-uses.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=mips -mcpu=mips32r2 -mattr=+micromips,+nooddspreg -O0 < %s | FileCheck %s
+
+; CHECK: addiu    $[[R0:[0-9]+]], $zero, 0
+; CHECK: subu16   $2, $[[R0]], ${{[0-9]+}}
+define i32 @foo() {
+  %1 = sub i32 0, undef
+  ret i32 %1
+}
diff --git a/test/CodeGen/PowerPC/ctr-loop-tls-const.ll b/test/CodeGen/PowerPC/ctr-loop-tls-const.ll
new file mode 100644
index 00000000000..01f837cb993
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctr-loop-tls-const.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mcpu=pwr7 -relocation-model=pic < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@x = thread_local global [1600 x i32] zeroinitializer, align 4
+
+; Function Attrs: nounwind
+define void @foo(i32 signext %v) #0 {
+entry:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %induction5 = or i64 %index, 1
+  %0 = getelementptr inbounds [1600 x i32], [1600 x i32]* @x, i64 0, i64 %index
+  %1 = getelementptr inbounds [1600 x i32], [1600 x i32]* @x, i64 0, i64 %induction5
+  %2 = load i32, i32* %0, align 4
+  %3 = load i32, i32* %1, align 4
+  %4 = add nsw i32 %2, %v
+  %5 = add nsw i32 %3, %v
+  store i32 %4, i32* %0, align 4
+  store i32 %5, i32* %1, align 4
+  %index.next = add i64 %index, 2
+  %6 = icmp eq i64 %index.next, 1600
+  br i1 %6, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+; CHECK-LABEL: @foo
+; CHECK-NOT: mtctr
+; CHECK: __tls_get_addr
+
+attributes #0 = { nounwind }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+
diff --git a/test/CodeGen/PowerPC/ctrloop-intrin.ll b/test/CodeGen/PowerPC/ctrloop-intrin.ll
new file mode 100644
index 00000000000..7c781cd15e4
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloop-intrin.ll
@@ -0,0 +1,349 @@
+; RUN: llc < %s
+; ModuleID = 'new.bc'
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le--linux-gnu"
+
+@.str.87 = external hidden unnamed_addr constant [5 x i8], align 1
+@.str.1.88 = external hidden unnamed_addr constant [4 x i8], align 1
+@.str.2.89 = external hidden unnamed_addr constant [5 x i8], align 1
+@.str.3.90 = external hidden unnamed_addr constant [4 x i8], align 1
+@.str.4.91 = external hidden unnamed_addr constant [14 x i8], align 1
+@.str.5.92 = external hidden unnamed_addr constant [13 x i8], align 1
+@.str.6.93 = external hidden unnamed_addr constant [10 x i8], align 1
+@.str.7.94 = external hidden unnamed_addr constant [9 x i8], align 1
+@.str.8.95 = external hidden unnamed_addr constant [2 x i8], align 1
+@.str.9.96 = external hidden unnamed_addr constant [2 x i8], align 1
+@.str.10.97 = external hidden unnamed_addr constant [3 x i8], align 1
+@.str.11.98 = external hidden unnamed_addr constant [3 x i8], align 1
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare i8* @halide_string_to_string(i8*, i8*, i8*) #1
+
+; Function Attrs: nounwind
+declare i8* @halide_int64_to_string(i8*, i8*, i64, i32) #1
+
+; Function Attrs: nounwind
+define weak i8* @halide_double_to_string(i8* %dst, i8* %end, double %arg, i32 %scientific) #1 {
+entry:
+  %arg.addr = alloca double, align 8
+  %bits = alloca i64, align 8
+  %buf = alloca [512 x i8], align 1
+  store double %arg, double* %arg.addr, align 8, !tbaa !4
+  %0 = bitcast i64* %bits to i8*
+  call void @llvm.lifetime.start(i64 8, i8* %0) #0
+  store i64 0, i64* %bits, align 8, !tbaa !8
+  %1 = bitcast double* %arg.addr to i8*
+  %call = call i8* @memcpy(i8* %0, i8* %1, i64 8) #2
+  %2 = load i64, i64* %bits, align 8, !tbaa !8
+  %and = and i64 %2, 4503599627370495
+  %shr = lshr i64 %2, 52
+  %shr.tr = trunc i64 %shr to i32
+  %conv = and i32 %shr.tr, 2047
+  %shr2 = lshr i64 %2, 63
+  %conv3 = trunc i64 %shr2 to i32
+  %cmp = icmp eq i32 %conv, 2047
+  br i1 %cmp, label %if.then, label %if.else.15
+
+if.then:                                          ; preds = %entry
+  %tobool = icmp eq i64 %and, 0
+  %tobool5 = icmp ne i32 %conv3, 0
+  br i1 %tobool, label %if.else.9, label %if.then.4
+
+if.then.4:                                        ; preds = %if.then
+  br i1 %tobool5, label %if.then.6, label %if.else
+
+if.then.6:                                        ; preds = %if.then.4
+  %call7 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.87, i64 0, i64 0)) #3
+  br label %cleanup.148
+
+if.else:                                          ; preds = %if.then.4
+  %call8 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1.88, i64 0, i64 0)) #3
+  br label %cleanup.148
+
+if.else.9:                                        ; preds = %if.then
+  br i1 %tobool5, label %if.then.11, label %if.else.13
+
+if.then.11:                                       ; preds = %if.else.9
+  %call12 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.2.89, i64 0, i64 0)) #3
+  br label %cleanup.148
+
+if.else.13:                                       ; preds = %if.else.9
+  %call14 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.3.90, i64 0, i64 0)) #3
+  br label %cleanup.148
+
+if.else.15:                                       ; preds = %entry
+  %cmp16 = icmp eq i32 %conv, 0
+  %cmp17 = icmp eq i64 %and, 0
+  %or.cond = and i1 %cmp17, %cmp16
+  br i1 %or.cond, label %if.then.18, label %if.end.32
+
+if.then.18:                                       ; preds = %if.else.15
+  %tobool19 = icmp eq i32 %scientific, 0
+  %tobool21 = icmp ne i32 %conv3, 0
+  br i1 %tobool19, label %if.else.26, label %if.then.20
+
+if.then.20:                                       ; preds = %if.then.18
+  br i1 %tobool21, label %if.then.22, label %if.else.24
+
+if.then.22:                                       ; preds = %if.then.20
+  %call23 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.4.91, i64 0, i64 0)) #3
+  br label %cleanup.148
+
+if.else.24:                                       ; preds = %if.then.20
+  %call25 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.5.92, i64 0, i64 0)) #3
+  br label %cleanup.148
+
+if.else.26:                                       ; preds = %if.then.18
+  br i1 %tobool21, label %if.then.28, label %if.else.30
+
+if.then.28:                                       ; preds = %if.else.26
+  %call29 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.6.93, i64 0, i64 0)) #3
+  br label %cleanup.148
+
+if.else.30:                                       ; preds = %if.else.26
+  %call31 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.7.94, i64 0, i64 0)) #3
+  br label %cleanup.148
+
+if.end.32:                                        ; preds = %if.else.15
+  %tobool33 = icmp eq i32 %conv3, 0
+  br i1 %tobool33, label %if.end.37, label %if.then.34
+
+if.then.34:                                       ; preds = %if.end.32
+  %call35 = call i8* @halide_string_to_string(i8* %dst, i8* %end, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.8.95, i64 0, i64 0)) #3
+  %sub36 = fsub double -0.000000e+00, %arg
+  store double %sub36, double* %arg.addr, align 8, !tbaa !4
+  br label %if.end.37
+
+if.end.37:                                        ; preds = %if.then.34, %if.end.32
+  %.pr = phi double [ %sub36, %if.then.34 ], [ %arg, %if.end.32 ]
+  %dst.addr.0 = phi i8* [ %call35, %if.then.34 ], [ %dst, %if.end.32 ]
+  %tobool38 = icmp eq i32 %scientific, 0
+  br i1 %tobool38, label %if.else.62, label %while.condthread-pre-split
+
+while.condthread-pre-split:                       ; preds = %if.end.37
+  %cmp40.261 = fcmp olt double %.pr, 1.000000e+00
+  br i1 %cmp40.261, label %while.body, label %while.cond.41thread-pre-split
+
+while.body:                                       ; preds = %while.body, %while.condthread-pre-split
+  %exponent_base_10.0262 = phi i32 [ %dec, %while.body ], [ 0, %while.condthread-pre-split ]
+  %3 = phi double [ %mul, %while.body ], [ %.pr, %while.condthread-pre-split ]
+  %mul = fmul double %3, 1.000000e+01
+  %dec = add nsw i32 %exponent_base_10.0262, -1
+  %cmp40 = fcmp olt double %mul, 1.000000e+00
+  br i1 %cmp40, label %while.body, label %while.cond.while.cond.41thread-pre-split_crit_edge
+
+while.cond.while.cond.41thread-pre-split_crit_edge: ; preds = %while.body
+  store double %mul, double* %arg.addr, align 8, !tbaa !4
+  br label %while.cond.41thread-pre-split
+
+while.cond.41thread-pre-split:                    ; preds = %while.cond.while.cond.41thread-pre-split_crit_edge, %while.condthread-pre-split
+  %.pr246 = phi double [ %mul, %while.cond.while.cond.41thread-pre-split_crit_edge ], [ %.pr, %while.condthread-pre-split ]
+  %exponent_base_10.0.lcssa = phi i32 [ %dec, %while.cond.while.cond.41thread-pre-split_crit_edge ], [ 0, %while.condthread-pre-split ]
+  %cmp42.257 = fcmp ult double %.pr246, 1.000000e+01
+  br i1 %cmp42.257, label %while.end.44, label %while.body.43
+
+while.body.43:                                    ; preds = %while.body.43, %while.cond.41thread-pre-split
+  %exponent_base_10.1258 = phi i32 [ %inc, %while.body.43 ], [ %exponent_base_10.0.lcssa, %while.cond.41thread-pre-split ]
+  %4 = phi double [ %div, %while.body.43 ], [ %.pr246, %while.cond.41thread-pre-split ]
+  %div = fdiv double %4, 1.000000e+01
+  %inc = add nsw i32 %exponent_base_10.1258, 1
+  %cmp42 = fcmp ult double %div, 1.000000e+01
+  br i1 %cmp42, label %while.cond.41.while.end.44_crit_edge, label %while.body.43
+
+while.cond.41.while.end.44_crit_edge:             ; preds = %while.body.43
+  store double %div, double* %arg.addr, align 8, !tbaa !4
+  br label %while.end.44
+
+while.end.44:                                     ; preds = %while.cond.41.while.end.44_crit_edge, %while.cond.41thread-pre-split
+  %exponent_base_10.1.lcssa = phi i32 [ %inc, %while.cond.41.while.end.44_crit_edge ], [ %exponent_base_10.0.lcssa, %while.cond.41thread-pre-split ]
+  %.lcssa = phi double [ %div, %while.cond.41.while.end.44_crit_edge ], [ %.pr246, %while.cond.41thread-pre-split ]
+  %mul45 = fmul double %.lcssa, 1.000000e+06
+  %add = fadd double %mul45, 5.000000e-01
+  %conv46 = fptoui double %add to i64
+  %div47 = udiv i64 %conv46, 1000000
+  %5 = mul i64 %div47, -1000000
+  %sub49 = add i64 %conv46, %5
+  %call50 = call i8* @halide_int64_to_string(i8* %dst.addr.0, i8* %end, i64 %div47, i32 1) #3
+  %call51 = call i8* @halide_string_to_string(i8* %call50, i8* %end, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.9.96, i64 0, i64 0)) #3
+  %call52 = call i8* @halide_int64_to_string(i8* %call51, i8* %end, i64 %sub49, i32 6) #3
+  %cmp53 = icmp sgt i32 %exponent_base_10.1.lcssa, -1
+  br i1 %cmp53, label %if.then.54, label %if.else.56
+
+if.then.54:                                       ; preds = %while.end.44
+  %call55 = call i8* @halide_string_to_string(i8* %call52, i8* %end, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.10.97, i64 0, i64 0)) #3
+  br label %if.end.59
+
+if.else.56:                                       ; preds = %while.end.44
+  %call57 = call i8* @halide_string_to_string(i8* %call52, i8* %end, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.11.98, i64 0, i64 0)) #3
+  %sub58 = sub nsw i32 0, %exponent_base_10.1.lcssa
+  br label %if.end.59
+
+if.end.59:                                        ; preds = %if.else.56, %if.then.54
+  %exponent_base_10.2 = phi i32 [ %exponent_base_10.1.lcssa, %if.then.54 ], [ %sub58, %if.else.56 ]
+  %dst.addr.1 = phi i8* [ %call55, %if.then.54 ], [ %call57, %if.else.56 ]
+  %conv60 = sext i32 %exponent_base_10.2 to i64
+  %call61 = call i8* @halide_int64_to_string(i8* %dst.addr.1, i8* %end, i64 %conv60, i32 2) #3
+  br label %cleanup.148
+
+if.else.62:                                       ; preds = %if.end.37
+  br i1 %cmp16, label %if.then.64, label %if.end.66
+
+if.then.64:                                       ; preds = %if.else.62
+  %call65 = call i8* @halide_double_to_string(i8* %dst.addr.0, i8* %end, double 0.000000e+00, i32 0) #3
+  br label %cleanup.148
+
+if.end.66:                                        ; preds = %if.else.62
+  %add68 = or i64 %and, 4503599627370496
+  %sub70 = add nsw i32 %conv, -1075
+  %cmp71 = icmp ult i32 %conv, 1075
+  br i1 %cmp71, label %if.then.72, label %if.end.105
+
+if.then.72:                                       ; preds = %if.end.66
+  %cmp73 = icmp slt i32 %sub70, -52
+  br i1 %cmp73, label %if.end.84, label %if.else.76
+
+if.else.76:                                       ; preds = %if.then.72
+  %sub77 = sub nsw i32 1075, %conv
+  %sh_prom = zext i32 %sub77 to i64
+  %shr78 = lshr i64 %add68, %sh_prom
+  %shl81 = shl i64 %shr78, %sh_prom
+  %sub82 = sub i64 %add68, %shl81
+  br label %if.end.84
+
+if.end.84:                                        ; preds = %if.else.76, %if.then.72
+  %integer_part.0 = phi i64 [ %shr78, %if.else.76 ], [ 0, %if.then.72 ]
+  %f.0.in = phi i64 [ %sub82, %if.else.76 ], [ %add68, %if.then.72 ]
+  %f.0 = uitofp i64 %f.0.in to double
+  %conv85.244 = zext i32 %sub70 to i64
+  %shl86 = shl i64 %conv85.244, 52
+  %add88 = add i64 %shl86, 4696837146684686336
+  %6 = bitcast i64 %add88 to double
+  %mul90 = fmul double %6, %f.0
+  %add91 = fadd double %mul90, 5.000000e-01
+  %conv92 = fptoui double %add91 to i64
+  %conv93 = uitofp i64 %conv92 to double
+  %and96 = and i64 %conv92, 1
+  %notlhs = fcmp oeq double %conv93, %add91
+  %notrhs = icmp ne i64 %and96, 0
+  %not.or.cond245 = and i1 %notrhs, %notlhs
+  %dec99 = sext i1 %not.or.cond245 to i64
+  %fractional_part.0 = add i64 %dec99, %conv92
+  %cmp101 = icmp eq i64 %fractional_part.0, 1000000
+  %inc103 = zext i1 %cmp101 to i64
+  %inc103.integer_part.0 = add i64 %inc103, %integer_part.0
+  %.fractional_part.0 = select i1 %cmp101, i64 0, i64 %fractional_part.0
+  br label %if.end.105
+
+if.end.105:                                       ; preds = %if.end.84, %if.end.66
+  %integer_part.2 = phi i64 [ %inc103.integer_part.0, %if.end.84 ], [ %add68, %if.end.66 ]
+  %integer_exponent.0 = phi i32 [ 0, %if.end.84 ], [ %sub70, %if.end.66 ]
+  %fractional_part.2 = phi i64 [ %.fractional_part.0, %if.end.84 ], [ 0, %if.end.66 ]
+  %7 = bitcast [512 x i8]* %buf to i8*
+  call void @llvm.lifetime.start(i64 512, i8* %7) #0
+  %add.ptr = getelementptr inbounds [512 x i8], [512 x i8]* %buf, i64 0, i64 512
+  %add.ptr106 = getelementptr inbounds [512 x i8], [512 x i8]* %buf, i64 0, i64 480
+  %call109 = call i8* @halide_int64_to_string(i8* %add.ptr106, i8* %add.ptr, i64 %integer_part.2, i32 1) #3
+  %cmp110.252 = icmp sgt i32 %integer_exponent.0, 0
+  br i1 %cmp110.252, label %for.cond.112.preheader, label %for.cond.cleanup
+
+for.cond.112.preheader:                           ; preds = %if.end.138, %if.end.105
+  %i.0255 = phi i32 [ %inc140, %if.end.138 ], [ 0, %if.end.105 ]
+  %int_part_ptr.0253 = phi i8* [ %int_part_ptr.1, %if.end.138 ], [ %add.ptr106, %if.end.105 ]
+  %int_part_ptr.02534 = ptrtoint i8* %int_part_ptr.0253 to i64
+  %cmp114.249 = icmp eq i8* %call109, %int_part_ptr.0253
+  br i1 %cmp114.249, label %if.end.138, label %for.body.116.preheader
+
+for.body.116.preheader:                           ; preds = %for.cond.112.preheader
+  %8 = sub i64 0, %int_part_ptr.02534
+  %scevgep5 = getelementptr i8, i8* %call109, i64 %8
+  %scevgep56 = ptrtoint i8* %scevgep5 to i64
+  call void @llvm.ppc.mtctr.i64(i64 %scevgep56)
+  br label %for.body.116
+
+for.cond.cleanup:                                 ; preds = %if.end.138, %if.end.105
+  %int_part_ptr.0.lcssa = phi i8* [ %add.ptr106, %if.end.105 ], [ %int_part_ptr.1, %if.end.138 ]
+  %9 = bitcast [512 x i8]* %buf to i8*
+  %call142 = call i8* @halide_string_to_string(i8* %dst.addr.0, i8* %end, i8* %int_part_ptr.0.lcssa) #3
+  %call143 = call i8* @halide_string_to_string(i8* %call142, i8* %end, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.9.96, i64 0, i64 0)) #3
+  %call144 = call i8* @halide_int64_to_string(i8* %call143, i8* %end, i64 %fractional_part.2, i32 6) #3
+  call void @llvm.lifetime.end(i64 512, i8* %9) #0
+  br label %cleanup.148
+
+for.cond.cleanup.115:                             ; preds = %for.body.116
+  br i1 %cmp125, label %if.then.136, label %if.end.138
+
+for.body.116:                                     ; preds = %for.body.116, %for.body.116.preheader
+  %call109.pn = phi i8* [ %p.0251, %for.body.116 ], [ %call109, %for.body.116.preheader ]
+  %carry.0250 = phi i32 [ %carry.1, %for.body.116 ], [ 0, %for.body.116.preheader ]
+  %call109.pn2 = ptrtoint i8* %call109.pn to i64
+  %p.0251 = getelementptr inbounds i8, i8* %call109.pn, i64 -1
+  %scevgep3 = getelementptr i8, i8* inttoptr (i64 -1 to i8*), i64 %call109.pn2
+  %10 = load i8, i8* %scevgep3, align 1, !tbaa !10
+  %sub118 = add i8 %10, -48
+  %conv120 = sext i8 %sub118 to i32
+  %mul121 = shl nsw i32 %conv120, 1
+  %add122 = or i32 %mul121, %carry.0250
+  %11 = trunc i32 %add122 to i8
+  %cmp125 = icmp sgt i8 %11, 9
+  %sub128 = add nsw i32 %add122, 246
+  %carry.1 = zext i1 %cmp125 to i32
+  %new_digit.0.in = select i1 %cmp125, i32 %sub128, i32 %add122
+  %add133 = add nsw i32 %new_digit.0.in, 48
+  %conv134 = trunc i32 %add133 to i8
+  %scevgep = getelementptr i8, i8* inttoptr (i64 -1 to i8*), i64 %call109.pn2
+  store i8 %conv134, i8* %scevgep, align 1, !tbaa !10
+  %12 = call i1 @llvm.ppc.is.decremented.ctr.nonzero()
+  br i1 %12, label %for.body.116, label %for.cond.cleanup.115
+
+if.then.136:                                      ; preds = %for.cond.cleanup.115
+  %incdec.ptr137 = getelementptr inbounds i8, i8* %int_part_ptr.0253, i64 -1
+  store i8 49, i8* %incdec.ptr137, align 1, !tbaa !10
+  br label %if.end.138
+
+if.end.138:                                       ; preds = %if.then.136, %for.cond.cleanup.115, %for.cond.112.preheader
+  %int_part_ptr.1 = phi i8* [ %incdec.ptr137, %if.then.136 ], [ %call109, %for.cond.112.preheader ], [ %int_part_ptr.0253, %for.cond.cleanup.115 ]
+  %inc140 = add nuw nsw i32 %i.0255, 1
+  %exitcond = icmp eq i32 %inc140, %integer_exponent.0
+  br i1 %exitcond, label %for.cond.cleanup, label %for.cond.112.preheader
+
+cleanup.148:                                      ; preds = %for.cond.cleanup, %if.then.64, %if.end.59, %if.else.30, %if.then.28, %if.else.24, %if.then.22, %if.else.13, %if.then.11, %if.else, %if.then.6
+  %retval.1 = phi i8* [ %call7, %if.then.6 ], [ %call8, %if.else ], [ %call12, %if.then.11 ], [ %call14, %if.else.13 ], [ %call23, %if.then.22 ], [ %call25, %if.else.24 ], [ %call29, %if.then.28 ], [ %call31, %if.else.30 ], [ %call65, %if.then.64 ], [ %call61, %if.end.59 ], [ %call144, %for.cond.cleanup ]
+  %13 = bitcast i64* %bits to i8*
+  call void @llvm.lifetime.end(i64 8, i8* %13) #0
+  ret i8* %retval.1
+}
+
+; Function Attrs: nounwind
+declare i8* @memcpy(i8*, i8* nocapture readonly, i64) #1
+
+; Function Attrs: nounwind
+declare void @llvm.ppc.mtctr.i64(i64) #0
+
+; Function Attrs: nounwind
+declare i1 @llvm.ppc.is.decremented.ctr.nonzero() #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind }
+
+!llvm.ident = !{!0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0}
+!llvm.module.flags = !{!1, !2, !3}
+
+!0 = !{!"clang version 3.7.0 (branches/release_37 246867) (llvm/branches/release_37 246866)"}
+!1 = !{i32 2, !"halide_use_soft_float_abi", i32 0}
+!2 = !{i32 2, !"halide_mcpu", !"pwr8"}
+!3 = !{i32 2, !"halide_mattrs", !"+altivec,+vsx,+power8-altivec,+direct-move"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"double", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"long long", !6, i64 0}
+!10 = !{!6, !6, i64 0}
diff --git a/test/CodeGen/PowerPC/no-rlwimi-trivial-commute.mir b/test/CodeGen/PowerPC/no-rlwimi-trivial-commute.mir
new file mode 100644
index 00000000000..5c998d09a3d
--- /dev/null
+++ b/test/CodeGen/PowerPC/no-rlwimi-trivial-commute.mir
@@ -0,0 +1,92 @@
+# RUN: llc -start-after=dead-mi-elimination -stop-after=twoaddressinstruction -o /dev/null %s | FileCheck %s
+
+--- |
+  target datalayout = "E-m:e-i64:64-n32:64"
+  target triple = "powerpc64-unknown-linux-gnu"
+  
+  @d = global i32 15, align 4
+  @b = global i32* @d, align 8
+  @a = common global i32 0, align 4
+  
+  ; Function Attrs: nounwind
+  define signext i32 @main() #0 {
+  entry:
+    %0 = load i32*, i32** @b, align 8
+    %1 = load i32, i32* @a, align 4
+    %lnot = icmp eq i32 %1, 0
+    %lnot.ext = zext i1 %lnot to i32
+    %shr.i = lshr i32 2072, %lnot.ext
+    %call.lobit = lshr i32 %shr.i, 7
+    %2 = and i32 %call.lobit, 1
+    %3 = load i32, i32* %0, align 4
+    %or = or i32 %2, %3
+    store i32 %or, i32* %0, align 4
+    %4 = load i32, i32* @a, align 4
+    %lnot.1 = icmp eq i32 %4, 0
+    %lnot.ext.1 = zext i1 %lnot.1 to i32
+    %shr.i.1 = lshr i32 2072, %lnot.ext.1
+    %call.lobit.1 = lshr i32 %shr.i.1, 7
+    %5 = and i32 %call.lobit.1, 1
+    %or.1 = or i32 %5, %or
+    store i32 %or.1, i32* %0, align 4
+    ret i32 %or.1
+  }
+  
+  attributes #0 = { nounwind "target-cpu"="ppc64" }
+
+...
+---
+name:            main
+alignment:       2
+exposesReturnsTwice: false
+hasInlineAsm:    false
+isSSA:           true
+tracksRegLiveness: true
+tracksSubRegLiveness: false
+registers:       
+  - { id: 0, class: g8rc_and_g8rc_nox0 }
+  - { id: 1, class: g8rc_and_g8rc_nox0 }
+  - { id: 2, class: gprc }
+  - { id: 3, class: gprc }
+  - { id: 4, class: gprc }
+  - { id: 5, class: g8rc_and_g8rc_nox0 }
+  - { id: 6, class: g8rc_and_g8rc_nox0 }
+  - { id: 7, class: gprc }
+  - { id: 8, class: gprc }
+  - { id: 9, class: gprc }
+  - { id: 10, class: g8rc }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+body:             |
+  bb.0.entry:
+    liveins: %x2
+
+    %0 = ADDIStocHA %x2, @b
+    %1 = LD target-flags(ppc-toc-lo) @b, killed %0 :: (load 8 from @b)
+    %2 = LWZ 0, %1 :: (load 4 from %ir.0)
+    %3 = LI 0
+    %4 = RLWIMI %3, killed %2, 0, 0, 31
+    ; CHECK-LABEL: name: main
+    ; CHECK: %[[REG1:[0-9]+]] = LI 0
+    ; CHECK: %[[REG2:[0-9]+]] = COPY %[[REG1]]
+    ; CHECK: %[[REG2]] = RLWIMI %[[REG2]], killed %2, 0, 0, 31
+    %8 = RLWIMI %3, %4, 0, 0, 31
+    STW %4, 0, %1 :: (store 4 into %ir.0)
+    %10 = EXTSW_32_64 %8
+    STW %8, 0, %1 :: (store 4 into %ir.0)
+    %x3 = COPY %10
+    BLR8 implicit %x3, implicit %lr8, implicit %rm
+
+...
diff --git a/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll b/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll
new file mode 100644
index 00000000000..052f55644fe
--- /dev/null
+++ b/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define <2 x i32> @test1(<4 x i32> %wide.vec) #0 {
+entry:
+  %strided.vec = shufflevector <4 x i32> %wide.vec, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %strided.vec
+
+; CHECK-LABEL: @test1
+; CHECK: vsldoi 2, 2, 2, 12
+; CHECK: blr
+}
+
+; Function Attrs: nounwind
+define <16 x i8> @test2(<16 x i8> %wide.vec) #0 {
+entry:
+  %strided.vec = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 11>
+  ret <16 x i8> %strided.vec
+
+; CHECK-LABEL: @test2
+; CHECK: vsldoi 2, 2, 2, 12
+; CHECK: blr
+}
+
+attributes #0 = { nounwind "target-cpu"="pwr7" }
+
diff --git a/test/CodeGen/PowerPC/pr24546.ll b/test/CodeGen/PowerPC/pr24546.ll
new file mode 100644
index 00000000000..3bb638af234
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr24546.ll
@@ -0,0 +1,116 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s
+
+; Verify that we no longer crash in VSX swap removal when debug values
+; are in the code stream.
+
+@php_intpow10.powers = external unnamed_addr constant [23 x double], align 8
+
+; Function Attrs: nounwind
+define double @_php_math_round(double %value, i32 signext %places, i32 signext %mode) #0 {
+entry:
+  br i1 undef, label %if.then, label %if.else, !dbg !32
+
+if.then:                                          ; preds = %entry
+  %conv = sitofp i32 undef to double, !dbg !34
+  br i1 undef, label %if.then.i, label %if.end.i, !dbg !36
+
+if.then.i:                                        ; preds = %if.then
+  %call.i = tail call double @pow(double 1.000000e+01, double undef) #3, !dbg !39
+  br label %php_intpow10.exit, !dbg !41
+
+if.end.i:                                         ; preds = %if.then
+  %0 = load double, double* undef, align 8, !dbg !42, !tbaa !43
+  br label %php_intpow10.exit, !dbg !47
+
+php_intpow10.exit:                                ; preds = %if.end.i, %if.then.i
+  %retval.0.i = phi double [ %call.i, %if.then.i ], [ %0, %if.end.i ], !dbg !48
+  tail call void @llvm.dbg.value(metadata double %retval.0.i, i64 0, metadata !15, metadata !49), !dbg !50
+  %div = fdiv double %conv, %retval.0.i, !dbg !51
+  br label %if.end.15, !dbg !52
+
+if.else:                                          ; preds = %entry
+  %mul = fmul double %value, undef, !dbg !53
+  br label %if.end.15
+
+if.end.15:                                        ; preds = %if.else, %php_intpow10.exit
+  %tmp_value.1 = phi double [ %div, %php_intpow10.exit ], [ %mul, %if.else ]
+  ret double %tmp_value.1, !dbg !57
+}
+
+declare signext i32 @php_intlog10abs(...) #1
+
+declare signext i32 @php_round_helper(...) #1
+
+; Function Attrs: nounwind
+declare double @pow(double, double) #0
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!29, !30}
+!llvm.ident = !{!31}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (git://github.com/llvm-mirror/clang.git e0848b6353721eb1b278a5bbea257bbf6316251e) (git://github.com/llvm-mirror/llvm.git 8724a428dfd5e78d7865bb01783708e83f9ed128)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !5, globals: !23)
+!1 = !DIFile(filename: "testcase.i", directory: "/tmp/glibc.build")
+!2 = !{}
+!3 = !{!4}
+!4 = !DIBasicType(name: "double", size: 64, align: 64, encoding: DW_ATE_float)
+!5 = !{!6, !18}
+!6 = !DISubprogram(name: "_php_math_round", scope: !1, file: !1, line: 15, type: !7, isLocal: false, isDefinition: true, scopeLine: 16, flags: DIFlagPrototyped, isOptimized: true, function: double (double, i32, i32)* @_php_math_round, variables: !10)
+!7 = !DISubroutineType(types: !8)
+!8 = !{!4, !4, !9, !9}
+!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = !{!11, !12, !13, !14, !15, !16, !17}
+!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", arg: 1, scope: !6, file: !1, line: 15, type: !4)
+!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "places", arg: 2, scope: !6, file: !1, line: 15, type: !9)
+!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "mode", arg: 3, scope: !6, file: !1, line: 15, type: !9)
+!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "f1", scope: !6, file: !1, line: 17, type: !4)
+!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "f2", scope: !6, file: !1, line: 17, type: !4)
+!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "tmp_value", scope: !6, file: !1, line: 18, type: !4)
+!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "precision_places", scope: !6, file: !1, line: 19, type: !9)
+!18 = !DISubprogram(name: "php_intpow10", scope: !1, file: !1, line: 1, type: !19, isLocal: true, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !21)
+!19 = !DISubroutineType(types: !20)
+!20 = !{!4, !9}
+!21 = !{!22}
+!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "power", arg: 1, scope: !18, file: !1, line: 1, type: !9)
+!23 = !{!24}
+!24 = !DIGlobalVariable(name: "powers", scope: !18, file: !1, line: 3, type: !25, isLocal: true, isDefinition: true, variable: [23 x double]* @php_intpow10.powers)
+!25 = !DICompositeType(tag: DW_TAG_array_type, baseType: !26, size: 1472, align: 64, elements: !27)
+!26 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !4)
+!27 = !{!28}
+!28 = !DISubrange(count: 23)
+!29 = !{i32 2, !"Dwarf Version", i32 4}
+!30 = !{i32 2, !"Debug Info Version", i32 3}
+!31 = !{!"clang version 3.8.0 (git://github.com/llvm-mirror/clang.git e0848b6353721eb1b278a5bbea257bbf6316251e) (git://github.com/llvm-mirror/llvm.git 8724a428dfd5e78d7865bb01783708e83f9ed128)"}
+!32 = !DILocation(line: 21, column: 32, scope: !33)
+!33 = distinct !DILexicalBlock(scope: !6, file: !1, line: 21, column: 6)
+!34 = !DILocation(line: 22, column: 15, scope: !35)
+!35 = distinct !DILexicalBlock(scope: !33, file: !1, line: 21, column: 67)
+!36 = !DILocation(line: 8, column: 16, scope: !37, inlinedAt: !38)
+!37 = distinct !DILexicalBlock(scope: !18, file: !1, line: 8, column: 6)
+!38 = distinct !DILocation(line: 23, column: 8, scope: !35)
+!39 = !DILocation(line: 9, column: 10, scope: !40, inlinedAt: !38)
+!40 = distinct !DILexicalBlock(scope: !37, file: !1, line: 8, column: 31)
+!41 = !DILocation(line: 9, column: 3, scope: !40, inlinedAt: !38)
+!42 = !DILocation(line: 11, column: 9, scope: !18, inlinedAt: !38)
+!43 = !{!44, !44, i64 0}
+!44 = !{!"double", !45, i64 0}
+!45 = !{!"omnipotent char", !46, i64 0}
+!46 = !{!"Simple C/C++ TBAA"}
+!47 = !DILocation(line: 11, column: 2, scope: !18, inlinedAt: !38)
+!48 = !DILocation(line: 23, column: 8, scope: !35)
+!49 = !DIExpression()
+!50 = !DILocation(line: 17, column: 13, scope: !6)
+!51 = !DILocation(line: 24, column: 25, scope: !35)
+!52 = !DILocation(line: 25, column: 2, scope: !35)
+!53 = !DILocation(line: 27, column: 22, scope: !54)
+!54 = distinct !DILexicalBlock(scope: !55, file: !1, line: 26, column: 20)
+!55 = distinct !DILexicalBlock(scope: !56, file: !1, line: 26, column: 7)
+!56 = distinct !DILexicalBlock(scope: !33, file: !1, line: 25, column: 9)
+!57 = !DILocation(line: 32, column: 2, scope: !6)
diff --git a/test/CodeGen/PowerPC/pr25157.ll b/test/CodeGen/PowerPC/pr25157.ll
new file mode 100644
index 00000000000..7137d675a74
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr25157.ll
@@ -0,0 +1,58 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+; Verify correct generation of an lxsspx rather than an invalid optimization
+; to lxvdsx.  Bugpoint-reduced test from Eric Schweitz.
+
+%struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625 = type <{ [28 x i8] }>
+%struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626 = type <{ [64 x i8] }>
+
+@.BSS38 = external global %struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625, align 32
+@_main1_2_ = external global %struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626, section ".comm", align 16
+
+define void @aercalc_() {
+L.entry:
+  br i1 undef, label %L.LB38_2426, label %L.LB38_2911
+
+L.LB38_2911:
+  br i1 undef, label %L.LB38_2140, label %L.LB38_2640
+
+L.LB38_2640:
+  unreachable
+
+L.LB38_2426:
+  br i1 undef, label %L.LB38_2438, label %L.LB38_2920
+
+L.LB38_2920:
+  br i1 undef, label %L.LB38_2438, label %L.LB38_2921
+
+L.LB38_2921:
+  br label %L.LB38_2140
+
+L.LB38_2140:
+  ret void
+
+L.LB38_2438:
+  br i1 undef, label %L.LB38_2451, label %L.LB38_2935
+
+L.LB38_2935:
+  br i1 undef, label %L.LB38_2451, label %L.LB38_2936
+
+L.LB38_2936:
+  unreachable
+
+L.LB38_2451:
+  br i1 undef, label %L.LB38_2452, label %L.LB38_2937
+
+L.LB38_2937:
+  unreachable
+
+L.LB38_2452:
+  %0 = load float, float* bitcast (i8* getelementptr inbounds (%struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625, %struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625* @.BSS38, i64 0, i32 0, i64 16) to float*), align 16
+  %1 = fpext float %0 to double
+  %2 = insertelement <2 x double> undef, double %1, i32 1
+  store <2 x double> %2, <2 x double>* bitcast (i8* getelementptr inbounds (%struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626, %struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626* @_main1_2_, i64 0, i32 0, i64 32) to <2 x double>*), align 16
+  unreachable
+}
+
+; CHECK-LABEL: @aercalc_
+; CHECK: lxsspx
diff --git a/test/CodeGen/PowerPC/rlwimi-and-or-bits.ll b/test/CodeGen/PowerPC/rlwimi-and-or-bits.ll
new file mode 100644
index 00000000000..a74bc727396
--- /dev/null
+++ b/test/CodeGen/PowerPC/rlwimi-and-or-bits.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@m = external global i32, align 4
+
+; Function Attrs: nounwind
+define signext i32 @main() #0 {
+entry:
+
+; CHECK-LABEL: @main
+; CHECK-NOT: rlwimi
+; CHECK: andi
+
+  %0 = load i32, i32* @m, align 4
+  %or = or i32 %0, 250
+  store i32 %or, i32* @m, align 4
+  %and = and i32 %or, 249
+  %sub.i = sub i32 %and, 0
+  %sext = shl i32 %sub.i, 24
+  %conv = ashr exact i32 %sext, 24
+  ret i32 %conv
+}
+
+attributes #0 = { nounwind "target-cpu"="pwr7" }
+attributes #1 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/select-i1-vs-i1.ll b/test/CodeGen/PowerPC/select-i1-vs-i1.ll
new file mode 100644
index 00000000000..6dabbaa4208
--- /dev/null
+++ b/test/CodeGen/PowerPC/select-i1-vs-i1.ll
@@ -0,0 +1,1685 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; FIXME: We should check the operands to the cr* logical operation itself, but
+; unfortunately, FileCheck does not yet understand how to do arithmetic, so we
+; can't do so without introducing a register-allocation dependency.
+
+define signext i32 @testi32slt(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 {
+entry:
+  %cmp1 = icmp eq i32 %c3, %c4
+  %cmp3tmp = icmp eq i32 %c1, %c2
+  %cmp3 = icmp slt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i32 %a1, i32 %a2
+  ret i32 %cond
+
+; CHECK-LABEL: @testi32slt
+; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define signext i32 @testi32ult(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 {
+entry:
+  %cmp1 = icmp eq i32 %c3, %c4
+  %cmp3tmp = icmp eq i32 %c1, %c2
+  %cmp3 = icmp ult i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i32 %a1, i32 %a2
+  ret i32 %cond
+
+; CHECK-LABEL: @testi32ult
+; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define signext i32 @testi32sle(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 {
+entry:
+  %cmp1 = icmp eq i32 %c3, %c4
+  %cmp3tmp = icmp eq i32 %c1, %c2
+  %cmp3 = icmp sle i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i32 %a1, i32 %a2
+  ret i32 %cond
+
+; CHECK-LABEL: @testi32sle
+; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define signext i32 @testi32ule(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 {
+entry:
+  %cmp1 = icmp eq i32 %c3, %c4
+  %cmp3tmp = icmp eq i32 %c1, %c2
+  %cmp3 = icmp ule i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i32 %a1, i32 %a2
+  ret i32 %cond
+
+; CHECK-LABEL: @testi32ule
+; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define signext i32 @testi32eq(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 {
+entry:
+  %cmp1 = icmp eq i32 %c3, %c4
+  %cmp3tmp = icmp eq i32 %c1, %c2
+  %cmp3 = icmp eq i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i32 %a1, i32 %a2
+  ret i32 %cond
+
+; CHECK-LABEL: @testi32eq
+; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
+; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define signext i32 @testi32sge(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 {
+entry:
+  %cmp1 = icmp eq i32 %c3, %c4
+  %cmp3tmp = icmp eq i32 %c1, %c2
+  %cmp3 = icmp sge i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i32 %a1, i32 %a2
+  ret i32 %cond
+
+; CHECK-LABEL: @testi32sge
+; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define signext i32 @testi32uge(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 {
+entry:
+  %cmp1 = icmp eq i32 %c3, %c4
+  %cmp3tmp = icmp eq i32 %c1, %c2
+  %cmp3 = icmp uge i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i32 %a1, i32 %a2
+  ret i32 %cond
+
+; CHECK-LABEL: @testi32uge
+; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define signext i32 @testi32sgt(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 {
+entry:
+  %cmp1 = icmp eq i32 %c3, %c4
+  %cmp3tmp = icmp eq i32 %c1, %c2
+  %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i32 %a1, i32 %a2
+  ret i32 %cond
+
+; CHECK-LABEL: @testi32sgt
+; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define signext i32 @testi32ugt(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 {
+entry:
+  %cmp1 = icmp eq i32 %c3, %c4
+  %cmp3tmp = icmp eq i32 %c1, %c2
+  %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i32 %a1, i32 %a2
+  ret i32 %cond
+
+; CHECK-LABEL: @testi32ugt
+; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define signext i32 @testi32ne(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 {
+entry:
+  %cmp1 = icmp eq i32 %c3, %c4
+  %cmp3tmp = icmp eq i32 %c1, %c2
+  %cmp3 = icmp ne i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i32 %a1, i32 %a2
+  ret i32 %cond
+
+; CHECK-LABEL: @testi32ne
+; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
+; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define i64 @testi64slt(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 {
+entry:
+  %cmp1 = icmp eq i64 %c3, %c4
+  %cmp3tmp = icmp eq i64 %c1, %c2
+  %cmp3 = icmp slt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i64 %a1, i64 %a2
+  ret i64 %cond
+
+; CHECK-LABEL: @testi64slt
+; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6
+; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define i64 @testi64ult(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 {
+entry:
+  %cmp1 = icmp eq i64 %c3, %c4
+  %cmp3tmp = icmp eq i64 %c1, %c2
+  %cmp3 = icmp ult i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i64 %a1, i64 %a2
+  ret i64 %cond
+
+; CHECK-LABEL: @testi64ult
+; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6
+; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define i64 @testi64sle(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 {
+entry:
+  %cmp1 = icmp eq i64 %c3, %c4
+  %cmp3tmp = icmp eq i64 %c1, %c2
+  %cmp3 = icmp sle i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i64 %a1, i64 %a2
+  ret i64 %cond
+
+; CHECK-LABEL: @testi64sle
+; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6
+; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define i64 @testi64ule(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 {
+entry:
+  %cmp1 = icmp eq i64 %c3, %c4
+  %cmp3tmp = icmp eq i64 %c1, %c2
+  %cmp3 = icmp ule i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i64 %a1, i64 %a2
+  ret i64 %cond
+
+; CHECK-LABEL: @testi64ule
+; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6
+; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define i64 @testi64eq(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 {
+entry:
+  %cmp1 = icmp eq i64 %c3, %c4
+  %cmp3tmp = icmp eq i64 %c1, %c2
+  %cmp3 = icmp eq i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i64 %a1, i64 %a2
+  ret i64 %cond
+
+; CHECK-LABEL: @testi64eq
+; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6
+; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4
+; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define i64 @testi64sge(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 {
+entry:
+  %cmp1 = icmp eq i64 %c3, %c4
+  %cmp3tmp = icmp eq i64 %c1, %c2
+  %cmp3 = icmp sge i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i64 %a1, i64 %a2
+  ret i64 %cond
+
+; CHECK-LABEL: @testi64sge
+; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6
+; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define i64 @testi64uge(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 {
+entry:
+  %cmp1 = icmp eq i64 %c3, %c4
+  %cmp3tmp = icmp eq i64 %c1, %c2
+  %cmp3 = icmp uge i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i64 %a1, i64 %a2
+  ret i64 %cond
+
+; CHECK-LABEL: @testi64uge
+; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6
+; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define i64 @testi64sgt(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 {
+entry:
+  %cmp1 = icmp eq i64 %c3, %c4
+  %cmp3tmp = icmp eq i64 %c1, %c2
+  %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i64 %a1, i64 %a2
+  ret i64 %cond
+
+; CHECK-LABEL: @testi64sgt
+; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6
+; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define i64 @testi64ugt(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 {
+entry:
+  %cmp1 = icmp eq i64 %c3, %c4
+  %cmp3tmp = icmp eq i64 %c1, %c2
+  %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i64 %a1, i64 %a2
+  ret i64 %cond
+
+; CHECK-LABEL: @testi64ugt
+; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6
+; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define i64 @testi64ne(i64 %c1, i64 %c2, i64 %c3, i64 %c4, i64 %a1, i64 %a2) #0 {
+entry:
+  %cmp1 = icmp eq i64 %c3, %c4
+  %cmp3tmp = icmp eq i64 %c1, %c2
+  %cmp3 = icmp ne i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, i64 %a1, i64 %a2
+  ret i64 %cond
+
+; CHECK-LABEL: @testi64ne
+; CHECK-DAG: cmpd {{([0-9]+, )?}}5, 6
+; CHECK-DAG: cmpd {{([0-9]+, )?}}3, 4
+; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: isel 3, 7, 8, [[REG1]]
+; CHECK: blr
+}
+
+define float @testfloatslt(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp slt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, float %a1, float %a2
+  ret float %cond
+
+; CHECK-LABEL: @testfloatslt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define float @testfloatult(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ult i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, float %a1, float %a2
+  ret float %cond
+
+; CHECK-LABEL: @testfloatult
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define float @testfloatsle(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp sle i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, float %a1, float %a2
+  ret float %cond
+
+; CHECK-LABEL: @testfloatsle
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define float @testfloatule(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ule i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, float %a1, float %a2
+  ret float %cond
+
+; CHECK-LABEL: @testfloatule
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define float @testfloateq(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp eq i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, float %a1, float %a2
+  ret float %cond
+
+; CHECK-LABEL: @testfloateq
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define float @testfloatsge(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp sge i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, float %a1, float %a2
+  ret float %cond
+
+; CHECK-LABEL: @testfloatsge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define float @testfloatuge(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp uge i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, float %a1, float %a2
+  ret float %cond
+
+; CHECK-LABEL: @testfloatuge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define float @testfloatsgt(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, float %a1, float %a2
+  ret float %cond
+
+; CHECK-LABEL: @testfloatsgt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define float @testfloatugt(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, float %a1, float %a2
+  ret float %cond
+
+; CHECK-LABEL: @testfloatugt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define float @testfloatne(float %c1, float %c2, float %c3, float %c4, float %a1, float %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ne i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, float %a1, float %a2
+  ret float %cond
+
+; CHECK-LABEL: @testfloatne
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define double @testdoubleslt(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq double %c3, %c4
+  %cmp3tmp = fcmp oeq double %c1, %c2
+  %cmp3 = icmp slt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, double %a1, double %a2
+  ret double %cond
+
+; CHECK-LABEL: @testdoubleslt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define double @testdoubleult(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq double %c3, %c4
+  %cmp3tmp = fcmp oeq double %c1, %c2
+  %cmp3 = icmp ult i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, double %a1, double %a2
+  ret double %cond
+
+; CHECK-LABEL: @testdoubleult
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define double @testdoublesle(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq double %c3, %c4
+  %cmp3tmp = fcmp oeq double %c1, %c2
+  %cmp3 = icmp sle i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, double %a1, double %a2
+  ret double %cond
+
+; CHECK-LABEL: @testdoublesle
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define double @testdoubleule(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq double %c3, %c4
+  %cmp3tmp = fcmp oeq double %c1, %c2
+  %cmp3 = icmp ule i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, double %a1, double %a2
+  ret double %cond
+
+; CHECK-LABEL: @testdoubleule
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define double @testdoubleeq(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq double %c3, %c4
+  %cmp3tmp = fcmp oeq double %c1, %c2
+  %cmp3 = icmp eq i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, double %a1, double %a2
+  ret double %cond
+
+; CHECK-LABEL: @testdoubleeq
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define double @testdoublesge(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq double %c3, %c4
+  %cmp3tmp = fcmp oeq double %c1, %c2
+  %cmp3 = icmp sge i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, double %a1, double %a2
+  ret double %cond
+
+; CHECK-LABEL: @testdoublesge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define double @testdoubleuge(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq double %c3, %c4
+  %cmp3tmp = fcmp oeq double %c1, %c2
+  %cmp3 = icmp uge i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, double %a1, double %a2
+  ret double %cond
+
+; CHECK-LABEL: @testdoubleuge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define double @testdoublesgt(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq double %c3, %c4
+  %cmp3tmp = fcmp oeq double %c1, %c2
+  %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, double %a1, double %a2
+  ret double %cond
+
+; CHECK-LABEL: @testdoublesgt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define double @testdoubleugt(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq double %c3, %c4
+  %cmp3tmp = fcmp oeq double %c1, %c2
+  %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, double %a1, double %a2
+  ret double %cond
+
+; CHECK-LABEL: @testdoubleugt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define double @testdoublene(double %c1, double %c2, double %c3, double %c4, double %a1, double %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq double %c3, %c4
+  %cmp3tmp = fcmp oeq double %c1, %c2
+  %cmp3 = icmp ne i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, double %a1, double %a2
+  ret double %cond
+
+; CHECK-LABEL: @testdoublene
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: fmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: fmr 1, 5
+; CHECK: blr
+}
+
+define <4 x float> @testv4floatslt(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp slt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; FIXME: This test (and the other v4f32 tests) should use the same bclr
+; technique as the v2f64 tests below.
+
+; CHECK-LABEL: @testv4floatslt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
+; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: xxlor [[REG2]], 35, 35
+; CHECK: .LBB[[BB]]:
+; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: blr
+}
+
+define <4 x float> @testv4floatult(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ult i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; CHECK-LABEL: @testv4floatult
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
+; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: xxlor [[REG2]], 35, 35
+; CHECK: .LBB[[BB]]:
+; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: blr
+}
+
+define <4 x float> @testv4floatsle(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp sle i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; CHECK-LABEL: @testv4floatsle
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
+; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: xxlor [[REG2]], 35, 35
+; CHECK: .LBB[[BB]]:
+; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: blr
+}
+
+define <4 x float> @testv4floatule(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ule i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; CHECK-LABEL: @testv4floatule
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
+; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: xxlor [[REG2]], 35, 35
+; CHECK: .LBB[[BB]]:
+; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: blr
+}
+
+define <4 x float> @testv4floateq(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp eq i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; CHECK-LABEL: @testv4floateq
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
+; CHECK-DAG: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: xxlor [[REG2]], 35, 35
+; CHECK: .LBB[[BB]]:
+; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: blr
+}
+
+define <4 x float> @testv4floatsge(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp sge i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; CHECK-LABEL: @testv4floatsge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
+; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: xxlor [[REG2]], 35, 35
+; CHECK: .LBB[[BB]]:
+; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: blr
+}
+
+define <4 x float> @testv4floatuge(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp uge i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; CHECK-LABEL: @testv4floatuge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
+; CHECK-DAG: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: xxlor [[REG2]], 35, 35
+; CHECK: .LBB[[BB]]:
+; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: blr
+}
+
+define <4 x float> @testv4floatsgt(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; CHECK-LABEL: @testv4floatsgt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
+; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: xxlor [[REG2]], 35, 35
+; CHECK: .LBB[[BB]]:
+; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: blr
+}
+
+define <4 x float> @testv4floatugt(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; CHECK-LABEL: @testv4floatugt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
+; CHECK-DAG: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: xxlor [[REG2]], 35, 35
+; CHECK: .LBB[[BB]]:
+; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: blr
+}
+
+define <4 x float> @testv4floatne(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ne i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; CHECK-LABEL: @testv4floatne
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
+; CHECK-DAG: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: xxlor [[REG2]], 35, 35
+; CHECK: .LBB[[BB]]:
+; CHECK: xxlor 34, [[REG2]], [[REG2]]
+; CHECK: blr
+}
+
+define ppc_fp128 @testppc_fp128eq(ppc_fp128 %c1, ppc_fp128 %c2, ppc_fp128 %c3, ppc_fp128 %c4, ppc_fp128 %a1, ppc_fp128 %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq ppc_fp128 %c3, %c4
+  %cmp3tmp = fcmp oeq ppc_fp128 %c1, %c2
+  %cmp3 = icmp eq i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, ppc_fp128 %a1, ppc_fp128 %a2
+  ret ppc_fp128 %cond
+
+; FIXME: Because of the way that the late SELECT_* pseudo-instruction expansion
+; works, we end up with two blocks with the same predicate. These could be
+; combined.
+
+; CHECK-LABEL: @testppc_fp128eq
+; CHECK-DAG: fcmpu {{[0-9]+}}, 6, 8
+; CHECK-DAG: fcmpu {{[0-9]+}}, 5, 7
+; CHECK-DAG: fcmpu {{[0-9]+}}, 2, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 3
+; CHECK: crand [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: crand [[REG2:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: creqv [[REG3:[0-9]+]], [[REG2]], [[REG1]]
+; CHECK: bc 12, [[REG3]], .LBB[[BB1:[0-9_]+]]
+; CHECK: fmr 9, 11
+; CHECK: .LBB[[BB1]]:
+; CHECK: bc 12, [[REG3]], .LBB[[BB2:[0-9_]+]]
+; CHECK: fmr 10, 12
+; CHECK: .LBB[[BB2]]:
+; CHECK-DAG: fmr 1, 9
+; CHECK-DAG: fmr 2, 10
+; CHECK: blr
+}
+
+define <2 x double> @testv2doubleslt(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp slt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2
+  ret <2 x double> %cond
+
+; CHECK-LABEL: @testv2doubleslt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: vor 2, 3, 3
+; CHECK: blr
+}
+
+define <2 x double> @testv2doubleult(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ult i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2
+  ret <2 x double> %cond
+
+; CHECK-LABEL: @testv2doubleult
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: vor 2, 3, 3
+; CHECK: blr
+}
+
+define <2 x double> @testv2doublesle(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp sle i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2
+  ret <2 x double> %cond
+
+; CHECK-LABEL: @testv2doublesle
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: vor 2, 3, 3
+; CHECK: blr
+}
+
+define <2 x double> @testv2doubleule(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ule i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2
+  ret <2 x double> %cond
+
+; CHECK-LABEL: @testv2doubleule
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: vor 2, 3, 3
+; CHECK: blr
+}
+
+define <2 x double> @testv2doubleeq(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp eq i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2
+  ret <2 x double> %cond
+
+; CHECK-LABEL: @testv2doubleeq
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: vor 2, 3, 3
+; CHECK: blr
+}
+
+define <2 x double> @testv2doublesge(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp sge i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2
+  ret <2 x double> %cond
+
+; CHECK-LABEL: @testv2doublesge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: vor 2, 3, 3
+; CHECK: blr
+}
+
+define <2 x double> @testv2doubleuge(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp uge i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2
+  ret <2 x double> %cond
+
+; CHECK-LABEL: @testv2doubleuge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: vor 2, 3, 3
+; CHECK: blr
+}
+
+define <2 x double> @testv2doublesgt(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2
+  ret <2 x double> %cond
+
+; CHECK-LABEL: @testv2doublesgt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: vor 2, 3, 3
+; CHECK: blr
+}
+
+define <2 x double> @testv2doubleugt(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2
+  ret <2 x double> %cond
+
+; CHECK-LABEL: @testv2doubleugt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: vor 2, 3, 3
+; CHECK: blr
+}
+
+define <2 x double> @testv2doublene(float %c1, float %c2, float %c3, float %c4, <2 x double> %a1, <2 x double> %a2) #0 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ne i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <2 x double> %a1, <2 x double> %a2
+  ret <2 x double> %cond
+
+; CHECK-LABEL: @testv2doublene
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bclr 12, [[REG1]], 0
+; CHECK: vor 2, 3, 3
+; CHECK: blr
+}
+
+define <4 x double> @testqv4doubleslt(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp slt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+  ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doubleslt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x double> @testqv4doubleult(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ult i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+  ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doubleult
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x double> @testqv4doublesle(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp sle i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+  ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doublesle
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x double> @testqv4doubleule(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ule i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+  ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doubleule
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x double> @testqv4doubleeq(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp eq i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+  ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doubleeq
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x double> @testqv4doublesge(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp sge i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+  ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doublesge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x double> @testqv4doubleuge(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp uge i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+  ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doubleuge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x double> @testqv4doublesgt(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+  ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doublesgt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x double> @testqv4doubleugt(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+  ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doubleugt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x double> @testqv4doublene(float %c1, float %c2, float %c3, float %c4, <4 x double> %a1, <4 x double> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ne i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x double> %a1, <4 x double> %a2
+  ret <4 x double> %cond
+
+; CHECK-LABEL: @testqv4doublene
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floatslt(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp slt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floatslt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floatult(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ult i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floatult
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floatsle(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp sle i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floatsle
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floatule(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ule i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floatule
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floateq(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp eq i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floateq
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floatsge(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp sge i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floatsge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floatuge(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp uge i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floatuge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floatsgt(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floatsgt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floatugt(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floatugt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x float> @testqv4floatne(float %c1, float %c2, float %c3, float %c4, <4 x float> %a1, <4 x float> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ne i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x float> %a1, <4 x float> %a2
+  ret <4 x float> %cond
+
+; CHECK-LABEL: @testqv4floatne
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1slt(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp slt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+  ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1slt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1ult(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ult i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+  ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1ult
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1sle(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp sle i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+  ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1sle
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1ule(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ule i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+  ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1ule
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1eq(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp eq i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+  ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1eq
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1sge(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp sge i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+  ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1sge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1uge(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp uge i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+  ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1uge
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1sgt(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp sgt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+  ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1sgt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1ugt(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ugt i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+  ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1ugt
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+define <4 x i1> @testqv4i1ne(float %c1, float %c2, float %c3, float %c4, <4 x i1> %a1, <4 x i1> %a2) #1 {
+entry:
+  %cmp1 = fcmp oeq float %c3, %c4
+  %cmp3tmp = fcmp oeq float %c1, %c2
+  %cmp3 = icmp ne i1 %cmp3tmp, %cmp1
+  %cond = select i1 %cmp3, <4 x i1> %a1, <4 x i1> %a2
+  ret <4 x i1> %cond
+
+; CHECK-LABEL: @testqv4i1ne
+; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
+; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
+; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
+; CHECK: qvfmr 5, 6
+; CHECK: .LBB[[BB]]:
+; CHECK: qvfmr 1, 5
+; CHECK: blr
+}
+
+attributes #0 = { nounwind readnone "target-cpu"="pwr7" }
+attributes #1 = { nounwind readnone "target-cpu"="a2q" }
+
diff --git a/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll b/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll
new file mode 100644
index 00000000000..a5b4474460c
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @LSH_recall_init(float %d_min, float %W) #0 {
+entry:
+  br i1 undef, label %for.body.lr.ph, label %for.end
+
+; CHECK-LABEL: @LSH_recall_init
+; CHECK: xsnmsubadp
+
+for.body.lr.ph:                                   ; preds = %entry
+  %conv3 = fpext float %W to double
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %div = fdiv fast float 0.000000e+00, 0.000000e+00
+  %add = fadd fast float %div, %d_min
+  %conv2 = fpext float %add to double
+  %0 = tail call double @llvm.sqrt.f64(double %conv2)
+  %div4 = fdiv fast double %conv3, %0
+  %call = tail call signext i32 bitcast (i32 (...)* @p_col_helper to i32 (double)*)(double %div4) #2
+  br label %for.body
+
+for.end:                                          ; preds = %entry
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare double @llvm.sqrt.f64(double) #1
+
+declare signext i32 @p_col_helper(...) #2
+
+attributes #0 = { nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "target-cpu"="pwr7" "unsafe-fp-math"="true" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/vsx-fma-mutate-undef.ll b/test/CodeGen/PowerPC/vsx-fma-mutate-undef.ll
new file mode 100644
index 00000000000..e3f4001aa1d
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx-fma-mutate-undef.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @acosh_float8() #0 {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %0 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> undef, <4 x float> <float 0x3FE62E4200000000, float 0x3FE62E4200000000, float 0x3FE62E4200000000, float 0x3FE62E4200000000>, <4 x float> undef) #0
+  %astype.i.i.74.i = bitcast <4 x float> %0 to <4 x i32>
+  %and.i.i.76.i = and <4 x i32> %astype.i.i.74.i, undef
+  %or.i.i.79.i = or <4 x i32> %and.i.i.76.i, undef
+  %astype5.i.i.80.i = bitcast <4 x i32> %or.i.i.79.i to <4 x float>
+  %1 = shufflevector <4 x float> %astype5.i.i.80.i, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %2 = shufflevector <8 x float> undef, <8 x float> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  store <8 x float> %2, <8 x float>* undef, align 32
+  br label %if.end
+
+; CHECK-LABEL: @acosh_float8
+; CHECK: xvmaddasp
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
diff --git a/test/CodeGen/X86/pr24374.ll b/test/CodeGen/X86/pr24374.ll
new file mode 100644
index 00000000000..7f331e10396
--- /dev/null
+++ b/test/CodeGen/X86/pr24374.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-w64-windows-gnu"
+
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @g, i8* null }]
+
+declare i32 @__gxx_personality_seh0(...)
+
+; Function Attrs: nounwind
+define void @f() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_seh0 to i8*) {
+entry:
+  invoke void @g()
+          to label %exit unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  landingpad { i8*, i32 }
+          cleanup
+  unreachable
+
+exit:                                             ; preds = %entry
+  unreachable
+}
+; CHECK-LABEL: f:
+; CHECK:       .seh_proc f
+; CHECK:               .seh_handler __gxx_personality_seh0, @unwind, @except
+; CHECK:       callq g
+; CHECK:               .seh_handlerdata
+; CHECK:               .seh_endproc
+
+define void @g() {
+  unreachable
+}
+; CHECK-LABEL: g:
+; CHECK:       .seh_proc g
+; CHECK:       .seh_endproc
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/X86/setcc-lowering.ll b/test/CodeGen/X86/setcc-lowering.ll
new file mode 100644
index 00000000000..3149fb51576
--- /dev/null
+++ b/test/CodeGen/X86/setcc-lowering.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s
+
+; Verify that we don't crash during codegen due to a wrong lowering
+; of a setcc node with illegal operand types and return type.
+
+define <8 x i16> @pr25080(<8 x i32> %a) {
+; CHECK-LABEL: pr25080:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; CHECK-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
+; CHECK-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
+; CHECK-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    vpor {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
+; CHECK-NEXT:    vpsraw $15, %xmm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+entry:
+  %0 = trunc <8 x i32> %a to <8 x i23>
+  %1 = icmp eq <8 x i23> %0, zeroinitializer
+  %2 = or <8 x i1> %1, <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>
+  %3 = sext <8 x i1> %2 to <8 x i16>
+  ret <8 x i16> %3
+}
diff --git a/test/DebugInfo/gvn.ll b/test/DebugInfo/gvn.ll
new file mode 100644
index 00000000000..3ca3663bd83
--- /dev/null
+++ b/test/DebugInfo/gvn.ll
@@ -0,0 +1,135 @@
+; RUN: opt < %s -O2 -gvn -S | FileCheck %s
+;
+; Produced at -O2 from:
+; struct context {
+;   int cur_pid
+; };
+; int a, b, c, f, d;
+; int pid_for_task(int);
+; sample(struct context *p1)
+; {
+;   if (c)
+;     b = a;
+;   if (a && p1->cur_pid)
+;     sample_internal();
+; }
+; callback() {
+;   f = pid_for_task(d);
+;   sample(&f);
+; }
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios"
+
+%struct.context = type { i32 }
+
+@c = common global i32 0, align 4
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+@d = common global i32 0, align 4
+@f = common global i32 0, align 4
+
+; Function Attrs: nounwind
+declare i32 @sample_internal(...)
+
+; Function Attrs: nounwind
+define i32 @callback() #0 {
+entry:
+  %0 = load i32, i32* @d, align 4, !dbg !37
+
+  ; Verify that the call still has a debug location after GVN.
+  ; CHECK: %call = tail call i32 @pid_for_task(i32 %0) #{{[0-9]}}, !dbg
+  %call = tail call i32 @pid_for_task(i32 %0) #3, !dbg !37
+
+  store i32 %call, i32* @f, align 4, !dbg !37
+  tail call void @llvm.dbg.value(metadata %struct.context* bitcast (i32* @f to %struct.context*), i64 0, metadata !25, metadata !26) #3, !dbg !38
+  %1 = load i32, i32* @c, align 4, !dbg !40
+  %tobool.i = icmp eq i32 %1, 0, !dbg !40
+  %.pr.i = load i32, i32* @a, align 4, !dbg !41
+  br i1 %tobool.i, label %if.end.i, label %if.then.i, !dbg !42
+
+if.then.i:                                        ; preds = %entry
+  store i32 %.pr.i, i32* @b, align 4, !dbg !43
+  br label %if.end.i, !dbg !43
+
+if.end.i:                                         ; preds = %if.then.i, %entry
+  %tobool1.i = icmp eq i32 %.pr.i, 0, !dbg !41
+
+  ; This instruction has no debug location -- in this
+  ; particular case it was removed by a bug in SimplifyCFG.
+  %2 = load i32, i32* @f, align 4
+
+  ; GVN is supposed to replace the load of @f with a direct reference to %call.
+  ; CHECK: %tobool2.i = icmp eq i32 %call, 0, !dbg
+  %tobool2.i = icmp eq i32 %2, 0, !dbg !41
+
+  %or.cond = or i1 %tobool1.i, %tobool2.i, !dbg !41
+  br i1 %or.cond, label %sample.exit, label %if.then.3.i, !dbg !41
+
+if.then.3.i:                                      ; preds = %if.end.i
+  %call.i = tail call i32 bitcast (i32 (...)* @sample_internal to i32 ()*)() #3, !dbg !44
+  br label %sample.exit, !dbg !44
+
+sample.exit:                                      ; preds = %if.end.i, %if.then.3.i
+  ret i32 undef, !dbg !45
+}
+
+declare i32 @pid_for_task(i32) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { nounwind }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!22, !23}
+!llvm.ident = !{!24}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 244473) (llvm/trunk 244644)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, globals: !16)
+!1 = !DIFile(filename: "test.c", directory: "/")
+!2 = !{}
+!3 = !{!4, !13}
+!4 = !DISubprogram(name: "sample", scope: !5, file: !5, line: 6, type: !6, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DIFile(filename: "test.i", directory: "/")
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8, !9}
+!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 64)
+!10 = !DICompositeType(tag: DW_TAG_structure_type, name: "context", file: !5, line: 1, size: 32, align: 32, elements: !11)
+!11 = !{!12}
+!12 = !DIDerivedType(tag: DW_TAG_member, name: "cur_pid", scope: !10, file: !5, line: 2, baseType: !8, size: 32, align: 32)
+!13 = !DISubprogram(name: "callback", scope: !5, file: !5, line: 13, type: !14, isLocal: false, isDefinition: true, scopeLine: 13, isOptimized: false, function: i32 ()* @callback, variables: !2)
+!14 = !DISubroutineType(types: !15)
+!15 = !{!8}
+!16 = !{!17, !18, !19, !20, !21}
+!17 = !DIGlobalVariable(name: "a", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @a)
+!18 = !DIGlobalVariable(name: "b", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @b)
+!19 = !DIGlobalVariable(name: "c", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @c)
+!20 = !DIGlobalVariable(name: "f", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @f)
+!21 = !DIGlobalVariable(name: "d", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @d)
+!22 = !{i32 2, !"Dwarf Version", i32 2}
+!23 = !{i32 2, !"Debug Info Version", i32 3}
+!24 = !{!"clang version 3.8.0 (trunk 244473) (llvm/trunk 244644)"}
+!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "p1", arg: 1, scope: !4, file: !5, line: 6, type: !9)
+!26 = !DIExpression()
+!27 = !DILocation(line: 6, scope: !4)
+!28 = !DILocation(line: 8, scope: !29)
+!29 = distinct !DILexicalBlock(scope: !4, file: !5, line: 8)
+!30 = !DILocation(line: 10, scope: !31)
+!31 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10)
+!32 = !DILocation(line: 8, scope: !4)
+!33 = !DILocation(line: 9, scope: !29)
+!34 = !DILocation(line: 10, scope: !4)
+!35 = !DILocation(line: 11, scope: !31)
+!36 = !DILocation(line: 12, scope: !4)
+!37 = !DILocation(line: 14, scope: !13)
+!38 = !DILocation(line: 6, scope: !4, inlinedAt: !39)
+!39 = distinct !DILocation(line: 15, scope: !13)
+!40 = !DILocation(line: 8, scope: !29, inlinedAt: !39)
+!41 = !DILocation(line: 10, scope: !31, inlinedAt: !39)
+!42 = !DILocation(line: 8, scope: !4, inlinedAt: !39)
+!43 = !DILocation(line: 9, scope: !29, inlinedAt: !39)
+!44 = !DILocation(line: 11, scope: !31, inlinedAt: !39)
+!45 = !DILocation(line: 16, scope: !13)
diff --git a/test/LTO/X86/diagnostic-handler-noexit.ll b/test/LTO/X86/diagnostic-handler-noexit.ll
new file mode 100644
index 00000000000..be768c900f1
--- /dev/null
+++ b/test/LTO/X86/diagnostic-handler-noexit.ll
@@ -0,0 +1,13 @@
+; LTO default diagnostic handler should be non-exiting.
+; This test verifies that after addModule() encounters an error, the diagnostic
+; handler does not call exit(1) and instead returns to the caller of addModule.
+
+; RUN: llvm-as <%s >%t1
+; RUN: llvm-as <%s >%t2
+; RUN: not llvm-lto -o /dev/null %t1 %t2 2>&1 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: Linking globals named 'goodboy': symbol multiply defined!
+; CHECK: llvm-lto{{.*}}: error adding file
+@goodboy = global i32 3203383023, align 4    ; 0xbeefbeef
diff --git a/test/MC/AMDGPU/vop3.s b/test/MC/AMDGPU/vop3.s
index 20562335974..63914675a86 100644
--- a/test/MC/AMDGPU/vop3.s
+++ b/test/MC/AMDGPU/vop3.s
@@ -1,5 +1,8 @@
-// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
-// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI
+// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s --check-prefix=SICI
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI
+
 
 //===----------------------------------------------------------------------===//
 // VOPC Instructions
@@ -8,63 +11,81 @@
 // Test forced e64 encoding
 
 v_cmp_lt_f32_e64 s[2:3], v4, -v6
-// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x40]
+// SICI: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x40]
+// VI:   v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x41,0xd0,0x04,0x0d,0x02,0x40]
+
 
 //
 // Modifier tests:
 //
 
 v_cmp_lt_f32 s[2:3] -v4, v6
-// CHECK: v_cmp_lt_f32_e64 s[2:3], -v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x20] 
+// SICI: v_cmp_lt_f32_e64 s[2:3], -v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x20]
+// VI:   v_cmp_lt_f32_e64 s[2:3], -v4, v6 ; encoding: [0x02,0x00,0x41,0xd0,0x04,0x0d,0x02,0x20]
 
 v_cmp_lt_f32 s[2:3]  v4, -v6
-// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x40]
+// SICI: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x40]
+// VI:   v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x41,0xd0,0x04,0x0d,0x02,0x40]
 
 v_cmp_lt_f32 s[2:3] -v4, -v6
-// CHECK: v_cmp_lt_f32_e64 s[2:3], -v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x60]
+// SICI: v_cmp_lt_f32_e64 s[2:3], -v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x60]
+// VI:   v_cmp_lt_f32_e64 s[2:3], -v4, -v6 ; encoding: [0x02,0x00,0x41,0xd0,0x04,0x0d,0x02,0x60]
 
 v_cmp_lt_f32 s[2:3] |v4|, v6
-// CHECK: v_cmp_lt_f32_e64 s[2:3], |v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_lt_f32_e64 s[2:3], |v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_lt_f32_e64 s[2:3], |v4|, v6 ; encoding: [0x02,0x01,0x41,0xd0,0x04,0x0d,0x02,0x00]
 
 v_cmp_lt_f32 s[2:3] v4, |v6|
-// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, |v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_lt_f32_e64 s[2:3], v4, |v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_lt_f32_e64 s[2:3], v4, |v6| ; encoding: [0x02,0x02,0x41,0xd0,0x04,0x0d,0x02,0x00]
 
 v_cmp_lt_f32 s[2:3] |v4|, |v6|
-// CHECK: v_cmp_lt_f32_e64 s[2:3], |v4|, |v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_lt_f32_e64 s[2:3], |v4|, |v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_lt_f32_e64 s[2:3], |v4|, |v6| ; encoding: [0x02,0x03,0x41,0xd0,0x04,0x0d,0x02,0x00]
 
 v_cmp_lt_f32 s[2:3] -|v4|, v6
-// CHECK: v_cmp_lt_f32_e64 s[2:3], -|v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x20]
+// SICI: v_cmp_lt_f32_e64 s[2:3], -|v4|, v6 ; encoding: [0x02,0x01,0x02,0xd0,0x04,0x0d,0x02,0x20]
+// VI:   v_cmp_lt_f32_e64 s[2:3], -|v4|, v6 ; encoding: [0x02,0x01,0x41,0xd0,0x04,0x0d,0x02,0x20]
 
 v_cmp_lt_f32 s[2:3] v4, -|v6|
-// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, -|v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x40]
+// SICI: v_cmp_lt_f32_e64 s[2:3], v4, -|v6| ; encoding: [0x02,0x02,0x02,0xd0,0x04,0x0d,0x02,0x40]
+// VI:   v_cmp_lt_f32_e64 s[2:3], v4, -|v6| ; encoding: [0x02,0x02,0x41,0xd0,0x04,0x0d,0x02,0x40]
 
 v_cmp_lt_f32 s[2:3] -|v4|, -|v6|
-// CHECK: v_cmp_lt_f32_e64 s[2:3], -|v4|, -|v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x60]
+// SICI: v_cmp_lt_f32_e64 s[2:3], -|v4|, -|v6| ; encoding: [0x02,0x03,0x02,0xd0,0x04,0x0d,0x02,0x60]
+// VI:   v_cmp_lt_f32_e64 s[2:3], -|v4|, -|v6| ; encoding: [0x02,0x03,0x41,0xd0,0x04,0x0d,0x02,0x60]
 
 //
 // Instruction tests:
 //
 
 v_cmp_f_f32 s[2:3], v4, v6
-// CHECK: v_cmp_f_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x00,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_f_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x00,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_f_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x40,0xd0,0x04,0x0d,0x02,0x00]
 
 v_cmp_lt_f32 s[2:3], v4, v6
-// CHECK: v_cmp_lt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_lt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_lt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x41,0xd0,0x04,0x0d,0x02,0x00]
 
 v_cmp_eq_f32 s[2:3], v4, v6
-// CHECK: v_cmp_eq_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x04,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_eq_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x04,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_eq_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x42,0xd0,0x04,0x0d,0x02,0x00]
 
 v_cmp_le_f32 s[2:3], v4, v6
-// CHECK: v_cmp_le_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x06,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_le_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x06,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_le_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x43,0xd0,0x04,0x0d,0x02,0x00]
 
 v_cmp_gt_f32 s[2:3], v4, v6
-// CHECK: v_cmp_gt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x08,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_gt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x08,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_gt_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x44,0xd0,0x04,0x0d,0x02,0x00]
 
 v_cmp_lg_f32 s[2:3], v4, v6
-// CHECK: v_cmp_lg_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0a,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_lg_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0a,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_lg_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x45,0xd0,0x04,0x0d,0x02,0x00]
 
 v_cmp_ge_f32 s[2:3], v4, v6
-// CHECK: v_cmp_ge_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0c,0xd0,0x04,0x0d,0x02,0x00]
+// SICI: v_cmp_ge_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0c,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_ge_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x46,0xd0,0x04,0x0d,0x02,0x00]
 
 // TODO: Finish VOPC
 
@@ -77,22 +98,28 @@ v_cmp_ge_f32 s[2:3], v4, v6
 // 
 
 v_fract_f32 v1, -v2
-// CHECK: v_fract_f32_e64 v1, -v2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x20]
+// SICI: v_fract_f32_e64 v1, -v2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x20]
+// VI:   v_fract_f32_e64 v1, -v2 ; encoding: [0x01,0x00,0x5b,0xd1,0x02,0x01,0x00,0x20]
 
 v_fract_f32 v1, |v2|
-// CHECK: v_fract_f32_e64 v1, |v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x00]
+// SICI: v_fract_f32_e64 v1, |v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x00]
+// VI:   v_fract_f32_e64 v1, |v2| ; encoding: [0x01,0x01,0x5b,0xd1,0x02,0x01,0x00,0x00]
 
 v_fract_f32 v1, -|v2|
-// CHECK: v_fract_f32_e64 v1, -|v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x20]
+// SICI: v_fract_f32_e64 v1, -|v2| ; encoding: [0x01,0x01,0x40,0xd3,0x02,0x01,0x00,0x20]
+// VI:   v_fract_f32_e64 v1, -|v2| ; encoding: [0x01,0x01,0x5b,0xd1,0x02,0x01,0x00,0x20]
 
 v_fract_f32 v1, v2 clamp
-// CHECK: v_fract_f32_e64 v1, v2 clamp ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x00]
+// SICI: v_fract_f32_e64 v1, v2 clamp ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x00]
+// VI:   v_fract_f32_e64 v1, v2 clamp ; encoding: [0x01,0x80,0x5b,0xd1,0x02,0x01,0x00,0x00]
 
 v_fract_f32 v1, v2 mul:2
-// CHECK: v_fract_f32_e64 v1, v2 mul:2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x08]
+// SICI: v_fract_f32_e64 v1, v2 mul:2 ; encoding: [0x01,0x00,0x40,0xd3,0x02,0x01,0x00,0x08]
+// VI:   v_fract_f32_e64 v1, v2 mul:2 ; encoding: [0x01,0x00,0x5b,0xd1,0x02,0x01,0x00,0x08]
 
 v_fract_f32 v1, v2, div:2 clamp
-// CHECK: v_fract_f32_e64 v1, v2 clamp div:2 ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x18]
+// SICI: v_fract_f32_e64 v1, v2 clamp div:2 ; encoding: [0x01,0x08,0x40,0xd3,0x02,0x01,0x00,0x18]
+// VI:   v_fract_f32_e64 v1, v2 clamp div:2 ; encoding: [0x01,0x80,0x5b,0xd1,0x02,0x01,0x00,0x18]
 
 // TODO: Finish VOP1
 
@@ -102,37 +129,47 @@ v_fract_f32 v1, v2, div:2 clamp
 
 // Test forced e64 encoding with e32 operands
 
-v_ldexp_f32_e64 v1, v3, v5
-// CHECK: v_ldexp_f32_e64 v1, v3, v5 ; encoding: [0x01,0x00,0x56,0xd2,0x03,0x0b,0x02,0x00]
+v_add_f32_e64 v1, v3, v5
+// SICI: v_add_f32_e64 v1, v3, v5 ; encoding: [0x01,0x00,0x06,0xd2,0x03,0x0b,0x02,0x00]
+// VI:   v_add_f32_e64 v1, v3, v5 ; encoding: [0x01,0x00,0x01,0xd1,0x03,0x0b,0x02,0x00]
 
 
 // TODO: Modifier tests
 
 v_cndmask_b32 v1, v3, v5, s[4:5]
-// CHECK: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x00]
+// SICI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x00]
+// VI:   v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0x12,0x00]
 
 //TODO: readlane, writelane
 
 v_add_f32 v1, v3, s5
-// CHECK: v_add_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x06,0xd2,0x03,0x0b,0x00,0x00]
+// SICI: v_add_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x06,0xd2,0x03,0x0b,0x00,0x00]
+// VI:   v_add_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x01,0xd1,0x03,0x0b,0x00,0x00]
 
 v_sub_f32 v1, v3, s5
-// CHECK: v_sub_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x08,0xd2,0x03,0x0b,0x00,0x00]
+// SICI: v_sub_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x08,0xd2,0x03,0x0b,0x00,0x00]
+// VI:   v_sub_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x02,0xd1,0x03,0x0b,0x00,0x00]
 
 v_subrev_f32 v1, v3, s5
-// CHECK: v_subrev_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0a,0xd2,0x03,0x0b,0x00,0x00]
+// SICI: v_subrev_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0a,0xd2,0x03,0x0b,0x00,0x00]
+// VI:   v_subrev_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x03,0xd1,0x03,0x0b,0x00,0x00]
 
 v_mac_legacy_f32 v1, v3, s5
-// CHECK: v_mac_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0c,0xd2,0x03,0x0b,0x00,0x00]
+// SICI: v_mac_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0c,0xd2,0x03,0x0b,0x00,0x00]
+// FIXME: The error message should be: error: instruction not supported on this GPU
+// NOVI: error: invalid operand for instruction
 
 v_mul_legacy_f32 v1, v3, s5
-// CHECK: v_mul_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0e,0xd2,0x03,0x0b,0x00,0x00]
+// SICI: v_mul_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x0e,0xd2,0x03,0x0b,0x00,0x00]
+// VI:   v_mul_legacy_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x04,0xd1,0x03,0x0b,0x00,0x00]
 
 v_mul_f32 v1, v3, s5
-// CHECK: v_mul_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x10,0xd2,0x03,0x0b,0x00,0x00]
+// SICI: v_mul_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x10,0xd2,0x03,0x0b,0x00,0x00]
+// VI:   v_mul_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x05,0xd1,0x03,0x0b,0x00,0x00]
 
 v_mul_i32_i24 v1, v3, s5
-// CHECK: v_mul_i32_i24_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x12,0xd2,0x03,0x0b,0x00,0x00]
+// SICI: v_mul_i32_i24_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x12,0xd2,0x03,0x0b,0x00,0x00]
+// VI:   v_mul_i32_i24_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x06,0xd1,0x03,0x0b,0x00,0x00]
 
 ///===---------------------------------------------------------------------===//
 // VOP3 Instructions
@@ -141,7 +178,8 @@ v_mul_i32_i24 v1, v3, s5
 // TODO: Modifier tests
 
 v_mad_legacy_f32 v2, v4, v6, v8
-// CHECK: v_mad_legacy_f32 v2, v4, v6, v8 ; encoding: [0x02,0x00,0x80,0xd2,0x04,0x0d,0x22,0x04]
+// SICI: v_mad_legacy_f32 v2, v4, v6, v8 ; encoding: [0x02,0x00,0x80,0xd2,0x04,0x0d,0x22,0x04]
+// VI:   v_mad_legacy_f32 v2, v4, v6, v8 ; encoding: [0x02,0x00,0xc0,0xd1,0x04,0x0d,0x22,0x04]
 
 
 
diff --git a/test/MC/ARM/directive-arch-semantic-action.s b/test/MC/ARM/directive-arch-semantic-action.s
new file mode 100644
index 00000000000..b9c65d8e49c
--- /dev/null
+++ b/test/MC/ARM/directive-arch-semantic-action.s
@@ -0,0 +1,12 @@
+@ RUN: not llvm-mc -triple arm-gnueabi-linux -filetype asm %s 2>&1 | FileCheck %s
+
+        .arch	armv6
+        dsb
+@ CHECK: error: instruction requires: data-barriers
+
+        .arch   armv7
+        dsb
+@ CHECK-NOT: error: instruction requires: data-barriers
+
+        .arch   invalid_architecture_name
+@ CHECK: error: Unknown arch name
diff --git a/test/MC/Sparc/sparc-directive-xword.s b/test/MC/Sparc/sparc-directive-xword.s
index 0c9e249a6ad..736f99fbce7 100644
--- a/test/MC/Sparc/sparc-directive-xword.s
+++ b/test/MC/Sparc/sparc-directive-xword.s
@@ -1,5 +1,6 @@
 ! RUN: not llvm-mc %s -arch=sparc   -show-encoding 2>&1 | FileCheck %s --check-prefix=SPARC32
-! RUN: llvm-mc %s -arch=sparcv9 -show-encoding | FileCheck %s --check-prefix=SPARC64
+! RUN: llvm-mc %s -triple sparc64 -show-encoding | FileCheck %s --check-prefix=SPARC64
+! RUN: llvm-mc %s -triple sparcv9 -show-encoding | FileCheck %s --check-prefix=SPARCV9
 
         ! SPARC32:       error: unknown directive
         ! SPARC32-NEXT:  .xword 65536
@@ -8,3 +9,5 @@
         ! SPARC64:  .xword 65536
         .xword 65536
 
+        ! SPARCV9:  .xword 65536
+        .xword 65536
diff --git a/tools/llvm-lto/llvm-lto.cpp b/tools/llvm-lto/llvm-lto.cpp
index 9678c8397e0..08218986f45 100644
--- a/tools/llvm-lto/llvm-lto.cpp
+++ b/tools/llvm-lto/llvm-lto.cpp
@@ -214,8 +214,11 @@ int main(int argc, char **argv) {
     if (SetMergedModule && i == BaseArg) {
       // Transfer ownership to the code generator.
       CodeGen.setModule(Module.release());
-    } else if (!CodeGen.addModule(Module.get()))
+    } else if (!CodeGen.addModule(Module.get())) {
+      // Print a message here so that we know addModule() did not abort.
+      errs() << argv[0] << ": error adding file '" << InputFilenames[i] << "'\n";
       return 1;
+    }
 
     unsigned NumSyms = LTOMod->getSymbolCount();
     for (unsigned I = 0; I < NumSyms; ++I) {
diff --git a/tools/llvm-shlib/Makefile b/tools/llvm-shlib/Makefile
index 19077a3858a..2bc81dac5f0 100644
--- a/tools/llvm-shlib/Makefile
+++ b/tools/llvm-shlib/Makefile
@@ -61,7 +61,7 @@ endif
 
 ifeq ($(HOST_OS), $(filter $(HOST_OS), DragonFly Linux FreeBSD GNU/kFreeBSD GNU))
     # Add soname to the library.
-    LLVMLibsOptions += -Wl,--soname,lib$(LIBRARYNAME)$(SHLIBEXT)
+    LLVMLibsOptions += -Wl,--soname,lib$(LIBRARYNAME).1$(SHLIBEXT)
 endif
 
 ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux GNU GNU/kFreeBSD))
diff --git a/unittests/Transforms/Utils/Local.cpp b/unittests/Transforms/Utils/Local.cpp
index f0c3ecfbb9b..2ff56047555 100644
--- a/unittests/Transforms/Utils/Local.cpp
+++ b/unittests/Transforms/Utils/Local.cpp
@@ -58,3 +58,40 @@ TEST(Local, RecursivelyDeleteDeadPHINodes) {
   delete bb0;
   delete bb1;
 }
+
+TEST(Local, RemoveDuplicatePHINodes) {
+  LLVMContext &C(getGlobalContext());
+  IRBuilder<> B(C);
+
+  std::unique_ptr<Function> F(
+      Function::Create(FunctionType::get(B.getVoidTy(), false),
+                       GlobalValue::ExternalLinkage, "F"));
+  BasicBlock *Entry(BasicBlock::Create(C, "", F.get()));
+  BasicBlock *BB(BasicBlock::Create(C, "", F.get()));
+  BranchInst::Create(BB, Entry);
+
+  B.SetInsertPoint(BB);
+
+  AssertingVH<PHINode> P1 = B.CreatePHI(Type::getInt32Ty(C), 2);
+  P1->addIncoming(B.getInt32(42), Entry);
+
+  PHINode *P2 = B.CreatePHI(Type::getInt32Ty(C), 2);
+  P2->addIncoming(B.getInt32(42), Entry);
+
+  AssertingVH<PHINode> P3 = B.CreatePHI(Type::getInt32Ty(C), 2);
+  P3->addIncoming(B.getInt32(42), Entry);
+  P3->addIncoming(B.getInt32(23), BB);
+
+  PHINode *P4 = B.CreatePHI(Type::getInt32Ty(C), 2);
+  P4->addIncoming(B.getInt32(42), Entry);
+  P4->addIncoming(B.getInt32(23), BB);
+
+  P1->addIncoming(P3, BB);
+  P2->addIncoming(P4, BB);
+  BranchInst::Create(BB, BB);
+
+  // Verify that we can eliminate PHIs that become duplicates after chaning PHIs
+  // downstream.
+  EXPECT_TRUE(EliminateDuplicatePHINodes(BB));
+  EXPECT_EQ(3U, BB->size());
+}