diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake
index 882d68e6b60..3dd16d51f0b 100644
--- a/cmake/modules/HandleLLVMOptions.cmake
+++ b/cmake/modules/HandleLLVMOptions.cmake
@@ -17,6 +17,9 @@ else()
   set(LINKER_IS_LLD_LINK FALSE)
 endif()
 
+set(LLVM_ENABLE_LTO OFF CACHE STRING "Build LLVM with LTO. May be specified as Thin or Full to use a particular kind of LTO")
+string(TOUPPER "${LLVM_ENABLE_LTO}" uppercase_LLVM_ENABLE_LTO)
+
 # Ninja Job Pool support
 # The following only works with the Ninja generator in CMake >= 3.0.
 set(LLVM_PARALLEL_COMPILE_JOBS "" CACHE STRING
@@ -32,16 +35,19 @@ endif()
 
 set(LLVM_PARALLEL_LINK_JOBS "" CACHE STRING
   "Define the maximum number of concurrent link jobs.")
-if(LLVM_PARALLEL_LINK_JOBS)
-  if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja")
-    message(WARNING "Job pooling is only available with Ninja generators.")
-  else()
+if(CMAKE_MAKE_PROGRAM MATCHES "ninja")
+  if(NOT LLVM_PARALLEL_LINK_JOBS AND uppercase_LLVM_ENABLE_LTO STREQUAL "THIN")
+    message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.")
+    set(LLVM_PARALLEL_LINK_JOBS "2")
+  endif()
+  if(LLVM_PARALLEL_LINK_JOBS)
     set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${LLVM_PARALLEL_LINK_JOBS})
     set(CMAKE_JOB_POOL_LINK link_job_pool)
   endif()
+elseif(LLVM_PARALLEL_LINK_JOBS)
+  message(WARNING "Job pooling is only available with Ninja generators.")
 endif()
 
-
 if (LINKER_IS_LLD_LINK)
   # Pass /MANIFEST:NO so that CMake doesn't run mt.exe on our binaries.  Adding
   # manifests with mt.exe breaks LLD's symbol tables and takes as much time as
@@ -724,8 +730,6 @@ append_if(LLVM_BUILD_INSTRUMENTED_COVERAGE "-fprofile-instr-generate='${LLVM_PRO
   CMAKE_EXE_LINKER_FLAGS
   CMAKE_SHARED_LINKER_FLAGS)
 
-set(LLVM_ENABLE_LTO OFF CACHE STRING "Build LLVM with LTO. May be specified as Thin or Full to use a particular kind of LTO")
-string(TOUPPER "${LLVM_ENABLE_LTO}" uppercase_LLVM_ENABLE_LTO)
 if(LLVM_ENABLE_LTO AND LLVM_ON_WIN32 AND NOT LINKER_IS_LLD_LINK)
   message(FATAL_ERROR "When compiling for Windows, LLVM_ENABLE_LTO requires using lld as the linker (point CMAKE_LINKER at lld-link.exe)")
 endif()
diff --git a/cmake/modules/VersionFromVCS.cmake b/cmake/modules/VersionFromVCS.cmake
index 983b48fefa0..552fe77cdfb 100644
--- a/cmake/modules/VersionFromVCS.cmake
+++ b/cmake/modules/VersionFromVCS.cmake
@@ -33,7 +33,8 @@ function(add_version_info_from_vcs VERS)
       execute_process(COMMAND ${git_executable} rev-parse --git-dir
         WORKING_DIRECTORY ${SOURCE_DIR}/cmake
         RESULT_VARIABLE git_result
-        OUTPUT_VARIABLE git_dir)
+        OUTPUT_VARIABLE git_dir
+        ERROR_QUIET)
       if(git_result EQUAL 0)
         # Try to get a ref-id
         string(STRIP "${git_dir}" git_dir)
@@ -45,7 +46,8 @@ function(add_version_info_from_vcs VERS)
             WORKING_DIRECTORY ${SOURCE_DIR}
             TIMEOUT 5
             RESULT_VARIABLE git_result
-            OUTPUT_VARIABLE git_output)
+            OUTPUT_VARIABLE git_output
+            ERROR_QUIET)
           if( git_result EQUAL 0 )
             string(REGEX MATCH "URL: ([^ \n]*)" svn_url ${git_output})
             if(svn_url)
diff --git a/docs/AMDGPUUsage.rst b/docs/AMDGPUUsage.rst
index 97497057fc9..81c067b317d 100644
--- a/docs/AMDGPUUsage.rst
+++ b/docs/AMDGPUUsage.rst
@@ -83,7 +83,7 @@ handler as follows:
    Usage           Code Sequence Description
    =============== ============= ===============================================
    llvm.trap           s_endpgm      Causes wavefront to be terminated.
-   llvm.debugtrap Nothing.         Compiler warning generated that there is no trap handler installed.
+   llvm.debugtrap      Nothing       Compiler warning generated that there is no trap handler installed.
    =============== ============= ===============================================
 
 Assembler
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
index ad2178dc587..6dff219ae37 100644
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -1,8 +1,8 @@
 
 if (DOXYGEN_FOUND)
 if (LLVM_ENABLE_DOXYGEN)
-  set(abs_top_srcdir ${LLVM_MAIN_SRC_DIR})
-  set(abs_top_builddir ${LLVM_BINARY_DIR})
+  set(abs_top_srcdir ${CMAKE_CURRENT_SOURCE_DIR})
+  set(abs_top_builddir ${CMAKE_CURRENT_BINARY_DIR})
   
   if (HAVE_DOT)
     set(DOT ${LLVM_PATH_DOT})
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index b0a31589cc4..bf4973ca9ae 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -1535,6 +1535,17 @@ example:
 ``sanitize_thread``
     This attribute indicates that ThreadSanitizer checks
     (dynamic thread safety analysis) are enabled for this function.
+``speculatable``
+    This function attribute indicates that the function does not have any
+    effects besides calculating its result and does not have undefined behavior.
+    Note that ``speculatable`` is not enough to conclude that along any
+    particular exection path the number of calls to this function will not be
+    externally observable. This attribute is only valid on functions
+    and declarations, not on individual call sites. If a function is
+    incorrectly marked as speculatable and really does exhibit
+    undefined behavior, the undefined behavior may be observed even
+    if the call site is dead code.
+
 ``ssp``
     This attribute indicates that the function should emit a stack
     smashing protector. It is in the form of a "canary" --- a random value
diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst
index 4fb67e1e6d5..d115a9cf6de 100644
--- a/docs/ProgrammersManual.rst
+++ b/docs/ProgrammersManual.rst
@@ -776,22 +776,21 @@ readability.
 Using cantFail to simplify safe callsites
 """""""""""""""""""""""""""""""""""""""""
 
-Some functions may only fail for a subset of their inputs. For such functions
-call-sites using known-safe inputs can assume that the result will be a success
-value.
+Some functions may only fail for a subset of their inputs, so calls using known
+safe inputs can be assumed to succeed.
 
 The cantFail functions encapsulate this by wrapping an assertion that their
 argument is a success value and, in the case of Expected<T>, unwrapping the
-T value from the Expected<T> argument:
+T value:
 
 .. code-block:: c++
 
-  Error mayFail(int X);
-  Expected<int> mayFail2(int X);
+  Error onlyFailsForSomeXValues(int X);
+  Expected<int> onlyFailsForSomeXValues2(int X);
 
   void foo() {
-    cantFail(mayFail(KnownSafeValue));
-    int Y = cantFail(mayFail2(KnownSafeValue));
+    cantFail(onlyFailsForSomeXValues(KnownSafeValue));
+    int Y = cantFail(onlyFailsForSomeXValues2(KnownSafeValue));
     ...
   }
 
@@ -801,8 +800,8 @@ terminate the program on an error input, cantFile simply asserts that the result
 is success. In debug builds this will result in an assertion failure if an error
 is encountered. In release builds the behavior of cantFail for failure values is
 undefined. As such, care must be taken in the use of cantFail: clients must be
-certain that a cantFail wrapped call really can not fail under any
-circumstances.
+certain that a cantFail wrapped call really can not fail with the given
+arguments.
 
 Use of the cantFail functions should be rare in library code, but they are
 likely to be of more use in tool and unit-test code where inputs and/or
diff --git a/docs/README.txt b/docs/README.txt
index 6c6e5b90ecf..f1c74261ce4 100644
--- a/docs/README.txt
+++ b/docs/README.txt
@@ -51,3 +51,18 @@ running:
 
     cd docs/
     make -f Makefile.sphinx linkcheck
+
+Doxygen page Output
+==============
+
+Install doxygen <http://www.stack.nl/~dimitri/doxygen/download.html> and dot2tex <https://dot2tex.readthedocs.io/en/latest>.
+
+    cd <build-dir>
+    cmake -DLLVM_ENABLE_DOXYGEN=On <llvm-top-src-dir>
+    make doxygen-llvm # for LLVM docs
+    make doxygen-clang # for clang docs
+
+It will generate html in
+    
+    <build-dir>/docs/doxygen/html # for LLVM docs
+    <build-dir>/tools/clang/docs/doxygen/html # for clang docs
diff --git a/docs/StackMaps.rst b/docs/StackMaps.rst
index a78fde16c2b..99c5e5fbe4d 100644
--- a/docs/StackMaps.rst
+++ b/docs/StackMaps.rst
@@ -319,7 +319,7 @@ format of this section follows:
 .. code-block:: none
 
   Header {
-    uint8  : Stack Map Version (current version is 2)
+    uint8  : Stack Map Version (current version is 3)
     uint8  : Reserved (expected to be 0)
     uint16 : Reserved (expected to be 0)
   }
@@ -341,10 +341,13 @@ format of this section follows:
     uint16 : NumLocations
     Location[NumLocations] {
       uint8  : Register | Direct | Indirect | Constant | ConstantIndex
-      uint8  : Reserved (location flags)
+      uint8  : Reserved (expected to be 0)
+      uint16 : Location Size
       uint16 : Dwarf RegNum
+      uint16 : Reserved (expected to be 0)
       int32  : Offset or SmallConstant
     }
+    uint32 : Padding (only if required to align to 8 byte)
     uint16 : Padding
     uint16 : NumLiveOuts
     LiveOuts[NumLiveOuts]
diff --git a/docs/TableGen/LangIntro.rst b/docs/TableGen/LangIntro.rst
index d8bd17d750b..460ff9067f2 100644
--- a/docs/TableGen/LangIntro.rst
+++ b/docs/TableGen/LangIntro.rst
@@ -58,6 +58,10 @@ types are:
     The 'string' type represents an ordered sequence of characters of arbitrary
     length.
 
+``code``
+    The `code` type represents a code fragment, which can be single/multi-line
+    string literal.
+
 ``bits<n>``
     A 'bits' type is an arbitrary, but fixed, size integer that is broken up
     into individual bits.  This type is useful because it can handle some bits
@@ -105,7 +109,7 @@ supported include:
     hexadecimal integer value
 
 ``"foo"``
-    string value
+    a single-line string value, can be assigned to ``string`` or ``code`` variable.
 
 ``[{ ... }]``
     usually called a "code fragment", but is just a multiline string literal
@@ -126,7 +130,8 @@ supported include:
     access to one bit of a value
 
 ``value{15-17}``
-    access to multiple bits of a value
+    access to an ordered sequence of bits of a value, in particular ``value{15-17}``
+    produces an order that is the reverse of ``value{17-15}``.
 
 ``DEF``
     reference to a record definition
diff --git a/docs/doxygen.cfg.in b/docs/doxygen.cfg.in
index 451eaf4d2fc..e3c7f479ac4 100644
--- a/docs/doxygen.cfg.in
+++ b/docs/doxygen.cfg.in
@@ -58,7 +58,7 @@ PROJECT_LOGO           =
 # entered, it will be relative to the location where doxygen was started. If
 # left blank the current directory will be used.
 
-OUTPUT_DIRECTORY       = @abs_top_builddir@/docs/doxygen
+OUTPUT_DIRECTORY       = @abs_top_builddir@/doxygen
 
 # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub-
 # directories (in 2 levels) under the output directory of each output format and
@@ -132,7 +132,7 @@ INLINE_INHERITED_MEMB  = NO
 # shortest path that makes the file name unique will be used
 # The default value is: YES.
 
-FULL_PATH_NAMES        = NO
+FULL_PATH_NAMES        = YES
 
 # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
 # Stripping is only done if one of the specified strings matches the left-hand
@@ -144,7 +144,7 @@ FULL_PATH_NAMES        = NO
 # will be relative from the directory where doxygen is started.
 # This tag requires that the tag FULL_PATH_NAMES is set to YES.
 
-STRIP_FROM_PATH        = ../..
+STRIP_FROM_PATH        = @abs_top_srcdir@/..
 
 # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
 # path mentioned in the documentation of a class, which tells the reader which
@@ -153,7 +153,8 @@ STRIP_FROM_PATH        = ../..
 # specify the list of include paths that are normally passed to the compiler
 # using the -I flag.
 
-STRIP_FROM_INC_PATH    =
+STRIP_FROM_INC_PATH    = @abs_top_srcdir@/../include
+STRIP_FROM_INC_PATH    += @abs_top_srcdir@/../lib
 
 # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
 # less readable) file names. This can be useful is your file systems doesn't
@@ -513,7 +514,7 @@ SHOW_GROUPED_MEMB_INC  = NO
 # files with double quotes in the documentation rather than with sharp brackets.
 # The default value is: NO.
 
-FORCE_LOCAL_INCLUDES   = NO
+FORCE_LOCAL_INCLUDES   = YES
 
 # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
 # documentation for inline members.
@@ -743,9 +744,9 @@ WARN_LOGFILE           =
 # spaces.
 # Note: If this tag is empty the current directory is searched.
 
-INPUT                  = @abs_top_srcdir@/include \
-                         @abs_top_srcdir@/lib \
-                         @abs_top_srcdir@/docs/doxygen-mainpage.dox
+INPUT                  = @abs_top_srcdir@/../include \
+                         @abs_top_srcdir@/../lib \
+                         @abs_top_srcdir@/doxygen-mainpage.dox
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
@@ -813,7 +814,7 @@ EXCLUDE_SYMBOLS        =
 # that contain example code fragments that are included (see the \include
 # command).
 
-EXAMPLE_PATH           = @abs_top_srcdir@/examples
+EXAMPLE_PATH           = @abs_top_srcdir@/../examples
 
 # If the value of the EXAMPLE_PATH tag contains directories, you can use the
 # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
@@ -833,7 +834,7 @@ EXAMPLE_RECURSIVE      = YES
 # that contain images that are to be included in the documentation (see the
 # \image command).
 
-IMAGE_PATH             = @abs_top_srcdir@/docs/img
+IMAGE_PATH             = @abs_top_srcdir@/img
 
 # The INPUT_FILTER tag can be used to specify a program that doxygen should
 # invoke to filter for each input file. Doxygen will invoke the filter program
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index d0104c3f0fa..6d74f344aad 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -366,7 +366,7 @@ public:
   /// that 0 is not a positive value.
   ///
   /// \returns true if this APInt is positive.
-  bool isStrictlyPositive() const { return isNonNegative() && !!*this; }
+  bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); }
 
   /// \brief Determine if all bits are set
   ///
@@ -377,6 +377,12 @@ public:
     return countPopulationSlowCase() == BitWidth;
   }
 
+  /// \brief Determine if all bits are clear
+  ///
+  /// This checks to see if the value has all bits of the APInt are clear or
+  /// not.
+  bool isNullValue() const { return !*this; }
+
   /// \brief Determine if this is the largest unsigned value.
   ///
   /// This checks to see if the value of this APInt is the maximum unsigned
@@ -395,7 +401,7 @@ public:
   ///
   /// This checks to see if the value of this APInt is the minimum unsigned
   /// value for the APInt's bit width.
-  bool isMinValue() const { return !*this; }
+  bool isMinValue() const { return isNullValue(); }
 
   /// \brief Determine if this is the smallest signed value.
   ///
@@ -611,15 +617,7 @@ public:
   }
 
   /// \brief Return a value containing V broadcasted over NewLen bits.
-  static APInt getSplat(unsigned NewLen, const APInt &V) {
-    assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
-
-    APInt Val = V.zextOrSelf(NewLen);
-    for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
-      Val |= Val << I;
-
-    return Val;
-  }
+  static APInt getSplat(unsigned NewLen, const APInt &V);
 
   /// \brief Determine if two APInts have the same value, after zero-extending
   /// one of them (if needed!) to ensure that the bit-widths match.
@@ -687,7 +685,9 @@ public:
   ///
   /// \returns true if *this is zero, false otherwise.
   bool operator!() const {
-    return *this == 0;
+    if (isSingleWord())
+      return VAL == 0;
+    return countLeadingZerosSlowCase() == BitWidth;
   }
 
   /// @}
@@ -874,6 +874,13 @@ public:
     return *this;
   }
 
+  /// \brief Left-shift assignment function.
+  ///
+  /// Shifts *this left by shiftAmt and assigns the result to *this.
+  ///
+  /// \returns *this after shifting left by ShiftAmt
+  APInt &operator<<=(const APInt &ShiftAmt);
+
   /// @}
   /// \name Binary Operators
   /// @{
@@ -981,7 +988,11 @@ public:
   /// \brief Left-shift function.
   ///
   /// Left-shift this APInt by shiftAmt.
-  APInt shl(const APInt &shiftAmt) const;
+  APInt shl(const APInt &ShiftAmt) const {
+    APInt R(*this);
+    R <<= ShiftAmt;
+    return R;
+  }
 
   /// \brief Rotate left by rotateAmt.
   APInt rotl(const APInt &rotateAmt) const;
@@ -1333,7 +1344,14 @@ public:
   /// \brief Set a given bit to 1.
   ///
   /// Set the given bit to 1 whose position is given as "bitPosition".
-  void setBit(unsigned bitPosition);
+  void setBit(unsigned BitPosition) {
+    assert(BitPosition <= BitWidth && "BitPosition out of range");
+    WordType Mask = maskBit(BitPosition);
+    if (isSingleWord())
+      VAL |= Mask;
+    else
+      pVal[whichWord(BitPosition)] |= Mask;
+  }
 
   /// Set the sign bit to 1.
   void setSignBit() {
@@ -1344,13 +1362,9 @@ public:
   void setBits(unsigned loBit, unsigned hiBit) {
     assert(hiBit <= BitWidth && "hiBit out of range");
     assert(loBit <= BitWidth && "loBit out of range");
+    assert(loBit <= hiBit && "loBit greater than hiBit");
     if (loBit == hiBit)
       return;
-    if (loBit > hiBit) {
-      setLowBits(hiBit);
-      setHighBits(BitWidth - loBit);
-      return;
-    }
     if (loBit < APINT_BITS_PER_WORD && hiBit <= APINT_BITS_PER_WORD) {
       uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
       mask <<= loBit;
@@ -1389,7 +1403,19 @@ public:
   /// \brief Set a given bit to 0.
   ///
   /// Set the given bit to 0 whose position is given as "bitPosition".
-  void clearBit(unsigned bitPosition);
+  void clearBit(unsigned BitPosition) {
+    assert(BitPosition <= BitWidth && "BitPosition out of range");
+    WordType Mask = ~maskBit(BitPosition);
+    if (isSingleWord())
+      VAL &= Mask;
+    else
+      pVal[whichWord(BitPosition)] &= Mask;
+  }
+
+  /// Set the sign bit to 0.
+  void clearSignBit() {
+    clearBit(BitWidth - 1);
+  }
 
   /// \brief Toggle every bit to its opposite value.
   void flipAllBits() {
@@ -1695,7 +1721,7 @@ public:
       return VAL - 1;
 
     // Handle the zero case.
-    if (!getBoolValue())
+    if (isNullValue())
       return UINT32_MAX;
 
     // The non-zero case is handled by computing:
diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h
index f833f417c7d..04c6fd70e07 100644
--- a/include/llvm/Analysis/AssumptionCache.h
+++ b/include/llvm/Analysis/AssumptionCache.h
@@ -43,7 +43,7 @@ class AssumptionCache {
 
   /// \brief Vector of weak value handles to calls of the @llvm.assume
   /// intrinsic.
-  SmallVector<WeakVH, 4> AssumeHandles;
+  SmallVector<WeakTrackingVH, 4> AssumeHandles;
 
   class AffectedValueCallbackVH final : public CallbackVH {
     AssumptionCache *AC;
@@ -62,12 +62,12 @@ class AssumptionCache {
   /// \brief A map of values about which an assumption might be providing
   /// information to the relevant set of assumptions.
   using AffectedValuesMap =
-    DenseMap<AffectedValueCallbackVH, SmallVector<WeakVH, 1>,
-             AffectedValueCallbackVH::DMI>;
+      DenseMap<AffectedValueCallbackVH, SmallVector<WeakTrackingVH, 1>,
+               AffectedValueCallbackVH::DMI>;
   AffectedValuesMap AffectedValues;
 
   /// Get the vector of assumptions which affect a value from the cache.
-  SmallVector<WeakVH, 1> &getOrInsertAffectedValues(Value *V);
+  SmallVector<WeakTrackingVH, 1> &getOrInsertAffectedValues(Value *V);
 
   /// Copy affected values in the cache for OV to be affected values for NV.
   void copyAffectedValuesInCache(Value *OV, Value *NV);
@@ -120,20 +120,20 @@ public:
   /// FIXME: We should replace this with pointee_iterator<filter_iterator<...>>
   /// when we can write that to filter out the null values. Then caller code
   /// will become simpler.
-  MutableArrayRef<WeakVH> assumptions() {
+  MutableArrayRef<WeakTrackingVH> assumptions() {
     if (!Scanned)
       scanFunction();
     return AssumeHandles;
   }
 
   /// \brief Access the list of assumptions which affect this value.
-  MutableArrayRef<WeakVH> assumptionsFor(const Value *V) {
+  MutableArrayRef<WeakTrackingVH> assumptionsFor(const Value *V) {
     if (!Scanned)
       scanFunction();
 
     auto AVI = AffectedValues.find_as(const_cast<Value *>(V));
     if (AVI == AffectedValues.end())
-      return MutableArrayRef<WeakVH>();
+      return MutableArrayRef<WeakTrackingVH>();
 
     return AVI->second;
   }
diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h
index 398bbfb0c41..a15a9e18c81 100644
--- a/include/llvm/Analysis/CGSCCPassManager.h
+++ b/include/llvm/Analysis/CGSCCPassManager.h
@@ -646,7 +646,7 @@ public:
     LazyCallGraph::SCC *C = &InitialC;
 
     // Collect value handles for all of the indirect call sites.
-    SmallVector<WeakVH, 8> CallHandles;
+    SmallVector<WeakTrackingVH, 8> CallHandles;
 
     // Struct to track the counts of direct and indirect calls in each function
     // of the SCC.
@@ -658,7 +658,7 @@ public:
     // Put value handles on all of the indirect calls and return the number of
     // direct calls for each function in the SCC.
     auto ScanSCC = [](LazyCallGraph::SCC &C,
-                      SmallVectorImpl<WeakVH> &CallHandles) {
+                      SmallVectorImpl<WeakTrackingVH> &CallHandles) {
       assert(CallHandles.empty() && "Must start with a clear set of handles.");
 
       SmallVector<CallCount, 4> CallCounts;
@@ -671,7 +671,7 @@ public:
               ++Count.Direct;
             } else {
               ++Count.Indirect;
-              CallHandles.push_back(WeakVH(&I));
+              CallHandles.push_back(WeakTrackingVH(&I));
             }
           }
       }
@@ -699,7 +699,7 @@ public:
              "Cannot have changed the size of the SCC!");
 
       // Check whether any of the handles were devirtualized.
-      auto IsDevirtualizedHandle = [&](WeakVH &CallH) {
+      auto IsDevirtualizedHandle = [&](WeakTrackingVH &CallH) {
         if (!CallH)
           return false;
         auto CS = CallSite(CallH);
diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h
index ea85436ee58..cc4788d3eda 100644
--- a/include/llvm/Analysis/CallGraph.h
+++ b/include/llvm/Analysis/CallGraph.h
@@ -172,7 +172,7 @@ class CallGraphNode {
 public:
   /// \brief A pair of the calling instruction (a call or invoke)
   /// and the call graph node being called.
-  typedef std::pair<WeakVH, CallGraphNode *> CallRecord;
+  typedef std::pair<WeakTrackingVH, CallGraphNode *> CallRecord;
 
 public:
   typedef std::vector<CallRecord> CalledFunctionsVector;
diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h
index bb572dd5603..035b974c5c1 100644
--- a/include/llvm/Analysis/IVUsers.h
+++ b/include/llvm/Analysis/IVUsers.h
@@ -80,7 +80,7 @@ private:
 
   /// OperandValToReplace - The Value of the operand in the user instruction
   /// that this IVStrideUse is representing.
-  WeakVH OperandValToReplace;
+  WeakTrackingVH OperandValToReplace;
 
   /// PostIncLoops - The set of loops for which Expr has been adjusted to
   /// use post-inc mode. This corresponds with SCEVExpander's post-inc concept.
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index 17e5cb6db02..d91d08a524d 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -160,6 +160,10 @@ InlineParams getInlineParams(int Threshold);
 /// the -Oz flag.
 InlineParams getInlineParams(unsigned OptLevel, unsigned SizeOptLevel);
 
+/// Return the cost associated with a callsite, including paramater passing
+/// and the call/return instruction.
+int getCallsiteCost(CallSite CS, const DataLayout &DL);
+
 /// \brief Get an InlineCost object representing the cost of inlining this
 /// callsite.
 ///
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
index 25240dae75e..bf73e099a2b 100644
--- a/include/llvm/Analysis/InstructionSimplify.h
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -35,35 +35,41 @@
 #include "llvm/IR/User.h"
 
 namespace llvm {
-  template<typename T>
-  class ArrayRef;
-  class AssumptionCache;
-  class DominatorTree;
-  class Instruction;
-  class DataLayout;
-  class FastMathFlags;
-  class OptimizationRemarkEmitter;
-  class TargetLibraryInfo;
-  class Type;
-  class Value;
+class Function;
+template <typename T, typename... TArgs> class AnalysisManager;
+template <class T> class ArrayRef;
+class AssumptionCache;
+class DominatorTree;
+class Instruction;
+class DataLayout;
+class FastMathFlags;
+struct LoopStandardAnalysisResults;
+class OptimizationRemarkEmitter;
+class Pass;
+class TargetLibraryInfo;
+class Type;
+class Value;
 
-  struct SimplifyQuery {
-    const DataLayout &DL;
-    const TargetLibraryInfo *TLI = nullptr;
-    const DominatorTree *DT = nullptr;
-    AssumptionCache *AC = nullptr;
-    const Instruction *CxtI = nullptr;
-    SimplifyQuery(const DataLayout &DL) : DL(DL) {}
+struct SimplifyQuery {
+  const DataLayout &DL;
+  const TargetLibraryInfo *TLI = nullptr;
+  const DominatorTree *DT = nullptr;
+  AssumptionCache *AC = nullptr;
+  const Instruction *CxtI = nullptr;
 
-    SimplifyQuery(const DataLayout &DL, const TargetLibraryInfo *TLI,
-                  const DominatorTree *DT, AssumptionCache *AC = nullptr,
-                  const Instruction *CXTI = nullptr)
-        : DL(DL), TLI(TLI), DT(DT), AC(AC), CxtI(CXTI) {}
-    SimplifyQuery getWithInstruction(Instruction *I) const {
-      SimplifyQuery Copy(*this);
-      Copy.CxtI = I;
-      return Copy;
-    }
+  SimplifyQuery(const DataLayout &DL, const Instruction *CXTI = nullptr)
+      : DL(DL), CxtI(CXTI) {}
+
+  SimplifyQuery(const DataLayout &DL, const TargetLibraryInfo *TLI,
+                const DominatorTree *DT = nullptr,
+                AssumptionCache *AC = nullptr,
+                const Instruction *CXTI = nullptr)
+      : DL(DL), TLI(TLI), DT(DT), AC(AC), CxtI(CXTI) {}
+  SimplifyQuery getWithInstruction(Instruction *I) const {
+    SimplifyQuery Copy(*this);
+    Copy.CxtI = I;
+    return Copy;
+  }
   };
 
   // NOTE: the explicit multiple argument versions of these functions are
@@ -73,257 +79,103 @@ namespace llvm {
   /// Given operands for an Add, fold the result or return null.
   Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
                        const SimplifyQuery &Q);
-  Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
-                         const DataLayout &DL,
-                         const TargetLibraryInfo *TLI = nullptr,
-                         const DominatorTree *DT = nullptr,
-                         AssumptionCache *AC = nullptr,
-                         const Instruction *CxtI = nullptr);
 
   /// Given operands for a Sub, fold the result or return null.
   Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
                          const SimplifyQuery &Q);
-  Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
-                         const DataLayout &DL,
-                         const TargetLibraryInfo *TLI = nullptr,
-                         const DominatorTree *DT = nullptr,
-                         AssumptionCache *AC = nullptr,
-                         const Instruction *CxtI = nullptr);
 
   /// Given operands for an FAdd, fold the result or return null.
   Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF,
                           const SimplifyQuery &Q);
-  Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF,
-                          const DataLayout &DL,
-                          const TargetLibraryInfo *TLI = nullptr,
-                          const DominatorTree *DT = nullptr,
-                          AssumptionCache *AC = nullptr,
-                          const Instruction *CxtI = nullptr);
 
   /// Given operands for an FSub, fold the result or return null.
   Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF,
                           const SimplifyQuery &Q);
-  Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF,
-                          const DataLayout &DL,
-                          const TargetLibraryInfo *TLI = nullptr,
-                          const DominatorTree *DT = nullptr,
-                          AssumptionCache *AC = nullptr,
-                          const Instruction *CxtI = nullptr);
 
   /// Given operands for an FMul, fold the result or return null.
   Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF,
                           const SimplifyQuery &Q);
-  Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF,
-                          const DataLayout &DL,
-                          const TargetLibraryInfo *TLI = nullptr,
-                          const DominatorTree *DT = nullptr,
-                          AssumptionCache *AC = nullptr,
-                          const Instruction *CxtI = nullptr);
 
   /// Given operands for a Mul, fold the result or return null.
   Value *SimplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
-  Value *SimplifyMulInst(Value *LHS, Value *RHS, const DataLayout &DL,
-                         const TargetLibraryInfo *TLI = nullptr,
-                         const DominatorTree *DT = nullptr,
-                         AssumptionCache *AC = nullptr,
-                         const Instruction *CxtI = nullptr);
 
   /// Given operands for an SDiv, fold the result or return null.
   Value *SimplifySDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
-  Value *SimplifySDivInst(Value *LHS, Value *RHS, const DataLayout &DL,
-                          const TargetLibraryInfo *TLI = nullptr,
-                          const DominatorTree *DT = nullptr,
-                          AssumptionCache *AC = nullptr,
-                          const Instruction *CxtI = nullptr);
 
   /// Given operands for a UDiv, fold the result or return null.
   Value *SimplifyUDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
-  Value *SimplifyUDivInst(Value *LHS, Value *RHS, const DataLayout &DL,
-                          const TargetLibraryInfo *TLI = nullptr,
-                          const DominatorTree *DT = nullptr,
-                          AssumptionCache *AC = nullptr,
-                          const Instruction *CxtI = nullptr);
 
   /// Given operands for an FDiv, fold the result or return null.
   Value *SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF,
                           const SimplifyQuery &Q);
-  Value *SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF,
-                          const DataLayout &DL,
-                          const TargetLibraryInfo *TLI = nullptr,
-                          const DominatorTree *DT = nullptr,
-                          AssumptionCache *AC = nullptr,
-                          const Instruction *CxtI = nullptr);
 
   /// Given operands for an SRem, fold the result or return null.
   Value *SimplifySRemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
-  Value *SimplifySRemInst(Value *LHS, Value *RHS, const DataLayout &DL,
-                          const TargetLibraryInfo *TLI = nullptr,
-                          const DominatorTree *DT = nullptr,
-                          AssumptionCache *AC = nullptr,
-                          const Instruction *CxtI = nullptr);
 
   /// Given operands for a URem, fold the result or return null.
   Value *SimplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
-  Value *SimplifyURemInst(Value *LHS, Value *RHS, const DataLayout &DL,
-                          const TargetLibraryInfo *TLI = nullptr,
-                          const DominatorTree *DT = nullptr,
-                          AssumptionCache *AC = nullptr,
-                          const Instruction *CxtI = nullptr);
 
   /// Given operands for an FRem, fold the result or return null.
   Value *SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF,
                           const SimplifyQuery &Q);
-  Value *SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF,
-                          const DataLayout &DL,
-                          const TargetLibraryInfo *TLI = nullptr,
-                          const DominatorTree *DT = nullptr,
-                          AssumptionCache *AC = nullptr,
-                          const Instruction *CxtI = nullptr);
 
   /// Given operands for a Shl, fold the result or return null.
   Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
                          const SimplifyQuery &Q);
-  Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                         const DataLayout &DL,
-                         const TargetLibraryInfo *TLI = nullptr,
-                         const DominatorTree *DT = nullptr,
-                         AssumptionCache *AC = nullptr,
-                         const Instruction *CxtI = nullptr);
 
   /// Given operands for a LShr, fold the result or return null.
   Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
                           const SimplifyQuery &Q);
-  Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
-                          const DataLayout &DL,
-                          const TargetLibraryInfo *TLI = nullptr,
-                          const DominatorTree *DT = nullptr,
-                          AssumptionCache *AC = nullptr,
-                          const Instruction *CxtI = nullptr);
 
   /// Given operands for a AShr, fold the result or return nulll.
   Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
                           const SimplifyQuery &Q);
-  Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
-                          const DataLayout &DL,
-                          const TargetLibraryInfo *TLI = nullptr,
-                          const DominatorTree *DT = nullptr,
-                          AssumptionCache *AC = nullptr,
-                          const Instruction *CxtI = nullptr);
 
   /// Given operands for an And, fold the result or return null.
   Value *SimplifyAndInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
-  Value *SimplifyAndInst(Value *LHS, Value *RHS, const DataLayout &DL,
-                         const TargetLibraryInfo *TLI = nullptr,
-                         const DominatorTree *DT = nullptr,
-                         AssumptionCache *AC = nullptr,
-                         const Instruction *CxtI = nullptr);
 
   /// Given operands for an Or, fold the result or return null.
   Value *SimplifyOrInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
-  Value *SimplifyOrInst(Value *LHS, Value *RHS, const DataLayout &DL,
-                        const TargetLibraryInfo *TLI = nullptr,
-                        const DominatorTree *DT = nullptr,
-                        AssumptionCache *AC = nullptr,
-                        const Instruction *CxtI = nullptr);
 
   /// Given operands for an Xor, fold the result or return null.
   Value *SimplifyXorInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
-  Value *SimplifyXorInst(Value *LHS, Value *RHS, const DataLayout &DL,
-                         const TargetLibraryInfo *TLI = nullptr,
-                         const DominatorTree *DT = nullptr,
-                         AssumptionCache *AC = nullptr,
-                         const Instruction *CxtI = nullptr);
 
   /// Given operands for an ICmpInst, fold the result or return null.
   Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                           const SimplifyQuery &Q);
-  Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                          const DataLayout &DL,
-                          const TargetLibraryInfo *TLI = nullptr,
-                          const DominatorTree *DT = nullptr,
-                          AssumptionCache *AC = nullptr,
-                          const Instruction *CxtI = nullptr);
 
   /// Given operands for an FCmpInst, fold the result or return null.
   Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                           FastMathFlags FMF, const SimplifyQuery &Q);
-  Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                          FastMathFlags FMF, const DataLayout &DL,
-                          const TargetLibraryInfo *TLI = nullptr,
-                          const DominatorTree *DT = nullptr,
-                          AssumptionCache *AC = nullptr,
-                          const Instruction *CxtI = nullptr);
 
   /// Given operands for a SelectInst, fold the result or return null.
   Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
                             const SimplifyQuery &Q);
-  Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
-                            const DataLayout &DL,
-                            const TargetLibraryInfo *TLI = nullptr,
-                            const DominatorTree *DT = nullptr,
-                            AssumptionCache *AC = nullptr,
-                            const Instruction *CxtI = nullptr);
 
   /// Given operands for a GetElementPtrInst, fold the result or return null. 
   Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
                          const SimplifyQuery &Q);
-  Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
-                         const DataLayout &DL,
-                         const TargetLibraryInfo *TLI = nullptr,
-                         const DominatorTree *DT = nullptr,
-                         AssumptionCache *AC = nullptr,
-                         const Instruction *CxtI = nullptr);
 
   /// Given operands for an InsertValueInst, fold the result or return null.
   Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
                                  ArrayRef<unsigned> Idxs,
                                  const SimplifyQuery &Q);
-  Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
-                                 ArrayRef<unsigned> Idxs, const DataLayout &DL,
-                                 const TargetLibraryInfo *TLI = nullptr,
-                                 const DominatorTree *DT = nullptr,
-                                 AssumptionCache *AC = nullptr,
-                                 const Instruction *CxtI = nullptr);
 
   /// Given operands for an ExtractValueInst, fold the result or return null.
   Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
                                   const SimplifyQuery &Q);
-  Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
-                                  const DataLayout &DL,
-                                  const TargetLibraryInfo *TLI = nullptr,
-                                  const DominatorTree *DT = nullptr,
-                                  AssumptionCache *AC = nullptr,
-                                  const Instruction *CxtI = nullptr);
 
   /// Given operands for an ExtractElementInst, fold the result or return null.
   Value *SimplifyExtractElementInst(Value *Vec, Value *Idx,
                                     const SimplifyQuery &Q);
-  Value *SimplifyExtractElementInst(Value *Vec, Value *Idx,
-                                    const DataLayout &DL,
-                                    const TargetLibraryInfo *TLI = nullptr,
-                                    const DominatorTree *DT = nullptr,
-                                    AssumptionCache *AC = nullptr,
-                                    const Instruction *CxtI = nullptr);
 
   /// Given operands for a CastInst, fold the result or return null.
   Value *SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
                           const SimplifyQuery &Q);
-  Value *SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
-                          const DataLayout &DL,
-                          const TargetLibraryInfo *TLI = nullptr,
-                          const DominatorTree *DT = nullptr,
-                          AssumptionCache *AC = nullptr,
-                          const Instruction *CxtI = nullptr);
 
   /// Given operands for a ShuffleVectorInst, fold the result or return null.
   Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
                                    Type *RetTy, const SimplifyQuery &Q);
-  Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
-                                   Type *RetTy, const DataLayout &DL,
-                                   const TargetLibraryInfo *TLI = nullptr,
-                                   const DominatorTree *DT = nullptr,
-                                   AssumptionCache *AC = nullptr,
-                                   const Instruction *CxtI = nullptr);
 
   //=== Helper functions for higher up the class hierarchy.
 
@@ -331,63 +183,29 @@ namespace llvm {
   /// Given operands for a CmpInst, fold the result or return null.
   Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                          const SimplifyQuery &Q);
-  Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                         const DataLayout &DL,
-                         const TargetLibraryInfo *TLI = nullptr,
-                         const DominatorTree *DT = nullptr,
-                         AssumptionCache *AC = nullptr,
-                         const Instruction *CxtI = nullptr);
 
   /// Given operands for a BinaryOperator, fold the result or return null.
   Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
                        const SimplifyQuery &Q);
-  Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                       const DataLayout &DL,
-                       const TargetLibraryInfo *TLI = nullptr,
-                       const DominatorTree *DT = nullptr,
-                       AssumptionCache *AC = nullptr,
-                       const Instruction *CxtI = nullptr);
 
   /// Given operands for an FP BinaryOperator, fold the result or return null.
   /// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the
   /// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp.
   Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
                          FastMathFlags FMF, const SimplifyQuery &Q);
-  Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                         FastMathFlags FMF, const DataLayout &DL,
-                         const TargetLibraryInfo *TLI = nullptr,
-                         const DominatorTree *DT = nullptr,
-                         AssumptionCache *AC = nullptr,
-                         const Instruction *CxtI = nullptr);
 
   /// Given a function and iterators over arguments, fold the result or return
   /// null.
   Value *SimplifyCall(Value *V, User::op_iterator ArgBegin,
                       User::op_iterator ArgEnd, const SimplifyQuery &Q);
-  Value *SimplifyCall(Value *V, User::op_iterator ArgBegin,
-                      User::op_iterator ArgEnd, const DataLayout &DL,
-                      const TargetLibraryInfo *TLI = nullptr,
-                      const DominatorTree *DT = nullptr,
-                      AssumptionCache *AC = nullptr,
-                      const Instruction *CxtI = nullptr);
 
   /// Given a function and set of arguments, fold the result or return null.
   Value *SimplifyCall(Value *V, ArrayRef<Value *> Args, const SimplifyQuery &Q);
-  Value *SimplifyCall(Value *V, ArrayRef<Value *> Args, const DataLayout &DL,
-                      const TargetLibraryInfo *TLI = nullptr,
-                      const DominatorTree *DT = nullptr,
-                      AssumptionCache *AC = nullptr,
-                      const Instruction *CxtI = nullptr);
 
   /// See if we can compute a simplified version of this instruction. If not,
   /// return null.
   Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q,
                              OptimizationRemarkEmitter *ORE = nullptr);
-  Value *SimplifyInstruction(Instruction *I, const DataLayout &DL,
-                             const TargetLibraryInfo *TLI = nullptr,
-                             const DominatorTree *DT = nullptr,
-                             AssumptionCache *AC = nullptr,
-                             OptimizationRemarkEmitter *ORE = nullptr);
 
   /// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
   ///
@@ -411,6 +229,15 @@ namespace llvm {
                                       const TargetLibraryInfo *TLI = nullptr,
                                       const DominatorTree *DT = nullptr,
                                       AssumptionCache *AC = nullptr);
+  // These helper functions return a SimplifyQuery structure that contains as
+  // many of the optional analysis we use as are currently valid.  This is the
+  // strongly preferred way of constructing SimplifyQuery in passes.
+  const SimplifyQuery getBestSimplifyQuery(Pass &, Function &);
+  template <class T, class... TArgs>
+  const SimplifyQuery getBestSimplifyQuery(AnalysisManager<T, TArgs...> &,
+                                           Function &);
+  const SimplifyQuery getBestSimplifyQuery(LoopStandardAnalysisResults &,
+                                           const DataLayout &);
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h
index 743faf2b67d..60dafccd84b 100644
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@@ -235,7 +235,7 @@ class ObjectSizeOffsetEvaluator
   : public InstVisitor<ObjectSizeOffsetEvaluator, SizeOffsetEvalType> {
 
   typedef IRBuilder<TargetFolder> BuilderTy;
-  typedef std::pair<WeakVH, WeakVH> WeakEvalType;
+  typedef std::pair<WeakTrackingVH, WeakTrackingVH> WeakEvalType;
   typedef DenseMap<const Value*, WeakEvalType> CacheMapTy;
   typedef SmallPtrSet<const Value*, 8> PtrSetTy;
 
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 517592a3d04..7d16f34e54c 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -189,7 +189,7 @@ namespace llvm {
     /// replace congruent phis with their most canonical representative. Return
     /// the number of phis eliminated.
     unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT,
-                                 SmallVectorImpl<WeakVH> &DeadInsts,
+                                 SmallVectorImpl<WeakTrackingVH> &DeadInsts,
                                  const TargetTransformInfo *TTI = nullptr);
 
     /// Insert code to directly compute the specified SCEV expression into the
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 67196687d55..b9639dba188 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -197,6 +197,12 @@ public:
   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
                        ArrayRef<const Value *> Arguments) const;
 
+  /// \return The estimated number of case clusters when lowering \p 'SI'.
+  /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
+  /// table.
+  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
+                                            unsigned &JTSize) const;
+
   /// \brief Estimate the cost of a given IR user when lowered.
   ///
   /// This can estimate the cost of either a ConstantExpr or Instruction when
@@ -764,6 +770,8 @@ public:
                                ArrayRef<Type *> ParamTys) = 0;
   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
                                ArrayRef<const Value *> Arguments) = 0;
+  virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
+                                                    unsigned &JTSize) = 0;
   virtual int getUserCost(const User *U) = 0;
   virtual bool hasBranchDivergence() = 0;
   virtual bool isSourceOfDivergence(const Value *V) = 0;
@@ -1067,6 +1075,10 @@ public:
   unsigned getMaxInterleaveFactor(unsigned VF) override {
     return Impl.getMaxInterleaveFactor(VF);
   }
+  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
+                                            unsigned &JTSize) override {
+    return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
+  }
   unsigned
   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
                          OperandValueKind Opd2Info,
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h
index 9ab6b7445ab..d7fda9e14b0 100644
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -114,6 +114,12 @@ public:
     return TTI::TCC_Free;
   }
 
+  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
+                                            unsigned &JTSize) {
+    JTSize = 0;
+    return SI.getNumCases();
+  }
+
   unsigned getCallCost(FunctionType *FTy, int NumArgs) {
     assert(FTy && "FunctionType must be provided to this routine.");
 
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index 764308dceed..a54c39e3ea3 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -60,7 +60,7 @@ template <typename T> class ArrayRef;
   /// \p KnownZero the set of bits that are known to be zero
   /// \p KnownOne the set of bits that are known to be one
   void computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
-                                         APInt &KnownZero, APInt &KnownOne);
+                                         KnownBits &Known);
   /// Return true if LHS and RHS have no common bits set.
   bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
                            const DataLayout &DL,
@@ -417,7 +417,7 @@ template <typename T> class ArrayRef;
   ///
   /// Note that this currently only considers the basic block that is
   /// the parent of I.
-  bool isKnownNotFullPoison(const Instruction *PoisonI);
+  bool programUndefinedIfFullPoison(const Instruction *PoisonI);
 
   /// \brief Specific patterns of select instructions we can match.
   enum SelectPatternFlavor {
diff --git a/include/llvm/Bitcode/BitcodeReader.h b/include/llvm/Bitcode/BitcodeReader.h
index 0701ddbb7f1..54f990d0023 100644
--- a/include/llvm/Bitcode/BitcodeReader.h
+++ b/include/llvm/Bitcode/BitcodeReader.h
@@ -93,6 +93,10 @@ namespace llvm {
 
     /// Parse the specified bitcode buffer, returning the module summary index.
     Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
+
+    /// Parse the specified bitcode buffer and merge its module summary index
+    /// into CombinedIndex.
+    Error readSummary(ModuleSummaryIndex &CombinedIndex, unsigned ModuleId);
   };
 
   /// Returns a list of modules in the specified bitcode buffer.
@@ -141,6 +145,18 @@ namespace llvm {
   Expected<std::unique_ptr<ModuleSummaryIndex>>
   getModuleSummaryIndex(MemoryBufferRef Buffer);
 
+  /// Parse the specified bitcode buffer and merge the index into CombinedIndex.
+  Error readModuleSummaryIndex(MemoryBufferRef Buffer,
+                               ModuleSummaryIndex &CombinedIndex,
+                               unsigned ModuleId);
+
+  /// Parse the module summary index out of an IR file and return the module
+  /// summary index object if found, or an empty summary if not. If Path refers
+  /// to an empty file and the -ignore-empty-index-file cl::opt flag is passed
+  /// this function will return nullptr.
+  Expected<std::unique_ptr<ModuleSummaryIndex>>
+  getModuleSummaryIndexForFile(StringRef Path);
+
   /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
   /// for an LLVM IR bitcode wrapper.
   ///
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index 03eac80bc1e..8ee1e4b583b 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -545,7 +545,8 @@ enum AttributeKindCodes {
   ATTR_KIND_INACCESSIBLEMEM_ONLY = 49,
   ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY = 50,
   ATTR_KIND_ALLOC_SIZE = 51,
-  ATTR_KIND_WRITEONLY = 52
+  ATTR_KIND_WRITEONLY = 52,
+  ATTR_KIND_SPECULATABLE = 53
 };
 
 enum ComdatSelectionKindCodes {
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h
index e30e947f787..32542fa8746 100644
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -171,6 +171,62 @@ public:
     return BaseT::getIntrinsicCost(IID, RetTy, ParamTys);
   }
 
+  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
+                                            unsigned &JumpTableSize) {
+    /// Try to find the estimated number of clusters. Note that the number of
+    /// clusters identified in this function could be different from the actural
+    /// numbers found in lowering. This function ignore switches that are
+    /// lowered with a mix of jump table / bit test / BTree. This function was
+    /// initially intended to be used when estimating the cost of switch in
+    /// inline cost heuristic, but it's a generic cost model to be used in other
+    /// places (e.g., in loop unrolling).
+    unsigned N = SI.getNumCases();
+    const TargetLoweringBase *TLI = getTLI();
+    const DataLayout &DL = this->getDataLayout();
+
+    JumpTableSize = 0;
+    bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
+
+    // Early exit if both a jump table and bit test are not allowed.
+    if (N < 1 || (!IsJTAllowed && DL.getPointerSizeInBits() < N))
+      return N;
+
+    APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
+    APInt MinCaseVal = MaxCaseVal;
+    for (auto CI : SI.cases()) {
+      const APInt &CaseVal = CI.getCaseValue()->getValue();
+      if (CaseVal.sgt(MaxCaseVal))
+        MaxCaseVal = CaseVal;
+      if (CaseVal.slt(MinCaseVal))
+        MinCaseVal = CaseVal;
+    }
+
+    // Check if suitable for a bit test
+    if (N <= DL.getPointerSizeInBits()) {
+      SmallPtrSet<const BasicBlock *, 4> Dests;
+      for (auto I : SI.cases())
+        Dests.insert(I.getCaseSuccessor());
+
+      if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
+                                     DL))
+        return 1;
+    }
+
+    // Check if suitable for a jump table.
+    if (IsJTAllowed) {
+      if (N < 2 || N < TLI->getMinimumJumpTableEntries())
+        return N;
+      uint64_t Range =
+          (MaxCaseVal - MinCaseVal).getLimitedValue(UINT64_MAX - 1) + 1;
+      // Check whether a range of clusters is dense enough for a jump table
+      if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
+        JumpTableSize = Range;
+        return 1;
+      }
+    }
+    return N;
+  }
+
   unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); }
 
   unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); }
diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h
index 75cd7da9d6b..14ee5019ef2 100644
--- a/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -25,6 +25,7 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include <vector>
 
@@ -171,9 +172,8 @@ public:
   struct LiveOutInfo {
     unsigned NumSignBits : 31;
     unsigned IsValid : 1;
-    APInt KnownOne, KnownZero;
-    LiveOutInfo() : NumSignBits(0), IsValid(true), KnownOne(1, 0),
-                    KnownZero(1, 0) {}
+    KnownBits Known;
+    LiveOutInfo() : NumSignBits(0), IsValid(true), Known(1) {}
   };
 
   /// Record the preferred extend type (ISD::SIGN_EXTEND or ISD::ZERO_EXTEND)
@@ -247,16 +247,16 @@ public:
 
   /// AddLiveOutRegInfo - Adds LiveOutInfo for a register.
   void AddLiveOutRegInfo(unsigned Reg, unsigned NumSignBits,
-                         const APInt &KnownZero, const APInt &KnownOne) {
+                         const KnownBits &Known) {
     // Only install this information if it tells us something.
-    if (NumSignBits == 1 && KnownZero == 0 && KnownOne == 0)
+    if (NumSignBits == 1 && Known.Zero == 0 && Known.One == 0)
       return;
 
     LiveOutRegInfo.grow(Reg);
     LiveOutInfo &LOI = LiveOutRegInfo[Reg];
     LOI.NumSignBits = NumSignBits;
-    LOI.KnownOne = KnownOne;
-    LOI.KnownZero = KnownZero;
+    LOI.Known.One = Known.One;
+    LOI.Known.Zero = Known.Zero;
   }
 
   /// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination
diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
index 899563acc33..45f25f96ec1 100644
--- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
+++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@@ -61,9 +61,6 @@ class InstructionSelector {
 public:
   virtual ~InstructionSelector() {}
 
-  /// This is executed before selecting a function.
-  virtual void beginFunction(const MachineFunction &MF) {}
-
   /// Select the (possibly generic) instruction \p I to only use target-specific
   /// opcodes. It is OK to insert multiple instructions, but they cannot be
   /// generic pre-isel instructions.
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index ee3fd0bdda2..ca0f3fbad89 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -216,6 +216,9 @@ namespace ISD {
     /// These nodes take two operands of the same value type, and produce two
     /// results.  The first result is the normal add or sub result, the second
     /// result is the carry flag result.
+    /// FIXME: These nodes are deprecated in favor of ADDCARRY and SUBCARRY.
+    /// They are kept around for now to provide a smooth transition path
+    /// toward the use of ADDCARRY/SUBCARRY and will eventually be removed.
     ADDC, SUBC,
 
     /// Carry-using nodes for multiple precision addition and subtraction. These
@@ -227,6 +230,16 @@ namespace ISD {
     /// values.
     ADDE, SUBE,
 
+    /// Carry-using nodes for multiple precision addition and subtraction.
+    /// These nodes take three operands: The first two are the normal lhs and
+    /// rhs to the add or sub, and the third is a boolean indicating if there
+    /// is an incoming carry. These nodes produce two results: the normal
+    /// result of the add or sub, and the output carry so they can be chained
+    /// together. The use of this opcode is preferable to adde/sube if the
+    /// target supports it, as the carry is a regular value rather than a
+    /// glue, which allows further optimisation.
+    ADDCARRY, SUBCARRY,
+
     /// RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
     /// These nodes take two operands: the normal LHS and RHS to the add. They
     /// produce two results: the normal result of the add, and a boolean that
diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h
index 38cf8aa165a..47b40de6fe1 100644
--- a/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/include/llvm/CodeGen/MIRYamlMapping.h
@@ -345,7 +345,7 @@ struct MachineFrameInfo {
   bool HasCalls = false;
   StringValue StackProtector;
   // TODO: Serialize FunctionContextIdx
-  unsigned MaxCallFrameSize = 0;
+  unsigned MaxCallFrameSize = ~0u; ///< ~0u means: not computed yet.
   bool HasOpaqueSPAdjustment = false;
   bool HasVAStart = false;
   bool HasMustTailInVarArgFunc = false;
@@ -366,7 +366,7 @@ template <> struct MappingTraits<MachineFrameInfo> {
     YamlIO.mapOptional("hasCalls", MFI.HasCalls);
     YamlIO.mapOptional("stackProtector", MFI.StackProtector,
                        StringValue()); // Don't print it out when it's empty.
-    YamlIO.mapOptional("maxCallFrameSize", MFI.MaxCallFrameSize);
+    YamlIO.mapOptional("maxCallFrameSize", MFI.MaxCallFrameSize, ~0u);
     YamlIO.mapOptional("hasOpaqueSPAdjustment", MFI.HasOpaqueSPAdjustment);
     YamlIO.mapOptional("hasVAStart", MFI.HasVAStart);
     YamlIO.mapOptional("hasMustTailInVarArgFunc", MFI.HasMustTailInVarArgFunc);
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index 5c9728b0a51..61be9f775c9 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -21,15 +21,9 @@
 
 namespace llvm {
 class raw_ostream;
-class DataLayout;
-class TargetRegisterClass;
-class Type;
 class MachineFunction;
 class MachineBasicBlock;
-class TargetFrameLowering;
-class TargetMachine;
 class BitVector;
-class Value;
 class AllocaInst;
 
 /// The CalleeSavedInfo class tracks the information need to locate where a
@@ -226,7 +220,7 @@ class MachineFrameInfo {
   /// setup/destroy pseudo instructions (as defined in the TargetFrameInfo
   /// class).  This information is important for frame pointer elimination.
   /// It is only valid during and after prolog/epilog code insertion.
-  unsigned MaxCallFrameSize = 0;
+  unsigned MaxCallFrameSize = ~0u;
 
   /// The prolog/epilog code inserter fills in this vector with each
   /// callee saved register saved in the frame.  Beyond its use by the prolog/
@@ -531,7 +525,16 @@ public:
   /// CallFrameSetup/Destroy pseudo instructions are used by the target, and
   /// then only during or after prolog/epilog code insertion.
   ///
-  unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; }
+  unsigned getMaxCallFrameSize() const {
+    // TODO: Enable this assert when targets are fixed.
+    //assert(isMaxCallFrameSizeComputed() && "MaxCallFrameSize not computed yet");
+    if (!isMaxCallFrameSizeComputed())
+      return 0;
+    return MaxCallFrameSize;
+  }
+  bool isMaxCallFrameSizeComputed() const {
+    return MaxCallFrameSize != ~0u;
+  }
   void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; }
 
   /// Create a new object at a fixed location on the stack.
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 4bb658898fb..9e1d148c7ce 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -33,6 +33,7 @@
 
 namespace llvm {
 
+struct KnownBits;
 class MachineConstantPoolValue;
 class MachineFunction;
 class MDNode;
@@ -687,6 +688,10 @@ public:
   /// Example: shuffle A, B, <0,5,2,7> -> shuffle B, A, <4,1,6,3>
   SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV);
 
+  /// Convert Op, which must be of float type, to the
+  /// float type VT, by either extending or rounding (by truncation).
+  SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT);
+
   /// Convert Op, which must be of integer type, to the
   /// integer type VT, by either any-extending or truncating it.
   SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
@@ -773,7 +778,7 @@ public:
   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
                   ArrayRef<SDUse> Ops);
   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
-                  ArrayRef<SDValue> Ops, const SDNodeFlags *Flags = nullptr);
+                  ArrayRef<SDValue> Ops, const SDNodeFlags Flags = SDNodeFlags());
   SDValue getNode(unsigned Opcode, const SDLoc &DL, ArrayRef<EVT> ResultTys,
                   ArrayRef<SDValue> Ops);
   SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs,
@@ -781,9 +786,10 @@ public:
 
   // Specialize based on number of operands.
   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT);
-  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N);
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N,
+                  const SDNodeFlags Flags = SDNodeFlags());
   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
-                  SDValue N2, const SDNodeFlags *Flags = nullptr);
+                  SDValue N2, const SDNodeFlags Flags = SDNodeFlags());
   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
                   SDValue N2, SDValue N3);
   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
@@ -1103,7 +1109,7 @@ public:
 
   /// Get the specified node if it's already available, or else return NULL.
   SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTs, ArrayRef<SDValue> Ops,
-                          const SDNodeFlags *Flags = nullptr);
+                          const SDNodeFlags Flags = SDNodeFlags());
 
   /// Creates a SDDbgValue node.
   SDDbgValue *getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R,
@@ -1266,7 +1272,7 @@ public:
 
   SDValue FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
                                        ArrayRef<SDValue> Ops,
-                                       const SDNodeFlags *Flags = nullptr);
+                                       const SDNodeFlags Flags = SDNodeFlags());
 
   /// Constant fold a setcc to true or false.
   SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond,
@@ -1283,21 +1289,19 @@ public:
     const;
 
   /// Determine which bits of Op are known to be either zero or one and return
-  /// them in the KnownZero/KnownOne bitsets. For vectors, the known bits are
-  /// those that are shared by every vector element.
+  /// them in Known. For vectors, the known bits are those that are shared by
+  /// every vector element.
   /// Targets can implement the computeKnownBitsForTargetNode method in the
   /// TargetLowering class to allow target nodes to be understood.
-  void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne,
-                        unsigned Depth = 0) const;
+  void computeKnownBits(SDValue Op, KnownBits &Known, unsigned Depth = 0) const;
 
   /// Determine which bits of Op are known to be either zero or one and return
-  /// them in the KnownZero/KnownOne bitsets. The DemandedElts argument allows
-  /// us to only collect the known bits that are shared by the requested vector
-  /// elements.
+  /// them in Known. The DemandedElts argument allows us to only collect the
+  /// known bits that are shared by the requested vector elements.
   /// Targets can implement the computeKnownBitsForTargetNode method in the
   /// TargetLowering class to allow target nodes to be understood.
-  void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne,
-                        const APInt &DemandedElts, unsigned Depth = 0) const;
+  void computeKnownBits(SDValue Op, KnownBits &Known, const APInt &DemandedElts,
+                        unsigned Depth = 0) const;
 
   /// Used to represent the possible overflow behavior of an operation.
   /// Never: the operation cannot overflow.
@@ -1439,10 +1443,6 @@ private:
 
   void allnodes_clear();
 
-  SDNode *GetBinarySDNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs,
-                          SDValue N1, SDValue N2,
-                          const SDNodeFlags *Flags = nullptr);
-
   /// Look up the node specified by ID in CSEMap.  If it exists, return it.  If
   /// not, return the insertion token that will make insertion faster.  This
   /// overload is for nodes other than Constant or ConstantFP, use the other one
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 81cc0b39cf8..35ddcf80c91 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -341,6 +341,11 @@ template<> struct simplify_type<SDUse> {
 /// the backend.
 struct SDNodeFlags {
 private:
+  // This bit is used to determine if the flags are in a defined state.
+  // Flag bits can only be masked out during intersection if the masking flags
+  // are defined.
+  bool AnyDefined : 1;
+
   bool NoUnsignedWrap : 1;
   bool NoSignedWrap : 1;
   bool Exact : 1;
@@ -355,22 +360,57 @@ private:
 public:
   /// Default constructor turns off all optimization flags.
   SDNodeFlags()
-      : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false),
-        UnsafeAlgebra(false), NoNaNs(false), NoInfs(false),
+      : AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false),
+        Exact(false), UnsafeAlgebra(false), NoNaNs(false), NoInfs(false),
         NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false),
         AllowContract(false) {}
 
+  /// Sets the state of the flags to the defined state.
+  void setDefined() { AnyDefined = true; }
+  /// Returns true if the flags are in a defined state.
+  bool isDefined() const { return AnyDefined; }
+
   // These are mutators for each flag.
-  void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; }
-  void setNoSignedWrap(bool b) { NoSignedWrap = b; }
-  void setExact(bool b) { Exact = b; }
-  void setUnsafeAlgebra(bool b) { UnsafeAlgebra = b; }
-  void setNoNaNs(bool b) { NoNaNs = b; }
-  void setNoInfs(bool b) { NoInfs = b; }
-  void setNoSignedZeros(bool b) { NoSignedZeros = b; }
-  void setAllowReciprocal(bool b) { AllowReciprocal = b; }
-  void setVectorReduction(bool b) { VectorReduction = b; }
-  void setAllowContract(bool b) { AllowContract = b; }
+  void setNoUnsignedWrap(bool b) {
+    setDefined();
+    NoUnsignedWrap = b;
+  }
+  void setNoSignedWrap(bool b) {
+    setDefined();
+    NoSignedWrap = b;
+  }
+  void setExact(bool b) {
+    setDefined();
+    Exact = b;
+  }
+  void setUnsafeAlgebra(bool b) {
+    setDefined();
+    UnsafeAlgebra = b;
+  }
+  void setNoNaNs(bool b) {
+    setDefined();
+    NoNaNs = b;
+  }
+  void setNoInfs(bool b) {
+    setDefined();
+    NoInfs = b;
+  }
+  void setNoSignedZeros(bool b) {
+    setDefined();
+    NoSignedZeros = b;
+  }
+  void setAllowReciprocal(bool b) {
+    setDefined();
+    AllowReciprocal = b;
+  }
+  void setVectorReduction(bool b) {
+    setDefined();
+    VectorReduction = b;
+  }
+  void setAllowContract(bool b) {
+    setDefined();
+    AllowContract = b;
+  }
 
   // These are accessors for each flag.
   bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
@@ -385,17 +425,20 @@ public:
   bool hasAllowContract() const { return AllowContract; }
 
   /// Clear any flags in this flag set that aren't also set in Flags.
-  void intersectWith(const SDNodeFlags *Flags) {
-    NoUnsignedWrap &= Flags->NoUnsignedWrap;
-    NoSignedWrap &= Flags->NoSignedWrap;
-    Exact &= Flags->Exact;
-    UnsafeAlgebra &= Flags->UnsafeAlgebra;
-    NoNaNs &= Flags->NoNaNs;
-    NoInfs &= Flags->NoInfs;
-    NoSignedZeros &= Flags->NoSignedZeros;
-    AllowReciprocal &= Flags->AllowReciprocal;
-    VectorReduction &= Flags->VectorReduction;
-    AllowContract &= Flags->AllowContract;
+  /// If the given Flags are undefined then don't do anything.
+  void intersectWith(const SDNodeFlags Flags) {
+    if (!Flags.isDefined())
+      return;
+    NoUnsignedWrap &= Flags.NoUnsignedWrap;
+    NoSignedWrap &= Flags.NoSignedWrap;
+    Exact &= Flags.Exact;
+    UnsafeAlgebra &= Flags.UnsafeAlgebra;
+    NoNaNs &= Flags.NoNaNs;
+    NoInfs &= Flags.NoInfs;
+    NoSignedZeros &= Flags.NoSignedZeros;
+    AllowReciprocal &= Flags.AllowReciprocal;
+    VectorReduction &= Flags.VectorReduction;
+    AllowContract &= Flags.AllowContract;
   }
 };
 
@@ -527,6 +570,8 @@ private:
   /// Return a pointer to the specified value type.
   static const EVT *getValueTypeList(EVT VT);
 
+  SDNodeFlags Flags;
+
 public:
   /// Unique and persistent id per SDNode in the DAG.
   /// Used for debug printing.
@@ -799,12 +844,12 @@ public:
     return nullptr;
   }
 
-  /// This could be defined as a virtual function and implemented more simply
-  /// and directly, but it is not to avoid creating a vtable for this class.
-  const SDNodeFlags *getFlags() const;
+  const SDNodeFlags getFlags() const { return Flags; }
+  void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; }
 
   /// Clear any flags in this node that aren't also set in Flags.
-  void intersectFlagsWith(const SDNodeFlags *Flags);
+  /// If Flags is not in a defined state then this has no effect.
+  void intersectFlagsWith(const SDNodeFlags Flags);
 
   /// Return the number of values defined/returned by this operator.
   unsigned getNumValues() const { return NumValues; }
@@ -1032,43 +1077,6 @@ inline void SDUse::setNode(SDNode *N) {
   if (N) N->addUse(*this);
 }
 
-/// Returns true if the opcode is a binary operation with flags.
-static bool isBinOpWithFlags(unsigned Opcode) {
-  switch (Opcode) {
-  case ISD::SDIV:
-  case ISD::UDIV:
-  case ISD::SRA:
-  case ISD::SRL:
-  case ISD::MUL:
-  case ISD::ADD:
-  case ISD::SUB:
-  case ISD::SHL:
-  case ISD::FADD:
-  case ISD::FDIV:
-  case ISD::FMUL:
-  case ISD::FREM:
-  case ISD::FSUB:
-    return true;
-  default:
-    return false;
-  }
-}
-
-/// This class is an extension of BinarySDNode
-/// used from those opcodes that have associated extra flags.
-class BinaryWithFlagsSDNode : public SDNode {
-public:
-  SDNodeFlags Flags;
-
-  BinaryWithFlagsSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
-                        SDVTList VTs, const SDNodeFlags &NodeFlags)
-      : SDNode(Opc, Order, dl, VTs), Flags(NodeFlags) {}
-
-  static bool classof(const SDNode *N) {
-    return isBinOpWithFlags(N->getOpcode());
-  }
-};
-
 /// This class is used to form a handle around another node that
 /// is persistent and is updated across invocations of replaceAllUsesWith on its
 /// operand.  This node should be directly created by end-users and not added to
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index cd843447545..b87a5e56699 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -42,7 +42,7 @@ def v64i1  : ValueType<64 , 19>;   //  64 x i1 vector value
 def v512i1 : ValueType<512, 20>;   // 512 x i1 vector value
 def v1024i1: ValueType<1024,21>;   //1024 x i1 vector value
 
-def v1i8   : ValueType<16,  22>;   //  1 x i8  vector value
+def v1i8   : ValueType<8,   22>;   //  1 x i8  vector value
 def v2i8   : ValueType<16 , 23>;   //  2 x i8  vector value
 def v4i8   : ValueType<32 , 24>;   //  4 x i8  vector value
 def v8i8   : ValueType<64 , 25>;   //  8 x i8  vector value
diff --git a/include/llvm/DebugInfo/CodeView/CVRecord.h b/include/llvm/DebugInfo/CodeView/CVRecord.h
index 487f3b6446f..086d6dff11c 100644
--- a/include/llvm/DebugInfo/CodeView/CVRecord.h
+++ b/include/llvm/DebugInfo/CodeView/CVRecord.h
@@ -50,8 +50,10 @@ public:
 
 template <typename Kind>
 struct VarStreamArrayExtractor<codeview::CVRecord<Kind>> {
-  Error operator()(BinaryStreamRef Stream, uint32_t &Len,
-                   codeview::CVRecord<Kind> &Item) const {
+  typedef void ContextType;
+
+  static Error extract(BinaryStreamRef Stream, uint32_t &Len,
+                       codeview::CVRecord<Kind> &Item, void *Ctx) {
     using namespace codeview;
     const RecordPrefix *Prefix = nullptr;
     BinaryStreamReader Reader(Stream);
diff --git a/include/llvm/DebugInfo/CodeView/CodeView.h b/include/llvm/DebugInfo/CodeView/CodeView.h
index e599f8a19e3..f881ad0c9d8 100644
--- a/include/llvm/DebugInfo/CodeView/CodeView.h
+++ b/include/llvm/DebugInfo/CodeView/CodeView.h
@@ -291,7 +291,7 @@ enum class ModifierOptions : uint16_t {
 };
 CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(ModifierOptions)
 
-enum class ModuleSubstreamKind : uint32_t {
+enum class ModuleDebugFragmentKind : uint32_t {
   None = 0,
   Symbols = 0xf1,
   Lines = 0xf2,
@@ -547,7 +547,8 @@ enum class TrampolineType : uint16_t { TrampIncremental, BranchIsland };
 enum class FileChecksumKind : uint8_t { None, MD5, SHA1, SHA256 };
 
 enum LineFlags : uint16_t {
-  HaveColumns = 1, // CV_LINES_HAVE_COLUMNS
+  LF_None = 0,
+  LF_HaveColumns = 1, // CV_LINES_HAVE_COLUMNS
 };
 }
 }
diff --git a/include/llvm/DebugInfo/CodeView/Line.h b/include/llvm/DebugInfo/CodeView/Line.h
index 975b503fe30..ac229c33751 100644
--- a/include/llvm/DebugInfo/CodeView/Line.h
+++ b/include/llvm/DebugInfo/CodeView/Line.h
@@ -127,27 +127,6 @@ public:
   bool isNeverStepInto() const { return LineInf.isNeverStepInto(); }
 };
 
-enum class InlineeLinesSignature : uint32_t {
-  Normal,    // CV_INLINEE_SOURCE_LINE_SIGNATURE
-  ExtraFiles // CV_INLINEE_SOURCE_LINE_SIGNATURE_EX
-};
-
-struct InlineeSourceLine {
-  TypeIndex Inlinee;         // ID of the function that was inlined.
-  ulittle32_t FileID;        // Offset into FileChecksums subsection.
-  ulittle32_t SourceLineNum; // First line of inlined code.
-  // If extra files present:
-  //   ulittle32_t ExtraFileCount;
-  //   ulittle32_t Files[];
-};
-
-struct FileChecksum {
-  ulittle32_t FileNameOffset; // Byte offset of filename in global string table.
-  uint8_t ChecksumSize;       // Number of bytes of checksum.
-  uint8_t ChecksumKind;       // FileChecksumKind
-  // Checksum bytes follow.
-};
-
 } // namespace codeview
 } // namespace llvm
 
diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h b/include/llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h
new file mode 100644
index 00000000000..a5a3b851b84
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h
@@ -0,0 +1,91 @@
+//===- ModuleDebugFileChecksumFragment.h ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFILECHECKSUMFRAGMENT_H
+#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFILECHECKSUMFRAGMENT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/BinaryStreamArray.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Endian.h"
+
+namespace llvm {
+namespace codeview {
+
+struct FileChecksumEntry {
+  uint32_t FileNameOffset;    // Byte offset of filename in global stringtable.
+  FileChecksumKind Kind;      // The type of checksum.
+  ArrayRef<uint8_t> Checksum; // The bytes of the checksum.
+};
+}
+}
+
+namespace llvm {
+template <> struct VarStreamArrayExtractor<codeview::FileChecksumEntry> {
+public:
+  typedef void ContextType;
+
+  static Error extract(BinaryStreamRef Stream, uint32_t &Len,
+                       codeview::FileChecksumEntry &Item, void *Ctx);
+};
+}
+
+namespace llvm {
+namespace codeview {
+class ModuleDebugFileChecksumFragmentRef final : public ModuleDebugFragmentRef {
+  typedef VarStreamArray<codeview::FileChecksumEntry> FileChecksumArray;
+  typedef FileChecksumArray::Iterator Iterator;
+
+public:
+  ModuleDebugFileChecksumFragmentRef()
+      : ModuleDebugFragmentRef(ModuleDebugFragmentKind::FileChecksums) {}
+
+  static bool classof(const ModuleDebugFragmentRef *S) {
+    return S->kind() == ModuleDebugFragmentKind::FileChecksums;
+  }
+
+  Error initialize(BinaryStreamReader Reader);
+
+  Iterator begin() const { return Checksums.begin(); }
+  Iterator end() const { return Checksums.end(); }
+
+  const FileChecksumArray &getArray() const { return Checksums; }
+
+private:
+  FileChecksumArray Checksums;
+};
+
+class ModuleDebugFileChecksumFragment final : public ModuleDebugFragment {
+public:
+  ModuleDebugFileChecksumFragment();
+
+  static bool classof(const ModuleDebugFragment *S) {
+    return S->kind() == ModuleDebugFragmentKind::FileChecksums;
+  }
+
+  void addChecksum(uint32_t StringTableOffset, FileChecksumKind Kind,
+                   ArrayRef<uint8_t> Bytes);
+
+  uint32_t calculateSerializedLength() override;
+  Error commit(BinaryStreamWriter &Writer) override;
+  uint32_t mapChecksumOffset(uint32_t StringTableOffset) const;
+
+private:
+  DenseMap<uint32_t, uint32_t> OffsetMap;
+  uint32_t SerializedSize = 0;
+  llvm::BumpPtrAllocator Storage;
+  std::vector<FileChecksumEntry> Checksums;
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugFragment.h b/include/llvm/DebugInfo/CodeView/ModuleDebugFragment.h
new file mode 100644
index 00000000000..a5311cae948
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/ModuleDebugFragment.h
@@ -0,0 +1,48 @@
+//===- ModuleDebugFragment.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENT_H
+#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENT_H
+
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Casting.h"
+
+namespace llvm {
+namespace codeview {
+
+class ModuleDebugFragmentRef {
+public:
+  explicit ModuleDebugFragmentRef(ModuleDebugFragmentKind Kind) : Kind(Kind) {}
+  virtual ~ModuleDebugFragmentRef();
+
+  ModuleDebugFragmentKind kind() const { return Kind; }
+
+protected:
+  ModuleDebugFragmentKind Kind;
+};
+
+class ModuleDebugFragment {
+public:
+  explicit ModuleDebugFragment(ModuleDebugFragmentKind Kind) : Kind(Kind) {}
+  virtual ~ModuleDebugFragment();
+
+  ModuleDebugFragmentKind kind() const { return Kind; }
+
+  virtual Error commit(BinaryStreamWriter &Writer) = 0;
+  virtual uint32_t calculateSerializedLength() = 0;
+
+protected:
+  ModuleDebugFragmentKind Kind;
+};
+
+} // namespace codeview
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENT_H
diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h b/include/llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h
new file mode 100644
index 00000000000..b98c8605592
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h
@@ -0,0 +1,78 @@
+//===- ModuleDebugFragment.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTRECORD_H
+#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTRECORD_H
+
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/Support/BinaryStreamArray.h"
+#include "llvm/Support/BinaryStreamRef.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace codeview {
+
+class ModuleDebugFragment;
+
+// Corresponds to the `CV_DebugSSubsectionHeader_t` structure.
+struct ModuleDebugFragmentHeader {
+  support::ulittle32_t Kind;   // codeview::ModuleDebugFragmentKind enum
+  support::ulittle32_t Length; // number of bytes occupied by this record.
+};
+
+class ModuleDebugFragmentRecord {
+public:
+  ModuleDebugFragmentRecord();
+  ModuleDebugFragmentRecord(ModuleDebugFragmentKind Kind, BinaryStreamRef Data);
+
+  static Error initialize(BinaryStreamRef Stream,
+                          ModuleDebugFragmentRecord &Info);
+
+  uint32_t getRecordLength() const;
+  ModuleDebugFragmentKind kind() const;
+  BinaryStreamRef getRecordData() const;
+
+private:
+  ModuleDebugFragmentKind Kind;
+  BinaryStreamRef Data;
+};
+
+class ModuleDebugFragmentRecordBuilder {
+public:
+  ModuleDebugFragmentRecordBuilder(ModuleDebugFragmentKind Kind,
+                                   ModuleDebugFragment &Frag);
+  uint32_t calculateSerializedLength();
+  Error commit(BinaryStreamWriter &Writer);
+
+private:
+  ModuleDebugFragmentKind Kind;
+  ModuleDebugFragment &Frag;
+};
+
+typedef VarStreamArray<ModuleDebugFragmentRecord> ModuleDebugFragmentArray;
+
+} // namespace codeview
+
+template <>
+struct VarStreamArrayExtractor<codeview::ModuleDebugFragmentRecord> {
+  typedef void ContextType;
+
+  static Error extract(BinaryStreamRef Stream, uint32_t &Length,
+                       codeview::ModuleDebugFragmentRecord &Info, void *Ctx) {
+    if (auto EC = codeview::ModuleDebugFragmentRecord::initialize(Stream, Info))
+      return EC;
+    Length = Info.getRecordLength();
+    return Error::success();
+  }
+};
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTRECORD_H
diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugFragmentVisitor.h b/include/llvm/DebugInfo/CodeView/ModuleDebugFragmentVisitor.h
new file mode 100644
index 00000000000..1f55d202420
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/ModuleDebugFragmentVisitor.h
@@ -0,0 +1,68 @@
+//===- ModuleDebugFragmentVisitor.h -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTVISITOR_H
+#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTVISITOR_H
+
+#include "llvm/Support/Error.h"
+#include <cstdint>
+
+namespace llvm {
+
+namespace codeview {
+
+class ModuleDebugFileChecksumFragmentRef;
+class ModuleDebugFragmentRecord;
+class ModuleDebugInlineeLineFragmentRef;
+class ModuleDebugLineFragmentRef;
+class ModuleDebugUnknownFragmentRef;
+
+class ModuleDebugFragmentVisitor {
+public:
+  virtual ~ModuleDebugFragmentVisitor() = default;
+
+  virtual Error visitUnknown(ModuleDebugUnknownFragmentRef &Unknown) {
+    return Error::success();
+  }
+  virtual Error visitLines(ModuleDebugLineFragmentRef &Lines) {
+    return Error::success();
+  }
+
+  virtual Error
+  visitFileChecksums(ModuleDebugFileChecksumFragmentRef &Checksums) {
+    return Error::success();
+  }
+
+  virtual Error visitInlineeLines(ModuleDebugInlineeLineFragmentRef &Inlinees) {
+    return Error::success();
+  }
+
+  virtual Error finished() { return Error::success(); }
+};
+
+Error visitModuleDebugFragment(const ModuleDebugFragmentRecord &R,
+                               ModuleDebugFragmentVisitor &V);
+
+template <typename T>
+Error visitModuleDebugFragments(T &&FragmentRange,
+                                ModuleDebugFragmentVisitor &V) {
+  for (const auto &L : FragmentRange) {
+    if (auto EC = visitModuleDebugFragment(L, V))
+      return EC;
+  }
+  if (auto EC = V.finished())
+    return EC;
+  return Error::success();
+}
+
+} // end namespace codeview
+
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTVISITOR_H
diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h b/include/llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h
new file mode 100644
index 00000000000..177367c111c
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h
@@ -0,0 +1,103 @@
+//===- ModuleDebugInlineeLinesFragment.h ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGINLINEELINESFRAGMENT_H
+#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGINLINEELINESFRAGMENT_H
+
+#include "llvm/DebugInfo/CodeView/Line.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h"
+#include "llvm/Support/BinaryStreamArray.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace codeview {
+
+class ModuleDebugInlineeLineFragmentRef;
+
+enum class InlineeLinesSignature : uint32_t {
+  Normal,    // CV_INLINEE_SOURCE_LINE_SIGNATURE
+  ExtraFiles // CV_INLINEE_SOURCE_LINE_SIGNATURE_EX
+};
+
+struct InlineeSourceLineHeader {
+  TypeIndex Inlinee;                  // ID of the function that was inlined.
+  support::ulittle32_t FileID;        // Offset into FileChecksums subsection.
+  support::ulittle32_t SourceLineNum; // First line of inlined code.
+                                      // If extra files present:
+                                      //   ulittle32_t ExtraFileCount;
+                                      //   ulittle32_t Files[];
+};
+
+struct InlineeSourceLine {
+  const InlineeSourceLineHeader *Header;
+  FixedStreamArray<support::ulittle32_t> ExtraFiles;
+};
+}
+
+template <> struct VarStreamArrayExtractor<codeview::InlineeSourceLine> {
+  typedef codeview::ModuleDebugInlineeLineFragmentRef ContextType;
+
+  static Error extract(BinaryStreamRef Stream, uint32_t &Len,
+                       codeview::InlineeSourceLine &Item,
+                       ContextType *Fragment);
+};
+
+namespace codeview {
+class ModuleDebugInlineeLineFragmentRef final : public ModuleDebugFragmentRef {
+  typedef VarStreamArray<InlineeSourceLine> LinesArray;
+  typedef LinesArray::Iterator Iterator;
+
+public:
+  ModuleDebugInlineeLineFragmentRef();
+
+  static bool classof(const ModuleDebugFragmentRef *S) {
+    return S->kind() == ModuleDebugFragmentKind::InlineeLines;
+  }
+
+  Error initialize(BinaryStreamReader Reader);
+  bool hasExtraFiles() const;
+
+  Iterator begin() const { return Lines.begin(); }
+  Iterator end() const { return Lines.end(); }
+
+private:
+  InlineeLinesSignature Signature;
+  VarStreamArray<InlineeSourceLine> Lines;
+};
+
+class ModuleDebugInlineeLineFragment final : public ModuleDebugFragment {
+public:
+  explicit ModuleDebugInlineeLineFragment(bool HasExtraFiles);
+
+  static bool classof(const ModuleDebugFragment *S) {
+    return S->kind() == ModuleDebugFragmentKind::InlineeLines;
+  }
+
+  Error commit(BinaryStreamWriter &Writer) override;
+  uint32_t calculateSerializedLength() override;
+
+  void addInlineSite(TypeIndex FuncId, uint32_t FileOffset,
+                     uint32_t SourceLine);
+  void addExtraFile(uint32_t FileOffset);
+
+private:
+  bool HasExtraFiles = false;
+  uint32_t ExtraFileCount = 0;
+
+  struct Entry {
+    std::vector<support::ulittle32_t> ExtraFiles;
+    InlineeSourceLineHeader Header;
+  };
+  std::vector<Entry> Entries;
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h b/include/llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h
new file mode 100644
index 00000000000..dcfe86dd850
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h
@@ -0,0 +1,137 @@
+//===- ModuleDebugLineFragment.h --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGLINEFRAGMENT_H
+#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGLINEFRAGMENT_H
+
+#include "llvm/DebugInfo/CodeView/Line.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h"
+#include "llvm/Support/BinaryStreamArray.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace codeview {
+
+// Corresponds to the `CV_DebugSLinesHeader_t` structure.
+struct LineFragmentHeader {
+  support::ulittle32_t RelocOffset;  // Code offset of line contribution.
+  support::ulittle16_t RelocSegment; // Code segment of line contribution.
+  support::ulittle16_t Flags;        // See LineFlags enumeration.
+  support::ulittle32_t CodeSize;     // Code size of this line contribution.
+};
+
+// Corresponds to the `CV_DebugSLinesFileBlockHeader_t` structure.
+struct LineBlockFragmentHeader {
+  support::ulittle32_t NameIndex; // Offset of FileChecksum entry in File
+                                  // checksums buffer.  The checksum entry then
+                                  // contains another offset into the string
+                                  // table of the actual name.
+  support::ulittle32_t NumLines;  // Number of lines
+  support::ulittle32_t BlockSize; // Code size of block, in bytes.
+  // The following two variable length arrays appear immediately after the
+  // header.  The structure definitions follow.
+  // LineNumberEntry   Lines[NumLines];
+  // ColumnNumberEntry Columns[NumLines];
+};
+
+// Corresponds to `CV_Line_t` structure
+struct LineNumberEntry {
+  support::ulittle32_t Offset; // Offset to start of code bytes for line number
+  support::ulittle32_t Flags;  // Start:24, End:7, IsStatement:1
+};
+
+// Corresponds to `CV_Column_t` structure
+struct ColumnNumberEntry {
+  support::ulittle16_t StartColumn;
+  support::ulittle16_t EndColumn;
+};
+
+struct LineColumnEntry {
+  support::ulittle32_t NameIndex;
+  FixedStreamArray<LineNumberEntry> LineNumbers;
+  FixedStreamArray<ColumnNumberEntry> Columns;
+};
+
+class LineColumnExtractor {
+public:
+  typedef const LineFragmentHeader ContextType;
+
+  static Error extract(BinaryStreamRef Stream, uint32_t &Len,
+                       LineColumnEntry &Item, const LineFragmentHeader *Header);
+};
+
+class ModuleDebugLineFragmentRef final : public ModuleDebugFragmentRef {
+  friend class LineColumnExtractor;
+  typedef VarStreamArray<LineColumnEntry, LineColumnExtractor> LineInfoArray;
+  typedef LineInfoArray::Iterator Iterator;
+
+public:
+  ModuleDebugLineFragmentRef();
+
+  static bool classof(const ModuleDebugFragmentRef *S) {
+    return S->kind() == ModuleDebugFragmentKind::Lines;
+  }
+
+  Error initialize(BinaryStreamReader Reader);
+
+  Iterator begin() const { return LinesAndColumns.begin(); }
+  Iterator end() const { return LinesAndColumns.end(); }
+
+  const LineFragmentHeader *header() const { return Header; }
+
+  bool hasColumnInfo() const;
+
+private:
+  const LineFragmentHeader *Header = nullptr;
+  LineInfoArray LinesAndColumns;
+};
+
+class ModuleDebugLineFragment final : public ModuleDebugFragment {
+  struct Block {
+    Block(uint32_t ChecksumBufferOffset)
+        : ChecksumBufferOffset(ChecksumBufferOffset) {}
+
+    uint32_t ChecksumBufferOffset;
+    std::vector<LineNumberEntry> Lines;
+    std::vector<ColumnNumberEntry> Columns;
+  };
+
+public:
+  ModuleDebugLineFragment();
+
+  static bool classof(const ModuleDebugFragment *S) {
+    return S->kind() == ModuleDebugFragmentKind::Lines;
+  }
+
+  void createBlock(uint32_t ChecksumBufferOffset);
+  void addLineInfo(uint32_t Offset, const LineInfo &Line);
+  void addLineAndColumnInfo(uint32_t Offset, const LineInfo &Line,
+                            uint32_t ColStart, uint32_t ColEnd);
+
+  uint32_t calculateSerializedLength() override;
+  Error commit(BinaryStreamWriter &Writer) override;
+
+  void setRelocationAddress(uint16_t Segment, uint16_t Offset);
+  void setCodeSize(uint32_t Size);
+  void setFlags(LineFlags Flags);
+
+  bool hasColumnInfo() const;
+
+private:
+  uint16_t RelocOffset = 0;
+  uint16_t RelocSegment = 0;
+  uint32_t CodeSize = 0;
+  LineFlags Flags = LF_None;
+  std::vector<Block> Blocks;
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugUnknownFragment.h b/include/llvm/DebugInfo/CodeView/ModuleDebugUnknownFragment.h
new file mode 100644
index 00000000000..b8c1c02e5cf
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/ModuleDebugUnknownFragment.h
@@ -0,0 +1,33 @@
+//===- ModuleDebugUnknownFragment.h -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGUNKNOWNFRAGMENT_H
+#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGUNKNOWNFRAGMENT_H
+
+#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h"
+#include "llvm/Support/BinaryStreamRef.h"
+
+namespace llvm {
+namespace codeview {
+
+class ModuleDebugUnknownFragmentRef final : public ModuleDebugFragmentRef {
+public:
+  ModuleDebugUnknownFragmentRef(ModuleDebugFragmentKind Kind,
+                                BinaryStreamRef Data)
+      : ModuleDebugFragmentRef(Kind), Data(Data) {}
+
+  BinaryStreamRef getData() const { return Data; }
+
+private:
+  BinaryStreamRef Data;
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/ModuleSubstream.h b/include/llvm/DebugInfo/CodeView/ModuleSubstream.h
deleted file mode 100644
index a1c5c93cc3f..00000000000
--- a/include/llvm/DebugInfo/CodeView/ModuleSubstream.h
+++ /dev/null
@@ -1,87 +0,0 @@
-//===- ModuleSubstream.h ----------------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULESUBSTREAM_H
-#define LLVM_DEBUGINFO_CODEVIEW_MODULESUBSTREAM_H
-
-#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/Support/BinaryStreamArray.h"
-#include "llvm/Support/BinaryStreamRef.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/Error.h"
-
-namespace llvm {
-namespace codeview {
-
-// Corresponds to the `CV_DebugSSubsectionHeader_t` structure.
-struct ModuleSubsectionHeader {
-  support::ulittle32_t Kind;   // codeview::ModuleSubstreamKind enum
-  support::ulittle32_t Length; // number of bytes occupied by this record.
-};
-
-// Corresponds to the `CV_DebugSLinesHeader_t` structure.
-struct LineSubstreamHeader {
-  support::ulittle32_t RelocOffset;  // Code offset of line contribution.
-  support::ulittle16_t RelocSegment; // Code segment of line contribution.
-  support::ulittle16_t Flags;        // See LineFlags enumeration.
-  support::ulittle32_t CodeSize;     // Code size of this line contribution.
-};
-
-// Corresponds to the `CV_DebugSLinesFileBlockHeader_t` structure.
-struct LineFileBlockHeader {
-  support::ulittle32_t NameIndex; // Index in DBI name buffer of filename.
-  support::ulittle32_t NumLines;  // Number of lines
-  support::ulittle32_t BlockSize; // Code size of block, in bytes.
-  // The following two variable length arrays appear immediately after the
-  // header.  The structure definitions follow.
-  // LineNumberEntry   Lines[NumLines];
-  // ColumnNumberEntry Columns[NumLines];
-};
-
-// Corresponds to `CV_Line_t` structure
-struct LineNumberEntry {
-  support::ulittle32_t Offset; // Offset to start of code bytes for line number
-  support::ulittle32_t Flags;  // Start:24, End:7, IsStatement:1
-};
-
-// Corresponds to `CV_Column_t` structure
-struct ColumnNumberEntry {
-  support::ulittle16_t StartColumn;
-  support::ulittle16_t EndColumn;
-};
-
-class ModuleSubstream {
-public:
-  ModuleSubstream();
-  ModuleSubstream(ModuleSubstreamKind Kind, BinaryStreamRef Data);
-  static Error initialize(BinaryStreamRef Stream, ModuleSubstream &Info);
-  uint32_t getRecordLength() const;
-  ModuleSubstreamKind getSubstreamKind() const;
-  BinaryStreamRef getRecordData() const;
-
-private:
-  ModuleSubstreamKind Kind;
-  BinaryStreamRef Data;
-};
-
-typedef VarStreamArray<ModuleSubstream> ModuleSubstreamArray;
-} // namespace codeview
-
-template <> struct VarStreamArrayExtractor<codeview::ModuleSubstream> {
-  Error operator()(BinaryStreamRef Stream, uint32_t &Length,
-                   codeview::ModuleSubstream &Info) const {
-    if (auto EC = codeview::ModuleSubstream::initialize(Stream, Info))
-      return EC;
-    Length = Info.getRecordLength();
-    return Error::success();
-  }
-};
-} // namespace llvm
-
-#endif // LLVM_DEBUGINFO_CODEVIEW_MODULESUBSTREAM_H
diff --git a/include/llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h b/include/llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h
deleted file mode 100644
index 31344a9427d..00000000000
--- a/include/llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h
+++ /dev/null
@@ -1,132 +0,0 @@
-//===- ModuleSubstreamVisitor.h ---------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULESUBSTREAMVISITOR_H
-#define LLVM_DEBUGINFO_CODEVIEW_MODULESUBSTREAMVISITOR_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/DebugInfo/CodeView/CodeViewError.h"
-#include "llvm/DebugInfo/CodeView/Line.h"
-#include "llvm/DebugInfo/CodeView/ModuleSubstream.h"
-#include "llvm/Support/BinaryStreamArray.h"
-#include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/BinaryStreamRef.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/Error.h"
-#include <cstdint>
-
-namespace llvm {
-
-namespace codeview {
-
-struct LineColumnEntry {
-  support::ulittle32_t NameIndex;
-  FixedStreamArray<LineNumberEntry> LineNumbers;
-  FixedStreamArray<ColumnNumberEntry> Columns;
-};
-
-struct FileChecksumEntry {
-  uint32_t FileNameOffset;    // Byte offset of filename in global stringtable.
-  FileChecksumKind Kind;      // The type of checksum.
-  ArrayRef<uint8_t> Checksum; // The bytes of the checksum.
-};
-
-typedef VarStreamArray<LineColumnEntry> LineInfoArray;
-typedef VarStreamArray<FileChecksumEntry> FileChecksumArray;
-
-class IModuleSubstreamVisitor {
-public:
-  virtual ~IModuleSubstreamVisitor() = default;
-
-  virtual Error visitUnknown(ModuleSubstreamKind Kind,
-                             BinaryStreamRef Data) = 0;
-  virtual Error visitSymbols(BinaryStreamRef Data);
-  virtual Error visitLines(BinaryStreamRef Data,
-                           const LineSubstreamHeader *Header,
-                           const LineInfoArray &Lines);
-  virtual Error visitStringTable(BinaryStreamRef Data);
-  virtual Error visitFileChecksums(BinaryStreamRef Data,
-                                   const FileChecksumArray &Checksums);
-  virtual Error visitFrameData(BinaryStreamRef Data);
-  virtual Error visitInlineeLines(BinaryStreamRef Data);
-  virtual Error visitCrossScopeImports(BinaryStreamRef Data);
-  virtual Error visitCrossScopeExports(BinaryStreamRef Data);
-  virtual Error visitILLines(BinaryStreamRef Data);
-  virtual Error visitFuncMDTokenMap(BinaryStreamRef Data);
-  virtual Error visitTypeMDTokenMap(BinaryStreamRef Data);
-  virtual Error visitMergedAssemblyInput(BinaryStreamRef Data);
-  virtual Error visitCoffSymbolRVA(BinaryStreamRef Data);
-};
-
-Error visitModuleSubstream(const ModuleSubstream &R,
-                           IModuleSubstreamVisitor &V);
-} // end namespace codeview
-
-template <> class VarStreamArrayExtractor<codeview::LineColumnEntry> {
-public:
-  VarStreamArrayExtractor(const codeview::LineSubstreamHeader *Header)
-      : Header(Header) {}
-
-  Error operator()(BinaryStreamRef Stream, uint32_t &Len,
-                   codeview::LineColumnEntry &Item) const {
-    using namespace codeview;
-    const LineFileBlockHeader *BlockHeader;
-    BinaryStreamReader Reader(Stream);
-    if (auto EC = Reader.readObject(BlockHeader))
-      return EC;
-    bool HasColumn = Header->Flags & uint32_t(LineFlags::HaveColumns);
-    uint32_t LineInfoSize =
-        BlockHeader->NumLines *
-        (sizeof(LineNumberEntry) + (HasColumn ? sizeof(ColumnNumberEntry) : 0));
-    if (BlockHeader->BlockSize < sizeof(LineFileBlockHeader))
-      return make_error<CodeViewError>(cv_error_code::corrupt_record,
-                                       "Invalid line block record size");
-    uint32_t Size = BlockHeader->BlockSize - sizeof(LineFileBlockHeader);
-    if (LineInfoSize > Size)
-      return make_error<CodeViewError>(cv_error_code::corrupt_record,
-                                       "Invalid line block record size");
-    // The value recorded in BlockHeader->BlockSize includes the size of
-    // LineFileBlockHeader.
-    Len = BlockHeader->BlockSize;
-    Item.NameIndex = BlockHeader->NameIndex;
-    if (auto EC = Reader.readArray(Item.LineNumbers, BlockHeader->NumLines))
-      return EC;
-    if (HasColumn) {
-      if (auto EC = Reader.readArray(Item.Columns, BlockHeader->NumLines))
-        return EC;
-    }
-    return Error::success();
-  }
-
-private:
-  const codeview::LineSubstreamHeader *Header;
-};
-
-template <> class VarStreamArrayExtractor<codeview::FileChecksumEntry> {
-public:
-  Error operator()(BinaryStreamRef Stream, uint32_t &Len,
-                   codeview::FileChecksumEntry &Item) const {
-    using namespace codeview;
-    const FileChecksum *Header;
-    BinaryStreamReader Reader(Stream);
-    if (auto EC = Reader.readObject(Header))
-      return EC;
-    Item.FileNameOffset = Header->FileNameOffset;
-    Item.Kind = static_cast<FileChecksumKind>(Header->ChecksumKind);
-    if (auto EC = Reader.readBytes(Item.Checksum, Header->ChecksumSize))
-      return EC;
-    Len = sizeof(FileChecksum) + Header->ChecksumSize;
-    return Error::success();
-  }
-};
-
-} // end namespace llvm
-
-#endif // LLVM_DEBUGINFO_CODEVIEW_MODULESUBSTREAMVISITOR_H
diff --git a/include/llvm/DebugInfo/CodeView/TypeDatabase.h b/include/llvm/DebugInfo/CodeView/TypeDatabase.h
index 54ad862cfa7..220de4bf0ee 100644
--- a/include/llvm/DebugInfo/CodeView/TypeDatabase.h
+++ b/include/llvm/DebugInfo/CodeView/TypeDatabase.h
@@ -35,6 +35,7 @@ public:
   StringRef getTypeName(TypeIndex Index) const;
 
   const CVType &getTypeRecord(TypeIndex Index) const;
+  CVType &getTypeRecord(TypeIndex Index);
 
   bool containsTypeIndex(TypeIndex Index) const;
 
diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h
index e3386a8dcd2..d51408122fc 100644
--- a/include/llvm/DebugInfo/DIContext.h
+++ b/include/llvm/DebugInfo/DIContext.h
@@ -161,6 +161,10 @@ public:
   virtual void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All,
                     bool DumpEH = false, bool SummarizeTypes = false) = 0;
 
+  virtual bool verify(raw_ostream &OS, DIDumpType DumpType = DIDT_All) {
+    // No verifier? Just say things went well.
+    return true;
+  }
   virtual DILineInfo getLineInfoForAddress(uint64_t Address,
       DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0;
   virtual DILineInfoTable getLineInfoForAddressRange(uint64_t Address,
diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h
index d89e2c684cd..3c04c6716ea 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -106,6 +106,8 @@ public:
   void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All,
             bool DumpEH = false, bool SummarizeTypes = false) override;
 
+  bool verify(raw_ostream &OS, DIDumpType DumpType = DIDT_All) override;
+
   typedef DWARFUnitSection<DWARFCompileUnit>::iterator_range cu_iterator_range;
   typedef DWARFUnitSection<DWARFTypeUnit>::iterator_range tu_iterator_range;
   typedef iterator_range<decltype(TUs)::iterator> tu_section_iterator_range;
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index e5bb24707b6..dd0e2648bf3 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -24,7 +24,8 @@ class raw_ostream;
 
 class DWARFDebugLine {
 public:
-  DWARFDebugLine(const RelocAddrMap* LineInfoRelocMap) : RelocMap(LineInfoRelocMap) {}
+  DWARFDebugLine(const RelocAddrMap *LineInfoRelocMap)
+      : RelocMap(LineInfoRelocMap) {}
 
   struct FileNameEntry {
     FileNameEntry() = default;
@@ -38,50 +39,46 @@ public:
   struct Prologue {
     Prologue();
 
-    // The size in bytes of the statement information for this compilation unit
-    // (not including the total_length field itself).
+    /// The size in bytes of the statement information for this compilation unit
+    /// (not including the total_length field itself).
     uint64_t TotalLength;
-    // Version identifier for the statement information format.
+    /// Version identifier for the statement information format.
     uint16_t Version;
-    // The number of bytes following the prologue_length field to the beginning
-    // of the first byte of the statement program itself.
+    /// The number of bytes following the prologue_length field to the beginning
+    /// of the first byte of the statement program itself.
     uint64_t PrologueLength;
-    // The size in bytes of the smallest target machine instruction. Statement
-    // program opcodes that alter the address register first multiply their
-    // operands by this value.
+    /// The size in bytes of the smallest target machine instruction. Statement
+    /// program opcodes that alter the address register first multiply their
+    /// operands by this value.
     uint8_t MinInstLength;
-    // The maximum number of individual operations that may be encoded in an
-    // instruction.
+    /// The maximum number of individual operations that may be encoded in an
+    /// instruction.
     uint8_t MaxOpsPerInst;
-    // The initial value of theis_stmtregister.
+    /// The initial value of theis_stmtregister.
     uint8_t DefaultIsStmt;
-    // This parameter affects the meaning of the special opcodes. See below.
+    /// This parameter affects the meaning of the special opcodes. See below.
     int8_t LineBase;
-    // This parameter affects the meaning of the special opcodes. See below.
+    /// This parameter affects the meaning of the special opcodes. See below.
     uint8_t LineRange;
-    // The number assigned to the first special opcode.
+    /// The number assigned to the first special opcode.
     uint8_t OpcodeBase;
     std::vector<uint8_t> StandardOpcodeLengths;
-    std::vector<const char*> IncludeDirectories;
+    std::vector<const char *> IncludeDirectories;
     std::vector<FileNameEntry> FileNames;
 
     bool IsDWARF64;
 
-    uint32_t sizeofTotalLength() const {
-      return IsDWARF64 ? 12 : 4;
-    }
+    uint32_t sizeofTotalLength() const { return IsDWARF64 ? 12 : 4; }
 
-    uint32_t sizeofPrologueLength() const {
-      return IsDWARF64 ? 8 : 4;
-    }
+    uint32_t sizeofPrologueLength() const { return IsDWARF64 ? 8 : 4; }
 
-    // Length of the prologue in bytes.
+    /// Length of the prologue in bytes.
     uint32_t getLength() const {
       return PrologueLength + sizeofTotalLength() + sizeof(Version) +
              sizeofPrologueLength();
     }
 
-    // Length of the line table data in bytes (not including the prologue).
+    /// Length of the line table data in bytes (not including the prologue).
     uint32_t getStatementTableLength() const {
       return TotalLength + sizeofTotalLength() - getLength();
     }
@@ -92,70 +89,70 @@ public:
 
     void clear();
     void dump(raw_ostream &OS) const;
-    bool parse(DataExtractor debug_line_data, uint32_t *offset_ptr);
+    bool parse(DataExtractor DebugLineData, uint32_t *OffsetPtr);
   };
 
-  // Standard .debug_line state machine structure.
+  /// Standard .debug_line state machine structure.
   struct Row {
-    explicit Row(bool default_is_stmt = false);
+    explicit Row(bool DefaultIsStmt = false);
 
     /// Called after a row is appended to the matrix.
     void postAppend();
-    void reset(bool default_is_stmt);
+    void reset(bool DefaultIsStmt);
     void dump(raw_ostream &OS) const;
 
-    static bool orderByAddress(const Row& LHS, const Row& RHS) {
+    static bool orderByAddress(const Row &LHS, const Row &RHS) {
       return LHS.Address < RHS.Address;
     }
 
-    // The program-counter value corresponding to a machine instruction
-    // generated by the compiler.
+    /// The program-counter value corresponding to a machine instruction
+    /// generated by the compiler.
     uint64_t Address;
-    // An unsigned integer indicating a source line number. Lines are numbered
-    // beginning at 1. The compiler may emit the value 0 in cases where an
-    // instruction cannot be attributed to any source line.
+    /// An unsigned integer indicating a source line number. Lines are numbered
+    /// beginning at 1. The compiler may emit the value 0 in cases where an
+    /// instruction cannot be attributed to any source line.
     uint32_t Line;
-    // An unsigned integer indicating a column number within a source line.
-    // Columns are numbered beginning at 1. The value 0 is reserved to indicate
-    // that a statement begins at the 'left edge' of the line.
+    /// An unsigned integer indicating a column number within a source line.
+    /// Columns are numbered beginning at 1. The value 0 is reserved to indicate
+    /// that a statement begins at the 'left edge' of the line.
     uint16_t Column;
-    // An unsigned integer indicating the identity of the source file
-    // corresponding to a machine instruction.
+    /// An unsigned integer indicating the identity of the source file
+    /// corresponding to a machine instruction.
     uint16_t File;
-    // An unsigned integer representing the DWARF path discriminator value
-    // for this location.
+    /// An unsigned integer representing the DWARF path discriminator value
+    /// for this location.
     uint32_t Discriminator;
-    // An unsigned integer whose value encodes the applicable instruction set
-    // architecture for the current instruction.
+    /// An unsigned integer whose value encodes the applicable instruction set
+    /// architecture for the current instruction.
     uint8_t Isa;
-    // A boolean indicating that the current instruction is the beginning of a
-    // statement.
-    uint8_t IsStmt:1,
-            // A boolean indicating that the current instruction is the
-            // beginning of a basic block.
-            BasicBlock:1,
-            // A boolean indicating that the current address is that of the
-            // first byte after the end of a sequence of target machine
-            // instructions.
-            EndSequence:1,
-            // A boolean indicating that the current address is one (of possibly
-            // many) where execution should be suspended for an entry breakpoint
-            // of a function.
-            PrologueEnd:1,
-            // A boolean indicating that the current address is one (of possibly
-            // many) where execution should be suspended for an exit breakpoint
-            // of a function.
-            EpilogueBegin:1;
+    /// A boolean indicating that the current instruction is the beginning of a
+    /// statement.
+    uint8_t IsStmt : 1,
+        /// A boolean indicating that the current instruction is the
+        /// beginning of a basic block.
+        BasicBlock : 1,
+        /// A boolean indicating that the current address is that of the
+        /// first byte after the end of a sequence of target machine
+        /// instructions.
+        EndSequence : 1,
+        /// A boolean indicating that the current address is one (of possibly
+        /// many) where execution should be suspended for an entry breakpoint
+        /// of a function.
+        PrologueEnd : 1,
+        /// A boolean indicating that the current address is one (of possibly
+        /// many) where execution should be suspended for an exit breakpoint
+        /// of a function.
+        EpilogueBegin : 1;
   };
 
-  // Represents a series of contiguous machine instructions. Line table for each
-  // compilation unit may consist of multiple sequences, which are not
-  // guaranteed to be in the order of ascending instruction address.
+  /// Represents a series of contiguous machine instructions. Line table for
+  /// each compilation unit may consist of multiple sequences, which are not
+  /// guaranteed to be in the order of ascending instruction address.
   struct Sequence {
     Sequence();
 
-    // Sequence describes instructions at address range [LowPC, HighPC)
-    // and is described by line table rows [FirstRowIndex, LastRowIndex).
+    /// Sequence describes instructions at address range [LowPC, HighPC)
+    /// and is described by line table rows [FirstRowIndex, LastRowIndex).
     uint64_t LowPC;
     uint64_t HighPC;
     unsigned FirstRowIndex;
@@ -164,7 +161,7 @@ public:
 
     void reset();
 
-    static bool orderByLowPC(const Sequence& LHS, const Sequence& RHS) {
+    static bool orderByLowPC(const Sequence &LHS, const Sequence &RHS) {
       return LHS.LowPC < RHS.LowPC;
     }
 
@@ -172,42 +169,38 @@ public:
       return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex);
     }
 
-    bool containsPC(uint64_t pc) const {
-      return (LowPC <= pc && pc < HighPC);
-    }
+    bool containsPC(uint64_t PC) const { return (LowPC <= PC && PC < HighPC); }
   };
 
   struct LineTable {
     LineTable();
 
-    // Represents an invalid row
+    /// Represents an invalid row
     const uint32_t UnknownRowIndex = UINT32_MAX;
 
-    void appendRow(const DWARFDebugLine::Row &R) {
-      Rows.push_back(R);
-    }
+    void appendRow(const DWARFDebugLine::Row &R) { Rows.push_back(R); }
 
     void appendSequence(const DWARFDebugLine::Sequence &S) {
       Sequences.push_back(S);
     }
 
-    // Returns the index of the row with file/line info for a given address,
-    // or UnknownRowIndex if there is no such row.
-    uint32_t lookupAddress(uint64_t address) const;
+    /// Returns the index of the row with file/line info for a given address,
+    /// or UnknownRowIndex if there is no such row.
+    uint32_t lookupAddress(uint64_t Address) const;
 
-    bool lookupAddressRange(uint64_t address, uint64_t size,
-                            std::vector<uint32_t> &result) const;
+    bool lookupAddressRange(uint64_t Address, uint64_t Size,
+                            std::vector<uint32_t> &Result) const;
 
     bool hasFileAtIndex(uint64_t FileIndex) const;
 
-    // Extracts filename by its index in filename table in prologue.
-    // Returns true on success.
+    /// Extracts filename by its index in filename table in prologue.
+    /// Returns true on success.
     bool getFileNameByIndex(uint64_t FileIndex, const char *CompDir,
                             DILineInfoSpecifier::FileLineInfoKind Kind,
                             std::string &Result) const;
 
-    // Fills the Result argument with the file and line information
-    // corresponding to Address. Returns true on success.
+    /// Fills the Result argument with the file and line information
+    /// corresponding to Address. Returns true on success.
     bool getFileLineInfoForAddress(uint64_t Address, const char *CompDir,
                                    DILineInfoSpecifier::FileLineInfoKind Kind,
                                    DILineInfo &Result) const;
@@ -216,8 +209,8 @@ public:
     void clear();
 
     /// Parse prologue and all rows.
-    bool parse(DataExtractor debug_line_data, const RelocAddrMap *RMap,
-               uint32_t *offset_ptr);
+    bool parse(DataExtractor DebugLineData, const RelocAddrMap *RMap,
+               uint32_t *OffsetPtr);
 
     struct Prologue Prologue;
     typedef std::vector<Row> RowVector;
@@ -228,25 +221,25 @@ public:
     SequenceVector Sequences;
 
   private:
-    uint32_t findRowInSeq(const DWARFDebugLine::Sequence &seq,
-                          uint64_t address) const;
+    uint32_t findRowInSeq(const DWARFDebugLine::Sequence &Seq,
+                          uint64_t Address) const;
   };
 
-  const LineTable *getLineTable(uint32_t offset) const;
-  const LineTable *getOrParseLineTable(DataExtractor debug_line_data,
-                                       uint32_t offset);
+  const LineTable *getLineTable(uint32_t Offset) const;
+  const LineTable *getOrParseLineTable(DataExtractor DebugLineData,
+                                       uint32_t Offset);
 
 private:
   struct ParsingState {
     ParsingState(struct LineTable *LT);
 
     void resetRowAndSequence();
-    void appendRowToMatrix(uint32_t offset);
+    void appendRowToMatrix(uint32_t Offset);
 
-    // Line table we're currently parsing.
+    /// Line table we're currently parsing.
     struct LineTable *LineTable;
-    // The row number that starts at zero for the prologue, and increases for
-    // each row added to the matrix.
+    /// The row number that starts at zero for the prologue, and increases for
+    /// each row added to the matrix.
     unsigned RowNumber;
     struct Row Row;
     struct Sequence Sequence;
diff --git a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index c8d7a0c1ac7..36b27228f5c 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -59,6 +59,7 @@ public:
   DWARFFormValue(dwarf::Form F = dwarf::Form(0)) : Form(F) {}
 
   dwarf::Form getForm() const { return Form; }
+  uint64_t getRawUValue() const { return Value.uval; }
   void setForm(dwarf::Form F) { Form = F; }
   void setUValue(uint64_t V) { Value.uval = V; }
   void setSValue(int64_t V) { Value.sval = V; }
diff --git a/include/llvm/DebugInfo/PDB/Native/ModInfo.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
similarity index 61%
rename from include/llvm/DebugInfo/PDB/Native/ModInfo.h
rename to include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
index d26d0d61844..879cb4285cd 100644
--- a/include/llvm/DebugInfo/PDB/Native/ModInfo.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
@@ -1,4 +1,4 @@
-//===- ModInfo.h - PDB module information -----------------------*- C++ -*-===//
+//===- DbiModuleDescriptor.h - PDB module information -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEBUGINFO_PDB_RAW_MODINFO_H
-#define LLVM_DEBUGINFO_PDB_RAW_MODINFO_H
+#ifndef LLVM_DEBUGINFO_PDB_RAW_DBIMODULEDESCRIPTOR_H
+#define LLVM_DEBUGINFO_PDB_RAW_DBIMODULEDESCRIPTOR_H
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
@@ -22,21 +22,21 @@ namespace llvm {
 
 namespace pdb {
 
-class ModInfo {
+class DbiModuleDescriptor {
   friend class DbiStreamBuilder;
 
 public:
-  ModInfo();
-  ModInfo(const ModInfo &Info);
-  ~ModInfo();
+  DbiModuleDescriptor();
+  DbiModuleDescriptor(const DbiModuleDescriptor &Info);
+  ~DbiModuleDescriptor();
 
-  static Error initialize(BinaryStreamRef Stream, ModInfo &Info);
+  static Error initialize(BinaryStreamRef Stream, DbiModuleDescriptor &Info);
 
   bool hasECInfo() const;
   uint16_t getTypeServerIndex() const;
   uint16_t getModuleStreamIndex() const;
   uint32_t getSymbolDebugInfoByteSize() const;
-  uint32_t getLineInfoByteSize() const;
+  uint32_t getC11LineInfoByteSize() const;
   uint32_t getC13LineInfoByteSize() const;
   uint32_t getNumberOfFiles() const;
   uint32_t getSourceFileNameIndex() const;
@@ -54,19 +54,20 @@ private:
 };
 
 struct ModuleInfoEx {
-  ModuleInfoEx(const ModInfo &Info) : Info(Info) {}
+  ModuleInfoEx(const DbiModuleDescriptor &Info) : Info(Info) {}
   ModuleInfoEx(const ModuleInfoEx &Ex) = default;
 
-  ModInfo Info;
+  DbiModuleDescriptor Info;
   std::vector<StringRef> SourceFiles;
 };
 
 } // end namespace pdb
 
-template <> struct VarStreamArrayExtractor<pdb::ModInfo> {
-  Error operator()(BinaryStreamRef Stream, uint32_t &Length,
-                   pdb::ModInfo &Info) const {
-    if (auto EC = pdb::ModInfo::initialize(Stream, Info))
+template <> struct VarStreamArrayExtractor<pdb::DbiModuleDescriptor> {
+  typedef void ContextType;
+  static Error extract(BinaryStreamRef Stream, uint32_t &Length,
+                       pdb::DbiModuleDescriptor &Info, void *Ctx) {
+    if (auto EC = pdb::DbiModuleDescriptor::initialize(Stream, Info))
       return EC;
     Length = Info.getRecordLength();
     return Error::success();
@@ -75,4 +76,4 @@ template <> struct VarStreamArrayExtractor<pdb::ModInfo> {
 
 } // end namespace llvm
 
-#endif // LLVM_DEBUGINFO_PDB_RAW_MODINFO_H
+#endif // LLVM_DEBUGINFO_PDB_RAW_DBIMODULEDESCRIPTOR_H
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
new file mode 100644
index 00000000000..8cc5db981f5
--- /dev/null
+++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
@@ -0,0 +1,101 @@
+//===- DbiModuleDescriptorBuilder.h - PDB module information ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_RAW_DBIMODULEDESCRIPTORBUILDER_H
+#define LLVM_DEBUGINFO_PDB_RAW_DBIMODULEDESCRIPTORBUILDER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
+#include <string>
+#include <vector>
+
+namespace llvm {
+class BinaryStreamWriter;
+
+namespace codeview {
+class ModuleDebugFragmentRecordBuilder;
+}
+
+namespace msf {
+class MSFBuilder;
+struct MSFLayout;
+}
+namespace pdb {
+
+class DbiModuleDescriptorBuilder {
+  friend class DbiStreamBuilder;
+
+public:
+  DbiModuleDescriptorBuilder(StringRef ModuleName, uint32_t ModIndex,
+                             msf::MSFBuilder &Msf);
+  ~DbiModuleDescriptorBuilder();
+
+  DbiModuleDescriptorBuilder(const DbiModuleDescriptorBuilder &) = delete;
+  DbiModuleDescriptorBuilder &
+  operator=(const DbiModuleDescriptorBuilder &) = delete;
+
+  void setObjFileName(StringRef Name);
+  void addSymbol(codeview::CVSymbol Symbol);
+
+  void addC13Fragment(std::unique_ptr<codeview::ModuleDebugLineFragment> Lines);
+  void addC13Fragment(
+      std::unique_ptr<codeview::ModuleDebugInlineeLineFragment> Inlinees);
+  void setC13FileChecksums(
+      std::unique_ptr<codeview::ModuleDebugFileChecksumFragment> Checksums);
+
+  uint16_t getStreamIndex() const;
+  StringRef getModuleName() const { return ModuleName; }
+  StringRef getObjFileName() const { return ObjFileName; }
+
+  ArrayRef<std::string> source_files() const {
+    return makeArrayRef(SourceFiles);
+  }
+
+  uint32_t calculateSerializedLength() const;
+
+  void finalize();
+  Error finalizeMsfLayout();
+
+  Error commit(BinaryStreamWriter &ModiWriter, const msf::MSFLayout &MsfLayout,
+               WritableBinaryStreamRef MsfBuffer);
+
+private:
+  uint32_t calculateC13DebugInfoSize() const;
+
+  void addSourceFile(StringRef Path);
+  msf::MSFBuilder &MSF;
+
+  uint32_t SymbolByteSize = 0;
+  std::string ModuleName;
+  std::string ObjFileName;
+  std::vector<std::string> SourceFiles;
+  std::vector<codeview::CVSymbol> Symbols;
+
+  std::unique_ptr<codeview::ModuleDebugFileChecksumFragment> ChecksumInfo;
+  std::vector<std::unique_ptr<codeview::ModuleDebugLineFragment>> LineInfo;
+  std::vector<std::unique_ptr<codeview::ModuleDebugInlineeLineFragment>>
+      Inlinees;
+
+  std::vector<std::unique_ptr<codeview::ModuleDebugFragmentRecordBuilder>>
+      C13Builders;
+
+  ModuleInfoHeader Layout;
+};
+
+} // end namespace pdb
+
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_RAW_DBIMODULEDESCRIPTORBUILDER_H
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiStream.h b/include/llvm/DebugInfo/PDB/Native/DbiStream.h
index f49f5aaefac..84ae57f2e23 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiStream.h
@@ -10,9 +10,9 @@
 #ifndef LLVM_DEBUGINFO_PDB_RAW_PDBDBISTREAM_H
 #define LLVM_DEBUGINFO_PDB_RAW_PDBDBISTREAM_H
 
-#include "llvm/DebugInfo/CodeView/ModuleSubstream.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-#include "llvm/DebugInfo/PDB/Native/ModInfo.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
 #include "llvm/DebugInfo/PDB/Native/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
 #include "llvm/DebugInfo/PDB/Native/StringTable.h"
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h b/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h
index 16426bd9384..bcac182e214 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h
@@ -31,7 +31,7 @@ struct coff_section;
 namespace pdb {
 class DbiStream;
 struct DbiStreamHeader;
-class ModInfoBuilder;
+class DbiModuleDescriptorBuilder;
 class PDBFile;
 
 class DbiStreamBuilder {
@@ -57,8 +57,9 @@ public:
 
   uint32_t calculateSerializedLength() const;
 
-  Expected<ModInfoBuilder &> addModuleInfo(StringRef ModuleName);
+  Expected<DbiModuleDescriptorBuilder &> addModuleInfo(StringRef ModuleName);
   Error addModuleSourceFile(StringRef Module, StringRef File);
+  Expected<uint32_t> getSourceFileNameIndex(StringRef FileName);
 
   Error finalizeMsfLayout();
 
@@ -103,8 +104,8 @@ private:
 
   const DbiStreamHeader *Header;
 
-  StringMap<std::unique_ptr<ModInfoBuilder>> ModiMap;
-  std::vector<ModInfoBuilder *> ModiList;
+  StringMap<std::unique_ptr<DbiModuleDescriptorBuilder>> ModiMap;
+  std::vector<DbiModuleDescriptorBuilder *> ModiList;
 
   StringMap<uint32_t> SourceFileNames;
 
diff --git a/include/llvm/DebugInfo/PDB/Native/ModInfoBuilder.h b/include/llvm/DebugInfo/PDB/Native/ModInfoBuilder.h
deleted file mode 100644
index 605fd2483c3..00000000000
--- a/include/llvm/DebugInfo/PDB/Native/ModInfoBuilder.h
+++ /dev/null
@@ -1,74 +0,0 @@
-//===- ModInfoBuilder.h - PDB module information ----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_PDB_RAW_MODINFOBUILDER_H
-#define LLVM_DEBUGINFO_PDB_RAW_MODINFOBUILDER_H
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
-#include "llvm/Support/Error.h"
-#include <cstdint>
-#include <string>
-#include <vector>
-
-namespace llvm {
-class BinaryStreamWriter;
-
-namespace msf {
-class MSFBuilder;
-struct MSFLayout;
-}
-namespace pdb {
-
-class ModInfoBuilder {
-  friend class DbiStreamBuilder;
-
-public:
-  ModInfoBuilder(StringRef ModuleName, uint32_t ModIndex, msf::MSFBuilder &Msf);
-
-  ModInfoBuilder(const ModInfoBuilder &) = delete;
-  ModInfoBuilder &operator=(const ModInfoBuilder &) = delete;
-
-  void setObjFileName(StringRef Name);
-  void addSymbol(codeview::CVSymbol Symbol);
-
-  uint16_t getStreamIndex() const;
-  StringRef getModuleName() const { return ModuleName; }
-  StringRef getObjFileName() const { return ObjFileName; }
-
-  ArrayRef<std::string> source_files() const {
-    return makeArrayRef(SourceFiles);
-  }
-
-  uint32_t calculateSerializedLength() const;
-
-  void finalize();
-  Error finalizeMsfLayout();
-
-  Error commit(BinaryStreamWriter &ModiWriter, const msf::MSFLayout &MsfLayout,
-               WritableBinaryStreamRef MsfBuffer);
-
-private:
-  void addSourceFile(StringRef Path);
-  msf::MSFBuilder &MSF;
-
-  uint32_t SymbolByteSize = 0;
-  std::string ModuleName;
-  std::string ObjFileName;
-  std::vector<std::string> SourceFiles;
-  std::vector<codeview::CVSymbol> Symbols;
-  ModuleInfoHeader Layout;
-};
-
-} // end namespace pdb
-
-} // end namespace llvm
-
-#endif // LLVM_DEBUGINFO_PDB_RAW_MODINFOBUILDER_H
diff --git a/include/llvm/DebugInfo/PDB/Native/ModStream.h b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
similarity index 59%
rename from include/llvm/DebugInfo/PDB/Native/ModStream.h
rename to include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
index b12d4ff375f..2c95690ed58 100644
--- a/include/llvm/DebugInfo/PDB/Native/ModStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
@@ -1,4 +1,4 @@
-//===- ModStream.h - PDB Module Info Stream Access ------------------------===//
+//===- ModuleDebugStream.h - PDB Module Info Stream Access ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEBUGINFO_PDB_RAW_MODSTREAM_H
-#define LLVM_DEBUGINFO_PDB_RAW_MODSTREAM_H
+#ifndef LLVM_DEBUGINFO_PDB_RAW_MODULEDEBUGSTREAM_H
+#define LLVM_DEBUGINFO_PDB_RAW_MODULEDEBUGSTREAM_H
 
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/DebugInfo/CodeView/CVRecord.h"
-#include "llvm/DebugInfo/CodeView/ModuleSubstream.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
 #include "llvm/Support/BinaryStreamArray.h"
@@ -22,13 +22,16 @@
 namespace llvm {
 namespace pdb {
 class PDBFile;
-class ModInfo;
+class DbiModuleDescriptor;
+
+class ModuleDebugStreamRef {
+  typedef codeview::ModuleDebugFragmentArray::Iterator
+      LinesAndChecksumsIterator;
 
-class ModStream {
 public:
-  ModStream(const ModInfo &Module,
-            std::unique_ptr<msf::MappedBlockStream> Stream);
-  ~ModStream();
+  ModuleDebugStreamRef(const DbiModuleDescriptor &Module,
+                       std::unique_ptr<msf::MappedBlockStream> Stream);
+  ~ModuleDebugStreamRef();
 
   Error reload();
 
@@ -37,26 +40,25 @@ public:
   iterator_range<codeview::CVSymbolArray::Iterator>
   symbols(bool *HadError) const;
 
-  iterator_range<codeview::ModuleSubstreamArray::Iterator>
-  lines(bool *HadError) const;
+  llvm::iterator_range<LinesAndChecksumsIterator> linesAndChecksums() const;
 
   bool hasLineInfo() const;
 
   Error commit();
 
 private:
-  const ModInfo &Mod;
+  const DbiModuleDescriptor &Mod;
 
   uint32_t Signature;
 
   std::unique_ptr<msf::MappedBlockStream> Stream;
 
   codeview::CVSymbolArray SymbolsSubstream;
-  BinaryStreamRef LinesSubstream;
+  BinaryStreamRef C11LinesSubstream;
   BinaryStreamRef C13LinesSubstream;
   BinaryStreamRef GlobalRefsSubstream;
 
-  codeview::ModuleSubstreamArray LineInfo;
+  codeview::ModuleDebugFragmentArray LinesAndChecksums;
 };
 }
 }
diff --git a/include/llvm/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.h b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.h
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h b/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h
index 8eeaf3e0ea4..b1d980679a4 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h
@@ -10,7 +10,7 @@
 #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVECOMPILANDSYMBOL_H
 #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVECOMPILANDSYMBOL_H
 
-#include "llvm/DebugInfo/PDB/Native/ModInfo.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
 #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
 
 namespace llvm {
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h b/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h
index 60a55ee50cc..18022f599bb 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h
@@ -11,7 +11,7 @@
 #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMMODULES_H
 
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
-#include "llvm/DebugInfo/PDB/Native/ModInfo.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
 #include "llvm/DebugInfo/PDB/PDBSymbol.h"
 namespace llvm {
 namespace pdb {
diff --git a/include/llvm/DebugInfo/PDB/Native/RawTypes.h b/include/llvm/DebugInfo/PDB/Native/RawTypes.h
index 1b2631efce7..e1c6cf0021d 100644
--- a/include/llvm/DebugInfo/PDB/Native/RawTypes.h
+++ b/include/llvm/DebugInfo/PDB/Native/RawTypes.h
@@ -200,7 +200,7 @@ struct FileInfoSubstreamHeader {
 };
 
 struct ModInfoFlags {
-  ///  uint16_t fWritten : 1;   // True if ModInfo is dirty
+  ///  uint16_t fWritten : 1;   // True if DbiModuleDescriptor is dirty
   ///  uint16_t fECEnabled : 1; // Is EC symbolic info present?  (What is EC?)
   ///  uint16_t unused : 6;     // Reserved
   ///  uint16_t iTSM : 8;       // Type Server Index for this module
@@ -231,8 +231,8 @@ struct ModuleInfoHeader {
   /// Size of local symbol debug info in above stream
   support::ulittle32_t SymBytes;
 
-  /// Size of line number debug info in above stream
-  support::ulittle32_t LineBytes;
+  /// Size of C11 line number info in above stream
+  support::ulittle32_t C11Bytes;
 
   /// Size of C13 line number info in above stream
   support::ulittle32_t C13Bytes;
diff --git a/include/llvm/DebugInfo/PDB/Native/StringTableBuilder.h b/include/llvm/DebugInfo/PDB/Native/StringTableBuilder.h
index dd0f40b1978..9c4b12e33ba 100644
--- a/include/llvm/DebugInfo/PDB/Native/StringTableBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/StringTableBuilder.h
@@ -29,6 +29,7 @@ public:
   // If string S does not exist in the string table, insert it.
   // Returns the ID for S.
   uint32_t insert(StringRef S);
+  uint32_t getStringIndex(StringRef S);
 
   uint32_t finalize();
   Error commit(BinaryStreamWriter &Writer) const;
diff --git a/include/llvm/DebugInfo/Symbolize/Symbolize.h b/include/llvm/DebugInfo/Symbolize/Symbolize.h
index 9253adf7eed..5103cc03a6b 100644
--- a/include/llvm/DebugInfo/Symbolize/Symbolize.h
+++ b/include/llvm/DebugInfo/Symbolize/Symbolize.h
@@ -56,8 +56,9 @@ public:
   Expected<DIGlobal> symbolizeData(const std::string &ModuleName,
                                    uint64_t ModuleOffset);
   void flush();
-  static std::string DemangleName(const std::string &Name,
-                                  const SymbolizableModule *ModInfo);
+  static std::string
+  DemangleName(const std::string &Name,
+               const SymbolizableModule *DbiModuleDescriptor);
 
 private:
   // Bundles together object file with code/data and object file with
diff --git a/include/llvm/IR/Argument.h b/include/llvm/IR/Argument.h
index 5c05f19abc1..5ed6d030c98 100644
--- a/include/llvm/IR/Argument.h
+++ b/include/llvm/IR/Argument.h
@@ -115,8 +115,6 @@ public:
   void addAttr(Attribute Attr);
 
   /// Remove attributes from an argument.
-  void removeAttr(AttributeList AS);
-
   void removeAttr(Attribute::AttrKind Kind);
 
   /// Check if an argument has a given attribute.
diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h
index e2cd4c236fc..af46034d5a9 100644
--- a/include/llvm/IR/Attributes.h
+++ b/include/llvm/IR/Attributes.h
@@ -457,8 +457,11 @@ public:
   /// \brief Return the attribute object that exists at the given index.
   Attribute getAttribute(unsigned Index, StringRef Kind) const;
 
+  /// \brief Return the alignment of the return value.
+  unsigned getRetAlignment() const;
+
   /// \brief Return the alignment for the specified function parameter.
-  unsigned getParamAlignment(unsigned Index) const;
+  unsigned getParamAlignment(unsigned ArgNo) const;
 
   /// \brief Get the stack alignment.
   unsigned getStackAlignment(unsigned Index) const;
diff --git a/include/llvm/IR/Attributes.td b/include/llvm/IR/Attributes.td
index 7b63638a3f6..75867a6e583 100644
--- a/include/llvm/IR/Attributes.td
+++ b/include/llvm/IR/Attributes.td
@@ -137,6 +137,9 @@ def SExt : EnumAttr<"signext">;
 /// +1 bias 0 means unaligned (different from alignstack=(1)).
 def StackAlignment : EnumAttr<"alignstack">;
 
+/// Function can be speculated.
+def Speculatable : EnumAttr<"speculatable">;
+
 /// Stack protection.
 def StackProtect : EnumAttr<"ssp">;
 
diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h
index 79f59557a5d..bad1d4e383d 100644
--- a/include/llvm/IR/CallSite.h
+++ b/include/llvm/IR/CallSite.h
@@ -386,20 +386,25 @@ public:
     CALLSITE_DELEGATE_GETTER(dataOperandHasImpliedAttr(i, Kind));
   }
 
+  /// Extract the alignment of the return value.
+  unsigned getRetAlignment() const {
+    CALLSITE_DELEGATE_GETTER(getRetAlignment());
+  }
+
   /// Extract the alignment for a call or parameter (0=unknown).
-  uint16_t getParamAlignment(uint16_t i) const {
-    CALLSITE_DELEGATE_GETTER(getParamAlignment(i));
+  unsigned getParamAlignment(unsigned ArgNo) const {
+    CALLSITE_DELEGATE_GETTER(getParamAlignment(ArgNo));
   }
 
   /// Extract the number of dereferenceable bytes for a call or parameter
   /// (0=unknown).
-  uint64_t getDereferenceableBytes(uint16_t i) const {
+  uint64_t getDereferenceableBytes(unsigned i) const {
     CALLSITE_DELEGATE_GETTER(getDereferenceableBytes(i));
   }
 
   /// Extract the number of dereferenceable_or_null bytes for a call or
   /// parameter (0=unknown).
-  uint64_t getDereferenceableOrNullBytes(uint16_t i) const {
+  uint64_t getDereferenceableOrNullBytes(unsigned i) const {
     CALLSITE_DELEGATE_GETTER(getDereferenceableOrNullBytes(i));
   }
 
@@ -599,7 +604,7 @@ public:
   bool isReturnNonNull() const {
     if (hasRetAttr(Attribute::NonNull))
       return true;
-    else if (getDereferenceableBytes(0) > 0 &&
+    else if (getDereferenceableBytes(AttributeList::ReturnIndex) > 0 &&
              getType()->getPointerAddressSpace() == 0)
       return true;
 
diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h
index 9cfbda1f685..604e99c8b52 100644
--- a/include/llvm/IR/CallingConv.h
+++ b/include/llvm/IR/CallingConv.h
@@ -196,6 +196,10 @@ namespace CallingConv {
     /// Register calling convention used for parameters transfer optimization
     X86_RegCall = 92,
 
+    /// Calling convention used for Mesa hull shaders. (= tessellation control
+    /// shaders)
+    AMDGPU_HS = 93,
+
     /// The highest possible calling convention ID. Must be some 2^k - 1.
     MaxID = 1023
   };
diff --git a/include/llvm/IR/DIBuilder.h b/include/llvm/IR/DIBuilder.h
index a4b2a02d505..4afb5d9d63b 100644
--- a/include/llvm/IR/DIBuilder.h
+++ b/include/llvm/IR/DIBuilder.h
@@ -577,15 +577,14 @@ namespace llvm {
     ///                      These flags are used to emit dwarf attributes.
     /// \param isOptimized   True if optimization is ON.
     /// \param TParams       Function template parameters.
-    DISubprogram *createFunction(DIScope *Scope, StringRef Name,
-                                 StringRef LinkageName, DIFile *File,
-                                 unsigned LineNo, DISubroutineType *Ty,
-                                 bool isLocalToUnit, bool isDefinition,
-                                 unsigned ScopeLine,
-                                 DINode::DIFlags Flags = DINode::FlagZero,
-                                 bool isOptimized = false,
-                                 DITemplateParameterArray TParams = nullptr,
-                                 DISubprogram *Decl = nullptr);
+    /// \param ThrownTypes   Exception types this function may throw.
+    DISubprogram *createFunction(
+        DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File,
+        unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
+        bool isDefinition, unsigned ScopeLine,
+        DINode::DIFlags Flags = DINode::FlagZero, bool isOptimized = false,
+        DITemplateParameterArray TParams = nullptr,
+        DISubprogram *Decl = nullptr, DITypeArray ThrownTypes = nullptr);
 
     /// Identical to createFunction,
     /// except that the resulting DbgNode is meant to be RAUWed.
@@ -595,7 +594,7 @@ namespace llvm {
         bool isDefinition, unsigned ScopeLine,
         DINode::DIFlags Flags = DINode::FlagZero, bool isOptimized = false,
         DITemplateParameterArray TParams = nullptr,
-        DISubprogram *Decl = nullptr);
+        DISubprogram *Decl = nullptr, DITypeArray ThrownTypes = nullptr);
 
     /// Create a new descriptor for the specified C++ method.
     /// See comments in \a DISubprogram* for descriptions of these fields.
@@ -619,23 +618,23 @@ namespace llvm {
     ///                      This flags are used to emit dwarf attributes.
     /// \param isOptimized   True if optimization is ON.
     /// \param TParams       Function template parameters.
+    /// \param ThrownTypes   Exception types this function may throw.
     DISubprogram *createMethod(
         DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File,
         unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
         bool isDefinition, unsigned Virtuality = 0, unsigned VTableIndex = 0,
         int ThisAdjustment = 0, DIType *VTableHolder = nullptr,
         DINode::DIFlags Flags = DINode::FlagZero, bool isOptimized = false,
-        DITemplateParameterArray TParams = nullptr);
+        DITemplateParameterArray TParams = nullptr,
+        DITypeArray ThrownTypes = nullptr);
 
     /// This creates new descriptor for a namespace with the specified
     /// parent scope.
     /// \param Scope       Namespace scope
     /// \param Name        Name of this namespace
-    /// \param File        Source file
-    /// \param LineNo      Line number
     /// \param ExportSymbols True for C++ inline namespaces.
-    DINamespace *createNameSpace(DIScope *Scope, StringRef Name, DIFile *File,
-                                 unsigned LineNo, bool ExportSymbols);
+    DINamespace *createNameSpace(DIScope *Scope, StringRef Name,
+                                 bool ExportSymbols);
 
     /// This creates new descriptor for a module with the specified
     /// parent scope.
diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h
index 8041e35e0e0..0331d5229e7 100644
--- a/include/llvm/IR/DebugInfoMetadata.h
+++ b/include/llvm/IR/DebugInfoMetadata.h
@@ -56,6 +56,8 @@
 
 namespace llvm {
 
+class DIBuilder;
+
 template <typename T> class Optional;
 
 /// Holds a subclass of DINode.
@@ -433,7 +435,7 @@ public:
 
   /// Return the raw underlying file.
   ///
-  /// An \a DIFile is an \a DIScope, but it doesn't point at a separate file
+  /// A \a DIFile is a \a DIScope, but it doesn't point at a separate file
   /// (it\em is the file).  If \c this is an \a DIFile, we need to return \c
   /// this.  Otherwise, return the first operand, which is where all other
   /// subclasses store their file pointer.
@@ -1509,14 +1511,14 @@ class DISubprogram : public DILocalScope {
           unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags,
           bool IsOptimized, DICompileUnit *Unit,
           DITemplateParameterArray TemplateParams, DISubprogram *Declaration,
-          DILocalVariableArray Variables, StorageType Storage,
-          bool ShouldCreate = true) {
+          DILocalVariableArray Variables, DITypeArray ThrownTypes,
+          StorageType Storage, bool ShouldCreate = true) {
     return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
                    getCanonicalMDString(Context, LinkageName), File, Line, Type,
                    IsLocalToUnit, IsDefinition, ScopeLine, ContainingType,
                    Virtuality, VirtualIndex, ThisAdjustment, Flags, IsOptimized,
                    Unit, TemplateParams.get(), Declaration, Variables.get(),
-                   Storage, ShouldCreate);
+                   ThrownTypes.get(), Storage, ShouldCreate);
   }
   static DISubprogram *
   getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
@@ -1525,15 +1527,16 @@ class DISubprogram : public DILocalScope {
           Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex,
           int ThisAdjustment, DIFlags Flags, bool IsOptimized, Metadata *Unit,
           Metadata *TemplateParams, Metadata *Declaration, Metadata *Variables,
-          StorageType Storage, bool ShouldCreate = true);
+          Metadata *ThrownTypes, StorageType Storage, bool ShouldCreate = true);
 
   TempDISubprogram cloneImpl() const {
-    return getTemporary(
-        getContext(), getScope(), getName(), getLinkageName(), getFile(),
-        getLine(), getType(), isLocalToUnit(), isDefinition(), getScopeLine(),
-        getContainingType(), getVirtuality(), getVirtualIndex(),
-        getThisAdjustment(), getFlags(), isOptimized(), getUnit(),
-        getTemplateParams(), getDeclaration(), getVariables());
+    return getTemporary(getContext(), getScope(), getName(), getLinkageName(),
+                        getFile(), getLine(), getType(), isLocalToUnit(),
+                        isDefinition(), getScopeLine(), getContainingType(),
+                        getVirtuality(), getVirtualIndex(), getThisAdjustment(),
+                        getFlags(), isOptimized(), getUnit(),
+                        getTemplateParams(), getDeclaration(), getVariables(),
+                        getThrownTypes());
   }
 
 public:
@@ -1546,11 +1549,12 @@ public:
                      bool IsOptimized, DICompileUnit *Unit,
                      DITemplateParameterArray TemplateParams = nullptr,
                      DISubprogram *Declaration = nullptr,
-                     DILocalVariableArray Variables = nullptr),
+                     DILocalVariableArray Variables = nullptr,
+                     DITypeArray ThrownTypes = nullptr),
                     (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
                      IsDefinition, ScopeLine, ContainingType, Virtuality,
                      VirtualIndex, ThisAdjustment, Flags, IsOptimized, Unit,
-                     TemplateParams, Declaration, Variables))
+                     TemplateParams, Declaration, Variables, ThrownTypes))
   DEFINE_MDNODE_GET(
       DISubprogram,
       (Metadata * Scope, MDString *Name, MDString *LinkageName, Metadata *File,
@@ -1558,10 +1562,12 @@ public:
        unsigned ScopeLine, Metadata *ContainingType, unsigned Virtuality,
        unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags,
        bool IsOptimized, Metadata *Unit, Metadata *TemplateParams = nullptr,
-       Metadata *Declaration = nullptr, Metadata *Variables = nullptr),
+       Metadata *Declaration = nullptr, Metadata *Variables = nullptr,
+       Metadata *ThrownTypes = nullptr),
       (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition,
        ScopeLine, ContainingType, Virtuality, VirtualIndex, ThisAdjustment,
-       Flags, IsOptimized, Unit, TemplateParams, Declaration, Variables))
+       Flags, IsOptimized, Unit, TemplateParams, Declaration, Variables,
+       ThrownTypes))
 
   TempDISubprogram clone() const { return cloneImpl(); }
 
@@ -1610,11 +1616,7 @@ public:
   DIScopeRef getScope() const { return DIScopeRef(getRawScope()); }
 
   StringRef getName() const { return getStringOperand(2); }
-  StringRef getDisplayName() const { return getStringOperand(3); }
-  StringRef getLinkageName() const { return getStringOperand(4); }
-
-  MDString *getRawName() const { return getOperandAs<MDString>(2); }
-  MDString *getRawLinkageName() const { return getOperandAs<MDString>(4); }
+  StringRef getLinkageName() const { return getStringOperand(3); }
 
   DISubroutineType *getType() const {
     return cast_or_null<DISubroutineType>(getRawType());
@@ -1626,9 +1628,7 @@ public:
   DICompileUnit *getUnit() const {
     return cast_or_null<DICompileUnit>(getRawUnit());
   }
-  void replaceUnit(DICompileUnit *CU) {
-    replaceOperandWith(7, CU);
-  }
+  void replaceUnit(DICompileUnit *CU) { replaceOperandWith(5, CU); }
   DITemplateParameterArray getTemplateParams() const {
     return cast_or_null<MDTuple>(getRawTemplateParams());
   }
@@ -1638,14 +1638,26 @@ public:
   DILocalVariableArray getVariables() const {
     return cast_or_null<MDTuple>(getRawVariables());
   }
+  DITypeArray getThrownTypes() const {
+    return cast_or_null<MDTuple>(getRawThrownTypes());
+  }
 
   Metadata *getRawScope() const { return getOperand(1); }
-  Metadata *getRawType() const { return getOperand(5); }
-  Metadata *getRawContainingType() const { return getOperand(6); }
-  Metadata *getRawUnit() const { return getOperand(7); }
-  Metadata *getRawTemplateParams() const { return getOperand(8); }
-  Metadata *getRawDeclaration() const { return getOperand(9); }
-  Metadata *getRawVariables() const { return getOperand(10); }
+  MDString *getRawName() const { return getOperandAs<MDString>(2); }
+  MDString *getRawLinkageName() const { return getOperandAs<MDString>(3); }
+  Metadata *getRawType() const { return getOperand(4); }
+  Metadata *getRawUnit() const { return getOperand(5); }
+  Metadata *getRawDeclaration() const { return getOperand(6); }
+  Metadata *getRawVariables() const { return getOperand(7); }
+  Metadata *getRawContainingType() const {
+    return getNumOperands() > 8 ? getOperandAs<Metadata>(8) : nullptr;
+  }
+  Metadata *getRawTemplateParams() const {
+    return getNumOperands() > 9 ? getOperandAs<Metadata>(9) : nullptr;
+  }
+  Metadata *getRawThrownTypes() const {
+    return getNumOperands() > 10 ? getOperandAs<Metadata>(10) : nullptr;
+  }
 
   /// Check if this subprogram describes the given function.
   ///
@@ -1841,45 +1853,40 @@ class DINamespace : public DIScope {
   friend class LLVMContextImpl;
   friend class MDNode;
 
-  unsigned Line;
   unsigned ExportSymbols : 1;
 
-  DINamespace(LLVMContext &Context, StorageType Storage, unsigned Line,
-              bool ExportSymbols, ArrayRef<Metadata *> Ops)
+  DINamespace(LLVMContext &Context, StorageType Storage, bool ExportSymbols,
+              ArrayRef<Metadata *> Ops)
       : DIScope(Context, DINamespaceKind, Storage, dwarf::DW_TAG_namespace,
                 Ops),
-        Line(Line), ExportSymbols(ExportSymbols) {}
+        ExportSymbols(ExportSymbols) {}
   ~DINamespace() = default;
 
   static DINamespace *getImpl(LLVMContext &Context, DIScope *Scope,
-                              DIFile *File, StringRef Name, unsigned Line,
-                              bool ExportSymbols, StorageType Storage,
-                              bool ShouldCreate = true) {
-    return getImpl(Context, Scope, File, getCanonicalMDString(Context, Name),
-                   Line, ExportSymbols, Storage, ShouldCreate);
+                              StringRef Name, bool ExportSymbols,
+                              StorageType Storage, bool ShouldCreate = true) {
+    return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
+                   ExportSymbols, Storage, ShouldCreate);
   }
   static DINamespace *getImpl(LLVMContext &Context, Metadata *Scope,
-                              Metadata *File, MDString *Name, unsigned Line,
-                              bool ExportSymbols, StorageType Storage,
-                              bool ShouldCreate = true);
+                              MDString *Name, bool ExportSymbols,
+                              StorageType Storage, bool ShouldCreate = true);
 
   TempDINamespace cloneImpl() const {
-    return getTemporary(getContext(), getScope(), getFile(), getName(),
-                        getLine(), getExportSymbols());
+    return getTemporary(getContext(), getScope(), getName(),
+                        getExportSymbols());
   }
 
 public:
-  DEFINE_MDNODE_GET(DINamespace, (DIScope * Scope, DIFile *File, StringRef Name,
-                                  unsigned Line, bool ExportSymbols),
-                    (Scope, File, Name, Line, ExportSymbols))
   DEFINE_MDNODE_GET(DINamespace,
-                    (Metadata * Scope, Metadata *File, MDString *Name,
-                     unsigned Line, bool ExportSymbols),
-                    (Scope, File, Name, Line, ExportSymbols))
+                    (DIScope *Scope, StringRef Name, bool ExportSymbols),
+                    (Scope, Name, ExportSymbols))
+  DEFINE_MDNODE_GET(DINamespace,
+                    (Metadata *Scope, MDString *Name, bool ExportSymbols),
+                    (Scope, Name, ExportSymbols))
 
   TempDINamespace clone() const { return cloneImpl(); }
 
-  unsigned getLine() const { return Line; }
   bool getExportSymbols() const { return ExportSymbols; }
   DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
   StringRef getName() const { return getStringOperand(2); }
@@ -2265,6 +2272,17 @@ public:
 
   /// Return whether this is a piece of an aggregate variable.
   bool isFragment() const { return getFragmentInfo().hasValue(); }
+
+  /// Append \p Ops with operations to apply the \p Offset.
+  static void appendOffset(SmallVectorImpl<uint64_t> &Ops, int64_t Offset);
+
+  /// Constants for DIExpression::prepend.
+  enum { NoDeref = false, WithDeref = true, WithStackValue = true };
+
+  /// Prepend \p DIExpr with a deref and offset operation and optionally turn it
+  /// into a stack value.
+  static DIExpression *prepend(const DIExpression *DIExpr, bool Deref,
+                               int64_t Offset = 0, bool StackValue = false);
 };
 
 /// Global variables.
diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h
index a3762a44ccb..9e723f97775 100644
--- a/include/llvm/IR/Function.h
+++ b/include/llvm/IR/Function.h
@@ -316,18 +316,20 @@ public:
   void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes);
 
   /// @brief Extract the alignment for a call or parameter (0=unknown).
-  unsigned getParamAlignment(unsigned i) const {
-    return AttributeSets.getParamAlignment(i);
+  unsigned getParamAlignment(unsigned ArgNo) const {
+    return AttributeSets.getParamAlignment(ArgNo);
   }
 
   /// @brief Extract the number of dereferenceable bytes for a call or
   /// parameter (0=unknown).
+  /// @param i AttributeList index, referring to a return value or argument.
   uint64_t getDereferenceableBytes(unsigned i) const {
     return AttributeSets.getDereferenceableBytes(i);
   }
 
   /// @brief Extract the number of dereferenceable_or_null bytes for a call or
   /// parameter (0=unknown).
+  /// @param i AttributeList index, referring to a return value or argument.
   uint64_t getDereferenceableOrNullBytes(unsigned i) const {
     return AttributeSets.getDereferenceableOrNullBytes(i);
   }
@@ -416,6 +418,14 @@ public:
     removeFnAttr(Attribute::Convergent);
   }
 
+  /// @brief Determine if the call has sideeffects.
+  bool isSpeculatable() const {
+    return hasFnAttribute(Attribute::Speculatable);
+  }
+  void setSpeculatable() {
+    addFnAttr(Attribute::Speculatable);
+  }
+
   /// Determine if the function is known not to recurse, directly or
   /// indirectly.
   bool doesNotRecurse() const {
@@ -440,10 +450,10 @@ public:
   }
 
   /// @brief Determine if the function returns a structure through first
-  /// pointer argument.
+  /// or second pointer argument.
   bool hasStructRetAttr() const {
-    return AttributeSets.hasAttribute(1, Attribute::StructRet) ||
-           AttributeSets.hasAttribute(2, Attribute::StructRet);
+    return AttributeSets.hasParamAttribute(0, Attribute::StructRet) ||
+           AttributeSets.hasParamAttribute(1, Attribute::StructRet);
   }
 
   /// @brief Determine if the parameter or return value is marked with NoAlias
diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h
index 518094735d7..6795b029cce 100644
--- a/include/llvm/IR/InstrTypes.h
+++ b/include/llvm/IR/InstrTypes.h
@@ -1059,18 +1059,6 @@ public:
     return isFalseWhenEqual(getPredicate());
   }
 
-  /// @brief Determine if Pred1 implies Pred2 is true when two compares have
-  /// matching operands.
-  bool isImpliedTrueByMatchingCmp(Predicate Pred2) const {
-    return isImpliedTrueByMatchingCmp(getPredicate(), Pred2);
-  }
-
-  /// @brief Determine if Pred1 implies Pred2 is false when two compares have
-  /// matching operands.
-  bool isImpliedFalseByMatchingCmp(Predicate Pred2) const {
-    return isImpliedFalseByMatchingCmp(getPredicate(), Pred2);
-  }
-
   /// @returns true if the predicate is unsigned, false otherwise.
   /// @brief Determine if the predicate is an unsigned operation.
   static bool isUnsigned(Predicate predicate);
diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h
index d23c1ddf925..4d3f1dc267f 100644
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@@ -1714,9 +1714,12 @@ public:
   ///     (\p i - 1) in the operand list.
   bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const;
 
+  /// Extract the alignment of the return value.
+  unsigned getRetAlignment() const { return Attrs.getRetAlignment(); }
+
   /// Extract the alignment for a call or parameter (0=unknown).
-  unsigned getParamAlignment(unsigned i) const {
-    return Attrs.getParamAlignment(i);
+  unsigned getParamAlignment(unsigned ArgNo) const {
+    return Attrs.getParamAlignment(ArgNo);
   }
 
   /// Extract the number of dereferenceable bytes for a call or
@@ -3804,9 +3807,12 @@ public:
   ///     (\p i - 1) in the operand list.
   bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const;
 
+  /// Extract the alignment of the return value.
+  unsigned getRetAlignment() const { return Attrs.getRetAlignment(); }
+
   /// Extract the alignment for a call or parameter (0=unknown).
-  unsigned getParamAlignment(unsigned i) const {
-    return Attrs.getParamAlignment(i);
+  unsigned getParamAlignment(unsigned ArgNo) const {
+    return Attrs.getParamAlignment(ArgNo);
   }
 
   /// Extract the number of dereferenceable bytes for a call or
diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h
index f69b5bfc0be..05e3315cbab 100644
--- a/include/llvm/IR/IntrinsicInst.h
+++ b/include/llvm/IR/IntrinsicInst.h
@@ -201,8 +201,8 @@ namespace llvm {
     Value *getNumElements() const { return getArgOperand(2); }
     void setNumElements(Value *V) { setArgOperand(2, V); }
 
-    uint64_t getSrcAlignment() const { return getParamAlignment(1); }
-    uint64_t getDstAlignment() const { return getParamAlignment(2); }
+    uint64_t getSrcAlignment() const { return getParamAlignment(0); }
+    uint64_t getDstAlignment() const { return getParamAlignment(1); }
 
     uint64_t getElementSizeInBytes() const {
       Value *Arg = getArgOperand(3);
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index 309b2148922..39b992cd06a 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -98,6 +98,18 @@ def IntrNoDuplicate : IntrinsicProperty;
 // Parallels the convergent attribute on LLVM IR functions.
 def IntrConvergent : IntrinsicProperty;
 
+// This property indicates that the intrinsic is safe to speculate.
+def IntrSpeculatable : IntrinsicProperty;
+
+// This property can be used to override the 'has no other side effects'
+// language of the IntrNoMem, IntrReadMem, IntrWriteMem, and IntrArgMemOnly
+// intrinsic properties.  By default, intrinsics are assumed to have side
+// effects, so this property is only necessary if you have defined one of
+// the memory properties listed above.
+// For this property, 'side effects' has the same meaning as 'side effects'
+// defined by the hasSideEffects property of the TableGen Instruction class.
+def IntrHasSideEffects : IntrinsicProperty;
+
 //===----------------------------------------------------------------------===//
 // Types used by intrinsics.
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td
index 21d8a15e7e7..d7413fe9e56 100644
--- a/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 class AMDGPUReadPreloadRegisterIntrinsic
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
 
 class AMDGPUReadPreloadRegisterIntrinsicNamed<string name>
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, GCCBuiltin<name>;
+  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<name>;
 
 let TargetPrefix = "r600" in {
 
@@ -47,7 +47,8 @@ def int_r600_group_barrier : GCCBuiltin<"__builtin_r600_group_barrier">,
 // AS 7 is PARAM_I_ADDRESS, used for kernel arguments
 def int_r600_implicitarg_ptr :
   GCCBuiltin<"__builtin_r600_implicitarg_ptr">,
-  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [], [IntrNoMem]>;
+  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [],
+  [IntrNoMem, IntrSpeculatable]>;
 
 def int_r600_rat_store_typed :
   // 1st parameter: Data
@@ -57,15 +58,15 @@ def int_r600_rat_store_typed :
   GCCBuiltin<"__builtin_r600_rat_store_typed">;
 
 def int_r600_recipsqrt_ieee :  Intrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_r600_recipsqrt_clamped : Intrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_r600_cube : Intrinsic<
-  [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]
+  [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable]
 >;
 
 } // End TargetPrefix = "r600"
@@ -82,31 +83,51 @@ defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
 
 def int_amdgcn_dispatch_ptr :
   GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">,
-  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+  [IntrNoMem, IntrSpeculatable]>;
 
 def int_amdgcn_queue_ptr :
   GCCBuiltin<"__builtin_amdgcn_queue_ptr">,
-  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+  [IntrNoMem, IntrSpeculatable]>;
 
 def int_amdgcn_kernarg_segment_ptr :
   GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
-  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+  [IntrNoMem, IntrSpeculatable]>;
 
 def int_amdgcn_implicitarg_ptr :
   GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
-  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+  [IntrNoMem, IntrSpeculatable]>;
 
 def int_amdgcn_groupstaticsize :
   GCCBuiltin<"__builtin_amdgcn_groupstaticsize">,
-  Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+  Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
 
 def int_amdgcn_dispatch_id :
   GCCBuiltin<"__builtin_amdgcn_dispatch_id">,
-  Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
+  Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>;
 
 def int_amdgcn_implicit_buffer_ptr :
   GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
-  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+  [IntrNoMem, IntrSpeculatable]>;
+
+// Set EXEC to the 64-bit value given.
+// This is always moved to the beginning of the basic block.
+def int_amdgcn_init_exec : Intrinsic<[],
+  [llvm_i64_ty],      // 64-bit literal constant
+  [IntrConvergent]>;
+
+// Set EXEC according to a thread count packed in an SGPR input:
+//    thread_count = (input >> bitoffset) & 0x7f;
+// This is always moved to the beginning of the basic block.
+def int_amdgcn_init_exec_from_input : Intrinsic<[],
+  [llvm_i32_ty,       // 32-bit SGPR input
+   llvm_i32_ty],      // bit offset of the thread count
+  [IntrConvergent]>;
+
 
 //===----------------------------------------------------------------------===//
 // Instruction Intrinsics
@@ -135,115 +156,129 @@ def int_amdgcn_div_scale : Intrinsic<
   //                second. (0 = first, 1 = second).
   [llvm_anyfloat_ty, llvm_i1_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
-  [IntrNoMem]
+  [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
-  [IntrNoMem]
+  [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_div_fixup : Intrinsic<[llvm_anyfloat_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
-  [IntrNoMem]
+  [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_trig_preop : Intrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_sin : Intrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>],
+  [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_cos : Intrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_log_clamp : Intrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">,
-  Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]
+  Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+  [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_rcp : Intrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_rcp_legacy : GCCBuiltin<"__builtin_amdgcn_rcp_legacy">,
-  Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]
+  Intrinsic<[llvm_float_ty], [llvm_float_ty],
+  [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_rsq :  Intrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_rsq_legacy :  GCCBuiltin<"__builtin_amdgcn_rsq_legacy">,
   Intrinsic<
-  [llvm_float_ty], [llvm_float_ty], [IntrNoMem]
+  [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_rsq_clamp : Intrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>;
 
 def int_amdgcn_ldexp : Intrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_frexp_mant : Intrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_frexp_exp : Intrinsic<
-  [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]
+  [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem, IntrSpeculatable]
 >;
 
 // v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0
 // and always uses rtz, so is not suitable for implementing the OpenCL
 // fract function. It should be ok on VI.
 def int_amdgcn_fract : Intrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+  [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_cvt_pkrtz : Intrinsic<
-  [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]
+  [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty],
+  [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_class : Intrinsic<
-  [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]
+  [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_fmed3 : GCCBuiltin<"__builtin_amdgcn_fmed3">,
   Intrinsic<[llvm_anyfloat_ty],
-    [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]
+    [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+    [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">,
   Intrinsic<[llvm_float_ty],
-    [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
+    [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+    [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_cubema : GCCBuiltin<"__builtin_amdgcn_cubema">,
   Intrinsic<[llvm_float_ty],
-  [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
+  [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+  [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_cubesc : GCCBuiltin<"__builtin_amdgcn_cubesc">,
   Intrinsic<[llvm_float_ty],
-    [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
+    [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+    [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">,
   Intrinsic<[llvm_float_ty],
-    [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
+    [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+    [IntrNoMem, IntrSpeculatable]
 >;
 
 // v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz
 // should be used.
 def int_amdgcn_sffbh :
-  Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+  Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>],
+  [IntrNoMem, IntrSpeculatable]
+>;
 
 
 // Fields should mirror atomicrmw
@@ -527,7 +562,9 @@ def int_amdgcn_s_decperflevel :
 
 def int_amdgcn_s_getreg :
   GCCBuiltin<"__builtin_amdgcn_s_getreg">,
-  Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
+  [IntrReadMem, IntrSpeculatable]
+>;
 
 // __builtin_amdgcn_interp_mov <param>, <attr_chan>, <attr>, <m0>
 // param values: 0 = P10, 1 = P20, 2 = P0
@@ -535,23 +572,24 @@ def int_amdgcn_interp_mov :
   GCCBuiltin<"__builtin_amdgcn_interp_mov">,
   Intrinsic<[llvm_float_ty],
             [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, IntrSpeculatable]>;
 
 // __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
+// This intrinsic reads from lds, but the memory values are constant,
+// so it behaves like IntrNoMem.
 def int_amdgcn_interp_p1 :
   GCCBuiltin<"__builtin_amdgcn_interp_p1">,
   Intrinsic<[llvm_float_ty],
             [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-            [IntrNoMem]>;  // This intrinsic reads from lds, but the memory
-                           // values are constant, so it behaves like IntrNoMem.
+            [IntrNoMem, IntrSpeculatable]>;
 
 // __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0>
 def int_amdgcn_interp_p2 :
   GCCBuiltin<"__builtin_amdgcn_interp_p2">,
   Intrinsic<[llvm_float_ty],
             [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-            [IntrNoMem]>;  // See int_amdgcn_v_interp_p1 for why this is
-                           // IntrNoMem.
+            [IntrNoMem, IntrSpeculatable]>;
+          // See int_amdgcn_v_interp_p1 for why this is IntrNoMem.
 
 // Pixel shaders only: whether the current pixel is live (i.e. not a helper
 // invocation for derivative computation).
@@ -574,48 +612,68 @@ def int_amdgcn_ds_swizzle :
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
 
 def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty],
-  [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]
+  [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty],
-  [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]
+  [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_lerp :
   GCCBuiltin<"__builtin_amdgcn_lerp">,
-  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable]
+>;
 
 def int_amdgcn_sad_u8 :
   GCCBuiltin<"__builtin_amdgcn_sad_u8">,
-  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable]
+>;
 
 def int_amdgcn_msad_u8 :
   GCCBuiltin<"__builtin_amdgcn_msad_u8">,
-  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable]
+>;
 
 def int_amdgcn_sad_hi_u8 :
   GCCBuiltin<"__builtin_amdgcn_sad_hi_u8">,
-  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable]
+>;
 
 def int_amdgcn_sad_u16 :
   GCCBuiltin<"__builtin_amdgcn_sad_u16">,
-  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable]
+>;
 
 def int_amdgcn_qsad_pk_u16_u8 :
   GCCBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">,
-  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
+  [IntrNoMem, IntrSpeculatable]
+>;
 
 def int_amdgcn_mqsad_pk_u16_u8 :
   GCCBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">,
-  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
+  [IntrNoMem, IntrSpeculatable]
+>;
 
 def int_amdgcn_mqsad_u32_u8 :
   GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty],
+  [IntrNoMem, IntrSpeculatable]
+>;
 
 def int_amdgcn_cvt_pk_u8_f32 :
   GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">,
-  Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable]
+>;
 
 def int_amdgcn_icmp :
   Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty],
@@ -716,6 +774,7 @@ def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>;
 // Emit 2.5 ulp, no denormal division. Should only be inserted by
 // pass based on !fpmath metadata.
 def int_amdgcn_fdiv_fast : Intrinsic<
-  [llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]
+  [llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+  [IntrNoMem, IntrSpeculatable]
 >;
 }
diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h
index 9c0a4159cad..a7274fbfbce 100644
--- a/include/llvm/IR/ModuleSummaryIndex.h
+++ b/include/llvm/IR/ModuleSummaryIndex.h
@@ -644,13 +644,6 @@ public:
     return It->second.second;
   }
 
-  /// Add the given per-module index into this module index/summary,
-  /// assigning it the given module ID. Each module merged in should have
-  /// a unique ID, necessary for consistent renaming of promoted
-  /// static (local) variables.
-  void mergeFrom(std::unique_ptr<ModuleSummaryIndex> Other,
-                 uint64_t NextModuleId);
-
   /// Convenience method for creating a promoted global name
   /// for the given value name of a local, and its original module's ID.
   static std::string getGlobalNameForLocal(StringRef Name, ModuleHash ModHash) {
@@ -703,13 +696,6 @@ public:
     return &I->second;
   }
 
-  /// Remove entries in the GlobalValueMap that have empty summaries due to the
-  /// eager nature of map entry creation during VST parsing. These would
-  /// also be suppressed during combined index generation in mergeFrom(),
-  /// but if there was only one module or this was the first module we might
-  /// not invoke mergeFrom.
-  void removeEmptySummaryEntries();
-
   /// Collect for the given module the list of function it defines
   /// (GUID -> Summary).
   void collectDefinedFunctionsForModule(StringRef ModulePath,
diff --git a/include/llvm/IR/ValueHandle.h b/include/llvm/IR/ValueHandle.h
index 4838bac9e0f..393618d5511 100644
--- a/include/llvm/IR/ValueHandle.h
+++ b/include/llvm/IR/ValueHandle.h
@@ -34,19 +34,14 @@ protected:
   ///
   /// This is to avoid having a vtable for the light-weight handle pointers. The
   /// fully general Callback version does have a vtable.
-  enum HandleBaseKind {
-    Assert,
-    Callback,
-    Tracking,
-    Weak
-  };
+  enum HandleBaseKind { Assert, Callback, Weak, WeakTracking };
 
   ValueHandleBase(const ValueHandleBase &RHS)
       : ValueHandleBase(RHS.PrevPair.getInt(), RHS) {}
 
   ValueHandleBase(HandleBaseKind Kind, const ValueHandleBase &RHS)
-      : PrevPair(nullptr, Kind), Next(nullptr), V(RHS.V) {
-    if (isValid(V))
+      : PrevPair(nullptr, Kind), Next(nullptr), Val(RHS.getValPtr()) {
+    if (isValid(getValPtr()))
       AddToExistingUseList(RHS.getPrevPtr());
   }
 
@@ -54,43 +49,51 @@ private:
   PointerIntPair<ValueHandleBase**, 2, HandleBaseKind> PrevPair;
   ValueHandleBase *Next;
 
-  Value* V;
+  Value *Val;
+
+  void setValPtr(Value *V) { Val = V; }
 
 public:
   explicit ValueHandleBase(HandleBaseKind Kind)
-    : PrevPair(nullptr, Kind), Next(nullptr), V(nullptr) {}
+      : PrevPair(nullptr, Kind), Next(nullptr), Val(nullptr) {}
   ValueHandleBase(HandleBaseKind Kind, Value *V)
-    : PrevPair(nullptr, Kind), Next(nullptr), V(V) {
-    if (isValid(V))
+      : PrevPair(nullptr, Kind), Next(nullptr), Val(V) {
+    if (isValid(getValPtr()))
       AddToUseList();
   }
 
   ~ValueHandleBase() {
-    if (isValid(V))
+    if (isValid(getValPtr()))
       RemoveFromUseList();
   }
 
   Value *operator=(Value *RHS) {
-    if (V == RHS) return RHS;
-    if (isValid(V)) RemoveFromUseList();
-    V = RHS;
-    if (isValid(V)) AddToUseList();
+    if (getValPtr() == RHS)
+      return RHS;
+    if (isValid(getValPtr()))
+      RemoveFromUseList();
+    setValPtr(RHS);
+    if (isValid(getValPtr()))
+      AddToUseList();
     return RHS;
   }
 
   Value *operator=(const ValueHandleBase &RHS) {
-    if (V == RHS.V) return RHS.V;
-    if (isValid(V)) RemoveFromUseList();
-    V = RHS.V;
-    if (isValid(V)) AddToExistingUseList(RHS.getPrevPtr());
-    return V;
+    if (getValPtr() == RHS.getValPtr())
+      return RHS.getValPtr();
+    if (isValid(getValPtr()))
+      RemoveFromUseList();
+    setValPtr(RHS.getValPtr());
+    if (isValid(getValPtr()))
+      AddToExistingUseList(RHS.getPrevPtr());
+    return getValPtr();
   }
 
-  Value *operator->() const { return V; }
-  Value &operator*() const { return *V; }
+  Value *operator->() const { return getValPtr(); }
+  Value &operator*() const { return *getValPtr(); }
 
 protected:
-  Value *getValPtr() const { return V; }
+  Value *getValPtr() const { return Val; }
 
   static bool isValid(Value *V) {
     return V &&
@@ -105,7 +108,7 @@ protected:
   ///
   /// This should only be used if a derived class has manually removed the
   /// handle from the use list.
-  void clearValPtr() { V = nullptr; }
+  void clearValPtr() { setValPtr(nullptr); }
 
 public:
   // Callbacks made from Value.
@@ -131,19 +134,16 @@ private:
   void AddToUseList();
 };
 
-/// \brief Value handle that is nullable, but tries to track the Value.
+/// \brief A nullable Value handle that is nullable.
 ///
-/// This is a value handle that tries hard to point to a Value, even across
-/// RAUW operations, but will null itself out if the value is destroyed.  this
-/// is useful for advisory sorts of information, but should not be used as the
-/// key of a map (since the map would have to rearrange itself when the pointer
-/// changes).
+/// This is a value handle that points to a value, and nulls itself
+/// out if that value is deleted.
 class WeakVH : public ValueHandleBase {
 public:
   WeakVH() : ValueHandleBase(Weak) {}
   WeakVH(Value *P) : ValueHandleBase(Weak, P) {}
   WeakVH(const WeakVH &RHS)
-    : ValueHandleBase(Weak, RHS) {}
+      : ValueHandleBase(Weak, RHS) {}
 
   WeakVH &operator=(const WeakVH &RHS) = default;
 
@@ -170,6 +170,51 @@ template <> struct simplify_type<const WeakVH> {
   static SimpleType getSimplifiedValue(const WeakVH &WVH) { return WVH; }
 };
 
+/// \brief Value handle that is nullable, but tries to track the Value.
+///
+/// This is a value handle that tries hard to point to a Value, even across
+/// RAUW operations, but will null itself out if the value is destroyed.  this
+/// is useful for advisory sorts of information, but should not be used as the
+/// key of a map (since the map would have to rearrange itself when the pointer
+/// changes).
+class WeakTrackingVH : public ValueHandleBase {
+public:
+  WeakTrackingVH() : ValueHandleBase(WeakTracking) {}
+  WeakTrackingVH(Value *P) : ValueHandleBase(WeakTracking, P) {}
+  WeakTrackingVH(const WeakTrackingVH &RHS)
+      : ValueHandleBase(WeakTracking, RHS) {}
+
+  WeakTrackingVH &operator=(const WeakTrackingVH &RHS) = default;
+
+  Value *operator=(Value *RHS) {
+    return ValueHandleBase::operator=(RHS);
+  }
+  Value *operator=(const ValueHandleBase &RHS) {
+    return ValueHandleBase::operator=(RHS);
+  }
+
+  operator Value*() const {
+    return getValPtr();
+  }
+
+  bool pointsToAliveValue() const {
+    return ValueHandleBase::isValid(getValPtr());
+  }
+};
+
+// Specialize simplify_type to allow WeakTrackingVH to participate in
+// dyn_cast, isa, etc.
+template <> struct simplify_type<WeakTrackingVH> {
+  typedef Value *SimpleType;
+  static SimpleType getSimplifiedValue(WeakTrackingVH &WVH) { return WVH; }
+};
+template <> struct simplify_type<const WeakTrackingVH> {
+  typedef Value *SimpleType;
+  static SimpleType getSimplifiedValue(const WeakTrackingVH &WVH) {
+    return WVH;
+  }
+};
+
 /// \brief Value handle that asserts if the Value is deleted.
 ///
 /// This is a Value Handle that points to a value and asserts out if the value
@@ -272,39 +317,37 @@ struct isPodLike<AssertingVH<T> > {
 /// to a Value (or subclass) across some operations which may move that value,
 /// but should never destroy it or replace it with some unacceptable type.
 ///
-/// It is an error to do anything with a TrackingVH whose value has been
-/// destroyed, except to destruct it.
-///
 /// It is an error to attempt to replace a value with one of a type which is
 /// incompatible with any of its outstanding TrackingVHs.
-template<typename ValueTy>
-class TrackingVH : public ValueHandleBase {
-  void CheckValidity() const {
-    Value *VP = ValueHandleBase::getValPtr();
+///
+/// It is an error to read from a TrackingVH that does not point to a valid
+/// value.  A TrackingVH is said to not point to a valid value if either it
+/// hasn't yet been assigned a value yet or because the value it was tracking
+/// has since been deleted.
+///
+/// Assigning a value to a TrackingVH is always allowed, even if said TrackingVH
+/// no longer points to a valid value.
+template <typename ValueTy> class TrackingVH {
+  WeakTrackingVH InnerHandle;
 
-    // Null is always ok.
-    if (!VP) return;
-
-    // Check that this value is valid (i.e., it hasn't been deleted). We
-    // explicitly delay this check until access to avoid requiring clients to be
-    // unnecessarily careful w.r.t. destruction.
-    assert(ValueHandleBase::isValid(VP) && "Tracked Value was deleted!");
+public:
+  ValueTy *getValPtr() const {
+    assert(InnerHandle.pointsToAliveValue() &&
+           "TrackingVH must be non-null and valid on dereference!");
 
     // Check that the value is a member of the correct subclass. We would like
     // to check this property on assignment for better debugging, but we don't
     // want to require a virtual interface on this VH. Instead we allow RAUW to
     // replace this value with a value of an invalid type, and check it here.
-    assert(isa<ValueTy>(VP) &&
+    assert(isa<ValueTy>(InnerHandle) &&
            "Tracked Value was replaced by one with an invalid type!");
+    return cast<ValueTy>(InnerHandle);
   }
 
-  ValueTy *getValPtr() const {
-    CheckValidity();
-    return (ValueTy*)ValueHandleBase::getValPtr();
-  }
   void setValPtr(ValueTy *P) {
-    CheckValidity();
-    ValueHandleBase::operator=(GetAsValue(P));
+    // Assigning to non-valid TrackingVH's are fine so we just unconditionally
+    // assign here.
+    InnerHandle = GetAsValue(P);
   }
 
   // Convert a ValueTy*, which may be const, to the type the base
@@ -313,8 +356,8 @@ class TrackingVH : public ValueHandleBase {
   static Value *GetAsValue(const Value *V) { return const_cast<Value*>(V); }
 
 public:
-  TrackingVH() : ValueHandleBase(Tracking) {}
-  TrackingVH(ValueTy *P) : ValueHandleBase(Tracking, GetAsValue(P)) {}
+  TrackingVH() {}
+  TrackingVH(ValueTy *P) { setValPtr(P); }
 
   operator ValueTy*() const {
     return getValPtr();
@@ -359,7 +402,8 @@ public:
   ///
   /// Called when this->getValPtr() is destroyed, inside ~Value(), so you
   /// may call any non-virtual Value method on getValPtr(), but no subclass
-  /// methods.  If WeakVH were implemented as a CallbackVH, it would use this
+  /// methods.  If WeakTrackingVH were implemented as a CallbackVH, it would use
+  /// this
   /// method to call setValPtr(NULL).  AssertingVH would use this method to
   /// cause an assertion failure.
   ///
@@ -370,7 +414,8 @@ public:
   /// \brief Callback for Value RAUW.
   ///
   /// Called when this->getValPtr()->replaceAllUsesWith(new_value) is called,
-  /// _before_ any of the uses have actually been replaced.  If WeakVH were
+  /// _before_ any of the uses have actually been replaced.  If WeakTrackingVH
+  /// were
   /// implemented as a CallbackVH, it would use this method to call
   /// setValPtr(new_value).  AssertingVH would do nothing in this method.
   virtual void allUsesReplacedWith(Value *) {}
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 15c8ff6d04d..44ff4c1a581 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -329,6 +329,7 @@ void initializeSeparateConstOffsetFromGEPPass(PassRegistry&);
 void initializeShadowStackGCLoweringPass(PassRegistry&);
 void initializeShrinkWrapPass(PassRegistry&);
 void initializeSimpleInlinerPass(PassRegistry&);
+void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry&);
 void initializeSingleLoopExtractorPass(PassRegistry&);
 void initializeSinkingLegacyPassPass(PassRegistry&);
 void initializeSjLjEHPreparePass(PassRegistry&);
diff --git a/include/llvm/MC/ConstantPools.h b/include/llvm/MC/ConstantPools.h
index 643902377dd..c34211c2bd1 100644
--- a/include/llvm/MC/ConstantPools.h
+++ b/include/llvm/MC/ConstantPools.h
@@ -42,7 +42,7 @@ struct ConstantPoolEntry {
 // A class to keep track of assembler-generated constant pools that are use to
 // implement the ldr-pseudo.
 class ConstantPool {
-  typedef SmallVector<ConstantPoolEntry, 4> EntryVecTy;
+  using EntryVecTy = SmallVector<ConstantPoolEntry, 4>;
   EntryVecTy Entries;
   DenseMap<int64_t, const MCSymbolRefExpr *> CachedEntries;
 
@@ -80,7 +80,7 @@ class AssemblerConstantPools {
   // sections in a stable order to ensure that we have print the
   // constant pools in a deterministic order when printing an assembly
   // file.
-  typedef MapVector<MCSection *, ConstantPool> ConstantPoolMapTy;
+  using ConstantPoolMapTy = MapVector<MCSection *, ConstantPool>;
   ConstantPoolMapTy ConstantPools;
 
 public:
diff --git a/include/llvm/MC/LaneBitmask.h b/include/llvm/MC/LaneBitmask.h
index 89e60928405..5ca06d1148e 100644
--- a/include/llvm/MC/LaneBitmask.h
+++ b/include/llvm/MC/LaneBitmask.h
@@ -1,4 +1,4 @@
-//===-- llvm/MC/LaneBitmask.h -----------------------------------*- C++ -*-===//
+//===- llvm/MC/LaneBitmask.h ------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -30,14 +30,16 @@
 #ifndef LLVM_MC_LANEBITMASK_H
 #define LLVM_MC_LANEBITMASK_H
 
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/Printable.h"
 #include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
+
   struct LaneBitmask {
     // When changing the underlying type, change the format string as well.
-    typedef unsigned Type;
+    using Type = unsigned;
     enum : unsigned { BitWidth = 8*sizeof(Type) };
     constexpr static const char *const FormatStr = "%08X";
 
@@ -84,6 +86,7 @@ namespace llvm {
       OS << format(LaneBitmask::FormatStr, LaneMask.getAsInteger());
     });
   }
-}
+
+} // end namespace llvm
 
 #endif // LLVM_MC_LANEBITMASK_H
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index c29abaa03a6..185b892d962 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -60,36 +60,36 @@ class MCAssembler {
   friend class MCAsmLayout;
 
 public:
-  typedef std::vector<MCSection *> SectionListType;
-  typedef std::vector<const MCSymbol *> SymbolDataListType;
+  using SectionListType = std::vector<MCSection *>;
+  using SymbolDataListType = std::vector<const MCSymbol *>;
 
-  typedef pointee_iterator<SectionListType::const_iterator> const_iterator;
-  typedef pointee_iterator<SectionListType::iterator> iterator;
+  using const_iterator = pointee_iterator<SectionListType::const_iterator>;
+  using iterator = pointee_iterator<SectionListType::iterator>;
 
-  typedef pointee_iterator<SymbolDataListType::const_iterator>
-  const_symbol_iterator;
-  typedef pointee_iterator<SymbolDataListType::iterator> symbol_iterator;
+  using const_symbol_iterator =
+      pointee_iterator<SymbolDataListType::const_iterator>;
+  using symbol_iterator = pointee_iterator<SymbolDataListType::iterator>;
 
-  typedef iterator_range<symbol_iterator> symbol_range;
-  typedef iterator_range<const_symbol_iterator> const_symbol_range;
+  using symbol_range = iterator_range<symbol_iterator>;
+  using const_symbol_range = iterator_range<const_symbol_iterator>;
 
-  typedef std::vector<IndirectSymbolData>::const_iterator
-      const_indirect_symbol_iterator;
-  typedef std::vector<IndirectSymbolData>::iterator indirect_symbol_iterator;
+  using const_indirect_symbol_iterator =
+      std::vector<IndirectSymbolData>::const_iterator;
+  using indirect_symbol_iterator = std::vector<IndirectSymbolData>::iterator;
 
-  typedef std::vector<DataRegionData>::const_iterator
-      const_data_region_iterator;
-  typedef std::vector<DataRegionData>::iterator data_region_iterator;
+  using const_data_region_iterator =
+      std::vector<DataRegionData>::const_iterator;
+  using data_region_iterator = std::vector<DataRegionData>::iterator;
 
   /// MachO specific deployment target version info.
   // A Major version of 0 indicates that no version information was supplied
   // and so the corresponding load command should not be emitted.
-  typedef struct {
+  using VersionMinInfoType = struct {
     MCVersionMinType Kind;
     unsigned Major;
     unsigned Minor;
     unsigned Update;
-  } VersionMinInfoType;
+  };
 
 private:
   MCContext &Context;
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index b3106936e27..9bea1963130 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -46,17 +46,19 @@ namespace llvm {
   class MCSectionELF;
   class MCSectionMachO;
   class MCSectionWasm;
+  class MCStreamer;
   class MCSymbol;
   class MCSymbolELF;
   class MCSymbolWasm;
   class SMLoc;
+  class SourceMgr;
 
   /// Context object for machine code objects.  This class owns all of the
   /// sections that it creates.
   ///
   class MCContext {
   public:
-    typedef StringMap<MCSymbol *, BumpPtrAllocator &> SymbolTable;
+    using SymbolTable = StringMap<MCSymbol *, BumpPtrAllocator &>;
 
   private:
     /// The SourceMgr for this object, if any.
@@ -223,10 +225,12 @@ namespace llvm {
       std::string SectionName;
       StringRef GroupName;
       unsigned UniqueID;
+
       WasmSectionKey(StringRef SectionName, StringRef GroupName,
                      unsigned UniqueID)
           : SectionName(SectionName), GroupName(GroupName), UniqueID(UniqueID) {
       }
+
       bool operator<(const WasmSectionKey &Other) const {
         if (SectionName != Other.SectionName)
           return SectionName < Other.SectionName;
diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h
index 0d69c2005cb..79f1b952501 100644
--- a/include/llvm/MC/MCDwarf.h
+++ b/include/llvm/MC/MCDwarf.h
@@ -168,10 +168,10 @@ public:
     MCLineDivisions[Sec].push_back(LineEntry);
   }
 
-  typedef std::vector<MCDwarfLineEntry> MCDwarfLineEntryCollection;
-  typedef MCDwarfLineEntryCollection::iterator iterator;
-  typedef MCDwarfLineEntryCollection::const_iterator const_iterator;
-  typedef MapVector<MCSection *, MCDwarfLineEntryCollection> MCLineDivisionMap;
+  using MCDwarfLineEntryCollection = std::vector<MCDwarfLineEntry>;
+  using iterator = MCDwarfLineEntryCollection::iterator;
+  using const_iterator = MCDwarfLineEntryCollection::const_iterator;
+  using MCLineDivisionMap = MapVector<MCSection *, MCDwarfLineEntryCollection>;
 
 private:
   // A collection of MCDwarfLineEntry for each section.
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index c850abf42e2..a91a31414bd 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -28,7 +28,8 @@ class MCSymbol;
 class MCValue;
 class raw_ostream;
 class StringRef;
-typedef DenseMap<const MCSection *, uint64_t> SectionAddrMap;
+
+using SectionAddrMap = DenseMap<const MCSection *, uint64_t>;
 
 /// \brief Base class for the full range of assembler expressions which are
 /// needed for parsing.
diff --git a/include/llvm/MC/MCFragment.h b/include/llvm/MC/MCFragment.h
index fc8257f90a9..0ca530c4510 100644
--- a/include/llvm/MC/MCFragment.h
+++ b/include/llvm/MC/MCFragment.h
@@ -200,8 +200,8 @@ protected:
                                                     Sec) {}
 
 public:
-  typedef SmallVectorImpl<MCFixup>::const_iterator const_fixup_iterator;
-  typedef SmallVectorImpl<MCFixup>::iterator fixup_iterator;
+  using const_fixup_iterator = SmallVectorImpl<MCFixup>::const_iterator;
+  using fixup_iterator = SmallVectorImpl<MCFixup>::iterator;
 
   SmallVectorImpl<MCFixup> &getFixups() { return Fixups; }
   const SmallVectorImpl<MCFixup> &getFixups() const { return Fixups; }
diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h
index 70227965937..9bf440ea96d 100644
--- a/include/llvm/MC/MCInst.h
+++ b/include/llvm/MC/MCInst.h
@@ -176,8 +176,9 @@ public:
 
   void addOperand(const MCOperand &Op) { Operands.push_back(Op); }
 
-  typedef SmallVectorImpl<MCOperand>::iterator iterator;
-  typedef SmallVectorImpl<MCOperand>::const_iterator const_iterator;
+  using iterator = SmallVectorImpl<MCOperand>::iterator;
+  using const_iterator = SmallVectorImpl<MCOperand>::const_iterator;
+
   void clear() { Operands.clear(); }
   void erase(iterator I) { Operands.erase(I); }
   size_t size() const { return Operands.size(); }
diff --git a/include/llvm/MC/MCLinkerOptimizationHint.h b/include/llvm/MC/MCLinkerOptimizationHint.h
index 0c3525bbeda..f0fd07f43cf 100644
--- a/include/llvm/MC/MCLinkerOptimizationHint.h
+++ b/include/llvm/MC/MCLinkerOptimizationHint.h
@@ -111,7 +111,7 @@ class MCLOHDirective {
                  const MCAsmLayout &Layout) const;
 
 public:
-  typedef SmallVectorImpl<MCSymbol *> LOHArgs;
+  using LOHArgs = SmallVectorImpl<MCSymbol *>;
 
   MCLOHDirective(MCLOHType Kind, const LOHArgs &Args)
       : Kind(Kind), Args(Args.begin(), Args.end()) {
@@ -140,7 +140,7 @@ class MCLOHContainer {
   SmallVector<MCLOHDirective, 32> Directives;
 
 public:
-  typedef SmallVectorImpl<MCLOHDirective> LOHDirectives;
+  using LOHDirectives = SmallVectorImpl<MCLOHDirective>;
 
   MCLOHContainer() = default;
 
@@ -179,8 +179,8 @@ public:
 };
 
 // Add types for specialized template using MCSymbol.
-typedef MCLOHDirective::LOHArgs MCLOHArgs;
-typedef MCLOHContainer::LOHDirectives MCLOHDirectives;
+using MCLOHArgs = MCLOHDirective::LOHArgs;
+using MCLOHDirectives = MCLOHContainer::LOHDirectives;
 
 } // end namespace llvm
 
diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h
index 7ddc7722e51..7836ece2d68 100644
--- a/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -161,6 +161,7 @@ protected: // Can only create subclasses.
   bool IsAtStartOfStatement = true;
   AsmCommentConsumer *CommentConsumer = nullptr;
 
+  bool AltMacroMode;
   MCAsmLexer();
 
   virtual AsmToken LexToken() = 0;
@@ -175,6 +176,14 @@ public:
   MCAsmLexer &operator=(const MCAsmLexer &) = delete;
   virtual ~MCAsmLexer();
 
+  bool IsaAltMacroMode() {
+    return AltMacroMode;
+  }
+
+  void SetAltMacroMode(bool AltMacroSet) {
+    AltMacroMode = AltMacroSet;
+  }
+
   /// Consume the next token from the input stream and return it.
   ///
   /// The lexer will continuosly return the end-of-file token once the end of
diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h
index 6763374185e..75d45f490bd 100644
--- a/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/include/llvm/MC/MCParser/MCAsmParser.h
@@ -67,9 +67,9 @@ public:
 /// assembly parsers.
 class MCAsmParser {
 public:
-  typedef bool (*DirectiveHandler)(MCAsmParserExtension*, StringRef, SMLoc);
-  typedef std::pair<MCAsmParserExtension*, DirectiveHandler>
-    ExtensionDirectiveHandler;
+  using DirectiveHandler = bool (*)(MCAsmParserExtension*, StringRef, SMLoc);
+  using ExtensionDirectiveHandler =
+      std::pair<MCAsmParserExtension*, DirectiveHandler>;
 
   struct MCPendingError {
     SMLoc Loc;
diff --git a/include/llvm/MC/MCParser/MCTargetAsmParser.h b/include/llvm/MC/MCParser/MCTargetAsmParser.h
index c81a7624011..b8d3180cd49 100644
--- a/include/llvm/MC/MCParser/MCTargetAsmParser.h
+++ b/include/llvm/MC/MCParser/MCTargetAsmParser.h
@@ -27,7 +27,7 @@ class MCStreamer;
 class MCSubtargetInfo;
 template <typename T> class SmallVectorImpl;
 
-typedef SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>> OperandVector;
+using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
 
 enum AsmRewriteKind {
   AOK_Delete = 0,     // Rewrite should be ignored.
diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h
index 015d0b96d9f..de98abe0dc4 100644
--- a/include/llvm/MC/MCRegisterInfo.h
+++ b/include/llvm/MC/MCRegisterInfo.h
@@ -27,13 +27,13 @@ namespace llvm {
 
 /// An unsigned integer type large enough to represent all physical registers,
 /// but not necessarily virtual registers.
-typedef uint16_t MCPhysReg;
+using MCPhysReg = uint16_t;
 
 /// MCRegisterClass - Base class of TargetRegisterClass.
 class MCRegisterClass {
 public:
-  typedef const MCPhysReg* iterator;
-  typedef const MCPhysReg* const_iterator;
+  using iterator = const MCPhysReg*;
+  using const_iterator = const MCPhysReg*;
 
   const iterator RegsBegin;
   const uint8_t *const RegSet;
@@ -134,7 +134,7 @@ struct MCRegisterDesc {
 ///
 class MCRegisterInfo {
 public:
-  typedef const MCRegisterClass *regclass_iterator;
+  using regclass_iterator = const MCRegisterClass *;
 
   /// DwarfLLVMRegPair - Emitted by tablegen so Dwarf<->LLVM reg mappings can be
   /// performed with a binary search.
diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h
index 2974d8f1b80..7bfffbcdb7c 100644
--- a/include/llvm/MC/MCSection.h
+++ b/include/llvm/MC/MCSection.h
@@ -47,13 +47,13 @@ public:
     BundleLockedAlignToEnd
   };
 
-  typedef iplist<MCFragment> FragmentListType;
+  using FragmentListType = iplist<MCFragment>;
 
-  typedef FragmentListType::const_iterator const_iterator;
-  typedef FragmentListType::iterator iterator;
+  using const_iterator = FragmentListType::const_iterator;
+  using iterator = FragmentListType::iterator;
 
-  typedef FragmentListType::const_reverse_iterator const_reverse_iterator;
-  typedef FragmentListType::reverse_iterator reverse_iterator;
+  using const_reverse_iterator = FragmentListType::const_reverse_iterator;
+  using reverse_iterator = FragmentListType::reverse_iterator;
 
 private:
   MCSymbol *Begin;
diff --git a/include/llvm/MC/MCSectionWasm.h b/include/llvm/MC/MCSectionWasm.h
index 4e19196175c..29d62a7a6f8 100644
--- a/include/llvm/MC/MCSectionWasm.h
+++ b/include/llvm/MC/MCSectionWasm.h
@@ -26,6 +26,7 @@ class MCSymbol;
 
 /// This represents a section on wasm.
 class MCSectionWasm final : public MCSection {
+private:
   /// This is the name of the section.  The referenced memory is owned by
   /// TargetLoweringObjectFileWasm's WasmUniqueMap.
   StringRef SectionName;
@@ -40,10 +41,11 @@ class MCSectionWasm final : public MCSection {
 
   const MCSymbolWasm *Group;
 
-  // The offset of the MC function section in the wasm code section.
+  // The offset of the MC function/data section in the wasm code/data section.
+  // For data relocations the offset is relative to start of the data payload
+  // itself and does not include the size of the section header.
   uint64_t SectionOffset;
 
-private:
   friend class MCContext;
   MCSectionWasm(StringRef Section, unsigned type, unsigned flags, SectionKind K,
                 const MCSymbolWasm *group, unsigned UniqueID, MCSymbol *Begin)
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index eb301031ba3..5390e794242 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -44,12 +44,11 @@ class MCInstPrinter;
 class MCSection;
 class MCStreamer;
 class MCSymbolRefExpr;
-class MCSymbolWasm;
 class MCSubtargetInfo;
 class raw_ostream;
 class Twine;
 
-typedef std::pair<MCSection *, const MCExpr *> MCSectionSubPair;
+using MCSectionSubPair = std::pair<MCSection *, const MCExpr *>;
 
 /// Target specific streamer interface. This is used so that targets can
 /// implement support for target specific assembly directives.
diff --git a/include/llvm/MC/MCSubtargetInfo.h b/include/llvm/MC/MCSubtargetInfo.h
index bb16463588c..d1d5d070bf5 100644
--- a/include/llvm/MC/MCSubtargetInfo.h
+++ b/include/llvm/MC/MCSubtargetInfo.h
@@ -26,6 +26,7 @@
 #include <string>
 
 namespace llvm {
+
 class MachineInstr;
 class MCInst;
 
@@ -63,8 +64,7 @@ public:
   MCSubtargetInfo() = delete;
   MCSubtargetInfo &operator=(const MCSubtargetInfo &) = delete;
   MCSubtargetInfo &operator=(MCSubtargetInfo &&) = delete;
-
-  virtual ~MCSubtargetInfo() {}
+  virtual ~MCSubtargetInfo() = default;
 
   /// getTargetTriple - Return the target triple string.
   const Triple &getTargetTriple() const { return TargetTriple; }
@@ -178,11 +178,11 @@ public:
 
   /// Returns string representation of scheduler comment
   virtual std::string getSchedInfoStr(const MachineInstr &MI) const {
-    return std::string();
+    return {};
   }
 
   virtual std::string getSchedInfoStr(MCInst const &MCI) const {
-    return std::string();
+    return {};
   }
 };
 
diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h
index e8432afd862..9b1cc6e7d7e 100644
--- a/include/llvm/MC/MCSymbol.h
+++ b/include/llvm/MC/MCSymbol.h
@@ -145,10 +145,10 @@ protected:
   /// MCSymbol contains a uint64_t so is probably aligned to 8.  On a 32-bit
   /// system, the name is a pointer so isn't going to satisfy the 8 byte
   /// alignment of uint64_t.  Account for that here.
-  typedef union {
+  using NameEntryStorageTy = union {
     const StringMapEntry<bool> *NameEntry;
     uint64_t AlignmentPadding;
-  } NameEntryStorageTy;
+  };
 
   MCSymbol(SymbolKind Kind, const StringMapEntry<bool> *Name, bool isTemporary)
       : IsTemporary(isTemporary), IsRedefinable(false), IsUsed(false),
diff --git a/include/llvm/MC/MCWasmObjectWriter.h b/include/llvm/MC/MCWasmObjectWriter.h
index 6e458eaac9c..a4dd382706d 100644
--- a/include/llvm/MC/MCWasmObjectWriter.h
+++ b/include/llvm/MC/MCWasmObjectWriter.h
@@ -32,12 +32,12 @@ class raw_pwrite_stream;
 struct WasmRelocationEntry {
   uint64_t Offset;            // Where is the relocation.
   const MCSymbolWasm *Symbol; // The symbol to relocate with.
-  uint64_t Addend;            // A value to add to the symbol.
+  int64_t Addend;             // A value to add to the symbol.
   unsigned Type;              // The type of the relocation.
   MCSectionWasm *FixupSection;// The section the relocation is targeting.
 
   WasmRelocationEntry(uint64_t Offset, const MCSymbolWasm *Symbol,
-                      uint64_t Addend, unsigned Type,
+                      int64_t Addend, unsigned Type,
                       MCSectionWasm *FixupSection)
       : Offset(Offset), Symbol(Symbol), Addend(Addend), Type(Type),
         FixupSection(FixupSection) {}
diff --git a/include/llvm/Object/Binary.h b/include/llvm/Object/Binary.h
index 06788326ff5..f42048e48ee 100644
--- a/include/llvm/Object/Binary.h
+++ b/include/llvm/Object/Binary.h
@@ -42,7 +42,6 @@ protected:
     ID_MachOUniversalBinary,
     ID_COFFImportFile,
     ID_IR,                 // LLVM IR
-    ID_ModuleSummaryIndex, // Module summary index
 
     // Object and children.
     ID_StartObjects,
@@ -128,8 +127,6 @@ public:
     return TypeID == ID_IR;
   }
 
-  bool isModuleSummaryIndex() const { return TypeID == ID_ModuleSummaryIndex; }
-
   bool isLittleEndian() const {
     return !(TypeID == ID_ELF32B || TypeID == ID_ELF64B ||
              TypeID == ID_MachO32B || TypeID == ID_MachO64B);
diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h
index e0bb8f1cf3d..1b6aaf4be66 100644
--- a/include/llvm/Object/COFF.h
+++ b/include/llvm/Object/COFF.h
@@ -623,6 +623,15 @@ struct coff_base_reloc_block_entry {
   int getOffset() const { return Data & ((1 << 12) - 1); }
 };
 
+struct coff_resource_dir_table {
+  support::ulittle32_t Characteristics;
+  support::ulittle32_t TimeDateStamp;
+  support::ulittle16_t MajorVersion;
+  support::ulittle16_t MinorVersion;
+  support::ulittle16_t NumberOfNameEntries;
+  support::ulittle16_t NumberOfIDEntries;
+};
+
 class COFFObjectFile : public ObjectFile {
 private:
   friend class ImportDirectoryEntryRef;
diff --git a/include/llvm/Object/COFFImportFile.h b/include/llvm/Object/COFFImportFile.h
index 4192fe7e5c9..78d9d679acd 100644
--- a/include/llvm/Object/COFFImportFile.h
+++ b/include/llvm/Object/COFFImportFile.h
@@ -53,7 +53,7 @@ public:
 
   basic_symbol_iterator symbol_end() const override {
     DataRefImpl Symb;
-    Symb.p = isCode() ? 2 : 1;
+    Symb.p = isData() ? 1 : 2;
     return BasicSymbolRef(Symb, this);
   }
 
@@ -63,8 +63,8 @@ public:
   }
 
 private:
-  bool isCode() const {
-    return getCOFFImportHeader()->getType() == COFF::IMPORT_CODE;
+  bool isData() const {
+    return getCOFFImportHeader()->getType() == COFF::IMPORT_DATA;
   }
 };
 
diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h
index 9c72bd4023d..42fdfe3e5a7 100644
--- a/include/llvm/Object/ELF.h
+++ b/include/llvm/Object/ELF.h
@@ -32,6 +32,7 @@ namespace llvm {
 namespace object {
 
 StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type);
+StringRef getELFSectionTypeName(uint32_t Machine, uint32_t Type);
 
 // Subclasses of ELFFile may need this for template instantiation
 inline std::pair<unsigned char, unsigned char>
diff --git a/include/llvm/Object/ModuleSummaryIndexObjectFile.h b/include/llvm/Object/ModuleSummaryIndexObjectFile.h
deleted file mode 100644
index f733f861e2c..00000000000
--- a/include/llvm/Object/ModuleSummaryIndexObjectFile.h
+++ /dev/null
@@ -1,112 +0,0 @@
-//===- ModuleSummaryIndexObjectFile.h - Summary index file implementation -===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the ModuleSummaryIndexObjectFile template class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_OBJECT_MODULESUMMARYINDEXOBJECTFILE_H
-#define LLVM_OBJECT_MODULESUMMARYINDEXOBJECTFILE_H
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Object/Binary.h"
-#include "llvm/Object/SymbolicFile.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include <memory>
-#include <system_error>
-
-namespace llvm {
-
-class ModuleSummaryIndex;
-
-namespace object {
-
-class ObjectFile;
-
-/// This class is used to read just the module summary index related
-/// sections out of the given object (which may contain a single module's
-/// bitcode or be a combined index bitcode file). It builds a ModuleSummaryIndex
-/// object.
-class ModuleSummaryIndexObjectFile : public SymbolicFile {
-  std::unique_ptr<ModuleSummaryIndex> Index;
-
-public:
-  ModuleSummaryIndexObjectFile(MemoryBufferRef Object,
-                               std::unique_ptr<ModuleSummaryIndex> I);
-  ~ModuleSummaryIndexObjectFile() override;
-
-  // TODO: Walk through GlobalValueMap entries for symbols.
-  // However, currently these interfaces are not used by any consumers.
-  void moveSymbolNext(DataRefImpl &Symb) const override {
-    llvm_unreachable("not implemented");
-  }
-
-  std::error_code printSymbolName(raw_ostream &OS,
-                                  DataRefImpl Symb) const override {
-    llvm_unreachable("not implemented");
-    return std::error_code();
-  }
-
-  uint32_t getSymbolFlags(DataRefImpl Symb) const override {
-    llvm_unreachable("not implemented");
-    return 0;
-  }
-
-  basic_symbol_iterator symbol_begin() const override {
-    llvm_unreachable("not implemented");
-    return basic_symbol_iterator(BasicSymbolRef());
-  }
-  basic_symbol_iterator symbol_end() const override {
-    llvm_unreachable("not implemented");
-    return basic_symbol_iterator(BasicSymbolRef());
-  }
-
-  const ModuleSummaryIndex &getIndex() const {
-    return const_cast<ModuleSummaryIndexObjectFile *>(this)->getIndex();
-  }
-  ModuleSummaryIndex &getIndex() { return *Index; }
-  std::unique_ptr<ModuleSummaryIndex> takeIndex();
-
-  static inline bool classof(const Binary *v) {
-    return v->isModuleSummaryIndex();
-  }
-
-  /// \brief Finds and returns bitcode embedded in the given object file, or an
-  /// error code if not found.
-  static ErrorOr<MemoryBufferRef> findBitcodeInObject(const ObjectFile &Obj);
-
-  /// \brief Finds and returns bitcode in the given memory buffer (which may
-  /// be either a bitcode file or a native object file with embedded bitcode),
-  /// or an error code if not found.
-  static ErrorOr<MemoryBufferRef>
-  findBitcodeInMemBuffer(MemoryBufferRef Object);
-
-  /// \brief Parse module summary index in the given memory buffer.
-  /// Return new ModuleSummaryIndexObjectFile instance containing parsed module
-  /// summary/index.
-  static Expected<std::unique_ptr<ModuleSummaryIndexObjectFile>>
-  create(MemoryBufferRef Object);
-};
-
-} // end namespace object
-
-/// Parse the module summary index out of an IR file and return the module
-/// summary index object if found, or nullptr if not. If Identifier is
-/// non-empty, it is used as the module ID (module path) in the resulting
-/// index. This can be used when the index is being read from a file
-/// containing minimized bitcode just for the thin link.
-Expected<std::unique_ptr<ModuleSummaryIndex>>
-getModuleSummaryIndexForFile(StringRef Path, StringRef Identifier = "");
-
-} // end namespace llvm
-
-#endif // LLVM_OBJECT_MODULESUMMARYINDEXOBJECTFILE_H
diff --git a/include/llvm/Support/AArch64TargetParser.def b/include/llvm/Support/AArch64TargetParser.def
index 46d253bf0ec..1700deadeae 100644
--- a/include/llvm/Support/AArch64TargetParser.def
+++ b/include/llvm/Support/AArch64TargetParser.def
@@ -21,7 +21,7 @@ AARCH64_ARCH("invalid", AK_INVALID, nullptr, nullptr,
 AARCH64_ARCH("armv8-a", AK_ARMV8A, "8-A", "v8", ARMBuildAttrs::CPUArch::v8_A,
              FK_CRYPTO_NEON_FP_ARMV8,
              (AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
-              AArch64::AEK_SIMD | AArch64::AEK_LSE))
+              AArch64::AEK_SIMD))
 AARCH64_ARCH("armv8.1-a", AK_ARMV8_1A, "8.1-A", "v8.1a",
              ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
              (AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
diff --git a/include/llvm/Support/BinaryStreamArray.h b/include/llvm/Support/BinaryStreamArray.h
index 21b2474660f..748a62be231 100644
--- a/include/llvm/Support/BinaryStreamArray.h
+++ b/include/llvm/Support/BinaryStreamArray.h
@@ -42,10 +42,12 @@ namespace llvm {
 /// having to specify a second template argument to VarStreamArray (documented
 /// below).
 template <typename T> struct VarStreamArrayExtractor {
+  typedef void Context;
+
   // Method intentionally deleted.  You must provide an explicit specialization
   // with the following method implemented.
-  Error operator()(BinaryStreamRef Stream, uint32_t &Len,
-                   T &Item) const = delete;
+  static Error extract(BinaryStreamRef Stream, uint32_t &Len, T &Item,
+                       Context *Ctx) = delete;
 };
 
 /// VarStreamArray represents an array of variable length records backed by a
@@ -64,82 +66,87 @@ template <typename T> struct VarStreamArrayExtractor {
 /// If you do not specify an Extractor type, you are expected to specialize
 /// VarStreamArrayExtractor<T> for your ValueType.
 ///
-/// By default an Extractor is default constructed in the class, but in some
-/// cases you might find it useful for an Extractor to maintain state across
-/// extractions.  In this case you can provide your own Extractor through a
-/// secondary constructor.  The following examples show various ways of
-/// creating a VarStreamArray.
+/// The default extractor type is stateless, but by specializing
+/// VarStreamArrayExtractor or defining your own custom extractor type and
+/// adding the appropriate ContextType typedef to the class, you can pass a
+/// context field during construction of the VarStreamArray that will be
+/// passed to each call to extract.
 ///
-///       // Will use VarStreamArrayExtractor<MyType> as the extractor.
-///       VarStreamArray<MyType> MyTypeArray;
-///
-///       // Will use a default-constructed MyExtractor as the extractor.
-///       VarStreamArray<MyType, MyExtractor> MyTypeArray2;
-///
-///       // Will use the specific instance of MyExtractor provided.
-///       // MyExtractor need not be default-constructible in this case.
-///       MyExtractor E(SomeContext);
-///       VarStreamArray<MyType, MyExtractor> MyTypeArray3(E);
-///
-template <typename ValueType, typename Extractor> class VarStreamArrayIterator;
+template <typename ValueType, typename ExtractorType>
+class VarStreamArrayIterator;
 
 template <typename ValueType,
-          typename Extractor = VarStreamArrayExtractor<ValueType>>
-
+          typename ExtractorType = VarStreamArrayExtractor<ValueType>>
 class VarStreamArray {
-  friend class VarStreamArrayIterator<ValueType, Extractor>;
-
 public:
-  typedef VarStreamArrayIterator<ValueType, Extractor> Iterator;
+  typedef typename ExtractorType::ContextType ContextType;
+  typedef VarStreamArrayIterator<ValueType, ExtractorType> Iterator;
+  friend Iterator;
 
   VarStreamArray() = default;
-  explicit VarStreamArray(const Extractor &E) : E(E) {}
 
-  explicit VarStreamArray(BinaryStreamRef Stream) : Stream(Stream) {}
-  VarStreamArray(BinaryStreamRef Stream, const Extractor &E)
-      : Stream(Stream), E(E) {}
+  explicit VarStreamArray(BinaryStreamRef Stream,
+                          ContextType *Context = nullptr)
+      : Stream(Stream), Context(Context) {}
 
-  VarStreamArray(const VarStreamArray<ValueType, Extractor> &Other)
-      : Stream(Other.Stream), E(Other.E) {}
+  VarStreamArray(const VarStreamArray<ValueType, ExtractorType> &Other)
+      : Stream(Other.Stream), Context(Other.Context) {}
 
   Iterator begin(bool *HadError = nullptr) const {
-    return Iterator(*this, E, HadError);
+    if (empty())
+      return end();
+
+    return Iterator(*this, Context, HadError);
   }
 
-  Iterator end() const { return Iterator(E); }
+  Iterator end() const { return Iterator(); }
 
-  const Extractor &getExtractor() const { return E; }
+  bool empty() const { return Stream.getLength() == 0; }
+
+  /// \brief given an offset into the array's underlying stream, return an
+  /// iterator to the record at that offset.  This is considered unsafe
+  /// since the behavior is undefined if \p Offset does not refer to the
+  /// beginning of a valid record.
+  Iterator at(uint32_t Offset) const {
+    return Iterator(*this, Context, Stream.drop_front(Offset), nullptr);
+  }
 
   BinaryStreamRef getUnderlyingStream() const { return Stream; }
 
 private:
   BinaryStreamRef Stream;
-  Extractor E;
+  ContextType *Context = nullptr;
 };
 
-template <typename ValueType, typename Extractor>
+template <typename ValueType, typename ExtractorType>
 class VarStreamArrayIterator
-    : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>,
-                                  std::forward_iterator_tag, ValueType> {
-  typedef VarStreamArrayIterator<ValueType, Extractor> IterType;
-  typedef VarStreamArray<ValueType, Extractor> ArrayType;
+    : public iterator_facade_base<
+          VarStreamArrayIterator<ValueType, ExtractorType>,
+          std::forward_iterator_tag, ValueType> {
+  typedef typename ExtractorType::ContextType ContextType;
+  typedef VarStreamArrayIterator<ValueType, ExtractorType> IterType;
+  typedef VarStreamArray<ValueType, ExtractorType> ArrayType;
 
 public:
-  VarStreamArrayIterator(const ArrayType &Array, const Extractor &E,
-                         bool *HadError = nullptr)
-      : IterRef(Array.Stream), Array(&Array), HadError(HadError), Extract(E) {
+  VarStreamArrayIterator(const ArrayType &Array, ContextType *Context,
+                         BinaryStreamRef Stream, bool *HadError = nullptr)
+      : IterRef(Stream), Context(Context), Array(&Array), HadError(HadError) {
     if (IterRef.getLength() == 0)
       moveToEnd();
     else {
-      auto EC = Extract(IterRef, ThisLen, ThisValue);
+      auto EC = ExtractorType::extract(IterRef, ThisLen, ThisValue, Context);
       if (EC) {
         consumeError(std::move(EC));
         markError();
       }
     }
   }
+
+  VarStreamArrayIterator(const ArrayType &Array, ContextType *Context,
+                         bool *HadError = nullptr)
+      : VarStreamArrayIterator(Array, Context, Array.Stream, HadError) {}
+
   VarStreamArrayIterator() = default;
-  explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {}
   ~VarStreamArrayIterator() = default;
 
   bool operator==(const IterType &R) const {
@@ -178,7 +185,7 @@ public:
         moveToEnd();
       } else {
         // There is some data after the current record.
-        auto EC = Extract(IterRef, ThisLen, ThisValue);
+        auto EC = ExtractorType::extract(IterRef, ThisLen, ThisValue, Context);
         if (EC) {
           consumeError(std::move(EC));
           markError();
@@ -205,11 +212,11 @@ private:
 
   ValueType ThisValue;
   BinaryStreamRef IterRef;
+  ContextType *Context{nullptr};
   const ArrayType *Array{nullptr};
   uint32_t ThisLen{0};
   bool HasError{false};
   bool *HadError{nullptr};
-  Extractor Extract;
 };
 
 template <typename T> class FixedStreamArrayIterator;
diff --git a/include/llvm/Support/BinaryStreamReader.h b/include/llvm/Support/BinaryStreamReader.h
index d994fa0f49d..f30d82d81b2 100644
--- a/include/llvm/Support/BinaryStreamReader.h
+++ b/include/llvm/Support/BinaryStreamReader.h
@@ -172,11 +172,13 @@ public:
   /// \returns a success error code if the data was successfully read, otherwise
   /// returns an appropriate error code.
   template <typename T, typename U>
-  Error readArray(VarStreamArray<T, U> &Array, uint32_t Size) {
+  Error
+  readArray(VarStreamArray<T, U> &Array, uint32_t Size,
+            typename VarStreamArray<T, U>::ContextType *Context = nullptr) {
     BinaryStreamRef S;
     if (auto EC = readStreamRef(S, Size))
       return EC;
-    Array = VarStreamArray<T, U>(S, Array.getExtractor());
+    Array = VarStreamArray<T, U>(S, Context);
     return Error::success();
   }
 
diff --git a/include/llvm/Support/BinaryStreamWriter.h b/include/llvm/Support/BinaryStreamWriter.h
index 64f26b24543..6734a797ccc 100644
--- a/include/llvm/Support/BinaryStreamWriter.h
+++ b/include/llvm/Support/BinaryStreamWriter.h
@@ -30,6 +30,8 @@ namespace llvm {
 /// although no methods are overridable.
 class BinaryStreamWriter {
 public:
+  // FIXME: We should be able to slice and drop_front etc on Writers / Readers.
+
   BinaryStreamWriter() = default;
   explicit BinaryStreamWriter(WritableBinaryStreamRef Stream);
   virtual ~BinaryStreamWriter() {}
diff --git a/include/llvm/Support/CMakeLists.txt b/include/llvm/Support/CMakeLists.txt
index b4b99370574..c58ccf21630 100644
--- a/include/llvm/Support/CMakeLists.txt
+++ b/include/llvm/Support/CMakeLists.txt
@@ -14,10 +14,15 @@ macro(find_first_existing_vc_file out_var path)
   execute_process(COMMAND ${git_executable} rev-parse --git-dir
     WORKING_DIRECTORY ${path}/cmake
     RESULT_VARIABLE git_result
-    OUTPUT_VARIABLE git_dir)
+    OUTPUT_VARIABLE git_dir
+    ERROR_QUIET)
   if(git_result EQUAL 0)
     string(STRIP "${git_dir}" git_dir)
     set(${out_var} "${git_dir}/logs/HEAD")
+    # some branchless cases (e.g. 'repo') may not yet have .git/logs/HEAD
+    if (NOT EXISTS "${git_dir}/logs/HEAD")
+      file(WRITE "${git_dir}/logs/HEAD" "")
+    endif()
   else()
     find_first_existing_file(${out_var}
       "${path}/.svn/wc.db"   # SVN 1.7
diff --git a/include/llvm/Support/DynamicLibrary.h b/include/llvm/Support/DynamicLibrary.h
index aa9bb8938ad..a8874a10d46 100644
--- a/include/llvm/Support/DynamicLibrary.h
+++ b/include/llvm/Support/DynamicLibrary.h
@@ -58,7 +58,7 @@ namespace sys {
     void *getAddressOfSymbol(const char *symbolName);
 
     /// This function permanently loads the dynamic library at the given path.
-    /// The library will only be unloaded when the program terminates.
+    /// The library will only be unloaded when llvm_shutdown() is called.
     /// This returns a valid DynamicLibrary instance on success and an invalid
     /// instance on failure (see isValid()). \p *errMsg will only be modified
     /// if the library fails to load.
@@ -71,7 +71,8 @@ namespace sys {
     /// Registers an externally loaded library. The library will be unloaded
     /// when the program terminates.
     ///
-    /// It is safe to call this function multiple times for the same library.
+    /// It is safe to call this function multiple times for the same library,
+    /// though ownership is only taken if there was no error.
     ///
     /// \returns An empty \p DynamicLibrary if the library was already loaded.
     static DynamicLibrary addPermanentLibrary(void *handle,
@@ -106,6 +107,8 @@ namespace sys {
     /// libraries.
     /// @brief Add searchable symbol/value pair.
     static void AddSymbol(StringRef symbolName, void *symbolValue);
+
+    class HandleSet;
   };
 
 } // End sys namespace
diff --git a/include/llvm/Support/ELFRelocs/AArch64.def b/include/llvm/Support/ELFRelocs/AArch64.def
index c21df07d2db..4afcd7d1f09 100644
--- a/include/llvm/Support/ELFRelocs/AArch64.def
+++ b/include/llvm/Support/ELFRelocs/AArch64.def
@@ -109,8 +109,8 @@ ELF_RELOC(R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC,      0x22f)
 ELF_RELOC(R_AARCH64_TLSDESC_LD_PREL19,               0x230)
 ELF_RELOC(R_AARCH64_TLSDESC_ADR_PREL21,              0x231)
 ELF_RELOC(R_AARCH64_TLSDESC_ADR_PAGE21,              0x232)
-ELF_RELOC(R_AARCH64_TLSDESC_LD64_LO12_NC,            0x233)
-ELF_RELOC(R_AARCH64_TLSDESC_ADD_LO12_NC,             0x234)
+ELF_RELOC(R_AARCH64_TLSDESC_LD64_LO12,               0x233)
+ELF_RELOC(R_AARCH64_TLSDESC_ADD_LO12,                0x234)
 ELF_RELOC(R_AARCH64_TLSDESC_OFF_G1,                  0x235)
 ELF_RELOC(R_AARCH64_TLSDESC_OFF_G0_NC,               0x236)
 ELF_RELOC(R_AARCH64_TLSDESC_LDR,                     0x237)
@@ -144,21 +144,28 @@ ELF_RELOC(R_AARCH64_P32_ADR_PREL_LO21,               0x00a)
 ELF_RELOC(R_AARCH64_P32_ADR_PREL_PG_HI21,            0x00b)
 ELF_RELOC(R_AARCH64_P32_ADD_ABS_LO12_NC,             0x00c)
 ELF_RELOC(R_AARCH64_P32_LDST8_ABS_LO12_NC,           0x00d)
+ELF_RELOC(R_AARCH64_P32_LDST16_ABS_LO12_NC,          0x00e)
+ELF_RELOC(R_AARCH64_P32_LDST32_ABS_LO12_NC,          0x00f)
+ELF_RELOC(R_AARCH64_P32_LDST64_ABS_LO12_NC,          0x010)
+ELF_RELOC(R_AARCH64_P32_LDST128_ABS_LO12_NC,         0x011)
 ELF_RELOC(R_AARCH64_P32_TSTBR14,                     0x012)
 ELF_RELOC(R_AARCH64_P32_CONDBR19,                    0x013)
 ELF_RELOC(R_AARCH64_P32_JUMP26,                      0x014)
 ELF_RELOC(R_AARCH64_P32_CALL26,                      0x015)
-ELF_RELOC(R_AARCH64_P32_LDST16_ABS_LO12_NC,          0x00e)
-ELF_RELOC(R_AARCH64_P32_LDST32_ABS_LO12_NC,          0x00f)
-ELF_RELOC(R_AARCH64_P32_LDST64_ABS_LO12_NC,          0x010)
 ELF_RELOC(R_AARCH64_P32_MOVW_PREL_G0,                0x016)
 ELF_RELOC(R_AARCH64_P32_MOVW_PREL_G0_NC,             0x017)
 ELF_RELOC(R_AARCH64_P32_MOVW_PREL_G1,                0x018)
-ELF_RELOC(R_AARCH64_P32_LDST128_ABS_LO12_NC,         0x011)
 ELF_RELOC(R_AARCH64_P32_GOT_LD_PREL19,               0x019)
 ELF_RELOC(R_AARCH64_P32_ADR_GOT_PAGE,                0x01a)
-ELF_RELOC(R_AARCH64_P32_LD64_GOT_LO12_NC,            0x01b)
+ELF_RELOC(R_AARCH64_P32_LD32_GOT_LO12_NC,            0x01b)
 ELF_RELOC(R_AARCH64_P32_LD32_GOTPAGE_LO14,           0x01c)
+ELF_RELOC(R_AARCH64_P32_TLSGD_ADR_PREL21,            0x050)
+ELF_RELOC(R_AARCH64_P32_TLSGD_ADR_PAGE21,            0x051)
+ELF_RELOC(R_AARCH64_P32_TLSGD_ADD_LO12_NC,           0x052)
+ELF_RELOC(R_AARCH64_P32_TLSLD_ADR_PREL21,            0x053)
+ELF_RELOC(R_AARCH64_P32_TLSLD_ADR_PAGE21,            0x054)
+ELF_RELOC(R_AARCH64_P32_TLSLD_ADD_LO12_NC,           0x055)
+ELF_RELOC(R_AARCH64_P32_TLSLD_LD_PREL19,             0x056)
 ELF_RELOC(R_AARCH64_P32_TLSLD_MOVW_DTPREL_G1,        0x057)
 ELF_RELOC(R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0,        0x058)
 ELF_RELOC(R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0_NC,     0x059)
@@ -173,6 +180,8 @@ ELF_RELOC(R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12,    0x061)
 ELF_RELOC(R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12_NC, 0x062)
 ELF_RELOC(R_AARCH64_P32_TLSLD_LDST64_DTPREL_LO12,    0x063)
 ELF_RELOC(R_AARCH64_P32_TLSLD_LDST64_DTPREL_LO12_NC, 0x064)
+ELF_RELOC(R_AARCH64_P32_TLSLD_LDST128_DTPREL_LO12,   0x065)
+ELF_RELOC(R_AARCH64_P32_TLSLD_LDST128_DTPREL_LO12_NC,0x066)
 ELF_RELOC(R_AARCH64_P32_TLSIE_ADR_GOTTPREL_PAGE21,   0x067)
 ELF_RELOC(R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC, 0x068)
 ELF_RELOC(R_AARCH64_P32_TLSIE_LD_GOTTPREL_PREL19,    0x069)
@@ -190,12 +199,20 @@ ELF_RELOC(R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12,     0x074)
 ELF_RELOC(R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12_NC,  0x075)
 ELF_RELOC(R_AARCH64_P32_TLSLE_LDST64_TPREL_LO12,     0x076)
 ELF_RELOC(R_AARCH64_P32_TLSLE_LDST64_TPREL_LO12_NC,  0x077)
-ELF_RELOC(R_AARCH64_P32_TLSDESC_ADR_PAGE21,          0x051)
-ELF_RELOC(R_AARCH64_P32_TLSDESC_LD32_LO12_NC,        0x07d)
-ELF_RELOC(R_AARCH64_P32_TLSDESC_ADD_LO12_NC,         0x034)
+ELF_RELOC(R_AARCH64_P32_TLSLE_LDST128_TPREL_LO12,    0x078)
+ELF_RELOC(R_AARCH64_P32_TLSLE_LDST128_TPREL_LO12_NC, 0x079)
+ELF_RELOC(R_AARCH64_P32_TLSDESC_LD_PREL19,           0x07a)
+ELF_RELOC(R_AARCH64_P32_TLSDESC_ADR_PREL21,          0x07b)
+ELF_RELOC(R_AARCH64_P32_TLSDESC_ADR_PAGE21,          0x07c)
+ELF_RELOC(R_AARCH64_P32_TLSDESC_LD32_LO12,           0x07d)
+ELF_RELOC(R_AARCH64_P32_TLSDESC_ADD_LO12,            0x07e)
 ELF_RELOC(R_AARCH64_P32_TLSDESC_CALL,                0x07f)
 ELF_RELOC(R_AARCH64_P32_COPY,                        0x0b4)
 ELF_RELOC(R_AARCH64_P32_GLOB_DAT,                    0x0b5)
 ELF_RELOC(R_AARCH64_P32_JUMP_SLOT,                   0x0b6)
 ELF_RELOC(R_AARCH64_P32_RELATIVE,                    0x0b7)
+ELF_RELOC(R_AARCH64_P32_TLS_DTPREL,                  0x0b8)
+ELF_RELOC(R_AARCH64_P32_TLS_DTPMOD,                  0x0b9)
+ELF_RELOC(R_AARCH64_P32_TLS_TPREL,                   0x0ba)
+ELF_RELOC(R_AARCH64_P32_TLSDESC,                     0x0bb)
 ELF_RELOC(R_AARCH64_P32_IRELATIVE,                   0x0bc)
diff --git a/include/llvm/Support/KnownBits.h b/include/llvm/Support/KnownBits.h
index 08d4dedd0ac..292ea9e4b71 100644
--- a/include/llvm/Support/KnownBits.h
+++ b/include/llvm/Support/KnownBits.h
@@ -19,7 +19,7 @@
 
 namespace llvm {
 
-// For now this is a simple wrapper around two APInts.
+// Struct for tracking the known zeros and ones of a value.
 struct KnownBits {
   APInt Zero;
   APInt One;
@@ -36,6 +36,24 @@ struct KnownBits {
            "Zero and One should have the same width!");
     return Zero.getBitWidth();
   }
+
+  /// Returns true if this value is known to be negative.
+  bool isNegative() const { return One.isSignBitSet(); }
+
+  /// Returns true if this value is known to be non-negative.
+  bool isNonNegative() const { return Zero.isSignBitSet(); }
+
+  /// Make this value negative.
+  void makeNegative() {
+    assert(!isNonNegative() && "Can't make a non-negative value negative");
+    One.setSignBit();
+  }
+
+  /// Make this value negative.
+  void makeNonNegative() {
+    assert(!isNegative() && "Can't make a negative value non-negative");
+    Zero.setSignBit();
+  }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/Support/LEB128.h b/include/llvm/Support/LEB128.h
index ff775f3b7b3..29640db6921 100644
--- a/include/llvm/Support/LEB128.h
+++ b/include/llvm/Support/LEB128.h
@@ -45,8 +45,7 @@ inline void encodeSLEB128(int64_t Value, raw_ostream &OS,
 
 /// Utility function to encode a SLEB128 value to a buffer. Returns
 /// the length in bytes of the encoded value.
-inline unsigned encodeSLEB128(int64_t Value, uint8_t *p,
-                              unsigned Padding = 0) {
+inline unsigned encodeSLEB128(int64_t Value, uint8_t *p, unsigned Padding = 0) {
   uint8_t *orig_p = p;
   bool More;
   do {
@@ -111,7 +110,6 @@ inline unsigned encodeULEB128(uint64_t Value, uint8_t *p,
   return (unsigned)(p - orig_p);
 }
 
-
 /// Utility function to decode a ULEB128 value.
 inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
                               const uint8_t *end = nullptr,
@@ -119,19 +117,19 @@ inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
   const uint8_t *orig_p = p;
   uint64_t Value = 0;
   unsigned Shift = 0;
-  if(error)
+  if (error)
     *error = nullptr;
   do {
-    if(end && p == end){
-      if(error)
+    if (end && p == end) {
+      if (error)
         *error = "malformed uleb128, extends past end";
       if (n)
         *n = (unsigned)(p - orig_p);
       return 0;
     }
     uint64_t Slice = *p & 0x7f;
-    if(Shift >= 64 || Slice << Shift >> Shift != Slice){
-      if(error)
+    if (Shift >= 64 || Slice << Shift >> Shift != Slice) {
+      if (error)
         *error = "uleb128 too big for uint64";
       if (n)
         *n = (unsigned)(p - orig_p);
@@ -154,15 +152,15 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
   unsigned Shift = 0;
   uint8_t Byte;
   do {
-    if(end && p == end){
-      if(error)
+    if (end && p == end) {
+      if (error)
         *error = "malformed sleb128, extends past end";
       if (n)
         *n = (unsigned)(p - orig_p);
       return 0;
     }
     Byte = *p++;
-    Value |= ((Byte & 0x7f) << Shift);
+    Value |= (int64_t(Byte & 0x7f) << Shift);
     Shift += 7;
   } while (Byte >= 128);
   // Sign extend negative numbers.
@@ -173,13 +171,12 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
   return Value;
 }
 
-
 /// Utility function to get the size of the ULEB128-encoded value.
 extern unsigned getULEB128Size(uint64_t Value);
 
 /// Utility function to get the size of the SLEB128-encoded value.
 extern unsigned getSLEB128Size(int64_t Value);
 
-}  // namespace llvm
+} // namespace llvm
 
-#endif  // LLVM_SYSTEM_LEB128_H
+#endif // LLVM_SYSTEM_LEB128_H
diff --git a/include/llvm/Support/ScopedPrinter.h b/include/llvm/Support/ScopedPrinter.h
index a2f2e098543..1b665193221 100644
--- a/include/llvm/Support/ScopedPrinter.h
+++ b/include/llvm/Support/ScopedPrinter.h
@@ -295,6 +295,11 @@ public:
     printBinaryImpl(Label, StringRef(), V, false);
   }
 
+  void printBinaryBlock(StringRef Label, ArrayRef<uint8_t> Value,
+                        uint32_t StartOffset) {
+    printBinaryImpl(Label, StringRef(), Value, true, StartOffset);
+  }
+
   void printBinaryBlock(StringRef Label, ArrayRef<uint8_t> Value) {
     printBinaryImpl(Label, StringRef(), Value, true);
   }
@@ -333,7 +338,7 @@ private:
   }
 
   void printBinaryImpl(StringRef Label, StringRef Str, ArrayRef<uint8_t> Value,
-                       bool Block);
+                       bool Block, uint32_t StartOffset = 0);
 
   raw_ostream &OS;
   int IndentLevel;
diff --git a/include/llvm/Support/StringSaver.h b/include/llvm/Support/StringSaver.h
index fcddd4cde5b..e85b2895ce5 100644
--- a/include/llvm/Support/StringSaver.h
+++ b/include/llvm/Support/StringSaver.h
@@ -26,7 +26,7 @@ public:
   StringRef save(const char *S) { return save(StringRef(S)); }
   StringRef save(StringRef S);
   StringRef save(const Twine &S) { return save(StringRef(S.str())); }
-  StringRef save(std::string &S) { return save(StringRef(S)); }
+  StringRef save(const std::string &S) { return save(StringRef(S)); }
 };
 }
 #endif
diff --git a/include/llvm/Support/Wasm.h b/include/llvm/Support/Wasm.h
index 8e6c418c818..a48dfe10b3b 100644
--- a/include/llvm/Support/Wasm.h
+++ b/include/llvm/Support/Wasm.h
@@ -24,6 +24,8 @@ namespace wasm {
 const char WasmMagic[] = {'\0', 'a', 's', 'm'};
 // Wasm binary format version
 const uint32_t WasmVersion = 0x1;
+// Wasm uses a 64k page size
+const uint32_t WasmPageSize = 65536;
 
 struct WasmObjectHeader {
   StringRef Magic;
@@ -106,7 +108,7 @@ struct WasmRelocation {
   uint32_t Type;         // The type of the relocation.
   int32_t Index;         // Index into function to global index space.
   uint64_t Offset;       // Offset from the start of the section.
-  uint64_t Addend;       // A value to add to the symbol.
+  int64_t Addend;        // A value to add to the symbol.
 };
 
 enum : unsigned {
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index b21689e0e13..d7fbca93f59 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -530,6 +530,12 @@ class Predicate<string cond> {
   /// PredicateName - User-level name to use for the predicate. Mainly for use
   /// in diagnostics such as missing feature errors in the asm matcher.
   string PredicateName = "";
+
+  /// Setting this to '1' indicates that the predicate must be recomputed on
+  /// every function change. Most predicates can leave this at '0'.
+  ///
+  /// Ignored by SelectionDAG, it always recomputes the predicate on every use.
+  bit RecomputePerFunction = 0;
 }
 
 /// NoHonorSignDependentRounding - This predicate is true if support for
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 51f11e1a9a2..aa9230044b1 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -69,6 +69,7 @@ class CCValAssign;
 class FastISel;
 class FunctionLoweringInfo;
 class IntrinsicInst;
+struct KnownBits;
 class MachineBasicBlock;
 class MachineFunction;
 class MachineInstr;
@@ -774,6 +775,74 @@ public:
     return (!isTypeLegal(VT) && getOperationAction(Op, VT) == Custom);
   }
 
+  /// Return true if lowering to a jump table is allowed.
+  bool areJTsAllowed(const Function *Fn) const {
+    if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")
+      return false;
+
+    return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+           isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
+  }
+
+  /// Check whether the range [Low,High] fits in a machine word.
+  bool rangeFitsInWord(const APInt &Low, const APInt &High,
+                       const DataLayout &DL) const {
+    // FIXME: Using the pointer type doesn't seem ideal.
+    uint64_t BW = DL.getPointerSizeInBits();
+    uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1;
+    return Range <= BW;
+  }
+
+  /// Return true if lowering to a jump table is suitable for a set of case
+  /// clusters which may contain \p NumCases cases, \p Range range of values.
+  /// FIXME: This function check the maximum table size and density, but the
+  /// minimum size is not checked. It would be nice if the the minimum size is
+  /// also combined within this function. Currently, the minimum size check is
+  /// performed in findJumpTable() in SelectionDAGBuiler and
+  /// getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
+  bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases,
+                              uint64_t Range) const {
+    const bool OptForSize = SI->getParent()->getParent()->optForSize();
+    const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize);
+    const unsigned MaxJumpTableSize =
+        OptForSize || getMaximumJumpTableSize() == 0
+            ? UINT_MAX
+            : getMaximumJumpTableSize();
+    // Check whether a range of clusters is dense enough for a jump table.
+    if (Range <= MaxJumpTableSize &&
+        (NumCases * 100 >= Range * MinDensity)) {
+      return true;
+    }
+    return false;
+  }
+
+  /// Return true if lowering to a bit test is suitable for a set of case
+  /// clusters which contains \p NumDests unique destinations, \p Low and
+  /// \p High as its lowest and highest case values, and expects \p NumCmps
+  /// case value comparisons. Check if the number of destinations, comparison
+  /// metric, and range are all suitable.
+  bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
+                             const APInt &Low, const APInt &High,
+                             const DataLayout &DL) const {
+    // FIXME: I don't think NumCmps is the correct metric: a single case and a
+    // range of cases both require only one branch to lower. Just looking at the
+    // number of clusters and destinations should be enough to decide whether to
+    // build bit tests.
+
+    // To lower a range with bit tests, the range must fit the bitwidth of a
+    // machine word.
+    if (!rangeFitsInWord(Low, High, DL))
+      return false;
+
+    // Decide whether it's profitable to lower this range with bit tests. Each
+    // destination requires a bit test and branch, and there is an overall range
+    // check branch. For a small number of clusters, separate comparisons might
+    // be cheaper, and for many destinations, splitting the range might be
+    // better.
+    return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) ||
+           (NumDests == 3 && NumCmps >= 6);
+  }
+
   /// Return true if the specified operation is illegal on this target or
   /// unlikely to be made legal with custom lowering. This is used to help guide
   /// high-level lowering decisions.
@@ -1148,6 +1217,9 @@ public:
   /// Return lower limit for number of blocks in a jump table.
   unsigned getMinimumJumpTableEntries() const;
 
+  /// Return lower limit of the density in a jump table.
+  unsigned getMinimumJumpTableDensity(bool OptForSize) const;
+
   /// Return upper limit for number of entries in a jump table.
   /// Zero if no limit.
   unsigned getMaximumJumpTableSize() const;
@@ -2025,6 +2097,12 @@ public:
     return LibcallCallingConvs[Call];
   }
 
+  /// Execute target specific actions to finalize target lowering.
+  /// This is used to set extra flags in MachineFrameInformation and freezing
+  /// the set of reserved registers.
+  /// The default implementation just freezes the set of reserved registers.
+  virtual void finalizeLowering(MachineFunction &MF) const;
+
 private:
   const TargetMachine &TM;
 
@@ -2442,7 +2520,7 @@ public:
   ///    with TLO.New will be incorrect when this parameter is true and TLO.Old
   ///    has multiple uses.
   bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
-                            APInt &KnownZero, APInt &KnownOne,
+                            KnownBits &Known,
                             TargetLoweringOpt &TLO,
                             unsigned Depth = 0,
                             bool AssumeSingleUse = false) const;
@@ -2456,8 +2534,7 @@ public:
   /// argument allows us to only collect the known bits that are shared by the
   /// requested vector elements.
   virtual void computeKnownBitsForTargetNode(const SDValue Op,
-                                             APInt &KnownZero,
-                                             APInt &KnownOne,
+                                             KnownBits &Known,
                                              const APInt &DemandedElts,
                                              const SelectionDAG &DAG,
                                              unsigned Depth = 0) const;
@@ -2584,12 +2661,6 @@ public:
     return false;
   }
 
-  /// Return true if the MachineFunction contains a COPY which would imply
-  /// HasCopyImplyingStackAdjustment.
-  virtual bool hasCopyImplyingStackAdjustment(MachineFunction *MF) const {
-    return false;
-  }
-
   /// Perform necessary initialization to handle a subset of CSRs explicitly
   /// via copies. This function is called at the beginning of instruction
   /// selection.
diff --git a/include/llvm/Transforms/Scalar/NaryReassociate.h b/include/llvm/Transforms/Scalar/NaryReassociate.h
index a74bb6cc419..f35707eeb3f 100644
--- a/include/llvm/Transforms/Scalar/NaryReassociate.h
+++ b/include/llvm/Transforms/Scalar/NaryReassociate.h
@@ -167,7 +167,7 @@ private:
   //     foo(a + b);
   //   if (p2)
   //     bar(a + b);
-  DenseMap<const SCEV *, SmallVector<WeakVH, 2>> SeenExprs;
+  DenseMap<const SCEV *, SmallVector<WeakTrackingVH, 2>> SeenExprs;
 };
 } // namespace llvm
 
diff --git a/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h b/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
new file mode 100644
index 00000000000..d7282ac6a78
--- /dev/null
+++ b/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
@@ -0,0 +1,53 @@
+//===- SimpleLoopUnswitch.h - Hoist loop-invariant control flow -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_SIMPLELOOPUNSWITCH_H
+#define LLVM_TRANSFORMS_SCALAR_SIMPLELOOPUNSWITCH_H
+
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+
+namespace llvm {
+
+/// This pass transforms loops that contain branches on loop-invariant
+/// conditions to have multiple loops. For example, it turns the left into the
+/// right code:
+///
+///  for (...)                  if (lic)
+///    A                          for (...)
+///    if (lic)                     A; B; C
+///      B                      else
+///    C                          for (...)
+///                                 A; C
+///
+/// This can increase the size of the code exponentially (doubling it every time
+/// a loop is unswitched) so we only unswitch if the resultant code will be
+/// smaller than a threshold.
+///
+/// This pass expects LICM to be run before it to hoist invariant conditions out
+/// of the loop, to make the unswitching opportunity obvious.
+///
+class SimpleLoopUnswitchPass : public PassInfoMixin<SimpleLoopUnswitchPass> {
+public:
+  SimpleLoopUnswitchPass() = default;
+
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+                        LoopStandardAnalysisResults &AR, LPMUpdater &U);
+};
+
+/// Create the legacy pass object for the simple loop unswitcher.
+///
+/// See the documentaion for `SimpleLoopUnswitchPass` for details.
+Pass *createSimpleLoopUnswitchLegacyPass();
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_SIMPLELOOPUNSWITCH_H
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h
index 337305a0a82..0a8903a6ed7 100644
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -74,7 +74,7 @@ struct ClonedCodeInfo {
   /// All cloned call sites that have operand bundles attached are appended to
   /// this vector.  This vector may contain nulls or undefs if some of the
   /// originally inserted callsites were DCE'ed after they were cloned.
-  std::vector<WeakVH> OperandBundleCallSites;
+  std::vector<WeakTrackingVH> OperandBundleCallSites;
 
   ClonedCodeInfo() = default;
 };
@@ -192,7 +192,7 @@ public:
 
   /// InlinedCalls - InlineFunction fills this in with callsites that were
   /// inlined from the callee.  This is only filled in if CG is non-null.
-  SmallVector<WeakVH, 8> InlinedCalls;
+  SmallVector<WeakTrackingVH, 8> InlinedCalls;
 
   /// All of the new call sites inlined into the caller.
   ///
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index 4933712fb8a..b5a5f4c2704 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -286,9 +286,6 @@ DbgDeclareInst *FindAllocaDbgDeclare(Value *V);
 /// Finds the llvm.dbg.value intrinsics describing a value.
 void findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V);
 
-/// Constants for \p replaceDbgDeclare and friends.
-enum { NoDeref = false, WithDeref = true };
-
 /// Replaces llvm.dbg.declare instruction when the address it describes
 /// is replaced with a new value. If Deref is true, an additional DW_OP_deref is
 /// prepended to the expression. If Offset is non-zero, a constant displacement
diff --git a/include/llvm/Transforms/Utils/ModuleUtils.h b/include/llvm/Transforms/Utils/ModuleUtils.h
index f5e843e2e8b..e9793fe4b66 100644
--- a/include/llvm/Transforms/Utils/ModuleUtils.h
+++ b/include/llvm/Transforms/Utils/ModuleUtils.h
@@ -84,6 +84,17 @@ void appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values);
 void filterDeadComdatFunctions(
     Module &M, SmallVectorImpl<Function *> &DeadComdatFunctions);
 
+/// \brief Produce a unique identifier for this module by taking the MD5 sum of
+/// the names of the module's strong external symbols.
+///
+/// This identifier is normally guaranteed to be unique, or the program would
+/// fail to link due to multiply defined symbols.
+///
+/// If the module has no strong external symbols (such a module may still have a
+/// semantic effect if it performs global initialization), we cannot produce a
+/// unique identifier for this module, so we return the empty string.
+std::string getUniqueModuleId(Module *M);
+
 } // End llvm namespace
 
 #endif //  LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
diff --git a/include/llvm/Transforms/Utils/SimplifyIndVar.h b/include/llvm/Transforms/Utils/SimplifyIndVar.h
index 6cdeeeb60a6..8d50aeb10d6 100644
--- a/include/llvm/Transforms/Utils/SimplifyIndVar.h
+++ b/include/llvm/Transforms/Utils/SimplifyIndVar.h
@@ -46,13 +46,13 @@ public:
 /// simplifyUsersOfIV - Simplify instructions that use this induction variable
 /// by using ScalarEvolution to analyze the IV's recurrence.
 bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
-                       LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead,
+                       LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead,
                        IVVisitor *V = nullptr);
 
 /// SimplifyLoopIVs - Simplify users of induction variables within this
 /// loop. This does not actually change or add IVs.
 bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
-                     LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead);
+                     LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead);
 
 } // end namespace llvm
 
diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h
index 950ad92afcd..e44dc437342 100644
--- a/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/include/llvm/Transforms/Utils/ValueMapper.h
@@ -23,7 +23,7 @@ namespace llvm {
 
 class Value;
 class Instruction;
-typedef ValueMap<const Value *, WeakVH> ValueToValueMapTy;
+typedef ValueMap<const Value *, WeakTrackingVH> ValueToValueMapTy;
 
 /// This is a class that can be implemented by clients to remap types when
 /// cloning constants and instructions.
diff --git a/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/include/llvm/Transforms/Vectorize/SLPVectorizer.h
index d669a8e5b61..10338f7937e 100644
--- a/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ b/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -40,8 +40,8 @@ class BoUpSLP;
 struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
   typedef SmallVector<StoreInst *, 8> StoreList;
   typedef MapVector<Value *, StoreList> StoreListMap;
-  typedef SmallVector<WeakVH, 8> WeakVHList;
-  typedef MapVector<Value *, WeakVHList> WeakVHListMap;
+  typedef SmallVector<WeakTrackingVH, 8> WeakTrackingVHList;
+  typedef MapVector<Value *, WeakTrackingVHList> WeakTrackingVHListMap;
 
   ScalarEvolution *SE = nullptr;
   TargetTransformInfo *TTI = nullptr;
@@ -111,7 +111,7 @@ private:
   StoreListMap Stores;
 
   /// The getelementptr instructions in a basic block organized by base pointer.
-  WeakVHListMap GEPs;
+  WeakTrackingVHListMap GEPs;
 };
 }
 
diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp
index 1fae9472448..0468c794e81 100644
--- a/lib/Analysis/AssumptionCache.cpp
+++ b/lib/Analysis/AssumptionCache.cpp
@@ -29,15 +29,16 @@ static cl::opt<bool>
                           cl::desc("Enable verification of assumption cache"),
                           cl::init(false));
 
-SmallVector<WeakVH, 1> &AssumptionCache::getOrInsertAffectedValues(Value *V) {
+SmallVector<WeakTrackingVH, 1> &
+AssumptionCache::getOrInsertAffectedValues(Value *V) {
   // Try using find_as first to avoid creating extra value handles just for the
   // purpose of doing the lookup.
   auto AVI = AffectedValues.find_as(V);
   if (AVI != AffectedValues.end())
     return AVI->second;
 
-  auto AVIP = AffectedValues.insert({
-      AffectedValueCallbackVH(V, this), SmallVector<WeakVH, 1>()});
+  auto AVIP = AffectedValues.insert(
+      {AffectedValueCallbackVH(V, this), SmallVector<WeakTrackingVH, 1>()});
   return AVIP.first->second;
 }
 
diff --git a/lib/Analysis/CFLGraph.h b/lib/Analysis/CFLGraph.h
index 75726e84569..06410bf01dd 100644
--- a/lib/Analysis/CFLGraph.h
+++ b/lib/Analysis/CFLGraph.h
@@ -429,7 +429,7 @@ template <typename CFLAA> class CFLGraphBuilder {
 
       if (Inst->getType()->isPointerTy()) {
         auto *Fn = CS.getCalledFunction();
-        if (Fn == nullptr || !Fn->doesNotAlias(0))
+        if (Fn == nullptr || !Fn->doesNotAlias(AttributeList::ReturnIndex))
           // No need to call addNode() since we've added Inst at the
           // beginning of this function and we know it is not a global.
           Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown());
diff --git a/lib/Analysis/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp
index ea70f5752c6..8058e5b1935 100644
--- a/lib/Analysis/CallGraphSCCPass.cpp
+++ b/lib/Analysis/CallGraphSCCPass.cpp
@@ -204,7 +204,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
     // Get the set of call sites currently in the function.
     for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
       // If this call site is null, then the function pass deleted the call
-      // entirely and the WeakVH nulled it out.  
+      // entirely and the WeakTrackingVH nulled it out.
       if (!I->first ||
           // If we've already seen this call site, then the FunctionPass RAUW'd
           // one call with another, which resulted in two "uses" in the edge
@@ -347,7 +347,8 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
       DevirtualizedCall = true;
     
     // After scanning this function, if we still have entries in callsites, then
-    // they are dangling pointers.  WeakVH should save us for this, so abort if
+    // they are dangling pointers.  WeakTrackingVH should save us for this, so
+    // abort if
     // this happens.
     assert(CallSites.empty() && "Dangling pointers found in call sites map");
     
diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp
index 285339deaaf..9f5dc531823 100644
--- a/lib/Analysis/DemandedBits.cpp
+++ b/lib/Analysis/DemandedBits.cpp
@@ -181,7 +181,7 @@ void DemandedBits::determineLiveOperandBits(
         // bits, then we must keep the highest input bit.
         if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt))
             .getBoolValue())
-          AB.setBit(BitWidth-1);
+          AB.setSignBit();
 
         // If the shift is exact, then the low bits are not dead
         // (they must be zero).
@@ -239,7 +239,7 @@ void DemandedBits::determineLiveOperandBits(
     if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(),
                                       AOut.getBitWidth() - BitWidth))
         .getBoolValue())
-      AB.setBit(BitWidth-1);
+      AB.setSignBit();
     break;
   case Instruction::Select:
     if (OperandNo != 0)
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 788f908bafc..100a591e452 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -54,6 +54,11 @@ static cl::opt<int>
                           cl::init(45),
                           cl::desc("Threshold for inlining cold callsites"));
 
+static cl::opt<bool>
+    EnableGenericSwitchCost("inline-generic-switch-cost", cl::Hidden,
+                            cl::init(false),
+                            cl::desc("Enable generic switch cost model"));
+
 // We introduce this threshold to help performance of instrumentation based
 // PGO before we actually hook up inliner with analysis passes such as BPI and
 // BFI.
@@ -998,11 +1003,72 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
     if (isa<ConstantInt>(V))
       return true;
 
-  // Otherwise, we need to accumulate a cost proportional to the number of
-  // distinct successor blocks. This fan-out in the CFG cannot be represented
-  // for free even if we can represent the core switch as a jumptable that
-  // takes a single instruction.
-  //
+  if (EnableGenericSwitchCost) {
+    // Assume the most general case where the swith is lowered into
+    // either a jump table, bit test, or a balanced binary tree consisting of
+    // case clusters without merging adjacent clusters with the same
+    // destination. We do not consider the switches that are lowered with a mix
+    // of jump table/bit test/binary search tree. The cost of the switch is
+    // proportional to the size of the tree or the size of jump table range.
+
+    // Exit early for a large switch, assuming one case needs at least one
+    // instruction.
+    // FIXME: This is not true for a bit test, but ignore such case for now to
+    // save compile-time.
+    int64_t CostLowerBound =
+        std::min((int64_t)INT_MAX,
+                 (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
+
+    if (CostLowerBound > Threshold) {
+      Cost = CostLowerBound;
+      return false;
+    }
+
+    unsigned JumpTableSize = 0;
+    unsigned NumCaseCluster =
+        TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
+
+    // If suitable for a jump table, consider the cost for the table size and
+    // branch to destination.
+    if (JumpTableSize) {
+      int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
+                       4 * InlineConstants::InstrCost;
+      Cost = std::min((int64_t)INT_MAX, JTCost + Cost);
+      return false;
+    }
+
+    // Considering forming a binary search, we should find the number of nodes
+    // which is same as the number of comparisons when lowered. For a given
+    // number of clusters, n, we can define a recursive function, f(n), to find
+    // the number of nodes in the tree. The recursion is :
+    // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
+    // and f(n) = n, when n <= 3.
+    // This will lead a binary tree where the leaf should be either f(2) or f(3)
+    // when n > 3.  So, the number of comparisons from leaves should be n, while
+    // the number of non-leaf should be :
+    //   2^(log2(n) - 1) - 1
+    //   = 2^log2(n) * 2^-1 - 1
+    //   = n / 2 - 1.
+    // Considering comparisons from leaf and non-leaf nodes, we can estimate the
+    // number of comparisons in a simple closed form :
+    //   n + n / 2 - 1 = n * 3 / 2 - 1
+    if (NumCaseCluster <= 3) {
+      // Suppose a comparison includes one compare and one conditional branch.
+      Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
+      return false;
+    }
+    int64_t ExpectedNumberOfCompare = 3 * (uint64_t)NumCaseCluster / 2 - 1;
+    uint64_t SwitchCost =
+        ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
+    Cost = std::min((uint64_t)INT_MAX, SwitchCost + Cost);
+    return false;
+  }
+
+  // Use a simple switch cost model where we accumulate a cost proportional to
+  // the number of distinct successor blocks. This fan-out in the CFG cannot
+  // be represented for free even if we can represent the core switch as a
+  // jumptable that takes a single instruction.
+  ///
   // NB: We convert large switches which are just used to initialize large phi
   // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
   // inlining those. It will prevent inlining in cases where the optimization
@@ -1217,36 +1283,10 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
   // the rest of the function body.
   Threshold += (SingleBBBonus + FiftyPercentVectorBonus);
 
-  // Give out bonuses per argument, as the instructions setting them up will
-  // be gone after inlining.
-  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
-    if (CS.isByValArgument(I)) {
-      // We approximate the number of loads and stores needed by dividing the
-      // size of the byval type by the target's pointer size.
-      PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
-      unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
-      unsigned PointerSize = DL.getPointerSizeInBits();
-      // Ceiling division.
-      unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
+  // Give out bonuses for the callsite, as the instructions setting them up
+  // will be gone after inlining.
+  Cost -= getCallsiteCost(CS, DL);
 
-      // If it generates more than 8 stores it is likely to be expanded as an
-      // inline memcpy so we take that as an upper bound. Otherwise we assume
-      // one load and one store per word copied.
-      // FIXME: The maxStoresPerMemcpy setting from the target should be used
-      // here instead of a magic number of 8, but it's not available via
-      // DataLayout.
-      NumStores = std::min(NumStores, 8U);
-
-      Cost -= 2 * NumStores * InlineConstants::InstrCost;
-    } else {
-      // For non-byval arguments subtract off one instruction per call
-      // argument.
-      Cost -= InlineConstants::InstrCost;
-    }
-  }
-  // The call instruction also disappears after inlining.
-  Cost -= InlineConstants::InstrCost + InlineConstants::CallPenalty;
-  
   // If there is only one call of the function, and it has internal linkage,
   // the cost of inlining it drops dramatically.
   bool OnlyOneCallAndLocalLinkage =
@@ -1431,6 +1471,38 @@ static bool functionsHaveCompatibleAttributes(Function *Caller,
          AttributeFuncs::areInlineCompatible(*Caller, *Callee);
 }
 
+int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) {
+  int Cost = 0;
+  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
+    if (CS.isByValArgument(I)) {
+      // We approximate the number of loads and stores needed by dividing the
+      // size of the byval type by the target's pointer size.
+      PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
+      unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
+      unsigned PointerSize = DL.getPointerSizeInBits();
+      // Ceiling division.
+      unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
+
+      // If it generates more than 8 stores it is likely to be expanded as an
+      // inline memcpy so we take that as an upper bound. Otherwise we assume
+      // one load and one store per word copied.
+      // FIXME: The maxStoresPerMemcpy setting from the target should be used
+      // here instead of a magic number of 8, but it's not available via
+      // DataLayout.
+      NumStores = std::min(NumStores, 8U);
+
+      Cost += 2 * NumStores * InlineConstants::InstrCost;
+    } else {
+      // For non-byval arguments subtract off one instruction per call
+      // argument.
+      Cost += InlineConstants::InstrCost;
+    }
+  }
+  // The call instruction also disappears after inlining.
+  Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty;
+  return Cost;
+}
+
 InlineCost llvm::getInlineCost(
     CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index e720e3ebecd..2f25a118366 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -21,8 +21,10 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -583,14 +585,6 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
   return nullptr;
 }
 
-Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const DataLayout &DL, const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT, AssumptionCache *AC,
-                             const Instruction *CxtI) {
-  return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, {DL, TLI, DT, AC, CxtI},
-                           RecursionLimit);
-}
-
 Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
                              const SimplifyQuery &Query) {
   return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query, RecursionLimit);
@@ -799,14 +793,6 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
   return nullptr;
 }
 
-Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const DataLayout &DL, const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT, AssumptionCache *AC,
-                             const Instruction *CxtI) {
-  return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, {DL, TLI, DT, AC, CxtI},
-                           RecursionLimit);
-}
-
 Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
                              const SimplifyQuery &Q) {
   return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit);
@@ -953,55 +939,22 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                              const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyFAddInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI},
-                            RecursionLimit);
-}
-
 Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                               const SimplifyQuery &Q) {
   return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit);
 }
 
-Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                              const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyFSubInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI},
-                            RecursionLimit);
-}
 
 Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                               const SimplifyQuery &Q) {
   return ::SimplifyFSubInst(Op0, Op1, FMF, Q, RecursionLimit);
 }
 
-Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                              const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyFMulInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI},
-                            RecursionLimit);
-}
-
 Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                               const SimplifyQuery &Q) {
   return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit);
 }
 
-Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL,
-                             const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT, AssumptionCache *AC,
-                             const Instruction *CxtI) {
-  return ::SimplifyMulInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit);
-}
-
 Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
   return ::SimplifyMulInst(Op0, Op1, Q, RecursionLimit);
 }
@@ -1124,13 +1077,6 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifySDivInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit);
-}
-
 Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
   return ::SimplifySDivInst(Op0, Op1, Q, RecursionLimit);
 }
@@ -1155,13 +1101,6 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyUDivInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit);
-}
-
 Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
   return ::SimplifyUDivInst(Op0, Op1, Q, RecursionLimit);
 }
@@ -1207,15 +1146,6 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
   return nullptr;
 }
 
-Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                              const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyFDivInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI},
-                            RecursionLimit);
-}
-
 Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                               const SimplifyQuery &Q) {
   return ::SimplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit);
@@ -1263,13 +1193,6 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifySRemInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit);
-}
-
 Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
   return ::SimplifySRemInst(Op0, Op1, Q, RecursionLimit);
 }
@@ -1294,13 +1217,6 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyURemInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit);
-}
-
 Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
   return ::SimplifyURemInst(Op0, Op1, Q, RecursionLimit);
 }
@@ -1327,15 +1243,6 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
   return nullptr;
 }
 
-Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                              const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyFRemInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI},
-                            RecursionLimit);
-}
-
 Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                               const SimplifyQuery &Q) {
   return ::SimplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit);
@@ -1464,14 +1371,6 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
   return nullptr;
 }
 
-Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const DataLayout &DL, const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT, AssumptionCache *AC,
-                             const Instruction *CxtI) {
-  return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, {DL, TLI, DT, AC, CxtI},
-                           RecursionLimit);
-}
-
 Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
                              const SimplifyQuery &Q) {
   return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit);
@@ -1493,15 +1392,6 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
   return nullptr;
 }
 
-Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
-                              const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyLShrInst(Op0, Op1, isExact, {DL, TLI, DT, AC, CxtI},
-                            RecursionLimit);
-}
-
 Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
                               const SimplifyQuery &Q) {
   return ::SimplifyLShrInst(Op0, Op1, isExact, Q, RecursionLimit);
@@ -1532,15 +1422,6 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
   return nullptr;
 }
 
-Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
-                              const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyAShrInst(Op0, Op1, isExact, {DL, TLI, DT, AC, CxtI},
-                            RecursionLimit);
-}
-
 Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
                               const SimplifyQuery &Q) {
   return ::SimplifyAShrInst(Op0, Op1, isExact, Q, RecursionLimit);
@@ -1793,13 +1674,6 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL,
-                             const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT, AssumptionCache *AC,
-                             const Instruction *CxtI) {
-  return ::SimplifyAndInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit);
-}
-
 Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
   return ::SimplifyAndInst(Op0, Op1, Q, RecursionLimit);
 }
@@ -2023,13 +1897,6 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL,
-                            const TargetLibraryInfo *TLI,
-                            const DominatorTree *DT, AssumptionCache *AC,
-                            const Instruction *CxtI) {
-  return ::SimplifyOrInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit);
-}
-
 Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
   return ::SimplifyOrInst(Op0, Op1, Q, RecursionLimit);
 }
@@ -2075,13 +1942,6 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
   return nullptr;
 }
 
-Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout &DL,
-                             const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT, AssumptionCache *AC,
-                             const Instruction *CxtI) {
-  return ::SimplifyXorInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit);
-}
-
 Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
   return ::SimplifyXorInst(Op0, Op1, Q, RecursionLimit);
 }
@@ -3448,15 +3308,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   return nullptr;
 }
 
-Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                              const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyICmpInst(Predicate, LHS, RHS, {DL, TLI, DT, AC, CxtI},
-                            RecursionLimit);
-}
-
 Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                               const SimplifyQuery &Q) {
   return ::SimplifyICmpInst(Predicate, LHS, RHS, Q, RecursionLimit);
@@ -3586,15 +3437,6 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   return nullptr;
 }
 
-Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                              FastMathFlags FMF, const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, {DL, TLI, DT, AC, CxtI},
-                            RecursionLimit);
-}
-
 Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                               FastMathFlags FMF, const SimplifyQuery &Q) {
   return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit);
@@ -3845,9 +3687,9 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
     return TrueVal;
 
   if (isa<UndefValue>(CondVal)) {  // select undef, X, Y -> X or Y
-    if (isa<Constant>(TrueVal))
-      return TrueVal;
-    return FalseVal;
+    if (isa<Constant>(FalseVal))
+      return FalseVal;
+    return TrueVal;
   }
   if (isa<UndefValue>(TrueVal))   // select C, undef, X -> X
     return FalseVal;
@@ -3861,15 +3703,6 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
   return nullptr;
 }
 
-Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
-                                const DataLayout &DL,
-                                const TargetLibraryInfo *TLI,
-                                const DominatorTree *DT, AssumptionCache *AC,
-                                const Instruction *CxtI) {
-  return ::SimplifySelectInst(Cond, TrueVal, FalseVal, {DL, TLI, DT, AC, CxtI},
-                              RecursionLimit);
-}
-
 Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
                                 const SimplifyQuery &Q) {
   return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Q, RecursionLimit);
@@ -3987,14 +3820,6 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
                                         Ops.slice(1));
 }
 
-Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
-                             const DataLayout &DL,
-                             const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT, AssumptionCache *AC,
-                             const Instruction *CxtI) {
-  return ::SimplifyGEPInst(SrcTy, Ops, {DL, TLI, DT, AC, CxtI}, RecursionLimit);
-}
-
 Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
                              const SimplifyQuery &Q) {
   return ::SimplifyGEPInst(SrcTy, Ops, Q, RecursionLimit);
@@ -4029,14 +3854,6 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
   return nullptr;
 }
 
-Value *llvm::SimplifyInsertValueInst(
-    Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout &DL,
-    const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC,
-    const Instruction *CxtI) {
-  return ::SimplifyInsertValueInst(Agg, Val, Idxs, {DL, TLI, DT, AC, CxtI},
-                                   RecursionLimit);
-}
-
 Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
                                      ArrayRef<unsigned> Idxs,
                                      const SimplifyQuery &Q) {
@@ -4068,16 +3885,6 @@ static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
   return nullptr;
 }
 
-Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
-                                      const DataLayout &DL,
-                                      const TargetLibraryInfo *TLI,
-                                      const DominatorTree *DT,
-                                      AssumptionCache *AC,
-                                      const Instruction *CxtI) {
-  return ::SimplifyExtractValueInst(Agg, Idxs, {DL, TLI, DT, AC, CxtI},
-                                    RecursionLimit);
-}
-
 Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
                                       const SimplifyQuery &Q) {
   return ::SimplifyExtractValueInst(Agg, Idxs, Q, RecursionLimit);
@@ -4108,13 +3915,6 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQ
   return nullptr;
 }
 
-Value *llvm::SimplifyExtractElementInst(
-    Value *Vec, Value *Idx, const DataLayout &DL, const TargetLibraryInfo *TLI,
-    const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) {
-  return ::SimplifyExtractElementInst(Vec, Idx, {DL, TLI, DT, AC, CxtI},
-                                      RecursionLimit);
-}
-
 Value *llvm::SimplifyExtractElementInst(Value *Vec, Value *Idx,
                                         const SimplifyQuery &Q) {
   return ::SimplifyExtractElementInst(Vec, Idx, Q, RecursionLimit);
@@ -4187,15 +3987,6 @@ static Value *SimplifyCastInst(unsigned CastOpc, Value *Op,
   return nullptr;
 }
 
-Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
-                              const DataLayout &DL,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, AssumptionCache *AC,
-                              const Instruction *CxtI) {
-  return ::SimplifyCastInst(CastOpc, Op, Ty, {DL, TLI, DT, AC, CxtI},
-                            RecursionLimit);
-}
-
 Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
                               const SimplifyQuery &Q) {
   return ::SimplifyCastInst(CastOpc, Op, Ty, Q, RecursionLimit);
@@ -4258,10 +4049,34 @@ static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1,
 static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
                                         Type *RetTy, const SimplifyQuery &Q,
                                         unsigned MaxRecurse) {
+  if (isa<UndefValue>(Mask))
+    return UndefValue::get(RetTy);
+
   Type *InVecTy = Op0->getType();
   unsigned MaskNumElts = Mask->getType()->getVectorNumElements();
   unsigned InVecNumElts = InVecTy->getVectorNumElements();
 
+  SmallVector<int, 32> Indices;
+  ShuffleVectorInst::getShuffleMask(Mask, Indices);
+  assert(MaskNumElts == Indices.size() &&
+         "Size of Indices not same as number of mask elements?");
+
+  // Canonicalization: If mask does not select elements from an input vector,
+  // replace that input vector with undef.
+  bool MaskSelects0 = false, MaskSelects1 = false;
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    if (Indices[i] == -1)
+      continue;
+    if ((unsigned)Indices[i] < InVecNumElts)
+      MaskSelects0 = true;
+    else
+      MaskSelects1 = true;
+  }
+  if (!MaskSelects0)
+    Op0 = UndefValue::get(InVecTy);
+  if (!MaskSelects1)
+    Op1 = UndefValue::get(InVecTy);
+
   auto *Op0Const = dyn_cast<Constant>(Op0);
   auto *Op1Const = dyn_cast<Constant>(Op1);
 
@@ -4269,42 +4084,33 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
   if (Op0Const && Op1Const)
     return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask);
 
-  // If only one of the operands is constant, constant fold the shuffle if the
-  // mask does not select elements from the variable operand.
-  bool MaskSelects0 = false, MaskSelects1 = false;
-  for (unsigned i = 0; i != MaskNumElts; ++i) {
-    int Idx = ShuffleVectorInst::getMaskValue(Mask, i);
-    if (Idx == -1)
-      continue;
-    if ((unsigned)Idx < InVecNumElts)
-      MaskSelects0 = true;
-    else
-      MaskSelects1 = true;
+  // Canonicalization: if only one input vector is constant, it shall be the
+  // second one.
+  if (Op0Const && !Op1Const) {
+    std::swap(Op0, Op1);
+    for (auto &Idx : Indices) {
+      if (Idx == -1)
+        continue;
+      Idx = Idx < (int)MaskNumElts ? Idx + MaskNumElts : Idx - MaskNumElts;
+    }
+    Mask = ConstantDataVector::get(
+        Mask->getContext(),
+        makeArrayRef(reinterpret_cast<uint32_t *>(Indices.data()),
+                     MaskNumElts));
   }
-  if (!MaskSelects0 && Op1Const)
-    return ConstantFoldShuffleVectorInstruction(UndefValue::get(InVecTy),
-                                                Op1Const, Mask);
-  if (!MaskSelects1 && Op0Const)
-    return ConstantFoldShuffleVectorInstruction(Op0Const,
-                                                UndefValue::get(InVecTy), Mask);
 
   // A shuffle of a splat is always the splat itself. Legal if the shuffle's
   // value type is same as the input vectors' type.
   if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op0))
-    if (!MaskSelects1 && RetTy == InVecTy &&
+    if (isa<UndefValue>(Op1) && RetTy == InVecTy &&
         OpShuf->getMask()->getSplatValue())
       return Op0;
-  if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op1))
-    if (!MaskSelects0 && RetTy == InVecTy &&
-        OpShuf->getMask()->getSplatValue())
-      return Op1;
 
   // Don't fold a shuffle with undef mask elements. This may get folded in a
   // better way using demanded bits or other analysis.
   // TODO: Should we allow this?
-  for (unsigned i = 0; i != MaskNumElts; ++i)
-    if (ShuffleVectorInst::getMaskValue(Mask, i) == -1)
-      return nullptr;
+  if (find(Indices, -1) != Indices.end())
+    return nullptr;
 
   // Check if every element of this shuffle can be mapped back to the
   // corresponding element of a single root vector. If so, we don't need this
@@ -4324,14 +4130,6 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
 }
 
 /// Given operands for a ShuffleVectorInst, fold the result or return null.
-Value *llvm::SimplifyShuffleVectorInst(
-    Value *Op0, Value *Op1, Constant *Mask, Type *RetTy,
-    const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT,
-    AssumptionCache *AC, const Instruction *CxtI) {
-  return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy,
-                                     {DL, TLI, DT, AC, CxtI}, RecursionLimit);
-}
-
 Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
                                        Type *RetTy, const SimplifyQuery &Q) {
   return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit);
@@ -4406,28 +4204,11 @@ static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
   }
 }
 
-Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                           const DataLayout &DL, const TargetLibraryInfo *TLI,
-                           const DominatorTree *DT, AssumptionCache *AC,
-                           const Instruction *CxtI) {
-  return ::SimplifyBinOp(Opcode, LHS, RHS, {DL, TLI, DT, AC, CxtI},
-                         RecursionLimit);
-}
-
 Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
                            const SimplifyQuery &Q) {
   return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit);
 }
 
-Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                             FastMathFlags FMF, const DataLayout &DL,
-                             const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT, AssumptionCache *AC,
-                             const Instruction *CxtI) {
-  return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, {DL, TLI, DT, AC, CxtI},
-                           RecursionLimit);
-}
-
 Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
                              FastMathFlags FMF, const SimplifyQuery &Q) {
   return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit);
@@ -4441,14 +4222,6 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   return SimplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse);
 }
 
-Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                             const DataLayout &DL, const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT, AssumptionCache *AC,
-                             const Instruction *CxtI) {
-  return ::SimplifyCmpInst(Predicate, LHS, RHS, {DL, TLI, DT, AC, CxtI},
-                           RecursionLimit);
-}
-
 Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                              const SimplifyQuery &Q) {
   return ::SimplifyCmpInst(Predicate, LHS, RHS, Q, RecursionLimit);
@@ -4672,27 +4445,11 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
   return ConstantFoldCall(F, ConstantArgs, Q.TLI);
 }
 
-Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin,
-                          User::op_iterator ArgEnd, const DataLayout &DL,
-                          const TargetLibraryInfo *TLI, const DominatorTree *DT,
-                          AssumptionCache *AC, const Instruction *CxtI) {
-  return ::SimplifyCall(V, ArgBegin, ArgEnd, {DL, TLI, DT, AC, CxtI},
-                        RecursionLimit);
-}
-
 Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin,
                           User::op_iterator ArgEnd, const SimplifyQuery &Q) {
   return ::SimplifyCall(V, ArgBegin, ArgEnd, Q, RecursionLimit);
 }
 
-Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
-                          const DataLayout &DL, const TargetLibraryInfo *TLI,
-                          const DominatorTree *DT, AssumptionCache *AC,
-                          const Instruction *CxtI) {
-  return ::SimplifyCall(V, Args.begin(), Args.end(), {DL, TLI, DT, AC, CxtI},
-                        RecursionLimit);
-}
-
 Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
                           const SimplifyQuery &Q) {
   return ::SimplifyCall(V, Args.begin(), Args.end(), Q, RecursionLimit);
@@ -4700,15 +4457,10 @@ Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
 
 /// See if we can compute a simplified version of this instruction.
 /// If not, this returns null.
-Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
-                                 const TargetLibraryInfo *TLI,
-                                 const DominatorTree *DT, AssumptionCache *AC,
-                                 OptimizationRemarkEmitter *ORE) {
-  return SimplifyInstruction(I, {DL, TLI, DT, AC, I}, ORE);
-}
 
-Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &Q,
+Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
                                  OptimizationRemarkEmitter *ORE) {
+  const SimplifyQuery Q = SQ.CxtI ? SQ : SQ.getWithInstruction(I);
   Value *Result;
 
   switch (I->getOpcode()) {
@@ -4905,7 +4657,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
     I = Worklist[Idx];
 
     // See if this instruction simplifies.
-    SimpleV = SimplifyInstruction(I, DL, TLI, DT, AC);
+    SimpleV = SimplifyInstruction(I, {DL, TLI, DT, AC});
     if (!SimpleV)
       continue;
 
@@ -4944,3 +4696,31 @@ bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
   assert(SimpleV && "Must provide a simplified value.");
   return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC);
 }
+
+namespace llvm {
+const SimplifyQuery getBestSimplifyQuery(Pass &P, Function &F) {
+  auto *DTWP = P.getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+  auto *TLIWP = P.getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+  auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr;
+  auto *ACWP = P.getAnalysisIfAvailable<AssumptionCacheTracker>();
+  auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr;
+  return {F.getParent()->getDataLayout(), TLI, DT, AC};
+}
+
+const SimplifyQuery getBestSimplifyQuery(LoopStandardAnalysisResults &AR,
+                                         const DataLayout &DL) {
+  return {DL, &AR.TLI, &AR.DT, &AR.AC};
+}
+
+template <class T, class... TArgs>
+const SimplifyQuery getBestSimplifyQuery(AnalysisManager<T, TArgs...> &AM,
+                                         Function &F) {
+  auto *DT = AM.template getCachedResult<DominatorTreeAnalysis>(F);
+  auto *TLI = AM.template getCachedResult<TargetLibraryAnalysis>(F);
+  auto *AC = AM.template getCachedResult<AssumptionAnalysis>(F);
+  return {F.getParent()->getDataLayout(), TLI, DT, AC};
+}
+template const SimplifyQuery getBestSimplifyQuery(AnalysisManager<Function> &,
+                                                  Function &);
+}
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index ad01f7f2f21..a98383eaf4a 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -920,7 +920,7 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV,
   // value is overdefined.
   if (BB == &BB->getParent()->getEntryBlock()) {
     assert(isa<Argument>(Val) && "Unknown live-in to the entry block");
-    // Bofore giving up, see if we can prove the pointer non-null local to
+    // Before giving up, see if we can prove the pointer non-null local to
     // this particular block.
     if (Val->getType()->isPointerTy() &&
         (isKnownNonNull(Val) || isObjectDereferencedInBlock(Val, BB))) {
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 0f04af54cdc..59813824644 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -699,7 +699,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
 
   // As a last resort, try SimplifyInstruction or constant folding.
   if (Instruction *Inst = dyn_cast<Instruction>(V)) {
-    if (Value *W = SimplifyInstruction(Inst, *DL, TLI, DT, AC))
+    if (Value *W = SimplifyInstruction(Inst, {*DL, TLI, DT, AC}))
       return findValueImpl(W, OffsetOk, Visited);
   } else if (auto *C = dyn_cast<Constant>(V)) {
     if (Value *W = ConstantFoldConstant(C, *DL, TLI))
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index 84ecd4ab980..682af4dc708 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -227,7 +227,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
 
     // Simplify the GEP to handle 'gep x, 0' -> x etc.
     if (Value *V = SimplifyGEPInst(GEP->getSourceElementType(),
-                                   GEPOps, DL, TLI, DT, AC)) {
+                                   GEPOps, {DL, TLI, DT, AC})) {
       for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
         RemoveInstInputs(GEPOps[i], InstInputs);
 
@@ -276,7 +276,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
         }
 
     // See if the add simplifies away.
-    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT, AC)) {
+    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, {DL, TLI, DT, AC})) {
       // If we simplified the operands, the LHS is no longer an input, but Res
       // is.
       RemoveInstInputs(LHS, InstInputs);
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 3ac4bf1276e..bd747f7c0b7 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -4108,127 +4108,128 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
       break;
     }
   }
-  if (BEValueV && StartValueV) {
-    // While we are analyzing this PHI node, handle its value symbolically.
-    const SCEV *SymbolicName = getUnknown(PN);
-    assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
-           "PHI node already processed?");
-    ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName});
+  if (!BEValueV || !StartValueV)
+    return nullptr;
 
-    // Using this symbolic name for the PHI, analyze the value coming around
-    // the back-edge.
-    const SCEV *BEValue = getSCEV(BEValueV);
+  // While we are analyzing this PHI node, handle its value symbolically.
+  const SCEV *SymbolicName = getUnknown(PN);
+  assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
+         "PHI node already processed?");
+  ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName});
 
-    // NOTE: If BEValue is loop invariant, we know that the PHI node just
-    // has a special value for the first iteration of the loop.
+  // Using this symbolic name for the PHI, analyze the value coming around
+  // the back-edge.
+  const SCEV *BEValue = getSCEV(BEValueV);
 
-    // If the value coming around the backedge is an add with the symbolic
-    // value we just inserted, then we found a simple induction variable!
-    if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
-      // If there is a single occurrence of the symbolic value, replace it
-      // with a recurrence.
-      unsigned FoundIndex = Add->getNumOperands();
+  // NOTE: If BEValue is loop invariant, we know that the PHI node just
+  // has a special value for the first iteration of the loop.
+
+  // If the value coming around the backedge is an add with the symbolic
+  // value we just inserted, then we found a simple induction variable!
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
+    // If there is a single occurrence of the symbolic value, replace it
+    // with a recurrence.
+    unsigned FoundIndex = Add->getNumOperands();
+    for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+      if (Add->getOperand(i) == SymbolicName)
+        if (FoundIndex == e) {
+          FoundIndex = i;
+          break;
+        }
+
+    if (FoundIndex != Add->getNumOperands()) {
+      // Create an add with everything but the specified operand.
+      SmallVector<const SCEV *, 8> Ops;
       for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
-        if (Add->getOperand(i) == SymbolicName)
-          if (FoundIndex == e) {
-            FoundIndex = i;
-            break;
+        if (i != FoundIndex)
+          Ops.push_back(Add->getOperand(i));
+      const SCEV *Accum = getAddExpr(Ops);
+
+      // This is not a valid addrec if the step amount is varying each
+      // loop iteration, but is not itself an addrec in this loop.
+      if (isLoopInvariant(Accum, L) ||
+          (isa<SCEVAddRecExpr>(Accum) &&
+           cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
+        SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
+
+        if (auto BO = MatchBinaryOp(BEValueV, DT)) {
+          if (BO->Opcode == Instruction::Add && BO->LHS == PN) {
+            if (BO->IsNUW)
+              Flags = setFlags(Flags, SCEV::FlagNUW);
+            if (BO->IsNSW)
+              Flags = setFlags(Flags, SCEV::FlagNSW);
+          }
+        } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
+          // If the increment is an inbounds GEP, then we know the address
+          // space cannot be wrapped around. We cannot make any guarantee
+          // about signed or unsigned overflow because pointers are
+          // unsigned but we may have a negative index from the base
+          // pointer. We can guarantee that no unsigned wrap occurs if the
+          // indices form a positive value.
+          if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
+            Flags = setFlags(Flags, SCEV::FlagNW);
+
+            const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
+            if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
+              Flags = setFlags(Flags, SCEV::FlagNUW);
           }
 
-      if (FoundIndex != Add->getNumOperands()) {
-        // Create an add with everything but the specified operand.
-        SmallVector<const SCEV *, 8> Ops;
-        for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
-          if (i != FoundIndex)
-            Ops.push_back(Add->getOperand(i));
-        const SCEV *Accum = getAddExpr(Ops);
-
-        // This is not a valid addrec if the step amount is varying each
-        // loop iteration, but is not itself an addrec in this loop.
-        if (isLoopInvariant(Accum, L) ||
-            (isa<SCEVAddRecExpr>(Accum) &&
-             cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
-          SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
-
-          if (auto BO = MatchBinaryOp(BEValueV, DT)) {
-            if (BO->Opcode == Instruction::Add && BO->LHS == PN) {
-              if (BO->IsNUW)
-                Flags = setFlags(Flags, SCEV::FlagNUW);
-              if (BO->IsNSW)
-                Flags = setFlags(Flags, SCEV::FlagNSW);
-            }
-          } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
-            // If the increment is an inbounds GEP, then we know the address
-            // space cannot be wrapped around. We cannot make any guarantee
-            // about signed or unsigned overflow because pointers are
-            // unsigned but we may have a negative index from the base
-            // pointer. We can guarantee that no unsigned wrap occurs if the
-            // indices form a positive value.
-            if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
-              Flags = setFlags(Flags, SCEV::FlagNW);
-
-              const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
-              if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
-                Flags = setFlags(Flags, SCEV::FlagNUW);
-            }
-
-            // We cannot transfer nuw and nsw flags from subtraction
-            // operations -- sub nuw X, Y is not the same as add nuw X, -Y
-            // for instance.
-          }
-
-          const SCEV *StartVal = getSCEV(StartValueV);
-          const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
-
-          // Okay, for the entire analysis of this edge we assumed the PHI
-          // to be symbolic.  We now need to go back and purge all of the
-          // entries for the scalars that use the symbolic expression.
-          forgetSymbolicName(PN, SymbolicName);
-          ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
-
-          // We can add Flags to the post-inc expression only if we
-          // know that it us *undefined behavior* for BEValueV to
-          // overflow.
-          if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
-            if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
-              (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
-
-          return PHISCEV;
+          // We cannot transfer nuw and nsw flags from subtraction
+          // operations -- sub nuw X, Y is not the same as add nuw X, -Y
+          // for instance.
         }
-      }
-    } else {
-      // Otherwise, this could be a loop like this:
-      //     i = 0;  for (j = 1; ..; ++j) { ....  i = j; }
-      // In this case, j = {1,+,1}  and BEValue is j.
-      // Because the other in-value of i (0) fits the evolution of BEValue
-      // i really is an addrec evolution.
-      //
-      // We can generalize this saying that i is the shifted value of BEValue
-      // by one iteration:
-      //   PHI(f(0), f({1,+,1})) --> f({0,+,1})
-      const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this);
-      const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this);
-      if (Shifted != getCouldNotCompute() &&
-          Start != getCouldNotCompute()) {
+
         const SCEV *StartVal = getSCEV(StartValueV);
-        if (Start == StartVal) {
-          // Okay, for the entire analysis of this edge we assumed the PHI
-          // to be symbolic.  We now need to go back and purge all of the
-          // entries for the scalars that use the symbolic expression.
-          forgetSymbolicName(PN, SymbolicName);
-          ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted;
-          return Shifted;
-        }
+        const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
+
+        // Okay, for the entire analysis of this edge we assumed the PHI
+        // to be symbolic.  We now need to go back and purge all of the
+        // entries for the scalars that use the symbolic expression.
+        forgetSymbolicName(PN, SymbolicName);
+        ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+
+        // We can add Flags to the post-inc expression only if we
+        // know that it us *undefined behavior* for BEValueV to
+        // overflow.
+        if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
+          if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
+            (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
+
+        return PHISCEV;
+      }
+    }
+  } else {
+    // Otherwise, this could be a loop like this:
+    //     i = 0;  for (j = 1; ..; ++j) { ....  i = j; }
+    // In this case, j = {1,+,1}  and BEValue is j.
+    // Because the other in-value of i (0) fits the evolution of BEValue
+    // i really is an addrec evolution.
+    //
+    // We can generalize this saying that i is the shifted value of BEValue
+    // by one iteration:
+    //   PHI(f(0), f({1,+,1})) --> f({0,+,1})
+    const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this);
+    const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this);
+    if (Shifted != getCouldNotCompute() &&
+        Start != getCouldNotCompute()) {
+      const SCEV *StartVal = getSCEV(StartValueV);
+      if (Start == StartVal) {
+        // Okay, for the entire analysis of this edge we assumed the PHI
+        // to be symbolic.  We now need to go back and purge all of the
+        // entries for the scalars that use the symbolic expression.
+        forgetSymbolicName(PN, SymbolicName);
+        ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted;
+        return Shifted;
       }
     }
-
-    // Remove the temporary PHI node SCEV that has been inserted while intending
-    // to create an AddRecExpr for this PHI node. We can not keep this temporary
-    // as it will prevent later (possibly simpler) SCEV expressions to be added
-    // to the ValueExprMap.
-    eraseValueFromMap(PN);
   }
 
+  // Remove the temporary PHI node SCEV that has been inserted while intending
+  // to create an AddRecExpr for this PHI node. We can not keep this temporary
+  // as it will prevent later (possibly simpler) SCEV expressions to be added
+  // to the ValueExprMap.
+  eraseValueFromMap(PN);
+
   return nullptr;
 }
 
@@ -4388,7 +4389,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
   // PHI's incoming blocks are in a different loop, in which case doing so
   // risks breaking LCSSA form. Instcombine would normally zap these, but
   // it doesn't have DominatorTree information, so it may miss cases.
-  if (Value *V = SimplifyInstruction(PN, getDataLayout(), &TLI, &DT, &AC))
+  if (Value *V = SimplifyInstruction(PN, {getDataLayout(), &TLI, &DT, &AC}))
     if (LI.replacementPreservesLCSSAForm(PN, V))
       return getSCEV(V);
 
@@ -5028,7 +5029,8 @@ bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) {
     return false;
 
   // Only proceed if we can prove that I does not yield poison.
-  if (!isKnownNotFullPoison(I)) return false;
+  if (!programUndefinedIfFullPoison(I))
+    return false;
 
   // At this point we know that if I is executed, then it does not wrap
   // according to at least one of NSW or NUW. If I is not executed, then we do
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 6dd10441c4c..86cbd79aa84 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1772,9 +1772,10 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
 ///
 /// This does not depend on any SCEVExpander state but should be used in
 /// the same context that SCEVExpander is used.
-unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
-                                           SmallVectorImpl<WeakVH> &DeadInsts,
-                                           const TargetTransformInfo *TTI) {
+unsigned
+SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
+                                  SmallVectorImpl<WeakTrackingVH> &DeadInsts,
+                                  const TargetTransformInfo *TTI) {
   // Find integer phis in order of increasing width.
   SmallVector<PHINode*, 8> Phis;
   for (auto &I : *L->getHeader()) {
@@ -1799,7 +1800,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
   // so narrow phis can reuse them.
   for (PHINode *Phi : Phis) {
     auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
-      if (Value *V = SimplifyInstruction(PN, DL, &SE.TLI, &SE.DT, &SE.AC))
+      if (Value *V = SimplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC}))
         return V;
       if (!SE.isSCEVable(PN->getType()))
         return nullptr;
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index d73b1a12803..26d606cce9b 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -83,6 +83,12 @@ int TargetTransformInfo::getIntrinsicCost(
   return Cost;
 }
 
+unsigned
+TargetTransformInfo::getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
+                                                      unsigned &JTSize) const {
+  return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);
+}
+
 int TargetTransformInfo::getUserCost(const User *U) const {
   int Cost = TTIImpl->getUserCost(U);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index af964b6259b..dc151f23267 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -296,12 +296,12 @@ static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
     if (NSW) {
       // Adding two non-negative numbers, or subtracting a negative number from
       // a non-negative one, can't wrap into negative.
-      if (LHSKnown.Zero.isSignBitSet() && Known2.Zero.isSignBitSet())
-        KnownOut.Zero.setSignBit();
+      if (LHSKnown.isNonNegative() && Known2.isNonNegative())
+        KnownOut.makeNonNegative();
       // Adding two negative numbers, or subtracting a non-negative number from
       // a negative one, can't wrap into non-negative.
-      else if (LHSKnown.One.isSignBitSet() && Known2.One.isSignBitSet())
-        KnownOut.One.setSignBit();
+      else if (LHSKnown.isNegative() && Known2.isNegative())
+        KnownOut.makeNegative();
     }
   }
 }
@@ -321,10 +321,10 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
       // The product of a number with itself is non-negative.
       isKnownNonNegative = true;
     } else {
-      bool isKnownNonNegativeOp1 = Known.Zero.isSignBitSet();
-      bool isKnownNonNegativeOp0 = Known2.Zero.isSignBitSet();
-      bool isKnownNegativeOp1 = Known.One.isSignBitSet();
-      bool isKnownNegativeOp0 = Known2.One.isSignBitSet();
+      bool isKnownNonNegativeOp1 = Known.isNonNegative();
+      bool isKnownNonNegativeOp0 = Known2.isNonNegative();
+      bool isKnownNegativeOp1 = Known.isNegative();
+      bool isKnownNegativeOp0 = Known2.isNegative();
       // The product of two numbers with the same sign is non-negative.
       isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
         (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
@@ -360,21 +360,20 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
   // which case we prefer to follow the result of the direct computation,
   // though as the program is invoking undefined behaviour we can choose
   // whatever we like here.
-  if (isKnownNonNegative && !Known.One.isSignBitSet())
-    Known.Zero.setSignBit();
-  else if (isKnownNegative && !Known.Zero.isSignBitSet())
-    Known.One.setSignBit();
+  if (isKnownNonNegative && !Known.isNegative())
+    Known.makeNonNegative();
+  else if (isKnownNegative && !Known.isNonNegative())
+    Known.makeNegative();
 }
 
 void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
-                                             APInt &KnownZero,
-                                             APInt &KnownOne) {
-  unsigned BitWidth = KnownZero.getBitWidth();
+                                             KnownBits &Known) {
+  unsigned BitWidth = Known.getBitWidth();
   unsigned NumRanges = Ranges.getNumOperands() / 2;
   assert(NumRanges >= 1);
 
-  KnownZero.setAllBits();
-  KnownOne.setAllBits();
+  Known.Zero.setAllBits();
+  Known.One.setAllBits();
 
   for (unsigned i = 0; i < NumRanges; ++i) {
     ConstantInt *Lower =
@@ -388,8 +387,8 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
         (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros();
 
     APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
-    KnownOne &= Range.getUnsignedMax() & Mask;
-    KnownZero &= ~Range.getUnsignedMax() & Mask;
+    Known.One &= Range.getUnsignedMax() & Mask;
+    Known.Zero &= ~Range.getUnsignedMax() & Mask;
   }
 }
 
@@ -709,9 +708,9 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 
-      if (RHSKnown.Zero.isSignBitSet()) {
+      if (RHSKnown.isNonNegative()) {
         // We know that the sign bit is zero.
-        Known.Zero.setSignBit();
+        Known.makeNonNegative();
       }
     // assume(v >_s c) where c is at least -1.
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
@@ -720,9 +719,9 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 
-      if (RHSKnown.One.isAllOnesValue() || RHSKnown.Zero.isSignBitSet()) {
+      if (RHSKnown.One.isAllOnesValue() || RHSKnown.isNonNegative()) {
         // We know that the sign bit is zero.
-        Known.Zero.setSignBit();
+        Known.makeNonNegative();
       }
     // assume(v <=_s c) where c is negative
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
@@ -731,9 +730,9 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 
-      if (RHSKnown.One.isSignBitSet()) {
+      if (RHSKnown.isNegative()) {
         // We know that the sign bit is one.
-        Known.One.setSignBit();
+        Known.makeNegative();
       }
     // assume(v <_s c) where c is non-positive
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
@@ -742,9 +741,9 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
       KnownBits RHSKnown(BitWidth);
       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 
-      if (RHSKnown.Zero.isAllOnesValue() || RHSKnown.One.isSignBitSet()) {
+      if (RHSKnown.Zero.isAllOnesValue() || RHSKnown.isNegative()) {
         // We know that the sign bit is one.
-        Known.One.setSignBit();
+        Known.makeNegative();
       }
     // assume(v <=_u c)
     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
@@ -902,7 +901,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
   default: break;
   case Instruction::Load:
     if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
-      computeKnownBitsFromRangeMetadata(*MD, Known.Zero, Known.One);
+      computeKnownBitsFromRangeMetadata(*MD, Known);
     break;
   case Instruction::And: {
     // If either the LHS or the RHS are Zero, the result is zero.
@@ -992,23 +991,23 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
     unsigned MaxHighZeros = 0;
     if (SPF == SPF_SMAX) {
       // If both sides are negative, the result is negative.
-      if (Known.One.isSignBitSet() && Known2.One.isSignBitSet())
+      if (Known.isNegative() && Known2.isNegative())
         // We can derive a lower bound on the result by taking the max of the
         // leading one bits.
         MaxHighOnes = std::max(Known.One.countLeadingOnes(),
                                Known2.One.countLeadingOnes());
       // If either side is non-negative, the result is non-negative.
-      else if (Known.Zero.isSignBitSet() || Known2.Zero.isSignBitSet())
+      else if (Known.isNonNegative() || Known2.isNonNegative())
         MaxHighZeros = 1;
     } else if (SPF == SPF_SMIN) {
       // If both sides are non-negative, the result is non-negative.
-      if (Known.Zero.isSignBitSet() && Known2.Zero.isSignBitSet())
+      if (Known.isNonNegative() && Known2.isNonNegative())
         // We can derive an upper bound on the result by taking the max of the
         // leading zero bits.
         MaxHighZeros = std::max(Known.Zero.countLeadingOnes(),
                                 Known2.Zero.countLeadingOnes());
       // If either side is negative, the result is negative.
-      else if (Known.One.isSignBitSet() || Known2.One.isSignBitSet())
+      else if (Known.isNegative() || Known2.isNegative())
         MaxHighOnes = 1;
     } else if (SPF == SPF_UMAX) {
       // We can derive a lower bound on the result by taking the max of the
@@ -1163,12 +1162,12 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
 
         // If the first operand is non-negative or has all low bits zero, then
         // the upper bits are all zero.
-        if (Known2.Zero.isSignBitSet() || ((Known2.Zero & LowBits) == LowBits))
+        if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero))
           Known.Zero |= ~LowBits;
 
         // If the first operand is negative and not all low bits are zero, then
         // the upper bits are all one.
-        if (Known2.One.isSignBitSet() && ((Known2.One & LowBits) != 0))
+        if (Known2.isNegative() && LowBits.intersects(Known2.One))
           Known.One |= ~LowBits;
 
         assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
@@ -1180,8 +1179,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
     // remainder is zero.
     computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
     // If it's known zero, our sign bit is also zero.
-    if (Known2.Zero.isSignBitSet())
-      Known.Zero.setSignBit();
+    if (Known2.isNonNegative())
+      Known.makeNonNegative();
 
     break;
   case Instruction::URem: {
@@ -1321,25 +1320,25 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
             // (add non-negative, non-negative) --> non-negative
             // (add negative, negative) --> negative
             if (Opcode == Instruction::Add) {
-              if (Known2.Zero.isSignBitSet() && Known3.Zero.isSignBitSet())
-                Known.Zero.setSignBit();
-              else if (Known2.One.isSignBitSet() && Known3.One.isSignBitSet())
-                Known.One.setSignBit();
+              if (Known2.isNonNegative() && Known3.isNonNegative())
+                Known.makeNonNegative();
+              else if (Known2.isNegative() && Known3.isNegative())
+                Known.makeNegative();
             }
 
             // (sub nsw non-negative, negative) --> non-negative
             // (sub nsw negative, non-negative) --> negative
             else if (Opcode == Instruction::Sub && LL == I) {
-              if (Known2.Zero.isSignBitSet() && Known3.One.isSignBitSet())
-                Known.Zero.setSignBit();
-              else if (Known2.One.isSignBitSet() && Known3.Zero.isSignBitSet())
-                Known.One.setSignBit();
+              if (Known2.isNonNegative() && Known3.isNegative())
+                Known.makeNonNegative();
+              else if (Known2.isNegative() && Known3.isNonNegative())
+                Known.makeNegative();
             }
 
             // (mul nsw non-negative, non-negative) --> non-negative
-            else if (Opcode == Instruction::Mul && Known2.Zero.isSignBitSet() &&
-                     Known3.Zero.isSignBitSet())
-              Known.Zero.setSignBit();
+            else if (Opcode == Instruction::Mul && Known2.isNonNegative() &&
+                     Known3.isNonNegative())
+              Known.makeNonNegative();
           }
 
           break;
@@ -1384,7 +1383,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
     // and then intersect with known bits based on other properties of the
     // function.
     if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range))
-      computeKnownBitsFromRangeMetadata(*MD, Known.Zero, Known.One);
+      computeKnownBitsFromRangeMetadata(*MD, Known);
     if (const Value *RV = ImmutableCallSite(I).getReturnedArgOperand()) {
       computeKnownBits(RV, Known2, Depth + 1, Q);
       Known.Zero |= Known2.Zero;
@@ -1599,8 +1598,8 @@ void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
   }
   KnownBits Bits(BitWidth);
   computeKnownBits(V, Bits, Depth, Q);
-  KnownOne = Bits.One.isSignBitSet();
-  KnownZero = Bits.Zero.isSignBitSet();
+  KnownOne = Bits.isNegative();
+  KnownZero = Bits.isNonNegative();
 }
 
 /// Return true if the given value is known to have exactly one
@@ -2221,7 +2220,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
 
         // If we are subtracting one from a positive number, there is no carry
         // out of the result.
-        if (Known.Zero.isSignBitSet())
+        if (Known.isNonNegative())
           return Tmp;
       }
 
@@ -2245,7 +2244,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
 
         // If the input is known to be positive (the sign bit is known clear),
         // the output of the NEG has the same number of sign bits as the input.
-        if (Known.Zero.isSignBitSet())
+        if (Known.isNonNegative())
           return Tmp2;
 
         // Otherwise, we treat this like a SUB.
@@ -2302,10 +2301,10 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
 
   // If we know that the sign bit is either zero or one, determine the number of
   // identical bits in the top of the input value.
-  if (Known.Zero.isSignBitSet())
+  if (Known.isNonNegative())
     return std::max(FirstAnswer, Known.Zero.countLeadingOnes());
 
-  if (Known.One.isSignBitSet())
+  if (Known.isNegative())
     return std::max(FirstAnswer, Known.One.countLeadingOnes());
 
   // computeKnownBits gave us no extra information about the top bits.
@@ -3198,7 +3197,7 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
       // See if InstructionSimplify knows any relevant tricks.
       if (Instruction *I = dyn_cast<Instruction>(V))
         // TODO: Acquire a DominatorTree and AssumptionCache and use them.
-        if (Value *Simplified = SimplifyInstruction(I, DL, nullptr)) {
+        if (Value *Simplified = SimplifyInstruction(I, {DL, I})) {
           V = Simplified;
           continue;
         }
@@ -3319,12 +3318,18 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
                                               LI->getAlignment(), DL, CtxI, DT);
   }
   case Instruction::Call: {
+    auto *CI = cast<const CallInst>(Inst);
+    const Function *Callee = CI->getCalledFunction();
+    if (Callee && Callee->isSpeculatable())
+      return true;
     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
       switch (II->getIntrinsicID()) {
       // These synthetic intrinsics have no side-effects and just mark
       // information about their operands.
       // FIXME: There are other no-op synthetic instructions that potentially
       // should be considered at least *safe* to speculate...
+      // FIXME: The speculatable attribute should be added to all these
+      // intrinsics and this case statement should be removed.
       case Intrinsic::dbg_declare:
       case Intrinsic::dbg_value:
         return true;
@@ -3836,7 +3841,7 @@ const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) {
   }
 }
 
-bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) {
+bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) {
   // We currently only look for uses of poison values within the same basic
   // block, as that makes it easier to guarantee that the uses will be
   // executed given that PoisonI is executed.
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 49a8ce4bed0..a49276099f1 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -601,6 +601,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(hhvm_ccc);
   KEYWORD(cxx_fast_tlscc);
   KEYWORD(amdgpu_vs);
+  KEYWORD(amdgpu_hs);
   KEYWORD(amdgpu_gs);
   KEYWORD(amdgpu_ps);
   KEYWORD(amdgpu_cs);
@@ -648,6 +649,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(returned);
   KEYWORD(returns_twice);
   KEYWORD(signext);
+  KEYWORD(speculatable);
   KEYWORD(sret);
   KEYWORD(ssp);
   KEYWORD(sspreq);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index c7076ed0dd8..97a567565b4 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -1095,6 +1095,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
     case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break;
     case lltok::kw_returns_twice:
       B.addAttribute(Attribute::ReturnsTwice); break;
+    case lltok::kw_speculatable: B.addAttribute(Attribute::Speculatable); break;
     case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break;
     case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break;
     case lltok::kw_sspstrong:
@@ -1667,8 +1668,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
 ///   ::= 'hhvm_ccc'
 ///   ::= 'cxx_fast_tlscc'
 ///   ::= 'amdgpu_vs'
-///   ::= 'amdgpu_tcs'
-///   ::= 'amdgpu_tes'
+///   ::= 'amdgpu_hs'
 ///   ::= 'amdgpu_gs'
 ///   ::= 'amdgpu_ps'
 ///   ::= 'amdgpu_cs'
@@ -1710,6 +1710,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
   case lltok::kw_hhvm_ccc:       CC = CallingConv::HHVM_C; break;
   case lltok::kw_cxx_fast_tlscc: CC = CallingConv::CXX_FAST_TLS; break;
   case lltok::kw_amdgpu_vs:      CC = CallingConv::AMDGPU_VS; break;
+  case lltok::kw_amdgpu_hs:      CC = CallingConv::AMDGPU_HS; break;
   case lltok::kw_amdgpu_gs:      CC = CallingConv::AMDGPU_GS; break;
   case lltok::kw_amdgpu_ps:      CC = CallingConv::AMDGPU_PS; break;
   case lltok::kw_amdgpu_cs:      CC = CallingConv::AMDGPU_CS; break;
@@ -4071,7 +4072,7 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
 ///                     virtuality: DW_VIRTUALTIY_pure_virtual,
 ///                     virtualIndex: 10, thisAdjustment: 4, flags: 11,
 ///                     isOptimized: false, templateParams: !4, declaration: !5,
-///                     variables: !6)
+///                     variables: !6, thrownTypes: !7)
 bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) {
   auto Loc = Lex.getLoc();
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
@@ -4093,7 +4094,8 @@ bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) {
   OPTIONAL(unit, MDField, );                                                   \
   OPTIONAL(templateParams, MDField, );                                         \
   OPTIONAL(declaration, MDField, );                                            \
-  OPTIONAL(variables, MDField, );
+  OPTIONAL(variables, MDField, );                                              \
+  OPTIONAL(thrownTypes, MDField, );
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
@@ -4103,12 +4105,12 @@ bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) {
         "missing 'distinct', required for !DISubprogram when 'isDefinition'");
 
   Result = GET_OR_DISTINCT(
-      DISubprogram, (Context, scope.Val, name.Val, linkageName.Val, file.Val,
-                     line.Val, type.Val, isLocal.Val, isDefinition.Val,
-                     scopeLine.Val, containingType.Val, virtuality.Val,
-                     virtualIndex.Val, thisAdjustment.Val, flags.Val,
-                     isOptimized.Val, unit.Val, templateParams.Val,
-                     declaration.Val, variables.Val));
+      DISubprogram,
+      (Context, scope.Val, name.Val, linkageName.Val, file.Val, line.Val,
+       type.Val, isLocal.Val, isDefinition.Val, scopeLine.Val,
+       containingType.Val, virtuality.Val, virtualIndex.Val, thisAdjustment.Val,
+       flags.Val, isOptimized.Val, unit.Val, templateParams.Val,
+       declaration.Val, variables.Val, thrownTypes.Val));
   return false;
 }
 
@@ -4148,15 +4150,13 @@ bool LLParser::ParseDILexicalBlockFile(MDNode *&Result, bool IsDistinct) {
 bool LLParser::ParseDINamespace(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   REQUIRED(scope, MDField, );                                                  \
-  OPTIONAL(file, MDField, );                                                   \
   OPTIONAL(name, MDStringField, );                                             \
-  OPTIONAL(line, LineField, );                                                 \
   OPTIONAL(exportSymbols, MDBoolField, );
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
   Result = GET_OR_DISTINCT(DINamespace,
-  (Context, scope.Val, file.Val, name.Val, line.Val, exportSymbols.Val));
+                           (Context, scope.Val, name.Val, exportSymbols.Val));
   return false;
 }
 
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 33f8e63daa0..6c8ed7da495 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -153,6 +153,7 @@ enum Kind {
   kw_hhvm_ccc,
   kw_cxx_fast_tlscc,
   kw_amdgpu_vs,
+  kw_amdgpu_hs,
   kw_amdgpu_gs,
   kw_amdgpu_ps,
   kw_amdgpu_cs,
@@ -198,6 +199,7 @@ enum Kind {
   kw_returned,
   kw_returns_twice,
   kw_signext,
+  kw_speculatable,
   kw_ssp,
   kw_sspreq,
   kw_sspstrong,
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 6d727ce8334..8b6f79a81b9 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -93,6 +93,13 @@ static cl::opt<bool> PrintSummaryGUIDs(
     cl::desc(
         "Print the global id for each value when reading the module summary"));
 
+// FIXME: This flag should either be removed or moved to clang as a driver flag.
+static llvm::cl::opt<bool> IgnoreEmptyThinLTOIndexFile(
+    "ignore-empty-index-file", llvm::cl::ZeroOrMore,
+    llvm::cl::desc(
+        "Ignore an empty index file and perform non-ThinLTO compilation"),
+    llvm::cl::init(false));
+
 namespace {
 
 enum {
@@ -706,11 +713,20 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
   /// Original source file name recorded in a bitcode record.
   std::string SourceFileName;
 
+  /// The string identifier given to this module by the client, normally the
+  /// path to the bitcode file.
+  StringRef ModulePath;
+
+  /// For per-module summary indexes, the unique numerical identifier given to
+  /// this module by the client.
+  unsigned ModuleId;
+
 public:
   ModuleSummaryIndexBitcodeReader(BitstreamCursor Stream, StringRef Strtab,
-                                  ModuleSummaryIndex &TheIndex);
+                                  ModuleSummaryIndex &TheIndex,
+                                  StringRef ModulePath, unsigned ModuleId);
 
-  Error parseModule(StringRef ModulePath);
+  Error parseModule();
 
 private:
   void setValueGUID(uint64_t ValueID, StringRef ValueName,
@@ -723,11 +739,13 @@ private:
   std::vector<FunctionSummary::EdgeTy> makeCallList(ArrayRef<uint64_t> Record,
                                                     bool IsOldProfileFormat,
                                                     bool HasProfile);
-  Error parseEntireSummary(StringRef ModulePath);
+  Error parseEntireSummary();
   Error parseModuleStringTable();
 
   std::pair<GlobalValue::GUID, GlobalValue::GUID>
   getGUIDFromValueId(unsigned ValueId);
+
+  ModulePathStringTableTy::iterator addThisModulePath();
 };
 
 } // end anonymous namespace
@@ -1119,6 +1137,7 @@ static uint64_t getRawAttributeMask(Attribute::AttrKind Val) {
   case Attribute::SwiftSelf:       return 1ULL << 51;
   case Attribute::SwiftError:      return 1ULL << 52;
   case Attribute::WriteOnly:       return 1ULL << 53;
+  case Attribute::Speculatable:    return 1ULL << 54;
   case Attribute::Dereferenceable:
     llvm_unreachable("dereferenceable attribute not supported in raw format");
     break;
@@ -1315,6 +1334,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
     return Attribute::ReturnsTwice;
   case bitc::ATTR_KIND_S_EXT:
     return Attribute::SExt;
+  case bitc::ATTR_KIND_SPECULATABLE:
+    return Attribute::Speculatable;
   case bitc::ATTR_KIND_STACK_ALIGNMENT:
     return Attribute::StackAlignment;
   case bitc::ATTR_KIND_STACK_PROTECT:
@@ -4666,8 +4687,15 @@ std::vector<StructType *> BitcodeReader::getIdentifiedStructTypes() const {
 }
 
 ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader(
-    BitstreamCursor Cursor, StringRef Strtab, ModuleSummaryIndex &TheIndex)
-    : BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex) {}
+    BitstreamCursor Cursor, StringRef Strtab, ModuleSummaryIndex &TheIndex,
+    StringRef ModulePath, unsigned ModuleId)
+    : BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex),
+      ModulePath(ModulePath), ModuleId(ModuleId) {}
+
+ModulePathStringTableTy::iterator
+ModuleSummaryIndexBitcodeReader::addThisModulePath() {
+  return TheIndex.addModulePath(ModulePath, ModuleId);
+}
 
 std::pair<GlobalValue::GUID, GlobalValue::GUID>
 ModuleSummaryIndexBitcodeReader::getGUIDFromValueId(unsigned ValueId) {
@@ -4777,7 +4805,7 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
 // Parse just the blocks needed for building the index out of the module.
 // At the end of this routine the module Index is populated with a map
 // from global value id to GlobalValueSummary objects.
-Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) {
+Error ModuleSummaryIndexBitcodeReader::parseModule() {
   if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
     return error("Invalid record");
 
@@ -4828,7 +4856,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) {
           SeenValueSymbolTable = true;
         }
         SeenGlobalValSummary = true;
-        if (Error Err = parseEntireSummary(ModulePath))
+        if (Error Err = parseEntireSummary())
           return Err;
         break;
       case bitc::MODULE_STRTAB_BLOCK_ID:
@@ -4861,12 +4889,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) {
         case bitc::MODULE_CODE_HASH: {
           if (Record.size() != 5)
             return error("Invalid hash length " + Twine(Record.size()).str());
-          if (TheIndex.modulePaths().empty())
-            // We always seed the index with the module.
-            TheIndex.addModulePath(ModulePath, 0);
-          if (TheIndex.modulePaths().size() != 1)
-            return error("Don't expect multiple modules defined?");
-          auto &Hash = TheIndex.modulePaths().begin()->second.second;
+          auto &Hash = addThisModulePath()->second.second;
           int Pos = 0;
           for (auto &Val : Record) {
             assert(!(Val >> 32) && "Unexpected high bits set");
@@ -4941,8 +4964,7 @@ std::vector<FunctionSummary::EdgeTy> ModuleSummaryIndexBitcodeReader::makeCallLi
 
 // Eagerly parse the entire summary block. This populates the GlobalValueSummary
 // objects in the index.
-Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
-    StringRef ModulePath) {
+Error ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
   if (Stream.EnterSubBlock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID))
     return error("Invalid record");
   SmallVector<uint64_t, 64> Record;
@@ -4966,7 +4988,6 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
   // "OriginalName" attachement.
   GlobalValueSummary *LastSeenSummary = nullptr;
   GlobalValue::GUID LastSeenGUID = 0;
-  bool Combined = false;
 
   // We can expect to see any number of type ID information records before
   // each function summary records; these variables store the information
@@ -4985,16 +5006,6 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
-      // For a per-module index, remove any entries that still have empty
-      // summaries. The VST parsing creates entries eagerly for all symbols,
-      // but not all have associated summaries (e.g. it doesn't know how to
-      // distinguish between VST_CODE_ENTRY for function declarations vs global
-      // variables with initializers that end up with a summary). Remove those
-      // entries now so that we don't need to rely on the combined index merger
-      // to clean them up (especially since that may not run for the first
-      // module's index if we merge into that).
-      if (!Combined)
-        TheIndex.removeEmptySummaryEntries();
       return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
@@ -5058,7 +5069,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
       PendingTypeTestAssumeConstVCalls.clear();
       PendingTypeCheckedLoadConstVCalls.clear();
       auto GUID = getGUIDFromValueId(ValueID);
-      FS->setModulePath(TheIndex.addModulePath(ModulePath, 0)->first());
+      FS->setModulePath(addThisModulePath()->first());
       FS->setOriginalName(GUID.second);
       TheIndex.addGlobalValueSummary(GUID.first, std::move(FS));
       break;
@@ -5078,13 +5089,14 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
       // string table section in the per-module index, we create a single
       // module path string table entry with an empty (0) ID to take
       // ownership.
-      AS->setModulePath(TheIndex.addModulePath(ModulePath, 0)->first());
+      AS->setModulePath(addThisModulePath()->first());
 
       GlobalValue::GUID AliaseeGUID = getGUIDFromValueId(AliaseeID).first;
-      auto *AliaseeSummary = TheIndex.getGlobalValueSummary(AliaseeGUID);
-      if (!AliaseeSummary)
+      auto AliaseeInModule =
+          TheIndex.findSummaryInModule(AliaseeGUID, ModulePath);
+      if (!AliaseeInModule)
         return error("Alias expects aliasee summary to be parsed");
-      AS->setAliasee(AliaseeSummary);
+      AS->setAliasee(AliaseeInModule);
 
       auto GUID = getGUIDFromValueId(ValueID);
       AS->setOriginalName(GUID.second);
@@ -5099,7 +5111,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
       std::vector<ValueInfo> Refs =
           makeRefList(ArrayRef<uint64_t>(Record).slice(2));
       auto FS = llvm::make_unique<GlobalVarSummary>(Flags, std::move(Refs));
-      FS->setModulePath(TheIndex.addModulePath(ModulePath, 0)->first());
+      FS->setModulePath(addThisModulePath()->first());
       auto GUID = getGUIDFromValueId(ValueID);
       FS->setOriginalName(GUID.second);
       TheIndex.addGlobalValueSummary(GUID.first, std::move(FS));
@@ -5143,7 +5155,6 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
       LastSeenGUID = GUID;
       FS->setModulePath(ModuleIdMap[ModuleId]);
       TheIndex.addGlobalValueSummary(GUID, std::move(FS));
-      Combined = true;
       break;
     }
     // FS_COMBINED_ALIAS: [valueid, modid, flags, valueid]
@@ -5169,7 +5180,6 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
       GlobalValue::GUID GUID = getGUIDFromValueId(ValueID).first;
       LastSeenGUID = GUID;
       TheIndex.addGlobalValueSummary(GUID, std::move(AS));
-      Combined = true;
       break;
     }
     // FS_COMBINED_GLOBALVAR_INIT_REFS: [valueid, modid, flags, n x valueid]
@@ -5186,7 +5196,6 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
       GlobalValue::GUID GUID = getGUIDFromValueId(ValueID).first;
       LastSeenGUID = GUID;
       TheIndex.addGlobalValueSummary(GUID, std::move(FS));
-      Combined = true;
       break;
     }
     // FS_COMBINED_ORIGINAL_NAME: [original_name]
@@ -5486,15 +5495,27 @@ BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata,
   return getModuleImpl(Context, false, ShouldLazyLoadMetadata, IsImporting);
 }
 
+// Parse the specified bitcode buffer and merge the index into CombinedIndex.
+Error BitcodeModule::readSummary(ModuleSummaryIndex &CombinedIndex,
+                                 unsigned ModuleId) {
+  BitstreamCursor Stream(Buffer);
+  Stream.JumpToBit(ModuleBit);
+
+  ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, CombinedIndex,
+                                    ModuleIdentifier, ModuleId);
+  return R.parseModule();
+}
+
 // Parse the specified bitcode buffer, returning the function info index.
 Expected<std::unique_ptr<ModuleSummaryIndex>> BitcodeModule::getSummary() {
   BitstreamCursor Stream(Buffer);
   Stream.JumpToBit(ModuleBit);
 
   auto Index = llvm::make_unique<ModuleSummaryIndex>();
-  ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, *Index);
+  ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, *Index,
+                                    ModuleIdentifier, 0);
 
-  if (Error Err = R.parseModule(ModuleIdentifier))
+  if (Error Err = R.parseModule())
     return std::move(Err);
 
   return std::move(Index);
@@ -5604,6 +5625,16 @@ Expected<std::string> llvm::getBitcodeProducerString(MemoryBufferRef Buffer) {
   return readIdentificationCode(*StreamOrErr);
 }
 
+Error llvm::readModuleSummaryIndex(MemoryBufferRef Buffer,
+                                   ModuleSummaryIndex &CombinedIndex,
+                                   unsigned ModuleId) {
+  Expected<BitcodeModule> BM = getSingleModule(Buffer);
+  if (!BM)
+    return BM.takeError();
+
+  return BM->readSummary(CombinedIndex, ModuleId);
+}
+
 Expected<std::unique_ptr<ModuleSummaryIndex>>
 llvm::getModuleSummaryIndex(MemoryBufferRef Buffer) {
   Expected<BitcodeModule> BM = getSingleModule(Buffer);
@@ -5620,3 +5651,14 @@ Expected<bool> llvm::hasGlobalValueSummary(MemoryBufferRef Buffer) {
 
   return BM->hasSummary();
 }
+
+Expected<std::unique_ptr<ModuleSummaryIndex>>
+llvm::getModuleSummaryIndexForFile(StringRef Path) {
+  ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
+      MemoryBuffer::getFileOrSTDIN(Path);
+  if (!FileOrErr)
+    return errorCodeToError(FileOrErr.getError());
+  if (IgnoreEmptyThinLTOIndexFile && !(*FileOrErr)->getBufferSize())
+    return nullptr;
+  return getModuleSummaryIndex(**FileOrErr);
+}
diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp
index d089684a052..42135e5949c 100644
--- a/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -474,8 +474,8 @@ class MetadataLoader::MetadataLoaderImpl {
     for (auto CU_SP : CUSubprograms)
       if (auto *SPs = dyn_cast_or_null<MDTuple>(CU_SP.second))
         for (auto &Op : SPs->operands())
-          if (auto *SP = dyn_cast_or_null<MDNode>(Op))
-            SP->replaceOperandWith(7, CU_SP.first);
+          if (auto *SP = dyn_cast_or_null<DISubprogram>(Op))
+            SP->replaceUnit(CU_SP.first);
     CUSubprograms.clear();
   }
 
@@ -1298,7 +1298,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     break;
   }
   case bitc::METADATA_SUBPROGRAM: {
-    if (Record.size() < 18 || Record.size() > 20)
+    if (Record.size() < 18 || Record.size() > 21)
       return error("Invalid record");
 
     IsDistinct =
@@ -1314,29 +1314,31 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     unsigned Offset = Record.size() >= 19 ? 1 : 0;
     bool HasFn = Offset && !HasUnit;
     bool HasThisAdj = Record.size() >= 20;
+    bool HasThrownTypes = Record.size() >= 21;
     DISubprogram *SP = GET_OR_DISTINCT(
-        DISubprogram, (Context,
-                       getDITypeRefOrNull(Record[1]),          // scope
-                       getMDString(Record[2]),                 // name
-                       getMDString(Record[3]),                 // linkageName
-                       getMDOrNull(Record[4]),                 // file
-                       Record[5],                              // line
-                       getMDOrNull(Record[6]),                 // type
-                       Record[7],                              // isLocal
-                       Record[8],                              // isDefinition
-                       Record[9],                              // scopeLine
-                       getDITypeRefOrNull(Record[10]),         // containingType
-                       Record[11],                             // virtuality
-                       Record[12],                             // virtualIndex
-                       HasThisAdj ? Record[19] : 0,            // thisAdjustment
-                       static_cast<DINode::DIFlags>(Record[13] // flags
-                                                    ),
-                       Record[14],                       // isOptimized
-                       HasUnit ? CUorFn : nullptr,       // unit
-                       getMDOrNull(Record[15 + Offset]), // templateParams
-                       getMDOrNull(Record[16 + Offset]), // declaration
-                       getMDOrNull(Record[17 + Offset])  // variables
-                       ));
+        DISubprogram,
+        (Context,
+         getDITypeRefOrNull(Record[1]),                     // scope
+         getMDString(Record[2]),                            // name
+         getMDString(Record[3]),                            // linkageName
+         getMDOrNull(Record[4]),                            // file
+         Record[5],                                         // line
+         getMDOrNull(Record[6]),                            // type
+         Record[7],                                         // isLocal
+         Record[8],                                         // isDefinition
+         Record[9],                                         // scopeLine
+         getDITypeRefOrNull(Record[10]),                    // containingType
+         Record[11],                                        // virtuality
+         Record[12],                                        // virtualIndex
+         HasThisAdj ? Record[19] : 0,                       // thisAdjustment
+         static_cast<DINode::DIFlags>(Record[13]),          // flags
+         Record[14],                                        // isOptimized
+         HasUnit ? CUorFn : nullptr,                        // unit
+         getMDOrNull(Record[15 + Offset]),                  // templateParams
+         getMDOrNull(Record[16 + Offset]),                  // declaration
+         getMDOrNull(Record[17 + Offset]),                  // variables
+         HasThrownTypes ? getMDOrNull(Record[20]) : nullptr // thrownTypes
+         ));
     MetadataList.assignValue(SP, NextMetadataNo);
     NextMetadataNo++;
 
@@ -1381,16 +1383,20 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     break;
   }
   case bitc::METADATA_NAMESPACE: {
-    if (Record.size() != 5)
+    // Newer versions of DINamespace dropped file and line.
+    MDString *Name;
+    if (Record.size() == 3)
+      Name = getMDString(Record[2]);
+    else if (Record.size() == 5)
+      Name = getMDString(Record[3]);
+    else
       return error("Invalid record");
 
     IsDistinct = Record[0] & 1;
     bool ExportSymbols = Record[0] & 2;
     MetadataList.assignValue(
         GET_OR_DISTINCT(DINamespace,
-                        (Context, getMDOrNull(Record[1]),
-                         getMDOrNull(Record[2]), getMDString(Record[3]),
-                         Record[4], ExportSymbols)),
+                        (Context, getMDOrNull(Record[1]), Name, ExportSymbols)),
         NextMetadataNo);
     NextMetadataNo++;
     break;
diff --git a/lib/Bitcode/Reader/ValueList.cpp b/lib/Bitcode/Reader/ValueList.cpp
index 7152a51cea6..d1a2a11bbfa 100644
--- a/lib/Bitcode/Reader/ValueList.cpp
+++ b/lib/Bitcode/Reader/ValueList.cpp
@@ -58,7 +58,7 @@ void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) {
   if (Idx >= size())
     resize(Idx + 1);
 
-  WeakVH &OldV = ValuePtrs[Idx];
+  WeakTrackingVH &OldV = ValuePtrs[Idx];
   if (!OldV) {
     OldV = V;
     return;
diff --git a/lib/Bitcode/Reader/ValueList.h b/lib/Bitcode/Reader/ValueList.h
index 3119d7735e2..72775a3cf3b 100644
--- a/lib/Bitcode/Reader/ValueList.h
+++ b/lib/Bitcode/Reader/ValueList.h
@@ -20,7 +20,7 @@ namespace llvm {
 class Constant;
 
 class BitcodeReaderValueList {
-  std::vector<WeakVH> ValuePtrs;
+  std::vector<WeakTrackingVH> ValuePtrs;
 
   /// As we resolve forward-referenced constants, we add information about them
   /// to this vector.  This allows us to resolve them in bulk instead of
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index e5aba03c8dc..8aa7d0daf07 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -688,6 +688,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_RETURNS_TWICE;
   case Attribute::SExt:
     return bitc::ATTR_KIND_S_EXT;
+  case Attribute::Speculatable:
+    return bitc::ATTR_KIND_SPECULATABLE;
   case Attribute::StackAlignment:
     return bitc::ATTR_KIND_STACK_ALIGNMENT;
   case Attribute::StackProtect:
@@ -1608,6 +1610,7 @@ void ModuleBitcodeWriter::writeDISubprogram(const DISubprogram *N,
   Record.push_back(VE.getMetadataOrNullID(N->getDeclaration()));
   Record.push_back(VE.getMetadataOrNullID(N->getVariables().get()));
   Record.push_back(N->getThisAdjustment());
+  Record.push_back(VE.getMetadataOrNullID(N->getThrownTypes().get()));
 
   Stream.EmitRecord(bitc::METADATA_SUBPROGRAM, Record, Abbrev);
   Record.clear();
@@ -1643,9 +1646,7 @@ void ModuleBitcodeWriter::writeDINamespace(const DINamespace *N,
                                            unsigned Abbrev) {
   Record.push_back(N->isDistinct() | N->getExportSymbols() << 1);
   Record.push_back(VE.getMetadataOrNullID(N->getScope()));
-  Record.push_back(VE.getMetadataOrNullID(N->getFile()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
-  Record.push_back(N->getLine());
 
   Stream.EmitRecord(bitc::METADATA_NAMESPACE, Record, Abbrev);
   Record.clear();
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d99065b1b67..b11e30c359b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -820,7 +820,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
 
   const DILocalVariable *V = MI->getDebugVariable();
   if (auto *SP = dyn_cast<DISubprogram>(V->getScope())) {
-    StringRef Name = SP->getDisplayName();
+    StringRef Name = SP->getName();
     if (!Name.empty())
       OS << Name << ":";
   }
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 2571f686965..786b11618d7 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -17,6 +17,7 @@
 #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/Line.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
 #include "llvm/DebugInfo/CodeView/TypeDatabase.h"
 #include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
@@ -237,7 +238,7 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
 
   // The display name includes function template arguments. Drop them to match
   // MSVC.
-  StringRef DisplayName = SP->getDisplayName().split('<').first;
+  StringRef DisplayName = SP->getName().split('<').first;
 
   const DIScope *Scope = SP->getScope().resolve();
   TypeIndex TI;
@@ -392,7 +393,7 @@ void CodeViewDebug::endModule() {
   // subprograms.
   switchToDebugSectionForSymbol(nullptr);
 
-  MCSymbol *CompilerInfo = beginCVSubsection(ModuleSubstreamKind::Symbols);
+  MCSymbol *CompilerInfo = beginCVSubsection(ModuleDebugFragmentKind::Symbols);
   emitCompilerInformation();
   endCVSubsection(CompilerInfo);
 
@@ -416,7 +417,7 @@ void CodeViewDebug::endModule() {
 
   // Emit UDT records for any types used by global variables.
   if (!GlobalUDTs.empty()) {
-    MCSymbol *SymbolsEnd = beginCVSubsection(ModuleSubstreamKind::Symbols);
+    MCSymbol *SymbolsEnd = beginCVSubsection(ModuleDebugFragmentKind::Symbols);
     emitDebugInfoForUDTs(GlobalUDTs);
     endCVSubsection(SymbolsEnd);
   }
@@ -644,7 +645,8 @@ void CodeViewDebug::emitInlineeLinesSubsection() {
     return;
 
   OS.AddComment("Inlinee lines subsection");
-  MCSymbol *InlineEnd = beginCVSubsection(ModuleSubstreamKind::InlineeLines);
+  MCSymbol *InlineEnd =
+      beginCVSubsection(ModuleDebugFragmentKind::InlineeLines);
 
   // We don't provide any extra file info.
   // FIXME: Find out if debuggers use this info.
@@ -657,7 +659,7 @@ void CodeViewDebug::emitInlineeLinesSubsection() {
 
     OS.AddBlankLine();
     unsigned FileId = maybeRecordFile(SP->getFile());
-    OS.AddComment("Inlined function " + SP->getDisplayName() + " starts at " +
+    OS.AddComment("Inlined function " + SP->getName() + " starts at " +
                   SP->getFilename() + Twine(':') + Twine(SP->getLine()));
     OS.AddBlankLine();
     // The filechecksum table uses 8 byte entries for now, and file ids start at
@@ -759,9 +761,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
 
   // If we have a display name, build the fully qualified name by walking the
   // chain of scopes.
-  if (!SP->getDisplayName().empty())
+  if (!SP->getName().empty())
     FuncName =
-        getFullyQualifiedName(SP->getScope().resolve(), SP->getDisplayName());
+        getFullyQualifiedName(SP->getScope().resolve(), SP->getName());
 
   // If our DISubprogram name is empty, use the mangled name.
   if (FuncName.empty())
@@ -769,7 +771,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
 
   // Emit a symbol subsection, required by VS2012+ to find function boundaries.
   OS.AddComment("Symbol subsection for " + Twine(FuncName));
-  MCSymbol *SymbolsEnd = beginCVSubsection(ModuleSubstreamKind::Symbols);
+  MCSymbol *SymbolsEnd = beginCVSubsection(ModuleDebugFragmentKind::Symbols);
   {
     MCSymbol *ProcRecordBegin = MMI->getContext().createTempSymbol(),
              *ProcRecordEnd = MMI->getContext().createTempSymbol();
@@ -2114,7 +2116,7 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
   maybeRecordLocation(DL, Asm->MF);
 }
 
-MCSymbol *CodeViewDebug::beginCVSubsection(ModuleSubstreamKind Kind) {
+MCSymbol *CodeViewDebug::beginCVSubsection(ModuleDebugFragmentKind Kind) {
   MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(),
            *EndLabel = MMI->getContext().createTempSymbol();
   OS.EmitIntValue(unsigned(Kind), 4);
@@ -2174,7 +2176,7 @@ void CodeViewDebug::emitDebugInfoForGlobals() {
         if (!GV->hasComdat() && !GV->isDeclarationForLinker()) {
           if (!EndLabel) {
             OS.AddComment("Symbol subsection for globals");
-            EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols);
+            EndLabel = beginCVSubsection(ModuleDebugFragmentKind::Symbols);
           }
           // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
           emitDebugInfoForGlobal(GVE->getVariable(), GV, Asm->getSymbol(GV));
@@ -2192,7 +2194,7 @@ void CodeViewDebug::emitDebugInfoForGlobals() {
           OS.AddComment("Symbol subsection for " +
                         Twine(GlobalValue::getRealLinkageName(GV->getName())));
           switchToDebugSectionForSymbol(GVSym);
-          EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols);
+          EndLabel = beginCVSubsection(ModuleDebugFragmentKind::Symbols);
           // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
           emitDebugInfoForGlobal(GVE->getVariable(), GV, GVSym);
           endCVSubsection(EndLabel);
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 343384c5177..46b2daa1e00 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -216,7 +216,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   /// Opens a subsection of the given kind in a .debug$S codeview section.
   /// Returns an end label for use with endCVSubsection when the subsection is
   /// finished.
-  MCSymbol *beginCVSubsection(codeview::ModuleSubstreamKind Kind);
+  MCSymbol *beginCVSubsection(codeview::ModuleDebugFragmentKind Kind);
 
   void endCVSubsection(MCSymbol *EndLabel);
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 16fb20dd7e2..8d25def7772 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -375,10 +375,6 @@ void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) {
   addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory());
 }
 
-void DwarfUnit::addSourceLine(DIE &Die, const DINamespace *NS) {
-  addSourceLine(Die, NS->getLine(), NS->getFilename(), NS->getDirectory());
-}
-
 /* Byref variables, in Blocks, are declared by the programmer as "SomeType
    VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
    gives the variable VarName either the struct, or a pointer to the struct, as
@@ -662,6 +658,14 @@ void DwarfUnit::addTemplateParams(DIE &Buffer, DINodeArray TParams) {
   }
 }
 
+/// Add thrown types.
+void DwarfUnit::addThrownTypes(DIE &Die, DINodeArray ThrownTypes) {
+  for (const auto *Ty : ThrownTypes) {
+    DIE &TT = createAndAddDIE(dwarf::DW_TAG_thrown_type, Die);
+    addType(TT, cast<DIType>(Ty));
+  }
+}
+
 DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
   if (!Context || isa<DIFile>(Context))
     return &getUnitDie();
@@ -1077,7 +1081,6 @@ DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) {
     Name = "(anonymous namespace)";
   DD->addAccelNamespace(Name, NDie);
   addGlobalName(Name, NDie, NS->getScope());
-  addSourceLine(NDie, NS);
   if (NS->getExportSymbols())
     addFlag(NDie, dwarf::DW_AT_export_symbols);
   return &NDie;
@@ -1249,6 +1252,8 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
     constructSubprogramArguments(SPDie, Args);
   }
 
+  addThrownTypes(SPDie, SP->getThrownTypes());
+
   if (SP->isArtificial())
     addFlag(SPDie, dwarf::DW_AT_artificial);
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index e84df465088..8fc841703e2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -210,7 +210,6 @@ public:
   void addSourceLine(DIE &Die, const DIGlobalVariable *G);
   void addSourceLine(DIE &Die, const DISubprogram *SP);
   void addSourceLine(DIE &Die, const DIType *Ty);
-  void addSourceLine(DIE &Die, const DINamespace *NS);
   void addSourceLine(DIE &Die, const DIObjCProperty *Ty);
 
   /// Add constant value entry in variable DIE.
@@ -230,6 +229,9 @@ public:
   /// Add template parameters in buffer.
   void addTemplateParams(DIE &Buffer, DINodeArray TParams);
 
+  /// Add thrown types.
+  void addThrownTypes(DIE &Die, DINodeArray ThrownTypes);
+
   // FIXME: Should be reformulated in terms of addComplexAddress.
   /// Start with the address based on the location provided, and generate the
   /// DWARF information necessary to find the actual Block variable (navigating
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 0912d9f68af..26da748fa24 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -65,6 +65,7 @@ add_llvm_library(LLVMCodeGen
   MachineCSE.cpp
   MachineDominanceFrontier.cpp
   MachineDominators.cpp
+  MachineFrameInfo.cpp
   MachineFunction.cpp
   MachineFunctionPass.cpp
   MachineFunctionPrinterPass.cpp
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index c862cfd28ad..c6c93811a0f 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -2226,10 +2226,11 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
       ConstantInt *RetVal =
           lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
       // Substituting this can cause recursive simplifications, which can
-      // invalidate our iterator.  Use a WeakVH to hold onto it in case this
+      // invalidate our iterator.  Use a WeakTrackingVH to hold onto it in case
+      // this
       // happens.
       Value *CurValue = &*CurInstIterator;
-      WeakVH IterHandle(CurValue);
+      WeakTrackingVH IterHandle(CurValue);
 
       replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
 
@@ -4442,9 +4443,9 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   // using it.
   if (Repl->use_empty()) {
     // This can cause recursive deletion, which can invalidate our iterator.
-    // Use a WeakVH to hold onto it in case this happens.
+    // Use a WeakTrackingVH to hold onto it in case this happens.
     Value *CurValue = &*CurInstIterator;
-    WeakVH IterHandle(CurValue);
+    WeakTrackingVH IterHandle(CurValue);
     BasicBlock *BB = CurInstIterator->getParent();
 
     RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
@@ -5959,7 +5960,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
     // It is possible for very late stage optimizations (such as SimplifyCFG)
     // to introduce PHI nodes too late to be cleaned up.  If we detect such a
     // trivial PHI, go ahead and zap it here.
-    if (Value *V = SimplifyInstruction(P, *DL, TLInfo, nullptr)) {
+    if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) {
       P->replaceAllUsesWith(V);
       P->eraseFromParent();
       ++NumPHIsElim;
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index 7b1b2d64fcc..65f58e5686e 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -213,10 +213,8 @@ VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf,
 
 
 VLIWPacketizerList::~VLIWPacketizerList() {
-  if (VLIWScheduler)
-    delete VLIWScheduler;
-  if (ResourceTracker)
-    delete ResourceTracker;
+  delete VLIWScheduler;
+  delete ResourceTracker;
 }
 
 
diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp
index 035a2ac78ed..ebfe6cb3b73 100644
--- a/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -83,8 +83,8 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
     // For ByVal, alignment should be passed from FE.  BE will guess if
     // this info is not there but there are cases it cannot get right.
     unsigned FrameAlign;
-    if (FuncInfo.getParamAlignment(OpIdx))
-      FrameAlign = FuncInfo.getParamAlignment(OpIdx);
+    if (FuncInfo.getParamAlignment(OpIdx - 1))
+      FrameAlign = FuncInfo.getParamAlignment(OpIdx - 1);
     else
       FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL);
     Arg.Flags.setByValAlign(FrameAlign);
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 5fb8dfc95d3..75be7a55bd2 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1199,9 +1199,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
 
   finishPendingPhis();
 
-  // Now that the MachineFrameInfo has been configured, no further changes to
-  // the reserved registers are possible.
-  MRI->freezeReservedRegs(*MF);
+  auto &TLI = *MF->getSubtarget().getTargetLowering();
+  TLI.finalizeLowering(*MF);
 
   // Merge the argument lowering and constants block with its single
   // successor, the LLVM-IR entry block.  We want the basic block to
diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 942680b6fff..c67da8629a3 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -58,10 +58,11 @@ bool InstructionSelector::constrainSelectedInstRegOperands(
     MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(),
                                        Reg, OpI));
 
-    // Tie uses to defs as indicated in MCInstrDesc.
+    // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been
+    // done.
     if (MO.isUse()) {
       int DefIdx = I.getDesc().getOperandConstraint(OpI, MCOI::TIED_TO);
-      if (DefIdx != -1)
+      if (DefIdx != -1 && !I.isRegTiedToUseOperand(DefIdx))
         I.tieOperands(DefIdx, OpI);
     }
   }
diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp
index a2773cccc5d..bd04acd049d 100644
--- a/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -541,7 +541,8 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
     MFI.ensureMaxAlignment(YamlMFI.MaxAlignment);
   MFI.setAdjustsStack(YamlMFI.AdjustsStack);
   MFI.setHasCalls(YamlMFI.HasCalls);
-  MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize);
+  if (YamlMFI.MaxCallFrameSize != ~0u)
+    MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize);
   MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment);
   MFI.setHasVAStart(YamlMFI.HasVAStart);
   MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc);
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index b6624b88fe2..d017b21f0a5 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -286,7 +286,8 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,
   YamlMFI.MaxAlignment = MFI.getMaxAlignment();
   YamlMFI.AdjustsStack = MFI.adjustsStack();
   YamlMFI.HasCalls = MFI.hasCalls();
-  YamlMFI.MaxCallFrameSize = MFI.getMaxCallFrameSize();
+  YamlMFI.MaxCallFrameSize = MFI.isMaxCallFrameSizeComputed()
+    ? MFI.getMaxCallFrameSize() : ~0u;
   YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment();
   YamlMFI.HasVAStart = MFI.hasVAStart();
   YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc();
diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp
new file mode 100644
index 00000000000..7de8434df80
--- /dev/null
+++ b/lib/CodeGen/MachineFrameInfo.cpp
@@ -0,0 +1,218 @@
+//===-- MachineFrameInfo.cpp ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Implements MachineFrameInfo that manages the stack frame.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+
+#define DEBUG_TYPE "codegen"
+
+using namespace llvm;
+
+void MachineFrameInfo::ensureMaxAlignment(unsigned Align) {
+  if (!StackRealignable)
+    assert(Align <= StackAlignment &&
+           "For targets without stack realignment, Align is out of limit!");
+  if (MaxAlignment < Align) MaxAlignment = Align;
+}
+
+/// Clamp the alignment if requested and emit a warning.
+static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
+                                           unsigned StackAlign) {
+  if (!ShouldClamp || Align <= StackAlign)
+    return Align;
+  DEBUG(dbgs() << "Warning: requested alignment " << Align
+               << " exceeds the stack alignment " << StackAlign
+               << " when stack realignment is off" << '\n');
+  return StackAlign;
+}
+
+int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
+                      bool isSS, const AllocaInst *Alloca) {
+  assert(Size != 0 && "Cannot allocate zero size stack objects!");
+  Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
+  Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca,
+                                !isSS));
+  int Index = (int)Objects.size() - NumFixedObjects - 1;
+  assert(Index >= 0 && "Bad frame index!");
+  ensureMaxAlignment(Alignment);
+  return Index;
+}
+
+int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
+                                             unsigned Alignment) {
+  Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
+  CreateStackObject(Size, Alignment, true);
+  int Index = (int)Objects.size() - NumFixedObjects - 1;
+  ensureMaxAlignment(Alignment);
+  return Index;
+}
+
+int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
+                                                const AllocaInst *Alloca) {
+  HasVarSizedObjects = true;
+  Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
+  Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca, true));
+  ensureMaxAlignment(Alignment);
+  return (int)Objects.size()-NumFixedObjects-1;
+}
+
+int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
+                                        bool Immutable, bool isAliased) {
+  assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
+  // The alignment of the frame index can be determined from its offset from
+  // the incoming frame position.  If the frame object is at offset 32 and
+  // the stack is guaranteed to be 16-byte aligned, then we know that the
+  // object is 16-byte aligned. Note that unlike the non-fixed case, if the
+  // stack needs realignment, we can't assume that the stack will in fact be
+  // aligned.
+  unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
+  Align = clampStackAlignment(!StackRealignable, Align, StackAlignment);
+  Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
+                                              /*isSS*/   false,
+                                              /*Alloca*/ nullptr, isAliased));
+  return -++NumFixedObjects;
+}
+
+int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
+                                                  int64_t SPOffset,
+                                                  bool Immutable) {
+  unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
+  Align = clampStackAlignment(!StackRealignable, Align, StackAlignment);
+  Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
+                                              /*isSS*/ true,
+                                              /*Alloca*/ nullptr,
+                                              /*isAliased*/ false));
+  return -++NumFixedObjects;
+}
+
+BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const {
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  BitVector BV(TRI->getNumRegs());
+
+  // Before CSI is calculated, no registers are considered pristine. They can be
+  // freely used and PEI will make sure they are saved.
+  if (!isCalleeSavedInfoValid())
+    return BV;
+
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR;
+       ++CSR)
+    BV.set(*CSR);
+
+  // Saved CSRs are not pristine.
+  for (auto &I : getCalleeSavedInfo())
+    for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S)
+      BV.reset(*S);
+
+  return BV;
+}
+
+unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+  unsigned MaxAlign = getMaxAlignment();
+  int Offset = 0;
+
+  // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+  // It really should be refactored to share code. Until then, changes
+  // should keep in mind that there's tight coupling between the two.
+
+  for (int i = getObjectIndexBegin(); i != 0; ++i) {
+    int FixedOff = -getObjectOffset(i);
+    if (FixedOff > Offset) Offset = FixedOff;
+  }
+  for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) {
+    if (isDeadObjectIndex(i))
+      continue;
+    Offset += getObjectSize(i);
+    unsigned Align = getObjectAlignment(i);
+    // Adjust to alignment boundary
+    Offset = (Offset+Align-1)/Align*Align;
+
+    MaxAlign = std::max(Align, MaxAlign);
+  }
+
+  if (adjustsStack() && TFI->hasReservedCallFrame(MF))
+    Offset += getMaxCallFrameSize();
+
+  // Round up the size to a multiple of the alignment.  If the function has
+  // any calls or alloca's, align to the target's StackAlignment value to
+  // ensure that the callee's frame or the alloca data is suitably aligned;
+  // otherwise, for leaf functions, align to the TransientStackAlignment
+  // value.
+  unsigned StackAlign;
+  if (adjustsStack() || hasVarSizedObjects() ||
+      (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0))
+    StackAlign = TFI->getStackAlignment();
+  else
+    StackAlign = TFI->getTransientStackAlignment();
+
+  // If the frame pointer is eliminated, all frame offsets will be relative to
+  // SP not FP. Align to MaxAlign so this works.
+  StackAlign = std::max(StackAlign, MaxAlign);
+  unsigned AlignMask = StackAlign - 1;
+  Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+
+  return (unsigned)Offset;
+}
+
+void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
+  if (Objects.empty()) return;
+
+  const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering();
+  int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
+
+  OS << "Frame Objects:\n";
+
+  for (unsigned i = 0, e = Objects.size(); i != e; ++i) {
+    const StackObject &SO = Objects[i];
+    OS << "  fi#" << (int)(i-NumFixedObjects) << ": ";
+    if (SO.Size == ~0ULL) {
+      OS << "dead\n";
+      continue;
+    }
+    if (SO.Size == 0)
+      OS << "variable sized";
+    else
+      OS << "size=" << SO.Size;
+    OS << ", align=" << SO.Alignment;
+
+    if (i < NumFixedObjects)
+      OS << ", fixed";
+    if (i < NumFixedObjects || SO.SPOffset != -1) {
+      int64_t Off = SO.SPOffset - ValOffset;
+      OS << ", at location [SP";
+      if (Off > 0)
+        OS << "+" << Off;
+      else if (Off < 0)
+        OS << Off;
+      OS << "]";
+    }
+    OS << "\n";
+  }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineFrameInfo::dump(const MachineFunction &MF) const {
+  print(MF, dbgs());
+}
+#endif
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index c9767a25e90..ac4ccb81b88 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -756,214 +756,6 @@ void llvm::addLandingPadInfo(const LandingPadInst &I, MachineBasicBlock &MBB) {
 
 /// \}
 
-//===----------------------------------------------------------------------===//
-//  MachineFrameInfo implementation
-//===----------------------------------------------------------------------===//
-
-/// Make sure the function is at least Align bytes aligned.
-void MachineFrameInfo::ensureMaxAlignment(unsigned Align) {
-  if (!StackRealignable)
-    assert(Align <= StackAlignment &&
-           "For targets without stack realignment, Align is out of limit!");
-  if (MaxAlignment < Align) MaxAlignment = Align;
-}
-
-/// Clamp the alignment if requested and emit a warning.
-static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
-                                           unsigned StackAlign) {
-  if (!ShouldClamp || Align <= StackAlign)
-    return Align;
-  DEBUG(dbgs() << "Warning: requested alignment " << Align
-               << " exceeds the stack alignment " << StackAlign
-               << " when stack realignment is off" << '\n');
-  return StackAlign;
-}
-
-/// Create a new statically sized stack object, returning a nonnegative
-/// identifier to represent it.
-int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
-                      bool isSS, const AllocaInst *Alloca) {
-  assert(Size != 0 && "Cannot allocate zero size stack objects!");
-  Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
-  Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca,
-                                !isSS));
-  int Index = (int)Objects.size() - NumFixedObjects - 1;
-  assert(Index >= 0 && "Bad frame index!");
-  ensureMaxAlignment(Alignment);
-  return Index;
-}
-
-/// Create a new statically sized stack object that represents a spill slot,
-/// returning a nonnegative identifier to represent it.
-int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
-                                             unsigned Alignment) {
-  Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
-  CreateStackObject(Size, Alignment, true);
-  int Index = (int)Objects.size() - NumFixedObjects - 1;
-  ensureMaxAlignment(Alignment);
-  return Index;
-}
-
-/// Notify the MachineFrameInfo object that a variable sized object has been
-/// created. This must be created whenever a variable sized object is created,
-/// whether or not the index returned is actually used.
-int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
-                                                const AllocaInst *Alloca) {
-  HasVarSizedObjects = true;
-  Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
-  Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca, true));
-  ensureMaxAlignment(Alignment);
-  return (int)Objects.size()-NumFixedObjects-1;
-}
-
-/// Create a new object at a fixed location on the stack.
-/// All fixed objects should be created before other objects are created for
-/// efficiency. By default, fixed objects are immutable. This returns an
-/// index with a negative value.
-int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
-                                        bool Immutable, bool isAliased) {
-  assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
-  // The alignment of the frame index can be determined from its offset from
-  // the incoming frame position.  If the frame object is at offset 32 and
-  // the stack is guaranteed to be 16-byte aligned, then we know that the
-  // object is 16-byte aligned. Note that unlike the non-fixed case, if the
-  // stack needs realignment, we can't assume that the stack will in fact be
-  // aligned.
-  unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
-  Align = clampStackAlignment(!StackRealignable, Align, StackAlignment);
-  Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
-                                              /*isSS*/   false,
-                                              /*Alloca*/ nullptr, isAliased));
-  return -++NumFixedObjects;
-}
-
-/// Create a spill slot at a fixed location on the stack.
-/// Returns an index with a negative value.
-int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
-                                                  int64_t SPOffset,
-                                                  bool Immutable) {
-  unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
-  Align = clampStackAlignment(!StackRealignable, Align, StackAlignment);
-  Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
-                                              /*isSS*/ true,
-                                              /*Alloca*/ nullptr,
-                                              /*isAliased*/ false));
-  return -++NumFixedObjects;
-}
-
-BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const {
-  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  BitVector BV(TRI->getNumRegs());
-
-  // Before CSI is calculated, no registers are considered pristine. They can be
-  // freely used and PEI will make sure they are saved.
-  if (!isCalleeSavedInfoValid())
-    return BV;
-
-  const MachineRegisterInfo &MRI = MF.getRegInfo();
-  for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR;
-       ++CSR)
-    BV.set(*CSR);
-
-  // Saved CSRs are not pristine.
-  for (auto &I : getCalleeSavedInfo())
-    for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S)
-      BV.reset(*S);
-
-  return BV;
-}
-
-unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
-  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
-  unsigned MaxAlign = getMaxAlignment();
-  int Offset = 0;
-
-  // This code is very, very similar to PEI::calculateFrameObjectOffsets().
-  // It really should be refactored to share code. Until then, changes
-  // should keep in mind that there's tight coupling between the two.
-
-  for (int i = getObjectIndexBegin(); i != 0; ++i) {
-    int FixedOff = -getObjectOffset(i);
-    if (FixedOff > Offset) Offset = FixedOff;
-  }
-  for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) {
-    if (isDeadObjectIndex(i))
-      continue;
-    Offset += getObjectSize(i);
-    unsigned Align = getObjectAlignment(i);
-    // Adjust to alignment boundary
-    Offset = (Offset+Align-1)/Align*Align;
-
-    MaxAlign = std::max(Align, MaxAlign);
-  }
-
-  if (adjustsStack() && TFI->hasReservedCallFrame(MF))
-    Offset += getMaxCallFrameSize();
-
-  // Round up the size to a multiple of the alignment.  If the function has
-  // any calls or alloca's, align to the target's StackAlignment value to
-  // ensure that the callee's frame or the alloca data is suitably aligned;
-  // otherwise, for leaf functions, align to the TransientStackAlignment
-  // value.
-  unsigned StackAlign;
-  if (adjustsStack() || hasVarSizedObjects() ||
-      (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0))
-    StackAlign = TFI->getStackAlignment();
-  else
-    StackAlign = TFI->getTransientStackAlignment();
-
-  // If the frame pointer is eliminated, all frame offsets will be relative to
-  // SP not FP. Align to MaxAlign so this works.
-  StackAlign = std::max(StackAlign, MaxAlign);
-  unsigned AlignMask = StackAlign - 1;
-  Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
-
-  return (unsigned)Offset;
-}
-
-void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
-  if (Objects.empty()) return;
-
-  const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering();
-  int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
-
-  OS << "Frame Objects:\n";
-
-  for (unsigned i = 0, e = Objects.size(); i != e; ++i) {
-    const StackObject &SO = Objects[i];
-    OS << "  fi#" << (int)(i-NumFixedObjects) << ": ";
-    if (SO.Size == ~0ULL) {
-      OS << "dead\n";
-      continue;
-    }
-    if (SO.Size == 0)
-      OS << "variable sized";
-    else
-      OS << "size=" << SO.Size;
-    OS << ", align=" << SO.Alignment;
-
-    if (i < NumFixedObjects)
-      OS << ", fixed";
-    if (i < NumFixedObjects || SO.SPOffset != -1) {
-      int64_t Off = SO.SPOffset - ValOffset;
-      OS << ", at location [SP";
-      if (Off > 0)
-        OS << "+" << Off;
-      else if (Off < 0)
-        OS << Off;
-      OS << "]";
-    }
-    OS << "\n";
-  }
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void MachineFrameInfo::dump(const MachineFunction &MF) const {
-  print(MF, dbgs());
-}
-#endif
-
 //===----------------------------------------------------------------------===//
 //  MachineJumpTableInfo implementation
 //===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 1faf6292a9c..d665201a5d1 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -2350,7 +2350,7 @@ MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB,
                                           const MachineInstr &Orig,
                                           int FrameIndex) {
   const MDNode *Var = Orig.getDebugVariable();
-  auto *Expr = cast_or_null<DIExpression>(Orig.getDebugExpression());
+  const auto *Expr = cast_or_null<DIExpression>(Orig.getDebugExpression());
   bool IsIndirect = Orig.isIndirectDebugValue();
   uint64_t Offset = IsIndirect ? Orig.getOperand(1).getImm() : 0;
   DebugLoc DL = Orig.getDebugLoc();
@@ -2359,13 +2359,8 @@ MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB,
   // If the DBG_VALUE already was a memory location, add an extra
   // DW_OP_deref. Otherwise just turning this from a register into a
   // memory/indirect location is sufficient.
-  if (IsIndirect) {
-    SmallVector<uint64_t, 8> Ops;
-    Ops.push_back(dwarf::DW_OP_deref);
-    if (Expr)
-      Ops.append(Expr->elements_begin(), Expr->elements_end());
-    Expr = DIExpression::get(Expr->getContext(), Ops);
-  }
+  if (IsIndirect)
+    Expr = DIExpression::prepend(Expr, DIExpression::WithDeref);
   return BuildMI(BB, I, DL, Orig.getDesc())
       .addFrameIndex(FrameIndex)
       .addImm(Offset)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1251ae6262b..dc0276d5766 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLowering.h"
@@ -236,10 +237,13 @@ namespace {
     SDValue visitSUB(SDNode *N);
     SDValue visitADDC(SDNode *N);
     SDValue visitUADDO(SDNode *N);
+    SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
     SDValue visitSUBC(SDNode *N);
     SDValue visitUSUBO(SDNode *N);
     SDValue visitADDE(SDNode *N);
+    SDValue visitADDCARRY(SDNode *N);
     SDValue visitSUBE(SDNode *N);
+    SDValue visitSUBCARRY(SDNode *N);
     SDValue visitMUL(SDNode *N);
     SDValue useDivRem(SDNode *N);
     SDValue visitSDIV(SDNode *N);
@@ -369,14 +373,14 @@ namespace {
     SDValue BuildSDIVPow2(SDNode *N);
     SDValue BuildUDIV(SDNode *N);
     SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
-    SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
-    SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
-    SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags);
-    SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Recip);
+    SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
+    SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
+    SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
+    SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
     SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
-                                SDNodeFlags *Flags, bool Reciprocal);
+                                SDNodeFlags Flags, bool Reciprocal);
     SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
-                                SDNodeFlags *Flags, bool Reciprocal);
+                                SDNodeFlags Flags, bool Reciprocal);
     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
                                bool DemandHighBits = true);
     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@@ -396,6 +400,7 @@ namespace {
     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
                                   ArrayRef<int> VectorMask, SDValue VecIn1,
                                   SDValue VecIn2, unsigned LeftIdx);
+    SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
 
     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
 
@@ -644,7 +649,7 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
   case ISD::FSUB:
     // We can't turn -(A-B) into B-A when we honor signed zeros.
     if (!Options->NoSignedZerosFPMath &&
-        !Op.getNode()->getFlags()->hasNoSignedZeros())
+        !Op.getNode()->getFlags().hasNoSignedZeros())
       return 0;
 
     // fold (fneg (fsub A, B)) -> (fsub B, A)
@@ -682,7 +687,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
 
   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
 
-  const SDNodeFlags *Flags = Op.getNode()->getFlags();
+  const SDNodeFlags Flags = Op.getNode()->getFlags();
 
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Unknown code");
@@ -965,8 +970,8 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 /// things it uses can be simplified by bit propagation. If so, return true.
 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
-  APInt KnownZero, KnownOne;
-  if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
+  KnownBits Known;
+  if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
     return false;
 
   // Revisit the node.
@@ -1412,7 +1417,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::SUBC:               return visitSUBC(N);
   case ISD::USUBO:              return visitUSUBO(N);
   case ISD::ADDE:               return visitADDE(N);
+  case ISD::ADDCARRY:           return visitADDCARRY(N);
   case ISD::SUBE:               return visitSUBE(N);
+  case ISD::SUBCARRY:           return visitSUBCARRY(N);
   case ISD::MUL:                return visitMUL(N);
   case ISD::SDIV:               return visitSDIV(N);
   case ISD::UDIV:               return visitUDIV(N);
@@ -1866,14 +1873,31 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   if (isNullConstant(N1))
     return N0;
 
-  // fold ((c1-A)+c2) -> (c1+c2)-A
   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
-    if (N0.getOpcode() == ISD::SUB)
-      if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
-        return DAG.getNode(ISD::SUB, DL, VT,
-                           DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
-                           N0.getOperand(1));
+    // fold ((c1-A)+c2) -> (c1+c2)-A
+    if (N0.getOpcode() == ISD::SUB &&
+        isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
+      // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
+      return DAG.getNode(ISD::SUB, DL, VT,
+                         DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
+                         N0.getOperand(1));
+    }
+
+    // add (sext i1 X), 1 -> zext (not i1 X)
+    // We don't transform this pattern:
+    //   add (zext i1 X), -1 -> sext (not i1 X)
+    // because most (?) targets generate better code for the zext form.
+    if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
+        isOneConstantOrOneSplatConstant(N1)) {
+      SDValue X = N0.getOperand(0);
+      if ((!LegalOperations ||
+           (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
+            TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
+          X.getScalarValueSizeInBits() == 1) {
+        SDValue Not = DAG.getNOT(DL, X, X.getValueType());
+        return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
       }
+    }
   }
 
   if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -1992,6 +2016,11 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference)
     }
   }
 
+  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
+  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)))
+    return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
+                       N0, N1.getOperand(0), N1.getOperand(2));
+
   return SDValue();
 }
 
@@ -2055,6 +2084,26 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) {
     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
                      DAG.getConstant(0, DL, CarryVT));
 
+  if (SDValue Combined = visitUADDOLike(N0, N1, N))
+    return Combined;
+
+  if (SDValue Combined = visitUADDOLike(N1, N0, N))
+    return Combined;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
+  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
+  // If Y + 1 cannot overflow.
+  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
+    SDValue Y = N1.getOperand(0);
+    SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
+    if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
+      return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
+                         N1.getOperand(2));
+  }
+
   return SDValue();
 }
 
@@ -2077,6 +2126,25 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue CarryIn = N->getOperand(2);
+
+  // canonicalize constant to RHS
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
+                       N1, N0, CarryIn);
+
+  // fold (addcarry x, y, false) -> (uaddo x, y)
+  if (isNullConstant(CarryIn))
+    return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), N0, N1);
+
+  return SDValue();
+}
+
 // Since it may not be valid to emit a fold to zero for vector initializers
 // check if we can before folding.
 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
@@ -2143,13 +2211,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     }
 
     // 0 - X --> 0 if the sub is NUW.
-    if (N->getFlags()->hasNoUnsignedWrap())
+    if (N->getFlags().hasNoUnsignedWrap())
       return N0;
 
     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
       // N1 must be 0 because negating the minimum signed value is undefined.
-      if (N->getFlags()->hasNoSignedWrap())
+      if (N->getFlags().hasNoSignedWrap())
         return N0;
 
       // 0 - X --> X if X is 0 or the minimum signed value.
@@ -2309,6 +2377,18 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue CarryIn = N->getOperand(2);
+
+  // fold (subcarry x, y, false) -> (usubo x, y)
+  if (isNullConstant(CarryIn))
+    return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitMUL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -2589,9 +2669,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
   // better results in that case. The target-specific lowering should learn how
   // to handle exact sdivs efficiently.
   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
-      !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
-      (N1C->getAPIntValue().isPowerOf2() ||
-       (-N1C->getAPIntValue()).isPowerOf2())) {
+      !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() ||
+                                    (-N1C->getAPIntValue()).isPowerOf2())) {
     // Target-specific implementation of sdiv x, pow2.
     if (SDValue Res = BuildSDIVPow2(N))
       return Res;
@@ -3766,7 +3845,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
   EVT VT = N->getValueType(0);
   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
     return SDValue();
-  if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+  if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
     return SDValue();
 
   // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
@@ -3880,8 +3959,15 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
 
   SDValue N0 = N.getOperand(0);
   unsigned Opc0 = N0.getOpcode();
+  if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
+    return false;
 
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+  ConstantSDNode *N1C = nullptr;
+  // SHL or SRL: look upstream for AND mask operand
+  if (Opc == ISD::AND)
+    N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+  else if (Opc0 == ISD::AND)
+    N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   if (!N1C)
     return false;
 
@@ -3952,7 +4038,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
   EVT VT = N->getValueType(0);
   if (VT != MVT::i32)
     return SDValue();
-  if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+  if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
     return SDValue();
 
   // Look for either
@@ -3967,18 +4053,16 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
   if (N1.getOpcode() == ISD::OR &&
       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
     // (or (or (and), (and)), (or (and), (and)))
-    SDValue N000 = N00.getOperand(0);
-    if (!isBSwapHWordElement(N000, Parts))
+    if (!isBSwapHWordElement(N00, Parts))
       return SDValue();
 
-    SDValue N001 = N00.getOperand(1);
-    if (!isBSwapHWordElement(N001, Parts))
+    if (!isBSwapHWordElement(N01, Parts))
       return SDValue();
-    SDValue N010 = N01.getOperand(0);
-    if (!isBSwapHWordElement(N010, Parts))
+    SDValue N10 = N1.getOperand(0);
+    if (!isBSwapHWordElement(N10, Parts))
       return SDValue();
-    SDValue N011 = N01.getOperand(1);
-    if (!isBSwapHWordElement(N011, Parts))
+    SDValue N11 = N1.getOperand(1);
+    if (!isBSwapHWordElement(N11, Parts))
       return SDValue();
   } else {
     // (or (or (or (and), (and)), (and)), (and))
@@ -5322,7 +5406,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
-      cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) {
+      N0->getFlags().hasExact()) {
     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
       uint64_t C1 = N0C1->getZExtValue();
       uint64_t C2 = N1C->getZExtValue();
@@ -5347,7 +5431,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
         SDValue Shift;
         if (c2 > c1) {
-          Mask = Mask.shl(c2 - c1);
+          Mask <<= c2 - c1;
           SDLoc DL(N);
           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
@@ -5680,20 +5764,20 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
   if (N1C && N0.getOpcode() == ISD::CTLZ &&
       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
-    APInt KnownZero, KnownOne;
-    DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne);
+    KnownBits Known;
+    DAG.computeKnownBits(N0.getOperand(0), Known);
 
     // If any of the input bits are KnownOne, then the input couldn't be all
     // zeros, thus the result of the srl will always be zero.
-    if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
+    if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
 
     // If all of the bits input the to ctlz node are known to be zero, then
     // the result of the ctlz is "32" and the result of the shift is one.
-    APInt UnknownBits = ~KnownZero;
+    APInt UnknownBits = ~Known.Zero;
     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
 
     // Otherwise, check to see if there is exactly one bit input to the ctlz.
-    if ((UnknownBits & (UnknownBits - 1)) == 0) {
+    if (UnknownBits.isPowerOf2()) {
       // Okay, we know that only that the single bit specified by UnknownBits
       // could be set on input to the CTLZ node. If this bit is set, the SRL
       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
@@ -6889,6 +6973,51 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
   return SDValue(N, 0); // Return N so it doesn't get rechecked!
 }
 
+/// If we're narrowing or widening the result of a vector select and the final
+/// size is the same size as a setcc (compare) feeding the select, then try to
+/// apply the cast operation to the select's operands because matching vector
+/// sizes for a select condition and other operands should be more efficient.
+SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
+  unsigned CastOpcode = Cast->getOpcode();
+  assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
+          CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
+          CastOpcode == ISD::FP_ROUND) &&
+         "Unexpected opcode for vector select narrowing/widening");
+
+  // We only do this transform before legal ops because the pattern may be
+  // obfuscated by target-specific operations after legalization. Do not create
+  // an illegal select op, however, because that may be difficult to lower.
+  EVT VT = Cast->getValueType(0);
+  if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
+    return SDValue();
+
+  SDValue VSel = Cast->getOperand(0);
+  if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
+      VSel.getOperand(0).getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  // Does the setcc have the same vector size as the casted select?
+  SDValue SetCC = VSel.getOperand(0);
+  EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
+  if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
+    return SDValue();
+
+  // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
+  SDValue A = VSel.getOperand(1);
+  SDValue B = VSel.getOperand(2);
+  SDValue CastA, CastB;
+  SDLoc DL(Cast);
+  if (CastOpcode == ISD::FP_ROUND) {
+    // FP_ROUND (fptrunc) has an extra flag operand to pass along.
+    CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
+    CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
+  } else {
+    CastA = DAG.getNode(CastOpcode, DL, VT, A);
+    CastB = DAG.getNode(CastOpcode, DL, VT, B);
+  }
+  return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
+}
+
 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -7112,19 +7241,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
       DAG.SignBitIsZero(N0))
     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
 
+  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+    return NewVSel;
+
   return SDValue();
 }
 
 // isTruncateOf - If N is a truncate of some other value, return true, record
-// the value being truncated in Op and which of Op's bits are zero in KnownZero.
-// This function computes KnownZero to avoid a duplicated call to
+// the value being truncated in Op and which of Op's bits are zero/one in Known.
+// This function computes KnownBits to avoid a duplicated call to
 // computeKnownBits in the caller.
 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
-                         APInt &KnownZero) {
-  APInt KnownOne;
+                         KnownBits &Known) {
   if (N->getOpcode() == ISD::TRUNCATE) {
     Op = N->getOperand(0);
-    DAG.computeKnownBits(Op, KnownZero, KnownOne);
+    DAG.computeKnownBits(Op, Known);
     return true;
   }
 
@@ -7143,9 +7274,9 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
   else
     return false;
 
-  DAG.computeKnownBits(Op, KnownZero, KnownOne);
+  DAG.computeKnownBits(Op, Known);
 
-  if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
+  if (!(Known.Zero | 1).isAllOnesValue())
     return false;
 
   return true;
@@ -7170,8 +7301,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   // This is valid when the truncated bits of x are already zero.
   // FIXME: We should extend this to work for vectors too.
   SDValue Op;
-  APInt KnownZero;
-  if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
+  KnownBits Known;
+  if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
     APInt TruncatedBits =
       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
       APInt(Op.getValueSizeInBits(), 0) :
@@ -7179,7 +7310,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                         N0.getValueSizeInBits(),
                         std::min(Op.getValueSizeInBits(),
                                  VT.getSizeInBits()));
-    if (TruncatedBits == (KnownZero & TruncatedBits)) {
+    if (TruncatedBits.isSubsetOf(Known.Zero)) {
       if (VT.bitsGT(Op.getValueType()))
         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op);
       if (VT.bitsLT(Op.getValueType()))
@@ -7446,6 +7577,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                        ShAmt);
   }
 
+  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+    return NewVSel;
+
   return SDValue();
 }
 
@@ -7802,7 +7936,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
                                PtrType, LN0->getBasePtr(),
                                DAG.getConstant(PtrOff, DL, PtrType),
-                               &Flags);
+                               Flags);
   AddToWorklist(NewPtr.getNode());
 
   SDValue Load;
@@ -8228,17 +8362,21 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
     return SDValue(N, 0);
 
   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
+  // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
   // When the adde's carry is not used.
-  if (N0.getOpcode() == ISD::ADDE && N0.hasOneUse() &&
-      !N0.getNode()->hasAnyUseOfValue(1) &&
-      (!LegalOperations || TLI.isOperationLegal(ISD::ADDE, VT))) {
+  if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
+      N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
+      (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
     SDLoc SL(N);
     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
-    return DAG.getNode(ISD::ADDE, SL, DAG.getVTList(VT, MVT::Glue),
-                       X, Y, N0.getOperand(2));
+    auto VTs = DAG.getVTList(VT, N0->getValueType(1));
+    return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
   }
 
+  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+    return NewVSel;
+
   return SDValue();
 }
 
@@ -8701,7 +8839,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
 }
 
 static bool isContractable(SDNode *N) {
-  SDNodeFlags F = cast<BinaryWithFlagsSDNode>(N)->Flags;
+  SDNodeFlags F = N->getFlags();
   return F.hasAllowContract() || F.hasUnsafeAlgebra();
 }
 
@@ -9287,7 +9425,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
-  const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
+  const SDNodeFlags Flags = N->getFlags();
 
   // fold vector ops
   if (VT.isVector())
@@ -9318,7 +9456,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
 
   // FIXME: Auto-upgrade the target/function-level option.
-  if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) {
+  if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
     // fold (fadd A, 0) -> A
     if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
       if (N1C->isZero())
@@ -9441,7 +9579,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
-  const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
+  const SDNodeFlags Flags = N->getFlags();
 
   // fold vector ops
   if (VT.isVector())
@@ -9461,7 +9599,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
 
   // FIXME: Auto-upgrade the target/function-level option.
-  if (Options.NoSignedZerosFPMath  || N->getFlags()->hasNoSignedZeros()) {
+  if (Options.NoSignedZerosFPMath  || N->getFlags().hasNoSignedZeros()) {
     // (fsub 0, B) -> -B
     if (N0CFP && N0CFP->isZero()) {
       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
@@ -9512,7 +9650,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
-  const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
+  const SDNodeFlags Flags = N->getFlags();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -9656,7 +9794,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
       return DAG.getNode(ISD::FMUL, DL, VT, N0,
                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
-                                     &Flags), &Flags);
+                                     Flags), Flags);
     }
 
     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
@@ -9666,7 +9804,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
       return DAG.getNode(ISD::FMA, DL, VT,
                          N0.getOperand(0),
                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
-                                     &Flags),
+                                     Flags),
                          N2);
     }
   }
@@ -9692,16 +9830,16 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
     if (N1CFP && N0 == N2) {
       return DAG.getNode(ISD::FMUL, DL, VT, N0,
                          DAG.getNode(ISD::FADD, DL, VT, N1,
-                                     DAG.getConstantFP(1.0, DL, VT), &Flags),
-                         &Flags);
+                                     DAG.getConstantFP(1.0, DL, VT), Flags),
+                         Flags);
     }
 
     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
       return DAG.getNode(ISD::FMUL, DL, VT, N0,
                          DAG.getNode(ISD::FADD, DL, VT, N1,
-                                     DAG.getConstantFP(-1.0, DL, VT), &Flags),
-                         &Flags);
+                                     DAG.getConstantFP(-1.0, DL, VT), Flags),
+                         Flags);
     }
   }
 
@@ -9717,8 +9855,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
-  const SDNodeFlags *Flags = N->getFlags();
-  if (!UnsafeMath && !Flags->hasAllowReciprocal())
+  const SDNodeFlags Flags = N->getFlags();
+  if (!UnsafeMath && !Flags.hasAllowReciprocal())
     return SDValue();
 
   // Skip if current node is a reciprocal.
@@ -9741,7 +9879,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
       // This division is eligible for optimization only if global unsafe math
       // is enabled or if this division allows reciprocal formation.
-      if (UnsafeMath || U->getFlags()->hasAllowReciprocal())
+      if (UnsafeMath || U->getFlags().hasAllowReciprocal())
         Users.insert(U);
     }
   }
@@ -9780,7 +9918,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
-  SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
+  SDNodeFlags Flags = N->getFlags();
 
   // fold vector ops
   if (VT.isVector())
@@ -9894,8 +10032,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
 
   // fold (frem c1, c2) -> fmod(c1,c2)
   if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
-                       &cast<BinaryWithFlagsSDNode>(N)->Flags);
+    return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
 
   if (SDValue NewSel = foldBinOpIntoSelect(N))
     return NewSel;
@@ -9915,7 +10052,7 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
   // For now, create a Flags object for use with all unsafe math transforms.
   SDNodeFlags Flags;
   Flags.setUnsafeAlgebra(true);
-  return buildSqrtEstimate(N0, &Flags);
+  return buildSqrtEstimate(N0, Flags);
 }
 
 /// copysign(x, fp_extend(y)) -> copysign(x, y)
@@ -10190,6 +10327,9 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
                        Tmp, N0.getOperand(1));
   }
 
+  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+    return NewVSel;
+
   return SDValue();
 }
 
@@ -10256,6 +10396,9 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   }
 
+  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+    return NewVSel;
+
   return SDValue();
 }
 
@@ -10341,10 +10484,10 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
       if (Level >= AfterLegalizeDAG &&
           (TLI.isFPImmLegal(CVal, VT) ||
            TLI.isOperationLegal(ISD::ConstantFP, VT)))
-        return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
-                           DAG.getNode(ISD::FNEG, SDLoc(N), VT,
-                                       N0.getOperand(1)),
-                           &cast<BinaryWithFlagsSDNode>(N0)->Flags);
+        return DAG.getNode(
+            ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
+            DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
+            N0->getFlags());
     }
   }
 
@@ -15832,7 +15975,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
 ///     =>
 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
 ///     does not require additional intermediate precision]
-SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
+SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
   if (Level >= AfterLegalizeDAG)
     return SDValue();
 
@@ -15887,7 +16030,7 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
 /// As a result, we precompute A/2 prior to the iteration loop.
 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
                                          unsigned Iterations,
-                                         SDNodeFlags *Flags, bool Reciprocal) {
+                                         SDNodeFlags Flags, bool Reciprocal) {
   EVT VT = Arg.getValueType();
   SDLoc DL(Arg);
   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
@@ -15931,7 +16074,7 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
                                          unsigned Iterations,
-                                         SDNodeFlags *Flags, bool Reciprocal) {
+                                         SDNodeFlags Flags, bool Reciprocal) {
   EVT VT = Arg.getValueType();
   SDLoc DL(Arg);
   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
@@ -15976,7 +16119,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
 /// Op can be zero.
-SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags,
+SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
                                            bool Reciprocal) {
   if (Level >= AfterLegalizeDAG)
     return SDValue();
@@ -16029,11 +16172,11 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags,
   return SDValue();
 }
 
-SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
+SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
   return buildSqrtEstimateImpl(Op, Flags, true);
 }
 
-SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
+SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
   return buildSqrtEstimateImpl(Op, Flags, false);
 }
 
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 377a5237f15..a0135dc40b8 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -400,10 +400,10 @@ FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
   if (!LOI->IsValid)
     return nullptr;
 
-  if (BitWidth > LOI->KnownZero.getBitWidth()) {
+  if (BitWidth > LOI->Known.getBitWidth()) {
     LOI->NumSignBits = 1;
-    LOI->KnownZero = LOI->KnownZero.zextOrTrunc(BitWidth);
-    LOI->KnownOne = LOI->KnownOne.zextOrTrunc(BitWidth);
+    LOI->Known.Zero = LOI->Known.Zero.zextOrTrunc(BitWidth);
+    LOI->Known.One = LOI->Known.One.zextOrTrunc(BitWidth);
   }
 
   return LOI;
@@ -436,17 +436,15 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
   Value *V = PN->getIncomingValue(0);
   if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
     DestLOI.NumSignBits = 1;
-    APInt Zero(BitWidth, 0);
-    DestLOI.KnownZero = Zero;
-    DestLOI.KnownOne = Zero;
+    DestLOI.Known = KnownBits(BitWidth);
     return;
   }
 
   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
     APInt Val = CI->getValue().zextOrTrunc(BitWidth);
     DestLOI.NumSignBits = Val.getNumSignBits();
-    DestLOI.KnownZero = ~Val;
-    DestLOI.KnownOne = Val;
+    DestLOI.Known.Zero = ~Val;
+    DestLOI.Known.One = Val;
   } else {
     assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
                                 "CopyToReg node was created.");
@@ -463,25 +461,23 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
     DestLOI = *SrcLOI;
   }
 
-  assert(DestLOI.KnownZero.getBitWidth() == BitWidth &&
-         DestLOI.KnownOne.getBitWidth() == BitWidth &&
+  assert(DestLOI.Known.Zero.getBitWidth() == BitWidth &&
+         DestLOI.Known.One.getBitWidth() == BitWidth &&
          "Masks should have the same bit width as the type.");
 
   for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
     Value *V = PN->getIncomingValue(i);
     if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
       DestLOI.NumSignBits = 1;
-      APInt Zero(BitWidth, 0);
-      DestLOI.KnownZero = Zero;
-      DestLOI.KnownOne = Zero;
+      DestLOI.Known = KnownBits(BitWidth);
       return;
     }
 
     if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
       APInt Val = CI->getValue().zextOrTrunc(BitWidth);
       DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits());
-      DestLOI.KnownZero &= ~Val;
-      DestLOI.KnownOne &= Val;
+      DestLOI.Known.Zero &= ~Val;
+      DestLOI.Known.One &= Val;
       continue;
     }
 
@@ -498,8 +494,8 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
       return;
     }
     DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits);
-    DestLOI.KnownZero &= SrcLOI->KnownZero;
-    DestLOI.KnownOne &= SrcLOI->KnownOne;
+    DestLOI.Known.Zero &= SrcLOI->Known.Zero;
+    DestLOI.Known.One &= SrcLOI->Known.One;
   }
 }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index fdebb8bd00d..2654b3ad7a6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2589,7 +2589,7 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
           DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
 
     APInt Shift(Sz, 1);
-    Shift = Shift.shl(J);
+    Shift <<= J;
     Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
     Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
   }
@@ -3253,7 +3253,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     EVT VT = Node->getValueType(0);
     if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
         TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) {
-      const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(Node)->Flags;
+      const SDNodeFlags Flags = Node->getFlags();
       Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
       Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags);
       Results.push_back(Tmp1);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 9ed70c9b4db..92b0d2ae401 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -21,6 +21,7 @@
 #include "LegalizeTypes.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -134,6 +135,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::SMULO:
   case ISD::UMULO:       Res = PromoteIntRes_XMULO(N, ResNo); break;
 
+  case ISD::ADDCARRY:
+  case ISD::SUBCARRY:    Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break;
+
   case ISD::ATOMIC_LOAD:
     Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break;
 
@@ -510,9 +514,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
   // Simply change the return type of the boolean result.
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
   EVT ValueVTs[] = { N->getValueType(0), NVT };
-  SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
+  SDValue Ops[3] = { N->getOperand(0), N->getOperand(1) };
+  unsigned NumOps = N->getNumOperands();
+  assert(NumOps <= 3 && "Too many operands");
+  if (NumOps == 3)
+    Ops[2] = N->getOperand(2);
+
   SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N),
-                            DAG.getVTList(ValueVTs), Ops);
+                            DAG.getVTList(ValueVTs), makeArrayRef(Ops, NumOps));
 
   // Modified the sum result - switch anything that used the old sum to use
   // the new one.
@@ -762,6 +771,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
   return Res;
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
+  if (ResNo == 1)
+    return PromoteIntRes_Overflow(N);
+  llvm_unreachable("Not implemented");
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
   // Promote the overflow bit trivially.
   if (ResNo == 1)
@@ -924,6 +939,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
   case ISD::SRL:
   case ISD::ROTL:
   case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+
+  case ISD::ADDCARRY:
+  case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break;
   }
 
   // If the result is null, the sub-method took care of registering results etc.
@@ -1276,6 +1294,30 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
                                 N->getOperand(0).getValueType().getScalarType());
 }
 
+SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 2 && "Don't know how to promote this operand!");
+
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  SDValue Carry = N->getOperand(2);
+  SDLoc DL(N);
+
+  auto VT = getSetCCResultType(LHS.getValueType());
+  TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(VT);
+  switch (BoolType) {
+  case TargetLoweringBase::UndefinedBooleanContent:
+    Carry = DAG.getAnyExtOrTrunc(Carry, DL, VT);
+    break;
+  case TargetLoweringBase::ZeroOrOneBooleanContent:
+    Carry = DAG.getZExtOrTrunc(Carry, DL, VT);
+    break;
+  case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+    Carry = DAG.getSExtOrTrunc(Carry, DL, VT);
+    break;
+  }
+
+  return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0);
+}
 
 //===----------------------------------------------------------------------===//
 //  Integer Result Expansion
@@ -1395,6 +1437,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::ADDE:
   case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break;
 
+  case ISD::ADDCARRY:
+  case ISD::SUBCARRY: ExpandIntRes_ADDSUBCARRY(N, Lo, Hi); break;
+
   case ISD::SHL:
   case ISD::SRA:
   case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
@@ -1525,11 +1570,11 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
   SDLoc dl(N);
 
   APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
-  APInt KnownZero, KnownOne;
-  DAG.computeKnownBits(N->getOperand(1), KnownZero, KnownOne);
+  KnownBits Known;
+  DAG.computeKnownBits(N->getOperand(1), Known);
 
   // If we don't know anything about the high bits, exit.
-  if (((KnownZero|KnownOne) & HighBitMask) == 0)
+  if (((Known.Zero|Known.One) & HighBitMask) == 0)
     return false;
 
   // Get the incoming operand to be shifted.
@@ -1538,7 +1583,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
 
   // If we know that any of the high bits of the shift amount are one, then we
   // can do this as a couple of simple shifts.
-  if (KnownOne.intersects(HighBitMask)) {
+  if (Known.One.intersects(HighBitMask)) {
     // Mask out the high bit, which we know is set.
     Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,
                       DAG.getConstant(~HighBitMask, dl, ShTy));
@@ -1563,7 +1608,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
 
   // If we know that all of the high bits of the shift amount are zero, then we
   // can do this as a couple of simple shifts.
-  if ((KnownZero & HighBitMask) == HighBitMask) {
+  if (HighBitMask.isSubsetOf(Known.Zero)) {
     // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined
     // shift if x is zero.  We can use XOR here because x is known to be smaller
     // than 32.
@@ -1738,6 +1783,23 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
   SDValue LoOps[2] = { LHSL, RHSL };
   SDValue HiOps[3] = { LHSH, RHSH };
 
+  bool HasOpCarry = TLI.isOperationLegalOrCustom(
+      N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY,
+      TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+  if (HasOpCarry) {
+    SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT));
+    if (N->getOpcode() == ISD::ADD) {
+      Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps);
+      HiOps[2] = Lo.getValue(1);
+      Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps);
+    } else {
+      Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
+      HiOps[2] = Lo.getValue(1);
+      Hi = DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps);
+    }
+    return;
+  }
+
   // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
   // them.  TODO: Teach operation legalization how to expand unsupported
   // ADDC/ADDE/SUBC/SUBE.  The problem is that these operations generate
@@ -1767,7 +1829,8 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
                                    ISD::UADDO : ISD::USUBO,
                                  TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
   if (hasOVF) {
-    SDVTList VTList = DAG.getVTList(NVT, NVT);
+    EVT OvfVT = getSetCCResultType(NVT);
+    SDVTList VTList = DAG.getVTList(NVT, OvfVT);
     TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);
     int RevOpc;
     if (N->getOpcode() == ISD::ADD) {
@@ -1783,12 +1846,14 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
 
     switch (BoolType) {
     case TargetLoweringBase::UndefinedBooleanContent:
-      OVF = DAG.getNode(ISD::AND, dl, NVT, DAG.getConstant(1, dl, NVT), OVF);
+      OVF = DAG.getNode(ISD::AND, dl, OvfVT, DAG.getConstant(1, dl, OvfVT), OVF);
       LLVM_FALLTHROUGH;
     case TargetLoweringBase::ZeroOrOneBooleanContent:
+      OVF = DAG.getZExtOrTrunc(OVF, dl, NVT);
       Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF);
       break;
     case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+      OVF = DAG.getSExtOrTrunc(OVF, dl, NVT);
       Hi = DAG.getNode(RevOpc, dl, NVT, Hi, OVF);
     }
     return;
@@ -1866,6 +1931,71 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
   ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
 }
 
+void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  SDLoc dl(N);
+
+  SDValue Ovf;
+
+  bool HasOpCarry = TLI.isOperationLegalOrCustom(
+      N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY,
+      TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
+
+  if (HasOpCarry) {
+    // Expand the subcomponents.
+    SDValue LHSL, LHSH, RHSL, RHSH;
+    GetExpandedInteger(LHS, LHSL, LHSH);
+    GetExpandedInteger(RHS, RHSL, RHSH);
+    SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
+    SDValue LoOps[2] = { LHSL, RHSL };
+    SDValue HiOps[3] = { LHSH, RHSH };
+
+    unsigned Opc = N->getOpcode() == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
+    Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
+    HiOps[2] = Lo.getValue(1);
+    Hi = DAG.getNode(Opc, dl, VTList, HiOps);
+
+    Ovf = Hi.getValue(1);
+  } else {
+    // Expand the result by simply replacing it with the equivalent
+    // non-overflow-checking operation.
+    auto Opc = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
+    SDValue Sum = DAG.getNode(Opc, dl, LHS.getValueType(), LHS, RHS);
+    SplitInteger(Sum, Lo, Hi);
+
+    // Calculate the overflow: addition overflows iff a + b < a, and subtraction
+    // overflows iff a - b > a.
+    auto Cond = N->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT;
+    Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond);
+  }
+
+  // Legalized the flag result - switch anything that used the old flag to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Ovf);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBCARRY(SDNode *N,
+                                                SDValue &Lo, SDValue &Hi) {
+  // Expand the subcomponents.
+  SDValue LHSL, LHSH, RHSL, RHSH;
+  SDLoc dl(N);
+  GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+  GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
+  SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
+  SDValue HiOps[3] = { LHSH, RHSH, SDValue() };
+
+  Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
+  HiOps[2] = Lo.getValue(1);
+  Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps);
+
+  // Legalized the flag result - switch anything that used the old flag to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
 void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
                                                SDValue &Lo, SDValue &Hi) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -2532,29 +2662,6 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
   Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
 }
 
-void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
-                                             SDValue &Lo, SDValue &Hi) {
-  SDValue LHS = N->getOperand(0);
-  SDValue RHS = N->getOperand(1);
-  SDLoc dl(N);
-
-  // Expand the result by simply replacing it with the equivalent
-  // non-overflow-checking operation.
-  SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ?
-                            ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
-                            LHS, RHS);
-  SplitInteger(Sum, Lo, Hi);
-
-  // Calculate the overflow: addition overflows iff a + b < a, and subtraction
-  // overflows iff a - b > a.
-  SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS,
-                             N->getOpcode () == ISD::UADDO ?
-                             ISD::SETULT : ISD::SETUGT);
-
-  // Use the calculated overflow everywhere.
-  ReplaceValueWith(SDValue(N, 1), Ofl);
-}
-
 void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
                                           SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index af55a22972a..cde4331cc42 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -279,6 +279,7 @@ private:
   SDValue PromoteIntRes_SRL(SDNode *N);
   SDValue PromoteIntRes_TRUNCATE(SDNode *N);
   SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
+  SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
   SDValue PromoteIntRes_UNDEF(SDNode *N);
   SDValue PromoteIntRes_VAARG(SDNode *N);
   SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
@@ -311,6 +312,7 @@ private:
   SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo);
 
   void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
 
@@ -350,6 +352,7 @@ private:
   void ExpandIntRes_ADDSUB            (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_ADDSUBC           (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_ADDSUBE           (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_ADDSUBCARRY       (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_BITREVERSE        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_BSWAP             (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_MUL               (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 4a3160297d6..97a7fab6efd 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -523,16 +523,17 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
   return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops);
 }
 
-/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,
-/// so just return the element, ignoring the index.
-SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
-  EVT VT = N->getValueType(0);
-  SDValue Res = GetScalarizedVector(N->getOperand(0));
-  if (Res.getValueType() != VT)
-    Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res);
-  return Res;
-}
-
+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,
+/// so just return the element, ignoring the index.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  SDValue Res = GetScalarizedVector(N->getOperand(0));
+  if (Res.getValueType() != VT)
+    Res = VT.isFloatingPoint()
+              ? DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Res)
+              : DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res);
+  return Res;
+}
 
 /// If the input condition is a vector that needs to be scalarized, it must be
 /// <1 x i1>, so just convert to a normal ISD::SELECT
@@ -730,7 +731,7 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
   GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
   SDLoc dl(N);
 
-  const SDNodeFlags *Flags = N->getFlags();
+  const SDNodeFlags Flags = N->getFlags();
   unsigned Opcode = N->getOpcode();
   Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
   Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
@@ -2219,7 +2220,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
   EVT WidenEltVT = WidenVT.getVectorElementType();
   EVT VT = WidenVT;
   unsigned NumElts =  VT.getVectorNumElements();
-  const SDNodeFlags *Flags = N->getFlags();
+  const SDNodeFlags Flags = N->getFlags();
   while (!TLI.isTypeLegal(VT) && NumElts != 1) {
     NumElts = NumElts / 2;
     VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
@@ -2367,7 +2368,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
 
   unsigned Opcode = N->getOpcode();
   unsigned InVTNumElts = InVT.getVectorNumElements();
-  const SDNodeFlags *Flags = N->getFlags();
+  const SDNodeFlags Flags = N->getFlags();
   if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
     InOp = GetWidenedVector(N->getOperand(0));
     InVT = InOp.getValueType();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 439f67f1e15..9d949a2bbfa 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -811,8 +811,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
   AddNodeIDCustom(ID, N);
   SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
   if (Node)
-    if (const SDNodeFlags *Flags = N->getFlags())
-      Node->intersectFlagsWith(Flags);
+    Node->intersectFlagsWith(N->getFlags());
   return Node;
 }
 
@@ -832,8 +831,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
   AddNodeIDCustom(ID, N);
   SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
   if (Node)
-    if (const SDNodeFlags *Flags = N->getFlags())
-      Node->intersectFlagsWith(Flags);
+    Node->intersectFlagsWith(N->getFlags());
   return Node;
 }
 
@@ -852,8 +850,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
   AddNodeIDCustom(ID, N);
   SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
   if (Node)
-    if (const SDNodeFlags *Flags = N->getFlags())
-      Node->intersectFlagsWith(Flags);
+    Node->intersectFlagsWith(N->getFlags());
   return Node;
 }
 
@@ -901,29 +898,6 @@ void SelectionDAG::allnodes_clear() {
 #endif
 }
 
-SDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, const SDLoc &DL,
-                                      SDVTList VTs, SDValue N1, SDValue N2,
-                                      const SDNodeFlags *Flags) {
-  SDValue Ops[] = {N1, N2};
-
-  if (isBinOpWithFlags(Opcode)) {
-    // If no flags were passed in, use a default flags object.
-    SDNodeFlags F;
-    if (Flags == nullptr)
-      Flags = &F;
-
-    auto *FN = newSDNode<BinaryWithFlagsSDNode>(Opcode, DL.getIROrder(),
-                                                DL.getDebugLoc(), VTs, *Flags);
-    createOperands(FN, Ops);
-
-    return FN;
-  }
-
-  auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
-  createOperands(N, Ops);
-  return N;
-}
-
 SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
                                           void *&InsertPos) {
   SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
@@ -985,6 +959,12 @@ void SelectionDAG::clear() {
   DbgInfo->clear();
 }
 
+SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) {
+  return VT.bitsGT(Op.getValueType())
+             ? getNode(ISD::FP_EXTEND, DL, VT, Op)
+             : getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL));
+}
+
 SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
   return VT.bitsGT(Op.getValueType()) ?
     getNode(ISD::ANY_EXTEND, DL, VT, Op) :
@@ -1967,9 +1947,9 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
 /// for bits that V cannot have.
 bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
                                      unsigned Depth) const {
-  APInt KnownZero, KnownOne;
-  computeKnownBits(Op, KnownZero, KnownOne, Depth);
-  return (KnownZero & Mask) == Mask;
+  KnownBits Known;
+  computeKnownBits(Op, Known, Depth);
+  return Mask.isSubsetOf(Known.Zero);
 }
 
 /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
@@ -1985,31 +1965,30 @@ static const APInt *getValidShiftAmountConstant(SDValue V) {
 }
 
 /// Determine which bits of Op are known to be either zero or one and return
-/// them in the KnownZero/KnownOne bitsets. For vectors, the known bits are
-/// those that are shared by every vector element.
-void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
-                                    APInt &KnownOne, unsigned Depth) const {
+/// them in Known. For vectors, the known bits are those that are shared by
+/// every vector element.
+void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
+                                    unsigned Depth) const {
   EVT VT = Op.getValueType();
   APInt DemandedElts = VT.isVector()
                            ? APInt::getAllOnesValue(VT.getVectorNumElements())
                            : APInt(1, 1);
-  computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth);
+  computeKnownBits(Op, Known, DemandedElts, Depth);
 }
 
 /// Determine which bits of Op are known to be either zero or one and return
-/// them in the KnownZero/KnownOne bitsets. The DemandedElts argument allows
-/// us to only collect the known bits that are shared by the requested vector
-/// elements.
-void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
-                                    APInt &KnownOne, const APInt &DemandedElts,
+/// them in Known. The DemandedElts argument allows us to only collect the known
+/// bits that are shared by the requested vector elements.
+void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
+                                    const APInt &DemandedElts,
                                     unsigned Depth) const {
   unsigned BitWidth = Op.getScalarValueSizeInBits();
 
-  KnownZero = KnownOne = APInt(BitWidth, 0);   // Don't know anything.
+  Known = KnownBits(BitWidth);   // Don't know anything.
   if (Depth == 6)
     return;  // Limit search depth.
 
-  APInt KnownZero2, KnownOne2;
+  KnownBits Known2;
   unsigned NumElts = DemandedElts.getBitWidth();
 
   if (!DemandedElts)
@@ -2019,35 +1998,35 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
   switch (Opcode) {
   case ISD::Constant:
     // We know all of the bits for a constant!
-    KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
-    KnownZero = ~KnownOne;
+    Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
+    Known.Zero = ~Known.One;
     break;
   case ISD::BUILD_VECTOR:
     // Collect the known bits that are shared by every demanded vector element.
     assert(NumElts == Op.getValueType().getVectorNumElements() &&
            "Unexpected vector size");
-    KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+    Known.Zero.setAllBits(); Known.One.setAllBits();
     for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
       if (!DemandedElts[i])
         continue;
 
       SDValue SrcOp = Op.getOperand(i);
-      computeKnownBits(SrcOp, KnownZero2, KnownOne2, Depth + 1);
+      computeKnownBits(SrcOp, Known2, Depth + 1);
 
       // BUILD_VECTOR can implicitly truncate sources, we must handle this.
       if (SrcOp.getValueSizeInBits() != BitWidth) {
         assert(SrcOp.getValueSizeInBits() > BitWidth &&
                "Expected BUILD_VECTOR implicit truncation");
-        KnownOne2 = KnownOne2.trunc(BitWidth);
-        KnownZero2 = KnownZero2.trunc(BitWidth);
+        Known2.One = Known2.One.trunc(BitWidth);
+        Known2.Zero = Known2.Zero.trunc(BitWidth);
       }
 
       // Known bits are the values that are shared by every demanded element.
-      KnownOne &= KnownOne2;
-      KnownZero &= KnownZero2;
+      Known.One &= Known2.One;
+      Known.Zero &= Known2.Zero;
 
       // If we don't know any bits, early out.
-      if (!KnownOne && !KnownZero)
+      if (!Known.One && !Known.Zero)
         break;
     }
     break;
@@ -2055,7 +2034,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     // Collect the known bits that are shared by every vector element referenced
     // by the shuffle.
     APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
-    KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+    Known.Zero.setAllBits(); Known.One.setAllBits();
     const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
     assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
     for (unsigned i = 0; i != NumElts; ++i) {
@@ -2066,8 +2045,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
       if (M < 0) {
         // For UNDEF elements, we don't know anything about the common state of
         // the shuffle result.
-        KnownOne.clearAllBits();
-        KnownZero.clearAllBits();
+        Known.One.clearAllBits();
+        Known.Zero.clearAllBits();
         DemandedLHS.clearAllBits();
         DemandedRHS.clearAllBits();
         break;
@@ -2081,24 +2060,24 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     // Known bits are the values that are shared by every demanded element.
     if (!!DemandedLHS) {
       SDValue LHS = Op.getOperand(0);
-      computeKnownBits(LHS, KnownZero2, KnownOne2, DemandedLHS, Depth + 1);
-      KnownOne &= KnownOne2;
-      KnownZero &= KnownZero2;
+      computeKnownBits(LHS, Known2, DemandedLHS, Depth + 1);
+      Known.One &= Known2.One;
+      Known.Zero &= Known2.Zero;
     }
     // If we don't know any bits, early out.
-    if (!KnownOne && !KnownZero)
+    if (!Known.One && !Known.Zero)
       break;
     if (!!DemandedRHS) {
       SDValue RHS = Op.getOperand(1);
-      computeKnownBits(RHS, KnownZero2, KnownOne2, DemandedRHS, Depth + 1);
-      KnownOne &= KnownOne2;
-      KnownZero &= KnownZero2;
+      computeKnownBits(RHS, Known2, DemandedRHS, Depth + 1);
+      Known.One &= Known2.One;
+      Known.Zero &= Known2.Zero;
     }
     break;
   }
   case ISD::CONCAT_VECTORS: {
     // Split DemandedElts and test each of the demanded subvectors.
-    KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+    Known.Zero.setAllBits(); Known.One.setAllBits();
     EVT SubVectorVT = Op.getOperand(0).getValueType();
     unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements();
     unsigned NumSubVectors = Op.getNumOperands();
@@ -2107,12 +2086,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
       DemandedSub = DemandedSub.trunc(NumSubVectorElts);
       if (!!DemandedSub) {
         SDValue Sub = Op.getOperand(i);
-        computeKnownBits(Sub, KnownZero2, KnownOne2, DemandedSub, Depth + 1);
-        KnownOne &= KnownOne2;
-        KnownZero &= KnownZero2;
+        computeKnownBits(Sub, Known2, DemandedSub, Depth + 1);
+        Known.One &= Known2.One;
+        Known.Zero &= Known2.Zero;
       }
       // If we don't know any bits, early out.
-      if (!KnownOne && !KnownZero)
+      if (!Known.One && !Known.Zero)
         break;
     }
     break;
@@ -2127,9 +2106,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
       // Offset the demanded elts by the subvector index.
       uint64_t Idx = SubIdx->getZExtValue();
       APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx);
-      computeKnownBits(Src, KnownZero, KnownOne, DemandedSrc, Depth + 1);
+      computeKnownBits(Src, Known, DemandedSrc, Depth + 1);
     } else {
-      computeKnownBits(Src, KnownZero, KnownOne, Depth + 1);
+      computeKnownBits(Src, Known, Depth + 1);
     }
     break;
   }
@@ -2143,7 +2122,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
 
     // Fast handling of 'identity' bitcasts.
     if (BitWidth == SubBitWidth) {
-      computeKnownBits(N0, KnownZero, KnownOne, DemandedElts, Depth + 1);
+      computeKnownBits(N0, Known, DemandedElts, Depth + 1);
       break;
     }
 
@@ -2167,10 +2146,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
           SubDemandedElts.setBit(i * SubScale);
 
       for (unsigned i = 0; i != SubScale; ++i) {
-        computeKnownBits(N0, KnownZero2, KnownOne2, SubDemandedElts.shl(i),
+        computeKnownBits(N0, Known2, SubDemandedElts.shl(i),
                          Depth + 1);
-        KnownOne |= KnownOne2.zext(BitWidth).shl(SubBitWidth * i);
-        KnownZero |= KnownZero2.zext(BitWidth).shl(SubBitWidth * i);
+        Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * i);
+        Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * i);
       }
     }
 
@@ -2187,16 +2166,16 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
         if (DemandedElts[i])
           SubDemandedElts.setBit(i / SubScale);
 
-      computeKnownBits(N0, KnownZero2, KnownOne2, SubDemandedElts, Depth + 1);
+      computeKnownBits(N0, Known2, SubDemandedElts, Depth + 1);
 
-      KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+      Known.Zero.setAllBits(); Known.One.setAllBits();
       for (unsigned i = 0; i != NumElts; ++i)
         if (DemandedElts[i]) {
           unsigned Offset = (i % SubScale) * BitWidth;
-          KnownOne &= KnownOne2.lshr(Offset).trunc(BitWidth);
-          KnownZero &= KnownZero2.lshr(Offset).trunc(BitWidth);
+          Known.One &= Known2.One.lshr(Offset).trunc(BitWidth);
+          Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth);
           // If we don't know any bits, early out.
-          if (!KnownOne && !KnownZero)
+          if (!Known.One && !Known.Zero)
             break;
         }
     }
@@ -2204,101 +2183,91 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
   }
   case ISD::AND:
     // If either the LHS or the RHS are Zero, the result is zero.
-    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
-                     Depth + 1);
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
-                     Depth + 1);
+    computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
+    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
 
     // Output known-1 bits are only known if set in both the LHS & RHS.
-    KnownOne &= KnownOne2;
+    Known.One &= Known2.One;
     // Output known-0 are known to be clear if zero in either the LHS | RHS.
-    KnownZero |= KnownZero2;
+    Known.Zero |= Known2.Zero;
     break;
   case ISD::OR:
-    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
-                     Depth + 1);
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
-                     Depth + 1);
+    computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
+    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
 
     // Output known-0 bits are only known if clear in both the LHS & RHS.
-    KnownZero &= KnownZero2;
+    Known.Zero &= Known2.Zero;
     // Output known-1 are known to be set if set in either the LHS | RHS.
-    KnownOne |= KnownOne2;
+    Known.One |= Known2.One;
     break;
   case ISD::XOR: {
-    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
-                     Depth + 1);
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
-                     Depth + 1);
+    computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
+    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
 
     // Output known-0 bits are known if clear or set in both the LHS & RHS.
-    APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+    APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
     // Output known-1 are known to be set if set in only one of the LHS, RHS.
-    KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
-    KnownZero = KnownZeroOut;
+    Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
+    Known.Zero = KnownZeroOut;
     break;
   }
   case ISD::MUL: {
-    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
-                     Depth + 1);
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
-                     Depth + 1);
+    computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
+    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
 
     // If low bits are zero in either operand, output low known-0 bits.
     // Also compute a conservative estimate for high known-0 bits.
     // More trickiness is possible, but this is sufficient for the
     // interesting case of alignment computation.
-    KnownOne.clearAllBits();
-    unsigned TrailZ = KnownZero.countTrailingOnes() +
-                      KnownZero2.countTrailingOnes();
-    unsigned LeadZ =  std::max(KnownZero.countLeadingOnes() +
-                               KnownZero2.countLeadingOnes(),
+    Known.One.clearAllBits();
+    unsigned TrailZ = Known.Zero.countTrailingOnes() +
+                      Known2.Zero.countTrailingOnes();
+    unsigned LeadZ =  std::max(Known.Zero.countLeadingOnes() +
+                               Known2.Zero.countLeadingOnes(),
                                BitWidth) - BitWidth;
 
-    KnownZero.clearAllBits();
-    KnownZero.setLowBits(std::min(TrailZ, BitWidth));
-    KnownZero.setHighBits(std::min(LeadZ, BitWidth));
+    Known.Zero.clearAllBits();
+    Known.Zero.setLowBits(std::min(TrailZ, BitWidth));
+    Known.Zero.setHighBits(std::min(LeadZ, BitWidth));
     break;
   }
   case ISD::UDIV: {
     // For the purposes of computing leading zeros we can conservatively
     // treat a udiv as a logical right shift by the power of 2 known to
     // be less than the denominator.
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
-                     Depth + 1);
-    unsigned LeadZ = KnownZero2.countLeadingOnes();
+    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    unsigned LeadZ = Known2.Zero.countLeadingOnes();
 
-    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
-                     Depth + 1);
-    unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+    computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+    unsigned RHSUnknownLeadingOnes = Known2.One.countLeadingZeros();
     if (RHSUnknownLeadingOnes != BitWidth)
       LeadZ = std::min(BitWidth,
                        LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
 
-    KnownZero.setHighBits(LeadZ);
+    Known.Zero.setHighBits(LeadZ);
     break;
   }
   case ISD::SELECT:
-    computeKnownBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1);
+    computeKnownBits(Op.getOperand(2), Known, Depth+1);
     // If we don't know any bits, early out.
-    if (!KnownOne && !KnownZero)
+    if (!Known.One && !Known.Zero)
       break;
-    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(1), Known2, Depth+1);
 
     // Only known if known in both the LHS and RHS.
-    KnownOne &= KnownOne2;
-    KnownZero &= KnownZero2;
+    Known.One &= Known2.One;
+    Known.Zero &= Known2.Zero;
     break;
   case ISD::SELECT_CC:
-    computeKnownBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1);
+    computeKnownBits(Op.getOperand(3), Known, Depth+1);
     // If we don't know any bits, early out.
-    if (!KnownOne && !KnownZero)
+    if (!Known.One && !Known.Zero)
       break;
-    computeKnownBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(2), Known2, Depth+1);
 
     // Only known if known in both the LHS and RHS.
-    KnownOne &= KnownOne2;
-    KnownZero &= KnownZero2;
+    Known.One &= Known2.One;
+    Known.Zero &= Known2.Zero;
     break;
   case ISD::SMULO:
   case ISD::UMULO:
@@ -2311,49 +2280,46 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
             TargetLowering::ZeroOrOneBooleanContent &&
         BitWidth > 1)
-      KnownZero.setBitsFrom(1);
+      Known.Zero.setBitsFrom(1);
     break;
   case ISD::SETCC:
     // If we know the result of a setcc has the top bits zero, use this info.
     if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
             TargetLowering::ZeroOrOneBooleanContent &&
         BitWidth > 1)
-      KnownZero.setBitsFrom(1);
+      Known.Zero.setBitsFrom(1);
     break;
   case ISD::SHL:
     if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
-      computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
-                       Depth + 1);
-      KnownZero = KnownZero << *ShAmt;
-      KnownOne = KnownOne << *ShAmt;
+      computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+      Known.Zero <<= *ShAmt;
+      Known.One <<= *ShAmt;
       // Low bits are known zero.
-      KnownZero.setLowBits(ShAmt->getZExtValue());
+      Known.Zero.setLowBits(ShAmt->getZExtValue());
     }
     break;
   case ISD::SRL:
     if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
-      computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
-                       Depth + 1);
-      KnownZero.lshrInPlace(*ShAmt);
-      KnownOne.lshrInPlace(*ShAmt);
+      computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+      Known.Zero.lshrInPlace(*ShAmt);
+      Known.One.lshrInPlace(*ShAmt);
       // High bits are known zero.
-      KnownZero.setHighBits(ShAmt->getZExtValue());
+      Known.Zero.setHighBits(ShAmt->getZExtValue());
     }
     break;
   case ISD::SRA:
     if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
-      computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
-                       Depth + 1);
-      KnownZero.lshrInPlace(*ShAmt);
-      KnownOne.lshrInPlace(*ShAmt);
+      computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+      Known.Zero.lshrInPlace(*ShAmt);
+      Known.One.lshrInPlace(*ShAmt);
       // If we know the value of the sign bit, then we know it is copied across
       // the high bits by the shift amount.
       APInt SignMask = APInt::getSignMask(BitWidth);
       SignMask.lshrInPlace(*ShAmt);  // Adjust to where it is now in the mask.
-      if (KnownZero.intersects(SignMask)) {
-        KnownZero.setHighBits(ShAmt->getZExtValue());// New bits are known zero.
-      } else if (KnownOne.intersects(SignMask)) {
-        KnownOne.setHighBits(ShAmt->getZExtValue()); // New bits are known one.
+      if (Known.Zero.intersects(SignMask)) {
+        Known.Zero.setHighBits(ShAmt->getZExtValue());// New bits are known zero.
+      } else if (Known.One.intersects(SignMask)) {
+        Known.One.setHighBits(ShAmt->getZExtValue()); // New bits are known one.
       }
     }
     break;
@@ -2374,31 +2340,44 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     if (NewBits.getBoolValue())
       InputDemandedBits |= InSignMask;
 
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
-                     Depth + 1);
-    KnownOne &= InputDemandedBits;
-    KnownZero &= InputDemandedBits;
+    computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+    Known.One &= InputDemandedBits;
+    Known.Zero &= InputDemandedBits;
 
     // If the sign bit of the input is known set or clear, then we know the
     // top bits of the result.
-    if (KnownZero.intersects(InSignMask)) {        // Input sign bit known clear
-      KnownZero |= NewBits;
-      KnownOne  &= ~NewBits;
-    } else if (KnownOne.intersects(InSignMask)) {  // Input sign bit known set
-      KnownOne  |= NewBits;
-      KnownZero &= ~NewBits;
+    if (Known.Zero.intersects(InSignMask)) {        // Input sign bit known clear
+      Known.Zero |= NewBits;
+      Known.One  &= ~NewBits;
+    } else if (Known.One.intersects(InSignMask)) {  // Input sign bit known set
+      Known.One  |= NewBits;
+      Known.Zero &= ~NewBits;
     } else {                              // Input sign bit unknown
-      KnownZero &= ~NewBits;
-      KnownOne  &= ~NewBits;
+      Known.Zero &= ~NewBits;
+      Known.One  &= ~NewBits;
     }
     break;
   }
   case ISD::CTTZ:
-  case ISD::CTTZ_ZERO_UNDEF:
+  case ISD::CTTZ_ZERO_UNDEF: {
+    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    // If we have a known 1, its position is our upper bound.
+    unsigned PossibleTZ = Known2.One.countTrailingZeros();
+    unsigned LowBits = Log2_32(PossibleTZ) + 1;
+    Known.Zero.setBitsFrom(LowBits);
+    break;
+  }
   case ISD::CTLZ:
-  case ISD::CTLZ_ZERO_UNDEF:
+  case ISD::CTLZ_ZERO_UNDEF: {
+    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    // If we have a known 1, its position is our upper bound.
+    unsigned PossibleLZ = Known2.One.countLeadingZeros();
+    unsigned LowBits = Log2_32(PossibleLZ) + 1;
+    Known.Zero.setBitsFrom(LowBits);
+    break;
+  }
   case ISD::CTPOP: {
-    KnownZero.setBitsFrom(Log2_32(BitWidth)+1);
+    Known.Zero.setBitsFrom(Log2_32(BitWidth)+1);
     break;
   }
   case ISD::LOAD: {
@@ -2407,36 +2386,35 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
       EVT VT = LD->getMemoryVT();
       unsigned MemBits = VT.getScalarSizeInBits();
-      KnownZero.setBitsFrom(MemBits);
+      Known.Zero.setBitsFrom(MemBits);
     } else if (const MDNode *Ranges = LD->getRanges()) {
       if (LD->getExtensionType() == ISD::NON_EXTLOAD)
-        computeKnownBitsFromRangeMetadata(*Ranges, KnownZero, KnownOne);
+        computeKnownBitsFromRangeMetadata(*Ranges, Known);
     }
     break;
   }
   case ISD::ZERO_EXTEND_VECTOR_INREG: {
     EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getScalarSizeInBits();
-    KnownZero = KnownZero.trunc(InBits);
-    KnownOne = KnownOne.trunc(InBits);
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne,
+    Known.Zero = Known.Zero.trunc(InBits);
+    Known.One = Known.One.trunc(InBits);
+    computeKnownBits(Op.getOperand(0), Known,
                      DemandedElts.zext(InVT.getVectorNumElements()),
                      Depth + 1);
-    KnownZero = KnownZero.zext(BitWidth);
-    KnownOne = KnownOne.zext(BitWidth);
-    KnownZero.setBitsFrom(InBits);
+    Known.Zero = Known.Zero.zext(BitWidth);
+    Known.One = Known.One.zext(BitWidth);
+    Known.Zero.setBitsFrom(InBits);
     break;
   }
   case ISD::ZERO_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getScalarSizeInBits();
-    KnownZero = KnownZero.trunc(InBits);
-    KnownOne = KnownOne.trunc(InBits);
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
-                     Depth + 1);
-    KnownZero = KnownZero.zext(BitWidth);
-    KnownOne = KnownOne.zext(BitWidth);
-    KnownZero.setBitsFrom(InBits);
+    Known.Zero = Known.Zero.trunc(InBits);
+    Known.One = Known.One.trunc(InBits);
+    computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+    Known.Zero = Known.Zero.zext(BitWidth);
+    Known.One = Known.One.zext(BitWidth);
+    Known.Zero.setBitsFrom(InBits);
     break;
   }
   // TODO ISD::SIGN_EXTEND_VECTOR_INREG
@@ -2444,49 +2422,47 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getScalarSizeInBits();
 
-    KnownZero = KnownZero.trunc(InBits);
-    KnownOne = KnownOne.trunc(InBits);
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
-                     Depth + 1);
+    Known.Zero = Known.Zero.trunc(InBits);
+    Known.One = Known.One.trunc(InBits);
+    computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
 
     // If the sign bit is known to be zero or one, then sext will extend
     // it to the top bits, else it will just zext.
-    KnownZero = KnownZero.sext(BitWidth);
-    KnownOne = KnownOne.sext(BitWidth);
+    Known.Zero = Known.Zero.sext(BitWidth);
+    Known.One = Known.One.sext(BitWidth);
     break;
   }
   case ISD::ANY_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getScalarSizeInBits();
-    KnownZero = KnownZero.trunc(InBits);
-    KnownOne = KnownOne.trunc(InBits);
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
-    KnownZero = KnownZero.zext(BitWidth);
-    KnownOne = KnownOne.zext(BitWidth);
+    Known.Zero = Known.Zero.trunc(InBits);
+    Known.One = Known.One.trunc(InBits);
+    computeKnownBits(Op.getOperand(0), Known, Depth+1);
+    Known.Zero = Known.Zero.zext(BitWidth);
+    Known.One = Known.One.zext(BitWidth);
     break;
   }
   case ISD::TRUNCATE: {
     EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getScalarSizeInBits();
-    KnownZero = KnownZero.zext(InBits);
-    KnownOne = KnownOne.zext(InBits);
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
-                     Depth + 1);
-    KnownZero = KnownZero.trunc(BitWidth);
-    KnownOne = KnownOne.trunc(BitWidth);
+    Known.Zero = Known.Zero.zext(InBits);
+    Known.One = Known.One.zext(InBits);
+    computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+    Known.Zero = Known.Zero.trunc(BitWidth);
+    Known.One = Known.One.trunc(BitWidth);
     break;
   }
   case ISD::AssertZext: {
     EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
     APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
-    KnownZero |= (~InMask);
-    KnownOne  &= (~KnownZero);
+    computeKnownBits(Op.getOperand(0), Known, Depth+1);
+    Known.Zero |= (~InMask);
+    Known.One  &= (~Known.Zero);
     break;
   }
   case ISD::FGETSIGN:
     // All bits are zero except the low bit.
-    KnownZero.setBitsFrom(1);
+    Known.Zero.setBitsFrom(1);
     break;
   case ISD::USUBO:
   case ISD::SSUBO:
@@ -2495,7 +2471,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
       if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
               TargetLowering::ZeroOrOneBooleanContent &&
           BitWidth > 1)
-        KnownZero.setBitsFrom(1);
+        Known.Zero.setBitsFrom(1);
       break;
     }
     LLVM_FALLTHROUGH;
@@ -2509,16 +2485,16 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
         unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
         // NLZ can't be BitWidth with no sign bit
         APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
-        computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+        computeKnownBits(Op.getOperand(1), Known2, DemandedElts,
                          Depth + 1);
 
         // If all of the MaskV bits are known to be zero, then we know the
         // output top bits are zero, because we now know that the output is
         // from [0-C].
-        if ((KnownZero2 & MaskV) == MaskV) {
+        if ((Known2.Zero & MaskV) == MaskV) {
           unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
           // Top bits known zero.
-          KnownZero.setHighBits(NLZ2);
+          Known.Zero.setHighBits(NLZ2);
         }
       }
     }
@@ -2526,27 +2502,26 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     // If low bits are know to be zero in both operands, then we know they are
     // going to be 0 in the result. Both addition and complement operations
     // preserve the low zero bits.
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
-                     Depth + 1);
-    unsigned KnownZeroLow = KnownZero2.countTrailingOnes();
+    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    unsigned KnownZeroLow = Known2.Zero.countTrailingOnes();
     if (KnownZeroLow == 0)
       break;
 
-    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
-                     Depth + 1);
+    computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
     KnownZeroLow = std::min(KnownZeroLow,
-                            KnownZero2.countTrailingOnes());
-    KnownZero.setBits(0, KnownZeroLow);
+                            Known2.Zero.countTrailingOnes());
+    Known.Zero.setLowBits(KnownZeroLow);
     break;
   }
   case ISD::UADDO:
   case ISD::SADDO:
+  case ISD::ADDCARRY:
     if (Op.getResNo() == 1) {
       // If we know the result of a setcc has the top bits zero, use this info.
       if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
               TargetLowering::ZeroOrOneBooleanContent &&
           BitWidth > 1)
-        KnownZero.setBitsFrom(1);
+        Known.Zero.setBitsFrom(1);
       break;
     }
     LLVM_FALLTHROUGH;
@@ -2560,31 +2535,30 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     // known to be clear. For example, if one input has the top 10 bits clear
     // and the other has the top 8 bits clear, we know the top 7 bits of the
     // output must be clear.
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
-                     Depth + 1);
-    unsigned KnownZeroHigh = KnownZero2.countLeadingOnes();
-    unsigned KnownZeroLow = KnownZero2.countTrailingOnes();
+    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    unsigned KnownZeroHigh = Known2.Zero.countLeadingOnes();
+    unsigned KnownZeroLow = Known2.Zero.countTrailingOnes();
 
-    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+    computeKnownBits(Op.getOperand(1), Known2, DemandedElts,
                      Depth + 1);
     KnownZeroHigh = std::min(KnownZeroHigh,
-                             KnownZero2.countLeadingOnes());
+                             Known2.Zero.countLeadingOnes());
     KnownZeroLow = std::min(KnownZeroLow,
-                            KnownZero2.countTrailingOnes());
+                            Known2.Zero.countTrailingOnes());
 
-    if (Opcode == ISD::ADDE) {
-      // With ADDE, a carry bit may be added in, so we can only use this
-      // information if we know (at least) that the low two bits are clear.
-      // We then return to the caller that the low bit is unknown but that
-      // other bits are known zero.
+    if (Opcode == ISD::ADDE || Opcode == ISD::ADDCARRY) {
+      // With ADDE and ADDCARRY, a carry bit may be added in, so we can only
+      // use this information if we know (at least) that the low two bits are
+      // clear. We then return to the caller that the low bit is unknown but
+      // that other bits are known zero.
       if (KnownZeroLow >= 2)
-        KnownZero.setBits(1, KnownZeroLow);
+        Known.Zero.setBits(1, KnownZeroLow);
       break;
     }
 
-    KnownZero.setLowBits(KnownZeroLow);
+    Known.Zero.setLowBits(KnownZeroLow);
     if (KnownZeroHigh > 1)
-      KnownZero.setHighBits(KnownZeroHigh - 1);
+      Known.Zero.setHighBits(KnownZeroHigh - 1);
     break;
   }
   case ISD::SREM:
@@ -2592,23 +2566,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
       const APInt &RA = Rem->getAPIntValue().abs();
       if (RA.isPowerOf2()) {
         APInt LowBits = RA - 1;
-        computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
-                         Depth + 1);
+        computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
 
         // The low bits of the first operand are unchanged by the srem.
-        KnownZero = KnownZero2 & LowBits;
-        KnownOne = KnownOne2 & LowBits;
+        Known.Zero = Known2.Zero & LowBits;
+        Known.One = Known2.One & LowBits;
 
         // If the first operand is non-negative or has all low bits zero, then
         // the upper bits are all zero.
-        if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
-          KnownZero |= ~LowBits;
+        if (Known2.Zero[BitWidth-1] || ((Known2.Zero & LowBits) == LowBits))
+          Known.Zero |= ~LowBits;
 
         // If the first operand is negative and not all low bits are zero, then
         // the upper bits are all one.
-        if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
-          KnownOne |= ~LowBits;
-        assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+        if (Known2.One[BitWidth-1] && ((Known2.One & LowBits) != 0))
+          Known.One |= ~LowBits;
+        assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?");
       }
     }
     break;
@@ -2617,42 +2590,39 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
       const APInt &RA = Rem->getAPIntValue();
       if (RA.isPowerOf2()) {
         APInt LowBits = (RA - 1);
-        computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
-                         Depth + 1);
+        computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
 
         // The upper bits are all zero, the lower ones are unchanged.
-        KnownZero = KnownZero2 | ~LowBits;
-        KnownOne = KnownOne2 & LowBits;
+        Known.Zero = Known2.Zero | ~LowBits;
+        Known.One = Known2.One & LowBits;
         break;
       }
     }
 
     // Since the result is less than or equal to either operand, any leading
     // zero bits in either operand must also exist in the result.
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
-                     Depth + 1);
-    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
-                     Depth + 1);
+    computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+    computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
 
-    uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
-                                KnownZero2.countLeadingOnes());
-    KnownOne.clearAllBits();
-    KnownZero.clearAllBits();
-    KnownZero.setHighBits(Leaders);
+    uint32_t Leaders = std::max(Known.Zero.countLeadingOnes(),
+                                Known2.Zero.countLeadingOnes());
+    Known.One.clearAllBits();
+    Known.Zero.clearAllBits();
+    Known.Zero.setHighBits(Leaders);
     break;
   }
   case ISD::EXTRACT_ELEMENT: {
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+    computeKnownBits(Op.getOperand(0), Known, Depth+1);
     const unsigned Index = Op.getConstantOperandVal(1);
     const unsigned BitWidth = Op.getValueSizeInBits();
 
     // Remove low part of known bits mask
-    KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth);
-    KnownOne = KnownOne.getHiBits(KnownOne.getBitWidth() - Index * BitWidth);
+    Known.Zero = Known.Zero.getHiBits(Known.Zero.getBitWidth() - Index * BitWidth);
+    Known.One = Known.One.getHiBits(Known.One.getBitWidth() - Index * BitWidth);
 
     // Remove high part of known bit mask
-    KnownZero = KnownZero.trunc(BitWidth);
-    KnownOne = KnownOne.trunc(BitWidth);
+    Known.Zero = Known.Zero.trunc(BitWidth);
+    Known.One = Known.One.trunc(BitWidth);
     break;
   }
   case ISD::EXTRACT_VECTOR_ELT: {
@@ -2665,22 +2635,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
     // anything about the extended bits.
     if (BitWidth > EltBitWidth) {
-      KnownZero = KnownZero.trunc(EltBitWidth);
-      KnownOne = KnownOne.trunc(EltBitWidth);
+      Known.Zero = Known.Zero.trunc(EltBitWidth);
+      Known.One = Known.One.trunc(EltBitWidth);
     }
     ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
     if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) {
       // If we know the element index, just demand that vector element.
       unsigned Idx = ConstEltNo->getZExtValue();
       APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
-      computeKnownBits(InVec, KnownZero, KnownOne, DemandedElt, Depth + 1);
+      computeKnownBits(InVec, Known, DemandedElt, Depth + 1);
     } else {
       // Unknown element index, so ignore DemandedElts and demand them all.
-      computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1);
+      computeKnownBits(InVec, Known, Depth + 1);
     }
     if (BitWidth > EltBitWidth) {
-      KnownZero = KnownZero.zext(BitWidth);
-      KnownOne = KnownOne.zext(BitWidth);
+      Known.Zero = Known.Zero.zext(BitWidth);
+      Known.One = Known.One.zext(BitWidth);
     }
     break;
   }
@@ -2693,117 +2663,110 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
       // If we know the element index, split the demand between the
       // source vector and the inserted element.
-      KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+      Known.Zero = Known.One = APInt::getAllOnesValue(BitWidth);
       unsigned EltIdx = CEltNo->getZExtValue();
 
       // If we demand the inserted element then add its common known bits.
       if (DemandedElts[EltIdx]) {
-        computeKnownBits(InVal, KnownZero2, KnownOne2, Depth + 1);
-        KnownOne &= KnownOne2.zextOrTrunc(KnownOne.getBitWidth());
-        KnownZero &= KnownZero2.zextOrTrunc(KnownZero.getBitWidth());;
+        computeKnownBits(InVal, Known2, Depth + 1);
+        Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
+        Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());;
       }
 
       // If we demand the source vector then add its common known bits, ensuring
       // that we don't demand the inserted element.
       APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx));
       if (!!VectorElts) {
-        computeKnownBits(InVec, KnownZero2, KnownOne2, VectorElts, Depth + 1);
-        KnownOne &= KnownOne2;
-        KnownZero &= KnownZero2;
+        computeKnownBits(InVec, Known2, VectorElts, Depth + 1);
+        Known.One &= Known2.One;
+        Known.Zero &= Known2.Zero;
       }
     } else {
       // Unknown element index, so ignore DemandedElts and demand them all.
-      computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1);
-      computeKnownBits(InVal, KnownZero2, KnownOne2, Depth + 1);
-      KnownOne &= KnownOne2.zextOrTrunc(KnownOne.getBitWidth());
-      KnownZero &= KnownZero2.zextOrTrunc(KnownZero.getBitWidth());;
+      computeKnownBits(InVec, Known, Depth + 1);
+      computeKnownBits(InVal, Known2, Depth + 1);
+      Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
+      Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());;
     }
     break;
   }
   case ISD::BITREVERSE: {
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
-                     Depth + 1);
-    KnownZero = KnownZero2.reverseBits();
-    KnownOne = KnownOne2.reverseBits();
+    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    Known.Zero = Known2.Zero.reverseBits();
+    Known.One = Known2.One.reverseBits();
     break;
   }
   case ISD::BSWAP: {
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
-                     Depth + 1);
-    KnownZero = KnownZero2.byteSwap();
-    KnownOne = KnownOne2.byteSwap();
+    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    Known.Zero = Known2.Zero.byteSwap();
+    Known.One = Known2.One.byteSwap();
     break;
   }
   case ISD::ABS: {
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
-                     Depth + 1);
+    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
 
     // If the source's MSB is zero then we know the rest of the bits already.
-    if (KnownZero2[BitWidth - 1]) {
-      KnownZero = KnownZero2;
-      KnownOne = KnownOne2;
+    if (Known2.isNonNegative()) {
+      Known.Zero = Known2.Zero;
+      Known.One = Known2.One;
       break;
     }
 
     // We only know that the absolute values's MSB will be zero iff there is
     // a set bit that isn't the sign bit (otherwise it could be INT_MIN).
-    KnownOne2.clearBit(BitWidth - 1);
-    if (KnownOne2.getBoolValue()) {
-      KnownZero = APInt::getSignMask(BitWidth);
+    Known2.One.clearSignBit();
+    if (Known2.One.getBoolValue()) {
+      Known.Zero = APInt::getSignMask(BitWidth);
       break;
     }
     break;
   }
   case ISD::UMIN: {
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
-                     Depth + 1);
-    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
-                     Depth + 1);
+    computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+    computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
 
     // UMIN - we know that the result will have the maximum of the
     // known zero leading bits of the inputs.
-    unsigned LeadZero = KnownZero.countLeadingOnes();
-    LeadZero = std::max(LeadZero, KnownZero2.countLeadingOnes());
+    unsigned LeadZero = Known.Zero.countLeadingOnes();
+    LeadZero = std::max(LeadZero, Known2.Zero.countLeadingOnes());
 
-    KnownZero &= KnownZero2;
-    KnownOne &= KnownOne2;
-    KnownZero.setHighBits(LeadZero);
+    Known.Zero &= Known2.Zero;
+    Known.One &= Known2.One;
+    Known.Zero.setHighBits(LeadZero);
     break;
   }
   case ISD::UMAX: {
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
-                     Depth + 1);
-    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+    computeKnownBits(Op.getOperand(0), Known, DemandedElts,
                      Depth + 1);
+    computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
 
     // UMAX - we know that the result will have the maximum of the
     // known one leading bits of the inputs.
-    unsigned LeadOne = KnownOne.countLeadingOnes();
-    LeadOne = std::max(LeadOne, KnownOne2.countLeadingOnes());
+    unsigned LeadOne = Known.One.countLeadingOnes();
+    LeadOne = std::max(LeadOne, Known2.One.countLeadingOnes());
 
-    KnownZero &= KnownZero2;
-    KnownOne &= KnownOne2;
-    KnownOne.setHighBits(LeadOne);
+    Known.Zero &= Known2.Zero;
+    Known.One &= Known2.One;
+    Known.One.setHighBits(LeadOne);
     break;
   }
   case ISD::SMIN:
   case ISD::SMAX: {
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+    computeKnownBits(Op.getOperand(0), Known, DemandedElts,
                      Depth + 1);
     // If we don't know any bits, early out.
-    if (!KnownOne && !KnownZero)
+    if (!Known.One && !Known.Zero)
       break;
-    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
-                     Depth + 1);
-    KnownZero &= KnownZero2;
-    KnownOne &= KnownOne2;
+    computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+    Known.Zero &= Known2.Zero;
+    Known.One &= Known2.One;
     break;
   }
   case ISD::FrameIndex:
   case ISD::TargetFrameIndex:
     if (unsigned Align = InferPtrAlignment(Op)) {
       // The low bits are known zero if the pointer is aligned.
-      KnownZero.setLowBits(Log2_32(Align));
+      Known.Zero.setLowBits(Log2_32(Align));
       break;
     }
     break;
@@ -2816,12 +2779,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
   case ISD::INTRINSIC_W_CHAIN:
   case ISD::INTRINSIC_VOID:
     // Allow the target to implement this method for its nodes.
-    TLI->computeKnownBitsForTargetNode(Op, KnownZero, KnownOne, DemandedElts,
-                                       *this, Depth);
+    TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth);
     break;
   }
 
-  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+  assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
 }
 
 SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
@@ -2830,28 +2792,28 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
   if (isNullConstant(N1))
     return OFK_Never;
 
-  APInt N1Zero, N1One;
-  computeKnownBits(N1, N1Zero, N1One);
-  if (N1Zero.getBoolValue()) {
-    APInt N0Zero, N0One;
-    computeKnownBits(N0, N0Zero, N0One);
+  KnownBits N1Known;
+  computeKnownBits(N1, N1Known);
+  if (N1Known.Zero.getBoolValue()) {
+    KnownBits N0Known;
+    computeKnownBits(N0, N0Known);
 
     bool overflow;
-    (void)(~N0Zero).uadd_ov(~N1Zero, overflow);
+    (void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow);
     if (!overflow)
       return OFK_Never;
   }
 
   // mulhi + 1 never overflow
   if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 &&
-      (~N1Zero & 0x01) == ~N1Zero)
+      (~N1Known.Zero & 0x01) == ~N1Known.Zero)
     return OFK_Never;
 
   if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) {
-    APInt N0Zero, N0One;
-    computeKnownBits(N0, N0Zero, N0One);
+    KnownBits N0Known;
+    computeKnownBits(N0, N0Known);
 
-    if ((~N0Zero & 0x01) == ~N0Zero)
+    if ((~N0Known.Zero & 0x01) == ~N0Known.Zero)
       return OFK_Never;
   }
 
@@ -2895,10 +2857,10 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
   // to handle some common cases.
 
   // Fall back to computeKnownBits to catch other known cases.
-  APInt KnownZero, KnownOne;
-  computeKnownBits(Val, KnownZero, KnownOne);
-  return (KnownZero.countPopulation() == BitWidth - 1) &&
-         (KnownOne.countPopulation() == 1);
+  KnownBits Known;
+  computeKnownBits(Val, Known);
+  return (Known.Zero.countPopulation() == BitWidth - 1) &&
+         (Known.One.countPopulation() == 1);
 }
 
 unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
@@ -2971,7 +2933,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     return std::max(Tmp, Tmp2);
 
   case ISD::SRA:
-    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
     // SRA X, C   -> adds C sign bits.
     if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
       APInt ShiftVal = C->getAPIntValue();
@@ -3068,17 +3030,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     // Special case decrementing a value (ADD X, -1):
     if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
       if (CRHS->isAllOnesValue()) {
-        APInt KnownZero, KnownOne;
-        computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+        KnownBits Known;
+        computeKnownBits(Op.getOperand(0), Known, Depth+1);
 
         // If the input is known to be 0 or 1, the output is 0/-1, which is all
         // sign bits set.
-        if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
+        if ((Known.Zero | 1).isAllOnesValue())
           return VTBits;
 
         // If we are subtracting one from a positive number, there is no carry
         // out of the result.
-        if (KnownZero.isNegative())
+        if (Known.isNonNegative())
           return Tmp;
       }
 
@@ -3093,16 +3055,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     // Handle NEG.
     if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0)))
       if (CLHS->isNullValue()) {
-        APInt KnownZero, KnownOne;
-        computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+        KnownBits Known;
+        computeKnownBits(Op.getOperand(1), Known, Depth+1);
         // If the input is known to be 0 or 1, the output is 0/-1, which is all
         // sign bits set.
-        if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
+        if ((Known.Zero | 1).isAllOnesValue())
           return VTBits;
 
         // If the input is known to be positive (the sign bit is known clear),
         // the output of the NEG has the same number of sign bits as the input.
-        if (KnownZero.isNegative())
+        if (Known.isNonNegative())
           return Tmp2;
 
         // Otherwise, we treat this like a SUB.
@@ -3134,6 +3096,44 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     // result. Otherwise it gives either negative or > bitwidth result
     return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
   }
+  case ISD::INSERT_VECTOR_ELT: {
+    SDValue InVec = Op.getOperand(0);
+    SDValue InVal = Op.getOperand(1);
+    SDValue EltNo = Op.getOperand(2);
+    unsigned NumElts = InVec.getValueType().getVectorNumElements();
+
+    ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
+    if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
+      // If we know the element index, split the demand between the
+      // source vector and the inserted element.
+      unsigned EltIdx = CEltNo->getZExtValue();
+
+      // If we demand the inserted element then get its sign bits.
+      Tmp = UINT_MAX;
+      if (DemandedElts[EltIdx]) {
+        // TODO - handle implicit truncation of inserted elements.
+        if (InVal.getScalarValueSizeInBits() != VTBits)
+          break;
+        Tmp = ComputeNumSignBits(InVal, Depth + 1);
+      }
+
+      // If we demand the source vector then get its sign bits, and determine
+      // the minimum.
+      APInt VectorElts = DemandedElts;
+      VectorElts.clearBit(EltIdx);
+      if (!!VectorElts) {
+        Tmp2 = ComputeNumSignBits(InVec, VectorElts, Depth + 1);
+        Tmp = std::min(Tmp, Tmp2);
+      }
+    } else {
+      // Unknown element index, so ignore DemandedElts and demand them all.
+      Tmp = ComputeNumSignBits(InVec, Depth + 1);
+      Tmp2 = ComputeNumSignBits(InVal, Depth + 1);
+      Tmp = std::min(Tmp, Tmp2);
+    }
+    assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+    return Tmp;
+  }
   case ISD::EXTRACT_VECTOR_ELT: {
     SDValue InVec = Op.getOperand(0);
     SDValue EltNo = Op.getOperand(1);
@@ -3199,14 +3199,14 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
 
   // Finally, if we can prove that the top bits of the result are 0's or 1's,
   // use this information.
-  APInt KnownZero, KnownOne;
-  computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth);
+  KnownBits Known;
+  computeKnownBits(Op, Known, DemandedElts, Depth);
 
   APInt Mask;
-  if (KnownZero.isNegative()) {        // sign bit is 0
-    Mask = KnownZero;
-  } else if (KnownOne.isNegative()) {  // sign bit is 1;
-    Mask = KnownOne;
+  if (Known.isNonNegative()) {        // sign bit is 0
+    Mask = Known.Zero;
+  } else if (Known.isNegative()) {  // sign bit is 1;
+    Mask = Known.One;
   } else {
     // Nothing known.
     return FirstAnswer;
@@ -3239,8 +3239,8 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
   if (getTarget().Options.NoNaNsFPMath)
     return true;
 
-  if (const BinaryWithFlagsSDNode *BF = dyn_cast<BinaryWithFlagsSDNode>(Op))
-    return BF->Flags.hasNoNaNs();
+  if (Op->getFlags().hasNoNaNs())
+    return true;
 
   // If the value is a constant, we can obviously see if it is a NaN or not.
   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
@@ -3284,11 +3284,10 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
 bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
   assert(A.getValueType() == B.getValueType() &&
          "Values must have the same type");
-  APInt AZero, AOne;
-  APInt BZero, BOne;
-  computeKnownBits(A, AZero, AOne);
-  computeKnownBits(B, BZero, BOne);
-  return (AZero | BZero).isAllOnesValue();
+  KnownBits AKnown, BKnown;
+  computeKnownBits(A, AKnown);
+  computeKnownBits(B, BKnown);
+  return (AKnown.Zero | BKnown.Zero).isAllOnesValue();
 }
 
 static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
@@ -3357,7 +3356,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) {
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
-                              SDValue Operand) {
+                              SDValue Operand, const SDNodeFlags Flags) {
   // Constant fold unary operations with an integer constant operand. Even
   // opaque constant will be folded, because the folding of unary operations
   // doesn't create new constants with different values. Nevertheless, the
@@ -3683,8 +3682,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
       // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags?
       return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
-                       Operand.getNode()->getOperand(0),
-                       &cast<BinaryWithFlagsSDNode>(Operand.getNode())->Flags);
+                     Operand.getNode()->getOperand(0),
+                     Operand.getNode()->getFlags());
     if (OpOpcode == ISD::FNEG)  // --X -> X
       return Operand.getNode()->getOperand(0);
     break;
@@ -3701,10 +3700,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTs, Ops);
     void *IP = nullptr;
-    if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+    if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+      E->intersectFlagsWith(Flags);
       return SDValue(E, 0);
+    }
 
     N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+    N->setFlags(Flags);
     createOperands(N, Ops);
     CSEMap.InsertNode(N, IP);
   } else {
@@ -3883,7 +3885,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
 SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
                                                    const SDLoc &DL, EVT VT,
                                                    ArrayRef<SDValue> Ops,
-                                                   const SDNodeFlags *Flags) {
+                                                   const SDNodeFlags Flags) {
   // If the opcode is a target-specific ISD node, there's nothing we can
   // do here and the operand rules may not line up with the below, so
   // bail early.
@@ -3975,8 +3977,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
-                              SDValue N1, SDValue N2,
-                              const SDNodeFlags *Flags) {
+                              SDValue N1, SDValue N2, const SDNodeFlags Flags) {
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
@@ -4161,7 +4162,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     auto SignExtendInReg = [&](APInt Val, llvm::EVT ConstantVT) {
       unsigned FromBits = EVT.getScalarSizeInBits();
       Val <<= Val.getBitWidth() - FromBits;
-      Val = Val.ashr(Val.getBitWidth() - FromBits);
+      Val.ashrInPlace(Val.getBitWidth() - FromBits);
       return getConstant(Val, DL, ConstantVT);
     };
 
@@ -4443,21 +4444,23 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
   // Memoize this node if possible.
   SDNode *N;
   SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = {N1, N2};
   if (VT != MVT::Glue) {
-    SDValue Ops[] = {N1, N2};
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTs, Ops);
     void *IP = nullptr;
     if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
-      if (Flags)
-        E->intersectFlagsWith(Flags);
+      E->intersectFlagsWith(Flags);
       return SDValue(E, 0);
     }
 
-    N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags);
+    N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+    N->setFlags(Flags);
+    createOperands(N, Ops);
     CSEMap.InsertNode(N, IP);
   } else {
-    N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags);
+    N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+    createOperands(N, Ops);
   }
 
   InsertNode(N);
@@ -5979,7 +5982,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
-                              ArrayRef<SDValue> Ops, const SDNodeFlags *Flags) {
+                              ArrayRef<SDValue> Ops, const SDNodeFlags Flags) {
   unsigned NumOps = Ops.size();
   switch (NumOps) {
   case 0: return getNode(Opcode, DL, VT);
@@ -6641,14 +6644,13 @@ SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT,
 /// else return NULL.
 SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
                                       ArrayRef<SDValue> Ops,
-                                      const SDNodeFlags *Flags) {
+                                      const SDNodeFlags Flags) {
   if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTList, Ops);
     void *IP = nullptr;
     if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) {
-      if (Flags)
-        E->intersectFlagsWith(Flags);
+      E->intersectFlagsWith(Flags);
       return E;
     }
   }
@@ -7392,15 +7394,8 @@ bool SDNode::hasPredecessor(const SDNode *N) const {
   return hasPredecessorHelper(N, Visited, Worklist);
 }
 
-const SDNodeFlags *SDNode::getFlags() const {
-  if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this))
-    return &FlagsNode->Flags;
-  return nullptr;
-}
-
-void SDNode::intersectFlagsWith(const SDNodeFlags *Flags) {
-  if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this))
-    FlagsNode->Flags.intersectWith(Flags);
+void SDNode::intersectFlagsWith(const SDNodeFlags Flags) {
+  this->Flags.intersectWith(Flags);
 }
 
 SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 6a737ed84ea..ba9e11798f1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -83,20 +83,6 @@ LimitFPPrecision("limit-float-precision",
                           "for some float libcalls"),
                  cl::location(LimitFloatPrecision),
                  cl::init(0));
-
-/// Minimum jump table density for normal functions.
-static cl::opt<unsigned>
-JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden,
-                 cl::desc("Minimum density for building a jump table in "
-                          "a normal function"));
-
-/// Minimum jump table density for -Os or -Oz functions.
-static cl::opt<unsigned>
-OptsizeJumpTableDensity("optsize-jump-table-density", cl::init(40), cl::Hidden,
-                        cl::desc("Minimum density for building a jump table in "
-                                 "an optsize function"));
-
-
 // Limit the width of DAG chains. This is important in general to prevent
 // DAG-based analysis from blowing up. For example, alias analysis and
 // load clustering may not complete in reasonable time. It is difficult to
@@ -364,7 +350,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
 
   EVT ValueSVT = ValueVT.getVectorElementType();
   if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
-    Val = DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
+    Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
+                                    : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
 
   return DAG.getBuildVector(ValueVT, DL, Val);
 }
@@ -557,10 +544,9 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
       Val = DAG.getNode(
           ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
           DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
-
-      Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
     }
 
+    assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
     Parts[0] = Val;
     return;
   }
@@ -675,7 +661,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
 
       unsigned RegSize = RegisterVT.getSizeInBits();
       unsigned NumSignBits = LOI->NumSignBits;
-      unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes();
+      unsigned NumZeroBits = LOI->Known.Zero.countLeadingOnes();
 
       if (NumZeroBits == RegSize) {
         // The current value is a zero.
@@ -1349,7 +1335,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
                                 RetPtr.getValueType(), RetPtr,
                                 DAG.getIntPtrConstant(Offsets[i],
                                                       getCurSDLoc()),
-                                &Flags);
+                                Flags);
       Chains[i] = DAG.getStore(Chain, getCurSDLoc(),
                                SDValue(RetOp.getNode(), RetOp.getResNo() + i),
                                // FIXME: better loc info would be nice.
@@ -2589,7 +2575,7 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
   Flags.setUnsafeAlgebra(FMF.unsafeAlgebra());
 
   SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
-                                     Op1, Op2, &Flags);
+                                     Op1, Op2, Flags);
   setValue(&I, BinNodeValue);
 }
 
@@ -2642,7 +2628,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
   Flags.setNoSignedWrap(nsw);
   Flags.setNoUnsignedWrap(nuw);
   SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
-                            &Flags);
+                            Flags);
   setValue(&I, Res);
 }
 
@@ -2654,7 +2640,7 @@ void SelectionDAGBuilder::visitSDiv(const User &I) {
   Flags.setExact(isa<PossiblyExactOperator>(&I) &&
                  cast<PossiblyExactOperator>(&I)->isExact());
   setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
-                           Op2, &Flags));
+                           Op2, Flags));
 }
 
 void SelectionDAGBuilder::visitICmp(const User &I) {
@@ -3266,7 +3252,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
           Flags.setNoUnsignedWrap(true);
 
         N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
-                        DAG.getConstant(Offset, dl, N.getValueType()), &Flags);
+                        DAG.getConstant(Offset, dl, N.getValueType()), Flags);
       }
     } else {
       MVT PtrTy =
@@ -3296,7 +3282,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
         if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
           Flags.setNoUnsignedWrap(true);
 
-        N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags);
+        N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
         continue;
       }
 
@@ -3374,7 +3360,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
   Flags.setNoUnsignedWrap(true);
   AllocSize = DAG.getNode(ISD::ADD, dl,
                           AllocSize.getValueType(), AllocSize,
-                          DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags);
+                          DAG.getIntPtrConstant(StackAlign - 1, dl), Flags);
 
   // Mask out the low bits for alignment purposes.
   AllocSize = DAG.getNode(ISD::AND, dl,
@@ -3478,7 +3464,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
     SDValue A = DAG.getNode(ISD::ADD, dl,
                             PtrVT, Ptr,
                             DAG.getConstant(Offsets[i], dl, PtrVT),
-                            &Flags);
+                            Flags);
     auto MMOFlags = MachineMemOperand::MONone;
     if (isVolatile)
       MMOFlags |= MachineMemOperand::MOVolatile;
@@ -3633,7 +3619,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
       ChainI = 0;
     }
     SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
-                              DAG.getConstant(Offsets[i], dl, PtrVT), &Flags);
+                              DAG.getConstant(Offsets[i], dl, PtrVT), Flags);
     SDValue St = DAG.getStore(
         Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add,
         MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo);
@@ -7897,7 +7883,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
     for (unsigned i = 0; i < NumValues; ++i) {
       SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
                                     CLI.DAG.getConstant(Offsets[i], CLI.DL,
-                                                        PtrVT), &Flags);
+                                                        PtrVT), Flags);
       SDValue L = CLI.DAG.getLoad(
           RetTys[i], CLI.DL, CLI.Chain, Add,
           MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
@@ -8187,15 +8173,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
   findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates);
 
   // Set up the incoming argument description vector.
-  unsigned Idx = 0;
   for (const Argument &Arg : F.args()) {
-    ++Idx;
+    unsigned ArgNo = Arg.getArgNo();
     SmallVector<EVT, 4> ValueVTs;
     ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
     bool isArgValueUsed = !Arg.use_empty();
     unsigned PartBase = 0;
     Type *FinalType = Arg.getType();
-    if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
+    if (Arg.hasAttribute(Attribute::ByVal))
       FinalType = cast<PointerType>(FinalType)->getElementType();
     bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
         FinalType, F.getCallingConv(), F.isVarArg());
@@ -8206,11 +8191,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       ISD::ArgFlagsTy Flags;
       unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
 
-      if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
+      if (Arg.hasAttribute(Attribute::ZExt))
         Flags.setZExt();
-      if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
+      if (Arg.hasAttribute(Attribute::SExt))
         Flags.setSExt();
-      if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) {
+      if (Arg.hasAttribute(Attribute::InReg)) {
         // If we are using vectorcall calling convention, a structure that is
         // passed InReg - is surely an HVA
         if (F.getCallingConv() == CallingConv::X86_VectorCall &&
@@ -8223,15 +8208,15 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
         // Set InReg Flag
         Flags.setInReg();
       }
-      if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
+      if (Arg.hasAttribute(Attribute::StructRet))
         Flags.setSRet();
-      if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf))
+      if (Arg.hasAttribute(Attribute::SwiftSelf))
         Flags.setSwiftSelf();
-      if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftError))
+      if (Arg.hasAttribute(Attribute::SwiftError))
         Flags.setSwiftError();
-      if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
+      if (Arg.hasAttribute(Attribute::ByVal))
         Flags.setByVal();
-      if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) {
+      if (Arg.hasAttribute(Attribute::InAlloca)) {
         Flags.setInAlloca();
         // Set the byval flag for CCAssignFn callbacks that don't know about
         // inalloca.  This way we can know how many bytes we should've allocated
@@ -8242,7 +8227,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       }
       if (F.getCallingConv() == CallingConv::X86_INTR) {
         // IA Interrupt passes frame (1st parameter) by value in the stack.
-        if (Idx == 1)
+        if (ArgNo == 0)
           Flags.setByVal();
       }
       if (Flags.isByVal() || Flags.isInAlloca()) {
@@ -8252,13 +8237,13 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
         // For ByVal, alignment should be passed from FE.  BE will guess if
         // this info is not there but there are cases it cannot get right.
         unsigned FrameAlign;
-        if (F.getParamAlignment(Idx))
-          FrameAlign = F.getParamAlignment(Idx);
+        if (Arg.getParamAlignment())
+          FrameAlign = Arg.getParamAlignment();
         else
           FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL);
         Flags.setByValAlign(FrameAlign);
       }
-      if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
+      if (Arg.hasAttribute(Attribute::Nest))
         Flags.setNest();
       if (NeedsRegBlock)
         Flags.setInConsecutiveRegs();
@@ -8270,7 +8255,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT);
       for (unsigned i = 0; i != NumRegs; ++i) {
         ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
-                              Idx-1, PartBase+i*RegisterVT.getStoreSize());
+                              ArgNo, PartBase+i*RegisterVT.getStoreSize());
         if (NumRegs > 1 && i == 0)
           MyFlags.Flags.setSplit();
         // if it isn't first piece, alignment must be 1
@@ -8311,7 +8296,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
 
   // Set up the argument values.
   unsigned i = 0;
-  Idx = 0;
   if (!FuncInfo->CanLowerReturn) {
     // Create a virtual register for the sret pointer, and put in a copy
     // from the sret argument into it.
@@ -8333,14 +8317,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
     DAG.setRoot(NewRoot);
 
     // i indexes lowered arguments.  Bump it past the hidden sret argument.
-    // Idx indexes LLVM arguments.  Don't touch it.
     ++i;
   }
 
   SmallVector<SDValue, 4> Chains;
   DenseMap<int, int> ArgCopyElisionFrameIndexMap;
   for (const Argument &Arg : F.args()) {
-    ++Idx;
     SmallVector<SDValue, 4> ArgValues;
     SmallVector<EVT, 4> ValueVTs;
     ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
@@ -8362,7 +8344,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
     // debugging information.
     bool isSwiftErrorArg =
         TLI->supportSwiftError() &&
-        F.getAttributes().hasAttribute(Idx, Attribute::SwiftError);
+        Arg.hasAttribute(Attribute::SwiftError);
     if (!ArgHasUses && !isSwiftErrorArg) {
       SDB->setUnusedArgValue(&Arg, InVals[i]);
 
@@ -8382,9 +8364,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       // function.
       if (ArgHasUses || isSwiftErrorArg) {
         Optional<ISD::NodeType> AssertOp;
-        if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
+        if (Arg.hasAttribute(Attribute::SExt))
           AssertOp = ISD::AssertSext;
-        else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
+        else if (Arg.hasAttribute(Attribute::ZExt))
           AssertOp = ISD::AssertZext;
 
         ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
@@ -8589,13 +8571,10 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
     HasTailCall = true;
 }
 
-bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters,
-                                  const SmallVectorImpl<unsigned> &TotalCases,
-                                  unsigned First, unsigned Last,
-                                  unsigned Density) const {
+uint64_t
+SelectionDAGBuilder::getJumpTableRange(const CaseClusterVector &Clusters,
+                                       unsigned First, unsigned Last) const {
   assert(Last >= First);
-  assert(TotalCases[Last] >= TotalCases[First]);
-
   const APInt &LowCase = Clusters[First].Low->getValue();
   const APInt &HighCase = Clusters[Last].High->getValue();
   assert(LowCase.getBitWidth() == HighCase.getBitWidth());
@@ -8604,26 +8583,17 @@ bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters,
   // comparison to lower. We should discriminate against such consecutive ranges
   // in jump tables.
 
-  uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100);
-  uint64_t Range = Diff + 1;
-
-  uint64_t NumCases =
-      TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]);
-
-  assert(NumCases < UINT64_MAX / 100);
-  assert(Range >= NumCases);
-
-  return NumCases * 100 >= Range * Density;
+  return (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100) + 1;
 }
 
-static inline bool areJTsAllowed(const TargetLowering &TLI,
-                                 const SwitchInst *SI) {
-  const Function *Fn = SI->getParent()->getParent();
-  if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")
-    return false;
-
-  return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
-         TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
+uint64_t SelectionDAGBuilder::getJumpTableNumCases(
+    const SmallVectorImpl<unsigned> &TotalCases, unsigned First,
+    unsigned Last) const {
+  assert(Last >= First);
+  assert(TotalCases[Last] >= TotalCases[First]);
+  uint64_t NumCases =
+      TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]);
+  return NumCases;
 }
 
 bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters,
@@ -8662,10 +8632,11 @@ bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters,
     JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
   }
 
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   unsigned NumDests = JTProbs.size();
-  if (isSuitableForBitTests(NumDests, NumCmps,
-                            Clusters[First].Low->getValue(),
-                            Clusters[Last].High->getValue())) {
+  if (TLI.isSuitableForBitTests(
+          NumDests, NumCmps, Clusters[First].Low->getValue(),
+          Clusters[Last].High->getValue(), DAG.getDataLayout())) {
     // Clusters[First..Last] should be lowered as bit tests instead.
     return false;
   }
@@ -8686,7 +8657,6 @@ bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters,
   }
   JumpTableMBB->normalizeSuccProbs();
 
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding())
                      ->createJumpTableIndex(Table);
 
@@ -8715,17 +8685,12 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
 #endif
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  if (!areJTsAllowed(TLI, SI))
+  if (!TLI.areJTsAllowed(SI->getParent()->getParent()))
     return;
 
-  const bool OptForSize = DefaultMBB->getParent()->getFunction()->optForSize();
-
   const int64_t N = Clusters.size();
   const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries();
   const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2;
-  const unsigned MaxJumpTableSize =
-                   OptForSize || TLI.getMaximumJumpTableSize() == 0
-                   ? UINT_MAX : TLI.getMaximumJumpTableSize();
 
   if (N < 2 || N < MinJumpTableEntries)
     return;
@@ -8740,15 +8705,12 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
       TotalCases[i] += TotalCases[i - 1];
   }
 
-  const unsigned MinDensity =
-    OptForSize ? OptsizeJumpTableDensity : JumpTableDensity;
-
   // Cheap case: the whole range may be suitable for jump table.
-  unsigned JumpTableSize = (Clusters[N - 1].High->getValue() -
-                            Clusters[0].Low->getValue())
-                           .getLimitedValue(UINT_MAX - 1) + 1;
-  if (JumpTableSize <= MaxJumpTableSize &&
-      isDense(Clusters, TotalCases, 0, N - 1, MinDensity)) {
+  uint64_t Range = getJumpTableRange(Clusters,0, N - 1);
+  uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1);
+  assert(NumCases < UINT64_MAX / 100);
+  assert(Range >= NumCases);
+  if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) {
     CaseCluster JTCluster;
     if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
       Clusters[0] = JTCluster;
@@ -8801,11 +8763,11 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
     // Search for a solution that results in fewer partitions.
     for (int64_t j = N - 1; j > i; j--) {
       // Try building a partition from Clusters[i..j].
-      JumpTableSize = (Clusters[j].High->getValue() -
-                       Clusters[i].Low->getValue())
-                      .getLimitedValue(UINT_MAX - 1) + 1;
-      if (JumpTableSize <= MaxJumpTableSize &&
-          isDense(Clusters, TotalCases, i, j, MinDensity)) {
+      uint64_t Range = getJumpTableRange(Clusters, i, j);
+      uint64_t NumCases = getJumpTableNumCases(TotalCases, i, j);
+      assert(NumCases < UINT64_MAX / 100);
+      assert(Range >= NumCases);
+      if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) {
         unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
         unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1];
         int64_t NumEntries = j - i + 1;
@@ -8849,36 +8811,6 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
   Clusters.resize(DstIndex);
 }
 
-bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) {
-  // FIXME: Using the pointer type doesn't seem ideal.
-  uint64_t BW = DAG.getDataLayout().getPointerSizeInBits();
-  uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1;
-  return Range <= BW;
-}
-
-bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests,
-                                                unsigned NumCmps,
-                                                const APInt &Low,
-                                                const APInt &High) {
-  // FIXME: I don't think NumCmps is the correct metric: a single case and a
-  // range of cases both require only one branch to lower. Just looking at the
-  // number of clusters and destinations should be enough to decide whether to
-  // build bit tests.
-
-  // To lower a range with bit tests, the range must fit the bitwidth of a
-  // machine word.
-  if (!rangeFitsInWord(Low, High))
-    return false;
-
-  // Decide whether it's profitable to lower this range with bit tests. Each
-  // destination requires a bit test and branch, and there is an overall range
-  // check branch. For a small number of clusters, separate comparisons might be
-  // cheaper, and for many destinations, splitting the range might be better.
-  return (NumDests == 1 && NumCmps >= 3) ||
-         (NumDests == 2 && NumCmps >= 5) ||
-         (NumDests == 3 && NumCmps >= 6);
-}
-
 bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
                                         unsigned First, unsigned Last,
                                         const SwitchInst *SI,
@@ -8900,16 +8832,17 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
   APInt High = Clusters[Last].High->getValue();
   assert(Low.slt(High));
 
-  if (!isSuitableForBitTests(NumDests, NumCmps, Low, High))
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  const DataLayout &DL = DAG.getDataLayout();
+  if (!TLI.isSuitableForBitTests(NumDests, NumCmps, Low, High, DL))
     return false;
 
   APInt LowBound;
   APInt CmpRange;
 
-  const int BitWidth = DAG.getTargetLoweringInfo()
-                           .getPointerTy(DAG.getDataLayout())
-                           .getSizeInBits();
-  assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!");
+  const int BitWidth = TLI.getPointerTy(DL).getSizeInBits();
+  assert(TLI.rangeFitsInWord(Low, High, DL) &&
+         "Case range must fit in bit mask!");
 
   // Check if the clusters cover a contiguous range such that no value in the
   // range will jump to the default statement.
@@ -8999,7 +8932,9 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters,
 
   // If target does not have legal shift left, do not emit bit tests at all.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT PTy = TLI.getPointerTy(DAG.getDataLayout());
+  const DataLayout &DL = DAG.getDataLayout();
+
+  EVT PTy = TLI.getPointerTy(DL);
   if (!TLI.isOperationLegal(ISD::SHL, PTy))
     return;
 
@@ -9030,8 +8965,8 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters,
       // Try building a partition from Clusters[i..j].
 
       // Check the range.
-      if (!rangeFitsInWord(Clusters[i].Low->getValue(),
-                           Clusters[j].High->getValue()))
+      if (!TLI.rangeFitsInWord(Clusters[i].Low->getValue(),
+                               Clusters[j].High->getValue(), DL))
         continue;
 
       // Check nbr of destinations and cluster types.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 9e34590cc39..9e9989058ae 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -304,10 +304,13 @@ private:
     BranchProbability DefaultProb;
   };
 
-  /// Check whether a range of clusters is dense enough for a jump table.
-  bool isDense(const CaseClusterVector &Clusters,
-               const SmallVectorImpl<unsigned> &TotalCases,
-               unsigned First, unsigned Last, unsigned MinDensity) const;
+  /// Return the range of value in [First..Last].
+  uint64_t getJumpTableRange(const CaseClusterVector &Clusters, unsigned First,
+                             unsigned Last) const;
+
+  /// Return the number of cases in [First..Last].
+  uint64_t getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases,
+                                unsigned First, unsigned Last) const;
 
   /// Build a jump table cluster from Clusters[First..Last]. Returns false if it
   /// decides it's not a good idea.
@@ -319,14 +322,6 @@ private:
   void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI,
                       MachineBasicBlock *DefaultMBB);
 
-  /// Check whether the range [Low,High] fits in a machine word.
-  bool rangeFitsInWord(const APInt &Low, const APInt &High);
-
-  /// Check whether these clusters are suitable for lowering with bit tests based
-  /// on the number of destinations, comparison metric, and range.
-  bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
-                             const APInt &Low, const APInt &High);
-
   /// Build a bit test cluster from Clusters[First..Last]. Returns false if it
   /// decides it's not a good idea.
   bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last,
@@ -777,6 +772,11 @@ public:
                                         bool VarArgDisallowed,
                                         bool ForceVoidReturnTy);
 
+  /// Returns the type of FrameIndex and TargetFrameIndex nodes.
+  MVT getFrameIndexTy() {
+    return DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout());
+  }
+
 private:
   // Terminator instructions.
   void visitRet(const ReturnInst &I);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 488c60a28ff..26dd45ef933 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -227,6 +227,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::CARRY_FALSE:                return "carry_false";
   case ISD::ADDC:                       return "addc";
   case ISD::ADDE:                       return "adde";
+  case ISD::ADDCARRY:                   return "addcarry";
   case ISD::SADDO:                      return "saddo";
   case ISD::UADDO:                      return "uaddo";
   case ISD::SSUBO:                      return "ssubo";
@@ -235,6 +236,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::UMULO:                      return "umulo";
   case ISD::SUBC:                       return "subc";
   case ISD::SUBE:                       return "sube";
+  case ISD::SUBCARRY:                   return "subcarry";
   case ISD::SHL_PARTS:                  return "shl_parts";
   case ISD::SRA_PARTS:                  return "sra_parts";
   case ISD::SRL_PARTS:                  return "srl_parts";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index e21204dbb96..3aabdaeaa09 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -73,6 +73,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -592,13 +593,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
     MRI.replaceRegWith(From, To);
   }
 
-  if (TLI->hasCopyImplyingStackAdjustment(MF))
-    MFI.setHasCopyImplyingStackAdjustment(true);
-
-  // Freeze the set of reserved registers now that MachineFrameInfo has been
-  // set up. All the information required by getReservedRegs() should be
-  // available now.
-  MRI.freezeReservedRegs(*MF);
+  TLI->finalizeLowering(*MF);
 
   // Release function-specific state. SDB and CurDAG are already cleared
   // at this point.
@@ -650,8 +645,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
 
   Worklist.push_back(CurDAG->getRoot().getNode());
 
-  APInt KnownZero;
-  APInt KnownOne;
+  KnownBits Known;
 
   do {
     SDNode *N = Worklist.pop_back_val();
@@ -680,8 +674,8 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
       continue;
 
     unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
-    CurDAG->computeKnownBits(Src, KnownZero, KnownOne);
-    FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne);
+    CurDAG->computeKnownBits(Src, Known);
+    FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, Known);
   } while (!Worklist.empty());
 }
 
@@ -1930,11 +1924,11 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
   // either already zero or is not demanded.  Check for known zero input bits.
   APInt NeededMask = DesiredMask & ~ActualMask;
 
-  APInt KnownZero, KnownOne;
-  CurDAG->computeKnownBits(LHS, KnownZero, KnownOne);
+  KnownBits Known;
+  CurDAG->computeKnownBits(LHS, Known);
 
   // If all the missing bits in the or are already known to be set, match!
-  if ((NeededMask & KnownOne) == NeededMask)
+  if (NeededMask.isSubsetOf(Known.One))
     return true;
 
   // TODO: check to see if missing bits are just not demanded.
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index d27e2455978..c0a5041b139 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -242,7 +242,8 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue,
 
   // Cache this slot so we find it when going through the normal
   // assignment loop.
-  SDValue Loc = Builder.DAG.getTargetFrameIndex(*Index, Incoming.getValueType());
+  SDValue Loc =
+      Builder.DAG.getTargetFrameIndex(*Index, Builder.getFrameIndexTy());
   Builder.StatepointLowering.setLocation(Incoming, Loc);
 }
 
@@ -343,7 +344,7 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
                                                        Builder);
     int Index = cast<FrameIndexSDNode>(Loc)->getIndex();
     // We use TargetFrameIndex so that isel will not select it into LEA
-    Loc = Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType());
+    Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy());
 
     // TODO: We can create TokenFactor node instead of
     //       chaining stores one after another, this may allow
@@ -391,8 +392,10 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
     // This handles allocas as arguments to the statepoint (this is only
     // really meaningful for a deopt value.  For GC, we'd be trying to
     // relocate the address of the alloca itself?)
+    assert(Incoming.getValueType() == Builder.getFrameIndexTy() &&
+           "Incoming value is a frame index!");
     Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
-                                                  Incoming.getValueType()));
+                                                  Builder.getFrameIndexTy()));
   } else if (LiveInOnly) {
     // If this value is live in (not live-on-return, or live-through), we can
     // treat it the same way patchpoint treats it's "live in" values.  We'll 
@@ -527,8 +530,10 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
     SDValue Incoming = Builder.getValue(V);
     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
       // This handles allocas as arguments to the statepoint
+      assert(Incoming.getValueType() == Builder.getFrameIndexTy() &&
+             "Incoming value is a frame index!");
       Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
-                                                    Incoming.getValueType()));
+                                                    Builder.getFrameIndexTy()));
     }
   }
 
@@ -949,8 +954,8 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
     return;
   }
 
-  SDValue SpillSlot = DAG.getTargetFrameIndex(*DerivedPtrLocation,
-                                              SD.getValueType());
+  SDValue SpillSlot =
+      DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy());
 
   // Be conservative: flush all pending loads
   // TODO: Probably we can be less restrictive on this,
@@ -958,7 +963,9 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
   SDValue Chain = getRoot();
 
   SDValue SpillLoad =
-      DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot,
+      DAG.getLoad(DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+                                                           Relocate.getType()),
+                  getCurSDLoc(), Chain, SpillSlot,
                   MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
                                                     *DerivedPtrLocation));
 
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 136dec873cb..2d39ecd9779 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -27,6 +27,7 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
@@ -109,8 +110,7 @@ void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS,
   IsReturned = CS->paramHasAttr(ArgIdx, Attribute::Returned);
   IsSwiftSelf = CS->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
   IsSwiftError = CS->paramHasAttr(ArgIdx, Attribute::SwiftError);
-  // FIXME: getParamAlignment is off by one from argument index.
-  Alignment  = CS->getParamAlignment(ArgIdx + 1);
+  Alignment  = CS->getParamAlignment(ArgIdx);
 }
 
 /// Generate a libcall taking the given operands as arguments and returning a
@@ -437,10 +437,9 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
                                      DAGCombinerInfo &DCI,
                                      TargetLoweringOpt &TLO) const {
   SDValue Op = User->getOperand(OpIdx);
-  APInt KnownZero, KnownOne;
+  KnownBits Known;
 
-  if (!SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne,
-                            TLO, 0, true))
+  if (!SimplifyDemandedBits(Op, Demanded, Known, TLO, 0, true))
     return false;
 
 
@@ -488,10 +487,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &DemandedMask,
   SelectionDAG &DAG = DCI.DAG;
   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
                         !DCI.isBeforeLegalizeOps());
-  APInt KnownZero, KnownOne;
+  KnownBits Known;
 
-  bool Simplified = SimplifyDemandedBits(Op, DemandedMask, KnownZero, KnownOne,
-                                         TLO);
+  bool Simplified = SimplifyDemandedBits(Op, DemandedMask, Known, TLO);
   if (Simplified)
     DCI.CommitTargetLoweringOpt(TLO);
   return Simplified;
@@ -501,13 +499,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &DemandedMask,
 /// result of Op are ever used downstream. If we can use this information to
 /// simplify Op, create a new simplified DAG node and return true, returning the
 /// original and new nodes in Old and New. Otherwise, analyze the expression and
-/// return a mask of KnownOne and KnownZero bits for the expression (used to
-/// simplify the caller).  The KnownZero/One bits may only be accurate for those
-/// bits in the DemandedMask.
+/// return a mask of Known bits for the expression (used to simplify the
+/// caller).  The Known bits may only be accurate for those bits in the
+/// DemandedMask.
 bool TargetLowering::SimplifyDemandedBits(SDValue Op,
                                           const APInt &DemandedMask,
-                                          APInt &KnownZero,
-                                          APInt &KnownOne,
+                                          KnownBits &Known,
                                           TargetLoweringOpt &TLO,
                                           unsigned Depth,
                                           bool AssumeSingleUse) const {
@@ -519,14 +516,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   auto &DL = TLO.DAG.getDataLayout();
 
   // Don't know anything.
-  KnownZero = KnownOne = APInt(BitWidth, 0);
+  Known = KnownBits(BitWidth);
 
   // Other users may use these bits.
   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
     if (Depth != 0) {
-      // If not at the root, Just compute the KnownZero/KnownOne bits to
+      // If not at the root, Just compute the Known bits to
       // simplify things downstream.
-      TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth);
+      TLO.DAG.computeKnownBits(Op, Known, Depth);
       return false;
     }
     // If this is the root being simplified, allow it to have multiple uses,
@@ -541,38 +538,37 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     return false;
   }
 
-  APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut;
+  KnownBits Known2, KnownOut;
   switch (Op.getOpcode()) {
   case ISD::Constant:
     // We know all of the bits for a constant!
-    KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
-    KnownZero = ~KnownOne;
+    Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
+    Known.Zero = ~Known.One;
     return false;   // Don't fall through, will infinitely loop.
   case ISD::BUILD_VECTOR:
     // Collect the known bits that are shared by every constant vector element.
-    KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+    Known.Zero.setAllBits(); Known.One.setAllBits();
     for (SDValue SrcOp : Op->ops()) {
       if (!isa<ConstantSDNode>(SrcOp)) {
         // We can only handle all constant values - bail out with no known bits.
-        KnownZero = KnownOne = APInt(BitWidth, 0);
+        Known = KnownBits(BitWidth);
         return false;
       }
-      KnownOne2 = cast<ConstantSDNode>(SrcOp)->getAPIntValue();
-      KnownZero2 = ~KnownOne2;
+      Known2.One = cast<ConstantSDNode>(SrcOp)->getAPIntValue();
+      Known2.Zero = ~Known2.One;
 
       // BUILD_VECTOR can implicitly truncate sources, we must handle this.
-      if (KnownOne2.getBitWidth() != BitWidth) {
-        assert(KnownOne2.getBitWidth() > BitWidth &&
-               KnownZero2.getBitWidth() > BitWidth &&
+      if (Known2.One.getBitWidth() != BitWidth) {
+        assert(Known2.getBitWidth() > BitWidth &&
                "Expected BUILD_VECTOR implicit truncation");
-        KnownOne2 = KnownOne2.trunc(BitWidth);
-        KnownZero2 = KnownZero2.trunc(BitWidth);
+        Known2.One = Known2.One.trunc(BitWidth);
+        Known2.Zero = Known2.Zero.trunc(BitWidth);
       }
 
       // Known bits are the values that are shared by every element.
       // TODO: support per-element known bits.
-      KnownOne &= KnownOne2;
-      KnownZero &= KnownZero2;
+      Known.One &= Known2.One;
+      Known.Zero &= Known2.Zero;
     }
     return false;   // Don't fall through, will infinitely loop.
   case ISD::AND:
@@ -582,16 +578,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // the RHS.
     if (ConstantSDNode *RHSC = isConstOrConstSplat(Op.getOperand(1))) {
       SDValue Op0 = Op.getOperand(0);
-      APInt LHSZero, LHSOne;
+      KnownBits LHSKnown;
       // Do not increment Depth here; that can cause an infinite loop.
-      TLO.DAG.computeKnownBits(Op0, LHSZero, LHSOne, Depth);
+      TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth);
       // If the LHS already has zeros where RHSC does, this and is dead.
-      if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
+      if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
         return TLO.CombineTo(Op, Op0);
 
       // If any of the set bits in the RHS are known zero on the LHS, shrink
       // the constant.
-      if (ShrinkDemandedConstant(Op, ~LHSZero & NewMask, TLO))
+      if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & NewMask, TLO))
         return true;
 
       // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
@@ -600,64 +596,56 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       // the xor. For example, for a 32-bit X:
       // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
       if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
-          LHSOne == ~RHSC->getAPIntValue()) {
+          LHSKnown.One == ~RHSC->getAPIntValue()) {
         SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(),
                                       Op0.getOperand(0), Op.getOperand(1));
         return TLO.CombineTo(Op, Xor);
       }
     }
 
-    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
-                             KnownOne, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-    if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
-                             KnownZero2, KnownOne2, TLO, Depth+1))
+    assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+    if (SimplifyDemandedBits(Op.getOperand(0), ~Known.Zero & NewMask,
+                             Known2, TLO, Depth+1))
       return true;
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+    assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?");
 
     // If all of the demanded bits are known one on one side, return the other.
     // These bits cannot contribute to the result of the 'and'.
-    if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+    if (NewMask.isSubsetOf(Known2.Zero | Known.One))
       return TLO.CombineTo(Op, Op.getOperand(0));
-    if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+    if (NewMask.isSubsetOf(Known.Zero | Known2.One))
       return TLO.CombineTo(Op, Op.getOperand(1));
     // If all of the demanded bits in the inputs are known zeros, return zero.
-    if ((NewMask & (KnownZero|KnownZero2)) == NewMask)
+    if (NewMask.isSubsetOf(Known.Zero | Known2.Zero))
       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType()));
     // If the RHS is a constant, see if we can simplify it.
-    if (ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask, TLO))
+    if (ShrinkDemandedConstant(Op, ~Known2.Zero & NewMask, TLO))
       return true;
     // If the operation can be done in a smaller type, do so.
     if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
       return true;
 
     // Output known-1 bits are only known if set in both the LHS & RHS.
-    KnownOne &= KnownOne2;
+    Known.One &= Known2.One;
     // Output known-0 are known to be clear if zero in either the LHS | RHS.
-    KnownZero |= KnownZero2;
+    Known.Zero |= Known2.Zero;
     break;
   case ISD::OR:
-    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
-                             KnownOne, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-    if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
-                             KnownZero2, KnownOne2, TLO, Depth+1))
+    assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+    if (SimplifyDemandedBits(Op.getOperand(0), ~Known.One & NewMask,
+                             Known2, TLO, Depth+1))
       return true;
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+    assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?");
 
     // If all of the demanded bits are known zero on one side, return the other.
     // These bits cannot contribute to the result of the 'or'.
-    if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
+    if (NewMask.isSubsetOf(Known2.One | Known.Zero))
       return TLO.CombineTo(Op, Op.getOperand(0));
-    if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask))
-      return TLO.CombineTo(Op, Op.getOperand(1));
-    // If all of the potentially set bits on one side are known to be set on
-    // the other side, just use the 'other' side.
-    if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
-      return TLO.CombineTo(Op, Op.getOperand(0));
-    if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+    if (NewMask.isSubsetOf(Known.One | Known2.Zero))
       return TLO.CombineTo(Op, Op.getOperand(1));
     // If the RHS is a constant, see if we can simplify it.
     if (ShrinkDemandedConstant(Op, NewMask, TLO))
@@ -667,25 +655,23 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       return true;
 
     // Output known-0 bits are only known if clear in both the LHS & RHS.
-    KnownZero &= KnownZero2;
+    Known.Zero &= Known2.Zero;
     // Output known-1 are known to be set if set in either the LHS | RHS.
-    KnownOne |= KnownOne2;
+    Known.One |= Known2.One;
     break;
   case ISD::XOR:
-    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
-                             KnownOne, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-    if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
-                             KnownOne2, TLO, Depth+1))
+    assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+    if (SimplifyDemandedBits(Op.getOperand(0), NewMask, Known2, TLO, Depth+1))
       return true;
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+    assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?");
 
     // If all of the demanded bits are known zero on one side, return the other.
     // These bits cannot contribute to the result of the 'xor'.
-    if ((KnownZero & NewMask) == NewMask)
+    if (NewMask.isSubsetOf(Known.Zero))
       return TLO.CombineTo(Op, Op.getOperand(0));
-    if ((KnownZero2 & NewMask) == NewMask)
+    if (NewMask.isSubsetOf(Known2.Zero))
       return TLO.CombineTo(Op, Op.getOperand(1));
     // If the operation can be done in a smaller type, do so.
     if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
@@ -694,25 +680,25 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // If all of the unknown bits are known to be zero on one side or the other
     // (but not both) turn this into an *inclusive* or.
     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
-    if ((NewMask & ~KnownZero & ~KnownZero2) == 0)
+    if ((NewMask & ~Known.Zero & ~Known2.Zero) == 0)
       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
                                                Op.getOperand(0),
                                                Op.getOperand(1)));
 
     // Output known-0 bits are known if clear or set in both the LHS & RHS.
-    KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+    KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
     // Output known-1 are known to be set if set in only one of the LHS, RHS.
-    KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+    KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
 
     // If all of the demanded bits on one side are known, and all of the set
     // bits on that side are also known to be set on the other side, turn this
     // into an AND, as we know the bits will be cleared.
     //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
     // NB: it is okay if more bits are known than are requested
-    if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side
-      if (KnownOne == KnownOne2) { // set bits are the same on both sides
+    if (NewMask.isSubsetOf(Known.Zero|Known.One)) { // all known on one side
+      if (Known.One == Known2.One) { // set bits are the same on both sides
         EVT VT = Op.getValueType();
-        SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, dl, VT);
+        SDValue ANDC = TLO.DAG.getConstant(~Known.One & NewMask, dl, VT);
         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
                                                  Op.getOperand(0), ANDC));
       }
@@ -738,44 +724,39 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       }
     }
 
-    KnownZero = std::move(KnownZeroOut);
-    KnownOne  = std::move(KnownOneOut);
+    Known = std::move(KnownOut);
     break;
   case ISD::SELECT:
-    if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
-                             KnownOne, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known, TLO, Depth+1))
       return true;
-    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
-                             KnownOne2, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known2, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+    assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+    assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?");
 
     // If the operands are constants, see if we can simplify them.
     if (ShrinkDemandedConstant(Op, NewMask, TLO))
       return true;
 
     // Only known if known in both the LHS and RHS.
-    KnownOne &= KnownOne2;
-    KnownZero &= KnownZero2;
+    Known.One &= Known2.One;
+    Known.Zero &= Known2.Zero;
     break;
   case ISD::SELECT_CC:
-    if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
-                             KnownOne, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op.getOperand(3), NewMask, Known, TLO, Depth+1))
       return true;
-    if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
-                             KnownOne2, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known2, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+    assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+    assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?");
 
     // If the operands are constants, see if we can simplify them.
     if (ShrinkDemandedConstant(Op, NewMask, TLO))
       return true;
 
     // Only known if known in both the LHS and RHS.
-    KnownOne &= KnownOne2;
-    KnownZero &= KnownZero2;
+    Known.One &= Known2.One;
+    Known.Zero &= Known2.Zero;
     break;
   case ISD::SETCC: {
     SDValue Op0 = Op.getOperand(0);
@@ -801,7 +782,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if (getBooleanContents(Op0.getValueType()) ==
             TargetLowering::ZeroOrOneBooleanContent &&
         BitWidth > 1)
-      KnownZero.setBitsFrom(1);
+      Known.Zero.setBitsFrom(1);
     break;
   }
   case ISD::SHL:
@@ -835,8 +816,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         }
       }
 
-      if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt),
-                               KnownZero, KnownOne, TLO, Depth+1))
+      if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), Known, TLO, Depth+1))
         return true;
 
       // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
@@ -885,10 +865,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         }
       }
 
-      KnownZero <<= SA->getZExtValue();
-      KnownOne  <<= SA->getZExtValue();
+      Known.Zero <<= SA->getZExtValue();
+      Known.One  <<= SA->getZExtValue();
       // low bits known zero.
-      KnownZero.setLowBits(SA->getZExtValue());
+      Known.Zero.setLowBits(SA->getZExtValue());
     }
     break;
   case ISD::SRL:
@@ -906,7 +886,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
       // If the shift is exact, then it does demand the low bits (and knows that
       // they are zero).
-      if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact())
+      if (Op->getFlags().hasExact())
         InDemandedMask.setLowBits(ShAmt);
 
       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
@@ -931,14 +911,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       }
 
       // Compute the new bits that are at the top now.
-      if (SimplifyDemandedBits(InOp, InDemandedMask,
-                               KnownZero, KnownOne, TLO, Depth+1))
+      if (SimplifyDemandedBits(InOp, InDemandedMask, Known, TLO, Depth+1))
         return true;
-      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-      KnownZero.lshrInPlace(ShAmt);
-      KnownOne.lshrInPlace(ShAmt);
+      assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+      Known.Zero.lshrInPlace(ShAmt);
+      Known.One.lshrInPlace(ShAmt);
 
-      KnownZero.setHighBits(ShAmt);  // High bits known zero.
+      Known.Zero.setHighBits(ShAmt);  // High bits known zero.
     }
     break;
   case ISD::SRA:
@@ -963,33 +942,30 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
       // If the shift is exact, then it does demand the low bits (and knows that
       // they are zero).
-      if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact())
+      if (Op->getFlags().hasExact())
         InDemandedMask.setLowBits(ShAmt);
 
       // If any of the demanded bits are produced by the sign extension, we also
       // demand the input sign bit.
-      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
-      if (HighBits.intersects(NewMask))
-        InDemandedMask |= APInt::getSignMask(VT.getScalarSizeInBits());
+      if (NewMask.countLeadingZeros() < ShAmt)
+        InDemandedMask.setSignBit();
 
-      if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
-                               KnownZero, KnownOne, TLO, Depth+1))
+      if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, Known, TLO,
+                               Depth+1))
         return true;
-      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-      KnownZero.lshrInPlace(ShAmt);
-      KnownOne.lshrInPlace(ShAmt);
-
-      // Handle the sign bit, adjusted to where it is now in the mask.
-      APInt SignMask = APInt::getSignMask(BitWidth).lshr(ShAmt);
+      assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+      Known.Zero.lshrInPlace(ShAmt);
+      Known.One.lshrInPlace(ShAmt);
 
       // If the input sign bit is known to be zero, or if none of the top bits
       // are demanded, turn this into an unsigned shift right.
-      if (KnownZero.intersects(SignMask) || (HighBits & ~NewMask) == HighBits) {
+      if (Known.Zero[BitWidth - ShAmt - 1] ||
+          NewMask.countLeadingZeros() >= ShAmt) {
         SDNodeFlags Flags;
-        Flags.setExact(cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact());
+        Flags.setExact(Op->getFlags().hasExact());
         return TLO.CombineTo(Op,
                              TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0),
-                                             Op.getOperand(1), &Flags));
+                                             Op.getOperand(1), Flags));
       }
 
       int Log2 = NewMask.exactLogBase2();
@@ -1002,9 +978,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
                                                  Op.getOperand(0), NewSA));
       }
 
-      if (KnownOne.intersects(SignMask))
+      if (Known.One[BitWidth - ShAmt - 1])
         // New bits are known one.
-        KnownOne |= HighBits;
+        Known.One.setHighBits(ShAmt);
     }
     break;
   case ISD::SIGN_EXTEND_INREG: {
@@ -1057,24 +1033,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     InputDemandedBits |= InSignBit;
 
     if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
-                             KnownZero, KnownOne, TLO, Depth+1))
+                             Known, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
 
     // If the sign bit of the input is known set or clear, then we know the
     // top bits of the result.
 
     // If the input sign bit is known zero, convert this into a zero extension.
-    if (KnownZero.intersects(InSignBit))
+    if (Known.Zero.intersects(InSignBit))
       return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(
                                    Op.getOperand(0), dl, ExVT.getScalarType()));
 
-    if (KnownOne.intersects(InSignBit)) {    // Input sign bit known set
-      KnownOne |= NewBits;
-      KnownZero &= ~NewBits;
+    if (Known.One.intersects(InSignBit)) {    // Input sign bit known set
+      Known.One |= NewBits;
+      Known.Zero &= ~NewBits;
     } else {                       // Input sign bit unknown
-      KnownZero &= ~NewBits;
-      KnownOne &= ~NewBits;
+      Known.Zero &= ~NewBits;
+      Known.One &= ~NewBits;
     }
     break;
   }
@@ -1085,22 +1061,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     APInt MaskLo = NewMask.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
     APInt MaskHi = NewMask.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
 
-    APInt KnownZeroLo, KnownOneLo;
-    APInt KnownZeroHi, KnownOneHi;
+    KnownBits KnownLo, KnownHi;
 
-    if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownZeroLo,
-                             KnownOneLo, TLO, Depth + 1))
+    if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
       return true;
 
-    if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownZeroHi,
-                             KnownOneHi, TLO, Depth + 1))
+    if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
       return true;
 
-    KnownZero = KnownZeroLo.zext(BitWidth) |
-                KnownZeroHi.zext(BitWidth).shl(HalfBitWidth);
+    Known.Zero = KnownLo.Zero.zext(BitWidth) |
+                KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
 
-    KnownOne = KnownOneLo.zext(BitWidth) |
-               KnownOneHi.zext(BitWidth).shl(HalfBitWidth);
+    Known.One = KnownLo.One.zext(BitWidth) |
+               KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
     break;
   }
   case ISD::ZERO_EXTEND: {
@@ -1115,13 +1088,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
                                                Op.getValueType(),
                                                Op.getOperand(0)));
 
-    if (SimplifyDemandedBits(Op.getOperand(0), InMask,
-                             KnownZero, KnownOne, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-    KnownZero = KnownZero.zext(BitWidth);
-    KnownOne = KnownOne.zext(BitWidth);
-    KnownZero |= NewBits;
+    assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+    Known.Zero = Known.Zero.zext(BitWidth);
+    Known.One = Known.One.zext(BitWidth);
+    Known.Zero |= NewBits;
     break;
   }
   case ISD::SIGN_EXTEND: {
@@ -1143,37 +1115,36 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     InDemandedBits |= InSignBit;
     InDemandedBits = InDemandedBits.trunc(InBits);
 
-    if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
-                             KnownOne, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, Known, TLO,
+                             Depth+1))
       return true;
-    KnownZero = KnownZero.zext(BitWidth);
-    KnownOne = KnownOne.zext(BitWidth);
+    Known.Zero = Known.Zero.zext(BitWidth);
+    Known.One = Known.One.zext(BitWidth);
 
     // If the sign bit is known zero, convert this to a zero extend.
-    if (KnownZero.intersects(InSignBit))
+    if (Known.Zero.intersects(InSignBit))
       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
                                                Op.getValueType(),
                                                Op.getOperand(0)));
 
     // If the sign bit is known one, the top bits match.
-    if (KnownOne.intersects(InSignBit)) {
-      KnownOne |= NewBits;
-      assert((KnownZero & NewBits) == 0);
+    if (Known.One.intersects(InSignBit)) {
+      Known.One |= NewBits;
+      assert((Known.Zero & NewBits) == 0);
     } else {   // Otherwise, top bits aren't known.
-      assert((KnownOne & NewBits) == 0);
-      assert((KnownZero & NewBits) == 0);
+      assert((Known.One & NewBits) == 0);
+      assert((Known.Zero & NewBits) == 0);
     }
     break;
   }
   case ISD::ANY_EXTEND: {
     unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
     APInt InMask = NewMask.trunc(OperandBitWidth);
-    if (SimplifyDemandedBits(Op.getOperand(0), InMask,
-                             KnownZero, KnownOne, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-    KnownZero = KnownZero.zext(BitWidth);
-    KnownOne = KnownOne.zext(BitWidth);
+    assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+    Known.Zero = Known.Zero.zext(BitWidth);
+    Known.One = Known.One.zext(BitWidth);
     break;
   }
   case ISD::TRUNCATE: {
@@ -1181,11 +1152,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // zero/one bits live out.
     unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
     APInt TruncMask = NewMask.zext(OperandBitWidth);
-    if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
-                             KnownZero, KnownOne, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, Known, TLO, Depth+1))
       return true;
-    KnownZero = KnownZero.trunc(BitWidth);
-    KnownOne = KnownOne.trunc(BitWidth);
+    Known.Zero = Known.Zero.trunc(BitWidth);
+    Known.One = Known.One.trunc(BitWidth);
 
     // If the input is only used by this truncate, see if we can shrink it based
     // on the known demanded bits.
@@ -1233,7 +1203,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       }
     }
 
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
     break;
   }
   case ISD::AssertZext: {
@@ -1243,11 +1213,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     APInt InMask = APInt::getLowBitsSet(BitWidth,
                                         VT.getSizeInBits());
     if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask,
-                             KnownZero, KnownOne, TLO, Depth+1))
+                             Known, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
 
-    KnownZero |= ~InMask;
+    Known.Zero |= ~InMask;
     break;
   }
   case ISD::BITCAST:
@@ -1285,22 +1255,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // of the highest bit demanded of them.
     APInt LoMask = APInt::getLowBitsSet(BitWidth,
                                         BitWidth - NewMask.countLeadingZeros());
-    if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,
-                             KnownOne2, TLO, Depth+1) ||
-        SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
-                             KnownOne2, TLO, Depth+1) ||
+    if (SimplifyDemandedBits(Op.getOperand(0), LoMask, Known2, TLO, Depth+1) ||
+        SimplifyDemandedBits(Op.getOperand(1), LoMask, Known2, TLO, Depth+1) ||
         // See if the operation should be performed at a smaller bit width.
         ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) {
-      const SDNodeFlags *Flags = Op.getNode()->getFlags();
-      if (Flags->hasNoSignedWrap() || Flags->hasNoUnsignedWrap()) {
+      SDNodeFlags Flags = Op.getNode()->getFlags();
+      if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
         // Disable the nsw and nuw flags. We can no longer guarantee that we
         // won't wrap after simplification.
-        SDNodeFlags NewFlags = *Flags;
-        NewFlags.setNoSignedWrap(false);
-        NewFlags.setNoUnsignedWrap(false);
+        Flags.setNoSignedWrap(false);
+        Flags.setNoUnsignedWrap(false);
         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, Op.getValueType(),
                                         Op.getOperand(0), Op.getOperand(1),
-                                        &NewFlags);
+                                        Flags);
         return TLO.CombineTo(Op, NewOp);
       }
       return true;
@@ -1309,13 +1276,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   }
   default:
     // Just use computeKnownBits to compute output bits.
-    TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth);
+    TLO.DAG.computeKnownBits(Op, Known, Depth);
     break;
   }
 
   // If we know the value of all of the demanded bits, return this as a
   // constant.
-  if ((NewMask & (KnownZero|KnownOne)) == NewMask) {
+  if (NewMask.isSubsetOf(Known.Zero|Known.One)) {
     // Avoid folding to a constant if any OpaqueConstant is involved.
     const SDNode *N = Op.getNode();
     for (SDNodeIterator I = SDNodeIterator::begin(N),
@@ -1326,17 +1293,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
           return false;
     }
     return TLO.CombineTo(Op,
-                         TLO.DAG.getConstant(KnownOne, dl, Op.getValueType()));
+                         TLO.DAG.getConstant(Known.One, dl, Op.getValueType()));
   }
 
   return false;
 }
 
 /// Determine which of the bits specified in Mask are known to be either zero or
-/// one and return them in the KnownZero/KnownOne bitsets.
+/// one and return them in the Known.
 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
-                                                   APInt &KnownZero,
-                                                   APInt &KnownOne,
+                                                   KnownBits &Known,
                                                    const APInt &DemandedElts,
                                                    const SelectionDAG &DAG,
                                                    unsigned Depth) const {
@@ -1346,7 +1312,7 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
          "Should use MaskedValueIsZero if you don't know whether Op"
          " is a target node!");
-  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
+  Known.Zero.clearAllBits(); Known.One.clearAllBits();
 }
 
 /// This method can be implemented by targets that want to expose additional
@@ -1721,7 +1687,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
               bestWidth = width;
               break;
             }
-            newMask = newMask << width;
+            newMask <<= width;
           }
         }
       }
@@ -2986,9 +2952,9 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
                                                         DAG.getDataLayout()));
     SDNodeFlags Flags;
     Flags.setExact(true);
-    Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, &Flags);
+    Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, Flags);
     Created.push_back(Op1.getNode());
-    d = d.ashr(ShAmt);
+    d.ashrInPlace(ShAmt);
   }
 
   // Calculate the multiplicative inverse, using Newton's method.
@@ -3030,7 +2996,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
     return SDValue();
 
   // If the sdiv has an 'exact' bit we can use a simpler lowering.
-  if (cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact())
+  if (N->getFlags().hasExact())
     return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, *Created);
 
   APInt::ms magics = Divisor.magic();
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index 315b059c5ac..916b6f08c1b 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -41,8 +41,8 @@ using namespace llvm;
 #define DEBUG_TYPE "stackmaps"
 
 static cl::opt<int> StackMapVersion(
-    "stackmap-version", cl::init(2),
-    cl::desc("Specify the stackmap encoding version (default = 2)"));
+    "stackmap-version", cl::init(3),
+    cl::desc("Specify the stackmap encoding version (default = 3)"));
 
 const char *StackMaps::WSMP = "Stack Maps: ";
 
@@ -85,7 +85,7 @@ unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const {
 }
 
 StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) {
-  if (StackMapVersion != 2)
+  if (StackMapVersion != 3)
     llvm_unreachable("Unsupported stackmap version!");
 }
 
@@ -221,8 +221,9 @@ void StackMaps::print(raw_ostream &OS) {
         OS << "Constant Index " << Loc.Offset;
         break;
       }
-      OS << "\t[encoding: .byte " << Loc.Type << ", .byte " << Loc.Size
-         << ", .short " << Loc.Reg << ", .int " << Loc.Offset << "]\n";
+      OS << "\t[encoding: .byte " << Loc.Type << ", .byte 0"
+         << ", .short " << Loc.Size << ", .short " << Loc.Reg << ", .short 0"
+         << ", .int " << Loc.Offset << "]\n";
       Idx++;
     }
 
@@ -521,11 +522,16 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) {
 
     for (const auto &Loc : CSLocs) {
       OS.EmitIntValue(Loc.Type, 1);
-      OS.EmitIntValue(Loc.Size, 1);
+      OS.EmitIntValue(0, 1);  // Reserved
+      OS.EmitIntValue(Loc.Size, 2);
       OS.EmitIntValue(Loc.Reg, 2);
+      OS.EmitIntValue(0, 2);  // Reserved
       OS.EmitIntValue(Loc.Offset, 4);
     }
 
+    // Emit alignment to 8 byte.
+    OS.EmitValueToAlignment(8);
+
     // Num live-out registers and padding to align to 4 byte.
     OS.EmitIntValue(0, 2);
     OS.EmitIntValue(LiveOuts.size(), 2);
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index e579922bb69..39aa946fa84 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/StackMaps.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -53,6 +54,18 @@ static cl::opt<unsigned> MaximumJumpTableSize
   ("max-jump-table-size", cl::init(0), cl::Hidden,
    cl::desc("Set maximum size of jump tables; zero for no limit."));
 
+/// Minimum jump table density for normal functions.
+static cl::opt<unsigned>
+    JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden,
+                     cl::desc("Minimum density for building a jump table in "
+                              "a normal function"));
+
+/// Minimum jump table density for -Os or -Oz functions.
+static cl::opt<unsigned> OptsizeJumpTableDensity(
+    "optsize-jump-table-density", cl::init(40), cl::Hidden,
+    cl::desc("Minimum density for building a jump table in "
+             "an optsize function"));
+
 // Although this default value is arbitrary, it is not random. It is assumed
 // that a condition that evaluates the same way by a higher percentage than this
 // is best represented as control flow. Therefore, the default value N should be
@@ -910,6 +923,10 @@ void TargetLoweringBase::initActions() {
     setOperationAction(ISD::SMULO, VT, Expand);
     setOperationAction(ISD::UMULO, VT, Expand);
 
+    // ADDCARRY operations default to expand
+    setOperationAction(ISD::ADDCARRY, VT, Expand);
+    setOperationAction(ISD::SUBCARRY, VT, Expand);
+
     // These default to Expand so they will be expanded to CTLZ/CTTZ by default.
     setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
     setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
@@ -1901,6 +1918,10 @@ void TargetLoweringBase::setMinimumJumpTableEntries(unsigned Val) {
   MinimumJumpTableEntries = Val;
 }
 
+unsigned TargetLoweringBase::getMinimumJumpTableDensity(bool OptForSize) const {
+  return OptForSize ? OptsizeJumpTableDensity : JumpTableDensity;
+}
+
 unsigned TargetLoweringBase::getMaximumJumpTableSize() const {
   return MaximumJumpTableSize;
 }
@@ -2092,3 +2113,7 @@ int TargetLoweringBase::getDivRefinementSteps(EVT VT,
                                               MachineFunction &MF) const {
   return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF));
 }
+
+void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const {
+  MF.getRegInfo().freezeReservedRegs(MF);
+}
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index c2db56a7657..f085132b6a9 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -25,6 +25,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -195,18 +196,30 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
         }
 
       if (phi->getNumOperands() == 3) {
-        unsigned Input = phi->getOperand(1).getReg();
-        unsigned Output = phi->getOperand(0).getReg();
-
-        phi++->eraseFromParent();
+        const MachineOperand &Input = phi->getOperand(1);
+        const MachineOperand &Output = phi->getOperand(0);
+        unsigned InputReg = Input.getReg();
+        unsigned OutputReg = Output.getReg();
+        assert(Output.getSubReg() == 0 && "Cannot have output subregister");
         ModifiedPHI = true;
 
-        if (Input != Output) {
+        if (InputReg != OutputReg) {
           MachineRegisterInfo &MRI = F.getRegInfo();
-          MRI.constrainRegClass(Input, MRI.getRegClass(Output));
-          MRI.replaceRegWith(Output, Input);
+          unsigned InputSub = Input.getSubReg();
+          if (InputSub == 0) {
+            MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg));
+            MRI.replaceRegWith(OutputReg, InputReg);
+          } else {
+            // The input register to the PHI has a subregister:
+            // insert a COPY instead of simply replacing the output
+            // with the input.
+            const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo();
+            BuildMI(*BB, BB->getFirstNonPHI(), phi->getDebugLoc(),
+                    TII->get(TargetOpcode::COPY), OutputReg)
+                .addReg(InputReg, getRegState(Input), InputSub);
+          }
+          phi++->eraseFromParent();
         }
-
         continue;
       }
 
diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt
index 6e9214d72ad..421f22ca5d8 100644
--- a/lib/DebugInfo/CodeView/CMakeLists.txt
+++ b/lib/DebugInfo/CodeView/CMakeLists.txt
@@ -7,8 +7,13 @@ add_llvm_library(LLVMDebugInfoCodeView
   EnumTables.cpp
   Formatters.cpp
   Line.cpp
-  ModuleSubstream.cpp
-  ModuleSubstreamVisitor.cpp
+  ModuleDebugFileChecksumFragment.cpp
+  ModuleDebugFragment.cpp
+  ModuleDebugFragmentRecord.cpp
+  ModuleDebugFragmentVisitor.cpp
+  ModuleDebugInlineeLinesFragment.cpp
+  ModuleDebugLineFragment.cpp
+  ModuleDebugUnknownFragment.cpp
   RecordSerialization.cpp
   SymbolRecordMapping.cpp
   SymbolDumper.cpp
diff --git a/lib/DebugInfo/CodeView/EnumTables.cpp b/lib/DebugInfo/CodeView/EnumTables.cpp
index 0e20bcb27ec..fc6008ba66d 100644
--- a/lib/DebugInfo/CodeView/EnumTables.cpp
+++ b/lib/DebugInfo/CodeView/EnumTables.cpp
@@ -245,20 +245,20 @@ static const EnumEntry<uint32_t> FrameProcSymFlagNames[] = {
 };
 
 static const EnumEntry<uint32_t> ModuleSubstreamKindNames[] = {
-    CV_ENUM_CLASS_ENT(ModuleSubstreamKind, None),
-    CV_ENUM_CLASS_ENT(ModuleSubstreamKind, Symbols),
-    CV_ENUM_CLASS_ENT(ModuleSubstreamKind, Lines),
-    CV_ENUM_CLASS_ENT(ModuleSubstreamKind, StringTable),
-    CV_ENUM_CLASS_ENT(ModuleSubstreamKind, FileChecksums),
-    CV_ENUM_CLASS_ENT(ModuleSubstreamKind, FrameData),
-    CV_ENUM_CLASS_ENT(ModuleSubstreamKind, InlineeLines),
-    CV_ENUM_CLASS_ENT(ModuleSubstreamKind, CrossScopeImports),
-    CV_ENUM_CLASS_ENT(ModuleSubstreamKind, CrossScopeExports),
-    CV_ENUM_CLASS_ENT(ModuleSubstreamKind, ILLines),
-    CV_ENUM_CLASS_ENT(ModuleSubstreamKind, FuncMDTokenMap),
-    CV_ENUM_CLASS_ENT(ModuleSubstreamKind, TypeMDTokenMap),
-    CV_ENUM_CLASS_ENT(ModuleSubstreamKind, MergedAssemblyInput),
-    CV_ENUM_CLASS_ENT(ModuleSubstreamKind, CoffSymbolRVA),
+    CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, None),
+    CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, Symbols),
+    CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, Lines),
+    CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, StringTable),
+    CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, FileChecksums),
+    CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, FrameData),
+    CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, InlineeLines),
+    CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, CrossScopeImports),
+    CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, CrossScopeExports),
+    CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, ILLines),
+    CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, FuncMDTokenMap),
+    CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, TypeMDTokenMap),
+    CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, MergedAssemblyInput),
+    CV_ENUM_CLASS_ENT(ModuleDebugFragmentKind, CoffSymbolRVA),
 };
 
 static const EnumEntry<uint16_t> ExportSymFlagNames[] = {
diff --git a/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp b/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp
new file mode 100644
index 00000000000..c349e7ecce9
--- /dev/null
+++ b/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp
@@ -0,0 +1,102 @@
+//===- ModuleDebugFileChecksumFragment.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h"
+
+#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/Support/BinaryStreamReader.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+struct FileChecksumEntryHeader {
+  using ulittle32_t = support::ulittle32_t;
+
+  ulittle32_t FileNameOffset; // Byte offset of filename in global string table.
+  uint8_t ChecksumSize;       // Number of bytes of checksum.
+  uint8_t ChecksumKind;       // FileChecksumKind
+                              // Checksum bytes follow.
+};
+
+Error llvm::VarStreamArrayExtractor<FileChecksumEntry>::extract(
+    BinaryStreamRef Stream, uint32_t &Len, FileChecksumEntry &Item, void *Ctx) {
+  BinaryStreamReader Reader(Stream);
+
+  const FileChecksumEntryHeader *Header;
+  if (auto EC = Reader.readObject(Header))
+    return EC;
+
+  Item.FileNameOffset = Header->FileNameOffset;
+  Item.Kind = static_cast<FileChecksumKind>(Header->ChecksumKind);
+  if (auto EC = Reader.readBytes(Item.Checksum, Header->ChecksumSize))
+    return EC;
+
+  Len = alignTo(Header->ChecksumSize + sizeof(FileChecksumEntryHeader), 4);
+  return Error::success();
+}
+
+Error ModuleDebugFileChecksumFragmentRef::initialize(
+    BinaryStreamReader Reader) {
+  if (auto EC = Reader.readArray(Checksums, Reader.bytesRemaining()))
+    return EC;
+
+  return Error::success();
+}
+
+ModuleDebugFileChecksumFragment::ModuleDebugFileChecksumFragment()
+    : ModuleDebugFragment(ModuleDebugFragmentKind::FileChecksums) {}
+
+void ModuleDebugFileChecksumFragment::addChecksum(uint32_t StringTableOffset,
+                                                  FileChecksumKind Kind,
+                                                  ArrayRef<uint8_t> Bytes) {
+  FileChecksumEntry Entry;
+  if (!Bytes.empty()) {
+    uint8_t *Copy = Storage.Allocate<uint8_t>(Bytes.size());
+    ::memcpy(Copy, Bytes.data(), Bytes.size());
+    Entry.Checksum = makeArrayRef(Copy, Bytes.size());
+  }
+  Entry.FileNameOffset = StringTableOffset;
+  Entry.Kind = Kind;
+  Checksums.push_back(Entry);
+
+  // This maps the offset of this string in the string table to the offset
+  // of this checksum entry in the checksum buffer.
+  OffsetMap[StringTableOffset] = SerializedSize;
+  assert(SerializedSize % 4 == 0);
+
+  uint32_t Len = alignTo(sizeof(FileChecksumEntryHeader) + Bytes.size(), 4);
+  SerializedSize += Len;
+}
+
+uint32_t ModuleDebugFileChecksumFragment::calculateSerializedLength() {
+  return SerializedSize;
+}
+
+Error ModuleDebugFileChecksumFragment::commit(BinaryStreamWriter &Writer) {
+  for (const auto &FC : Checksums) {
+    FileChecksumEntryHeader Header;
+    Header.ChecksumKind = uint8_t(FC.Kind);
+    Header.ChecksumSize = FC.Checksum.size();
+    Header.FileNameOffset = FC.FileNameOffset;
+    if (auto EC = Writer.writeObject(Header))
+      return EC;
+    if (auto EC = Writer.writeArray(makeArrayRef(FC.Checksum)))
+      return EC;
+    if (auto EC = Writer.padToAlignment(4))
+      return EC;
+  }
+  return Error::success();
+}
+
+uint32_t ModuleDebugFileChecksumFragment::mapChecksumOffset(
+    uint32_t StringTableOffset) const {
+  auto Iter = OffsetMap.find(StringTableOffset);
+  assert(Iter != OffsetMap.end());
+  return Iter->second;
+}
diff --git a/lib/DebugInfo/CodeView/ModuleDebugFragment.cpp b/lib/DebugInfo/CodeView/ModuleDebugFragment.cpp
new file mode 100644
index 00000000000..2af1917413d
--- /dev/null
+++ b/lib/DebugInfo/CodeView/ModuleDebugFragment.cpp
@@ -0,0 +1,16 @@
+//===- ModuleDebugFragment.cpp -----------------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h"
+
+using namespace llvm::codeview;
+
+ModuleDebugFragmentRef::~ModuleDebugFragmentRef() {}
+
+ModuleDebugFragment::~ModuleDebugFragment() {}
diff --git a/lib/DebugInfo/CodeView/ModuleDebugFragmentRecord.cpp b/lib/DebugInfo/CodeView/ModuleDebugFragmentRecord.cpp
new file mode 100644
index 00000000000..b2543de7806
--- /dev/null
+++ b/lib/DebugInfo/CodeView/ModuleDebugFragmentRecord.cpp
@@ -0,0 +1,84 @@
+//===- ModuleDebugFragmentRecord.cpp -----------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h"
+
+#include "llvm/Support/BinaryStreamReader.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+ModuleDebugFragmentRecord::ModuleDebugFragmentRecord()
+    : Kind(ModuleDebugFragmentKind::None) {}
+
+ModuleDebugFragmentRecord::ModuleDebugFragmentRecord(
+    ModuleDebugFragmentKind Kind, BinaryStreamRef Data)
+    : Kind(Kind), Data(Data) {}
+
+Error ModuleDebugFragmentRecord::initialize(BinaryStreamRef Stream,
+                                            ModuleDebugFragmentRecord &Info) {
+  const ModuleDebugFragmentHeader *Header;
+  BinaryStreamReader Reader(Stream);
+  if (auto EC = Reader.readObject(Header))
+    return EC;
+
+  ModuleDebugFragmentKind Kind =
+      static_cast<ModuleDebugFragmentKind>(uint32_t(Header->Kind));
+  switch (Kind) {
+  case ModuleDebugFragmentKind::FileChecksums:
+  case ModuleDebugFragmentKind::Lines:
+  case ModuleDebugFragmentKind::InlineeLines:
+    break;
+  default:
+    llvm_unreachable("Unexpected debug fragment kind!");
+  }
+  if (auto EC = Reader.readStreamRef(Info.Data, Header->Length))
+    return EC;
+  Info.Kind = Kind;
+  return Error::success();
+}
+
+uint32_t ModuleDebugFragmentRecord::getRecordLength() const {
+  uint32_t Result = sizeof(ModuleDebugFragmentHeader) + Data.getLength();
+  assert(Result % 4 == 0);
+  return Result;
+}
+
+ModuleDebugFragmentKind ModuleDebugFragmentRecord::kind() const { return Kind; }
+
+BinaryStreamRef ModuleDebugFragmentRecord::getRecordData() const {
+  return Data;
+}
+
+ModuleDebugFragmentRecordBuilder::ModuleDebugFragmentRecordBuilder(
+    ModuleDebugFragmentKind Kind, ModuleDebugFragment &Frag)
+    : Kind(Kind), Frag(Frag) {}
+
+uint32_t ModuleDebugFragmentRecordBuilder::calculateSerializedLength() {
+  uint32_t Size = sizeof(ModuleDebugFragmentHeader) +
+                  alignTo(Frag.calculateSerializedLength(), 4);
+  return Size;
+}
+
+Error ModuleDebugFragmentRecordBuilder::commit(BinaryStreamWriter &Writer) {
+  ModuleDebugFragmentHeader Header;
+  Header.Kind = uint32_t(Kind);
+  Header.Length =
+      calculateSerializedLength() - sizeof(ModuleDebugFragmentHeader);
+
+  if (auto EC = Writer.writeObject(Header))
+    return EC;
+  if (auto EC = Frag.commit(Writer))
+    return EC;
+  if (auto EC = Writer.padToAlignment(4))
+    return EC;
+
+  return Error::success();
+}
diff --git a/lib/DebugInfo/CodeView/ModuleDebugFragmentVisitor.cpp b/lib/DebugInfo/CodeView/ModuleDebugFragmentVisitor.cpp
new file mode 100644
index 00000000000..dc591f3990e
--- /dev/null
+++ b/lib/DebugInfo/CodeView/ModuleDebugFragmentVisitor.cpp
@@ -0,0 +1,52 @@
+//===- ModuleDebugFragmentVisitor.cpp ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentVisitor.h"
+
+#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugUnknownFragment.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamRef.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+Error llvm::codeview::visitModuleDebugFragment(
+    const ModuleDebugFragmentRecord &R, ModuleDebugFragmentVisitor &V) {
+  BinaryStreamReader Reader(R.getRecordData());
+  switch (R.kind()) {
+  case ModuleDebugFragmentKind::Lines: {
+    ModuleDebugLineFragmentRef Fragment;
+    if (auto EC = Fragment.initialize(Reader))
+      return EC;
+
+    return V.visitLines(Fragment);
+  }
+  case ModuleDebugFragmentKind::FileChecksums: {
+    ModuleDebugFileChecksumFragmentRef Fragment;
+    if (auto EC = Fragment.initialize(Reader))
+      return EC;
+
+    return V.visitFileChecksums(Fragment);
+  }
+  case ModuleDebugFragmentKind::InlineeLines: {
+    ModuleDebugInlineeLineFragmentRef Fragment;
+    if (auto EC = Fragment.initialize(Reader))
+      return EC;
+    return V.visitInlineeLines(Fragment);
+  }
+  default: {
+    ModuleDebugUnknownFragmentRef Fragment(R.kind(), R.getRecordData());
+    return V.visitUnknown(Fragment);
+  }
+  }
+}
diff --git a/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp b/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp
new file mode 100644
index 00000000000..483f7cb5c5a
--- /dev/null
+++ b/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp
@@ -0,0 +1,116 @@
+//===- ModuleDebugInlineeLineFragment.cpp ------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h"
+
+#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+Error VarStreamArrayExtractor<InlineeSourceLine>::extract(
+    BinaryStreamRef Stream, uint32_t &Len, InlineeSourceLine &Item,
+    ContextType *Fragment) {
+  BinaryStreamReader Reader(Stream);
+
+  if (auto EC = Reader.readObject(Item.Header))
+    return EC;
+
+  if (Fragment->hasExtraFiles()) {
+    uint32_t ExtraFileCount;
+    if (auto EC = Reader.readInteger(ExtraFileCount))
+      return EC;
+    if (auto EC = Reader.readArray(Item.ExtraFiles, ExtraFileCount))
+      return EC;
+  }
+
+  Len = Reader.getOffset();
+  return Error::success();
+}
+
+ModuleDebugInlineeLineFragmentRef::ModuleDebugInlineeLineFragmentRef()
+    : ModuleDebugFragmentRef(ModuleDebugFragmentKind::InlineeLines) {}
+
+Error ModuleDebugInlineeLineFragmentRef::initialize(BinaryStreamReader Reader) {
+  if (auto EC = Reader.readEnum(Signature))
+    return EC;
+
+  if (auto EC = Reader.readArray(Lines, Reader.bytesRemaining(), this))
+    return EC;
+
+  assert(Reader.bytesRemaining() == 0);
+  return Error::success();
+}
+
+bool ModuleDebugInlineeLineFragmentRef::hasExtraFiles() const {
+  return Signature == InlineeLinesSignature::ExtraFiles;
+}
+
+ModuleDebugInlineeLineFragment::ModuleDebugInlineeLineFragment(
+    bool HasExtraFiles)
+    : ModuleDebugFragment(ModuleDebugFragmentKind::InlineeLines),
+      HasExtraFiles(HasExtraFiles) {}
+
+uint32_t ModuleDebugInlineeLineFragment::calculateSerializedLength() {
+  // 4 bytes for the signature
+  uint32_t Size = sizeof(InlineeLinesSignature);
+
+  // one header for each entry.
+  Size += Entries.size() * sizeof(InlineeSourceLineHeader);
+  if (HasExtraFiles) {
+    // If extra files are enabled, one count for each entry.
+    Size += Entries.size() * sizeof(uint32_t);
+
+    // And one file id for each file.
+    Size += ExtraFileCount * sizeof(uint32_t);
+  }
+  assert(Size % 4 == 0);
+  return Size;
+}
+
+Error ModuleDebugInlineeLineFragment::commit(BinaryStreamWriter &Writer) {
+  InlineeLinesSignature Sig = InlineeLinesSignature::Normal;
+  if (HasExtraFiles)
+    Sig = InlineeLinesSignature::ExtraFiles;
+
+  if (auto EC = Writer.writeEnum(Sig))
+    return EC;
+
+  for (const auto &E : Entries) {
+    if (auto EC = Writer.writeObject(E.Header))
+      return EC;
+
+    if (!HasExtraFiles)
+      continue;
+
+    if (auto EC = Writer.writeInteger<uint32_t>(E.ExtraFiles.size()))
+      return EC;
+    if (auto EC = Writer.writeArray(makeArrayRef(E.ExtraFiles)))
+      return EC;
+  }
+
+  return Error::success();
+}
+
+void ModuleDebugInlineeLineFragment::addExtraFile(uint32_t FileOffset) {
+  auto &Entry = Entries.back();
+  Entry.ExtraFiles.push_back(ulittle32_t(FileOffset));
+  ++ExtraFileCount;
+}
+
+void ModuleDebugInlineeLineFragment::addInlineSite(TypeIndex FuncId,
+                                                   uint32_t FileOffset,
+                                                   uint32_t SourceLine) {
+  Entries.emplace_back();
+  auto &Entry = Entries.back();
+  Entry.Header.FileID = FileOffset;
+  Entry.Header.SourceLineNum = SourceLine;
+  Entry.Header.Inlinee = FuncId;
+}
diff --git a/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp b/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp
new file mode 100644
index 00000000000..103010ca283
--- /dev/null
+++ b/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp
@@ -0,0 +1,155 @@
+//===- ModuleDebugLineFragment.cpp -------------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h"
+
+#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+Error LineColumnExtractor::extract(BinaryStreamRef Stream, uint32_t &Len,
+                                   LineColumnEntry &Item,
+                                   const LineFragmentHeader *Header) {
+  using namespace codeview;
+  const LineBlockFragmentHeader *BlockHeader;
+  BinaryStreamReader Reader(Stream);
+  if (auto EC = Reader.readObject(BlockHeader))
+    return EC;
+  bool HasColumn = Header->Flags & uint16_t(LF_HaveColumns);
+  uint32_t LineInfoSize =
+      BlockHeader->NumLines *
+      (sizeof(LineNumberEntry) + (HasColumn ? sizeof(ColumnNumberEntry) : 0));
+  if (BlockHeader->BlockSize < sizeof(LineBlockFragmentHeader))
+    return make_error<CodeViewError>(cv_error_code::corrupt_record,
+                                     "Invalid line block record size");
+  uint32_t Size = BlockHeader->BlockSize - sizeof(LineBlockFragmentHeader);
+  if (LineInfoSize > Size)
+    return make_error<CodeViewError>(cv_error_code::corrupt_record,
+                                     "Invalid line block record size");
+  // The value recorded in BlockHeader->BlockSize includes the size of
+  // LineBlockFragmentHeader.
+  Len = BlockHeader->BlockSize;
+  Item.NameIndex = BlockHeader->NameIndex;
+  if (auto EC = Reader.readArray(Item.LineNumbers, BlockHeader->NumLines))
+    return EC;
+  if (HasColumn) {
+    if (auto EC = Reader.readArray(Item.Columns, BlockHeader->NumLines))
+      return EC;
+  }
+  return Error::success();
+}
+
+ModuleDebugLineFragmentRef::ModuleDebugLineFragmentRef()
+    : ModuleDebugFragmentRef(ModuleDebugFragmentKind::Lines) {}
+
+Error ModuleDebugLineFragmentRef::initialize(BinaryStreamReader Reader) {
+  if (auto EC = Reader.readObject(Header))
+    return EC;
+
+  if (auto EC =
+          Reader.readArray(LinesAndColumns, Reader.bytesRemaining(), Header))
+    return EC;
+
+  return Error::success();
+}
+
+bool ModuleDebugLineFragmentRef::hasColumnInfo() const {
+  return !!(Header->Flags & LF_HaveColumns);
+}
+
+ModuleDebugLineFragment::ModuleDebugLineFragment()
+    : ModuleDebugFragment(ModuleDebugFragmentKind::Lines) {}
+
+void ModuleDebugLineFragment::createBlock(uint32_t ChecksumBufferOffset) {
+  Blocks.emplace_back(ChecksumBufferOffset);
+}
+
+void ModuleDebugLineFragment::addLineInfo(uint32_t Offset,
+                                          const LineInfo &Line) {
+  Block &B = Blocks.back();
+  LineNumberEntry LNE;
+  LNE.Flags = Line.getRawData();
+  LNE.Offset = Offset;
+  B.Lines.push_back(LNE);
+}
+
+void ModuleDebugLineFragment::addLineAndColumnInfo(uint32_t Offset,
+                                                   const LineInfo &Line,
+                                                   uint32_t ColStart,
+                                                   uint32_t ColEnd) {
+  Block &B = Blocks.back();
+  assert(B.Lines.size() == B.Columns.size());
+
+  addLineInfo(Offset, Line);
+  ColumnNumberEntry CNE;
+  CNE.StartColumn = ColStart;
+  CNE.EndColumn = ColEnd;
+  B.Columns.push_back(CNE);
+}
+
+Error ModuleDebugLineFragment::commit(BinaryStreamWriter &Writer) {
+  LineFragmentHeader Header;
+  Header.CodeSize = CodeSize;
+  Header.Flags = hasColumnInfo() ? LF_HaveColumns : 0;
+  Header.RelocOffset = RelocOffset;
+  Header.RelocSegment = RelocSegment;
+
+  if (auto EC = Writer.writeObject(Header))
+    return EC;
+
+  for (const auto &B : Blocks) {
+    LineBlockFragmentHeader BlockHeader;
+    assert(B.Lines.size() == B.Columns.size() || B.Columns.empty());
+
+    BlockHeader.NumLines = B.Lines.size();
+    BlockHeader.BlockSize = sizeof(LineBlockFragmentHeader);
+    BlockHeader.BlockSize += BlockHeader.NumLines * sizeof(LineNumberEntry);
+    if (hasColumnInfo())
+      BlockHeader.BlockSize += BlockHeader.NumLines * sizeof(ColumnNumberEntry);
+    BlockHeader.NameIndex = B.ChecksumBufferOffset;
+    if (auto EC = Writer.writeObject(BlockHeader))
+      return EC;
+
+    if (auto EC = Writer.writeArray(makeArrayRef(B.Lines)))
+      return EC;
+
+    if (hasColumnInfo()) {
+      if (auto EC = Writer.writeArray(makeArrayRef(B.Columns)))
+        return EC;
+    }
+  }
+  return Error::success();
+}
+
+uint32_t ModuleDebugLineFragment::calculateSerializedLength() {
+  uint32_t Size = sizeof(LineFragmentHeader);
+  for (const auto &B : Blocks) {
+    Size += sizeof(LineBlockFragmentHeader);
+    Size += B.Lines.size() * sizeof(LineNumberEntry);
+    if (hasColumnInfo())
+      Size += B.Columns.size() * sizeof(ColumnNumberEntry);
+  }
+  return Size;
+}
+
+void ModuleDebugLineFragment::setRelocationAddress(uint16_t Segment,
+                                                   uint16_t Offset) {
+  RelocOffset = Offset;
+  RelocSegment = Segment;
+}
+
+void ModuleDebugLineFragment::setCodeSize(uint32_t Size) { CodeSize = Size; }
+
+void ModuleDebugLineFragment::setFlags(LineFlags Flags) { this->Flags = Flags; }
+
+bool ModuleDebugLineFragment::hasColumnInfo() const {
+  return Flags & LF_HaveColumns;
+}
diff --git a/lib/DebugInfo/CodeView/ModuleDebugUnknownFragment.cpp b/lib/DebugInfo/CodeView/ModuleDebugUnknownFragment.cpp
new file mode 100644
index 00000000000..9fd2cb8ed3e
--- /dev/null
+++ b/lib/DebugInfo/CodeView/ModuleDebugUnknownFragment.cpp
@@ -0,0 +1,10 @@
+//===- ModuleDebugUnknownFragment.cpp ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/ModuleDebugUnknownFragment.h"
\ No newline at end of file
diff --git a/lib/DebugInfo/CodeView/ModuleSubstream.cpp b/lib/DebugInfo/CodeView/ModuleSubstream.cpp
deleted file mode 100644
index 69a7c59116c..00000000000
--- a/lib/DebugInfo/CodeView/ModuleSubstream.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-//===- ModuleSubstream.cpp --------------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/CodeView/ModuleSubstream.h"
-
-#include "llvm/Support/BinaryStreamReader.h"
-
-using namespace llvm;
-using namespace llvm::codeview;
-
-ModuleSubstream::ModuleSubstream() : Kind(ModuleSubstreamKind::None) {}
-
-ModuleSubstream::ModuleSubstream(ModuleSubstreamKind Kind, BinaryStreamRef Data)
-    : Kind(Kind), Data(Data) {}
-
-Error ModuleSubstream::initialize(BinaryStreamRef Stream,
-                                  ModuleSubstream &Info) {
-  const ModuleSubsectionHeader *Header;
-  BinaryStreamReader Reader(Stream);
-  if (auto EC = Reader.readObject(Header))
-    return EC;
-
-  ModuleSubstreamKind Kind =
-      static_cast<ModuleSubstreamKind>(uint32_t(Header->Kind));
-  if (auto EC = Reader.readStreamRef(Info.Data, Header->Length))
-    return EC;
-  Info.Kind = Kind;
-  return Error::success();
-}
-
-uint32_t ModuleSubstream::getRecordLength() const {
-  return sizeof(ModuleSubsectionHeader) + Data.getLength();
-}
-
-ModuleSubstreamKind ModuleSubstream::getSubstreamKind() const { return Kind; }
-
-BinaryStreamRef ModuleSubstream::getRecordData() const { return Data; }
diff --git a/lib/DebugInfo/CodeView/ModuleSubstreamVisitor.cpp b/lib/DebugInfo/CodeView/ModuleSubstreamVisitor.cpp
deleted file mode 100644
index e490a78cadb..00000000000
--- a/lib/DebugInfo/CodeView/ModuleSubstreamVisitor.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-//===- ModuleSubstreamVisitor.cpp -------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h"
-#include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/BinaryStreamRef.h"
-
-using namespace llvm;
-using namespace llvm::codeview;
-
-Error IModuleSubstreamVisitor::visitSymbols(BinaryStreamRef Data) {
-  return visitUnknown(ModuleSubstreamKind::Symbols, Data);
-}
-Error IModuleSubstreamVisitor::visitLines(BinaryStreamRef Data,
-                                          const LineSubstreamHeader *Header,
-                                          const LineInfoArray &Lines) {
-  return visitUnknown(ModuleSubstreamKind::Lines, Data);
-}
-Error IModuleSubstreamVisitor::visitStringTable(BinaryStreamRef Data) {
-  return visitUnknown(ModuleSubstreamKind::StringTable, Data);
-}
-Error IModuleSubstreamVisitor::visitFileChecksums(
-    BinaryStreamRef Data, const FileChecksumArray &Checksums) {
-  return visitUnknown(ModuleSubstreamKind::FileChecksums, Data);
-}
-Error IModuleSubstreamVisitor::visitFrameData(BinaryStreamRef Data) {
-  return visitUnknown(ModuleSubstreamKind::FrameData, Data);
-}
-Error IModuleSubstreamVisitor::visitInlineeLines(BinaryStreamRef Data) {
-  return visitUnknown(ModuleSubstreamKind::InlineeLines, Data);
-}
-Error IModuleSubstreamVisitor::visitCrossScopeImports(BinaryStreamRef Data) {
-  return visitUnknown(ModuleSubstreamKind::CrossScopeExports, Data);
-}
-Error IModuleSubstreamVisitor::visitCrossScopeExports(BinaryStreamRef Data) {
-  return visitUnknown(ModuleSubstreamKind::CrossScopeImports, Data);
-}
-Error IModuleSubstreamVisitor::visitILLines(BinaryStreamRef Data) {
-  return visitUnknown(ModuleSubstreamKind::ILLines, Data);
-}
-Error IModuleSubstreamVisitor::visitFuncMDTokenMap(BinaryStreamRef Data) {
-  return visitUnknown(ModuleSubstreamKind::FuncMDTokenMap, Data);
-}
-Error IModuleSubstreamVisitor::visitTypeMDTokenMap(BinaryStreamRef Data) {
-  return visitUnknown(ModuleSubstreamKind::TypeMDTokenMap, Data);
-}
-Error IModuleSubstreamVisitor::visitMergedAssemblyInput(BinaryStreamRef Data) {
-  return visitUnknown(ModuleSubstreamKind::MergedAssemblyInput, Data);
-}
-Error IModuleSubstreamVisitor::visitCoffSymbolRVA(BinaryStreamRef Data) {
-  return visitUnknown(ModuleSubstreamKind::CoffSymbolRVA, Data);
-}
-
-Error llvm::codeview::visitModuleSubstream(const ModuleSubstream &R,
-                                           IModuleSubstreamVisitor &V) {
-  switch (R.getSubstreamKind()) {
-  case ModuleSubstreamKind::Symbols:
-    return V.visitSymbols(R.getRecordData());
-  case ModuleSubstreamKind::Lines: {
-    BinaryStreamReader Reader(R.getRecordData());
-    const LineSubstreamHeader *Header;
-    if (auto EC = Reader.readObject(Header))
-      return EC;
-    VarStreamArrayExtractor<LineColumnEntry> E(Header);
-    LineInfoArray LineInfos(E);
-    if (auto EC = Reader.readArray(LineInfos, Reader.bytesRemaining()))
-      return EC;
-    return V.visitLines(R.getRecordData(), Header, LineInfos);
-  }
-  case ModuleSubstreamKind::StringTable:
-    return V.visitStringTable(R.getRecordData());
-  case ModuleSubstreamKind::FileChecksums: {
-    BinaryStreamReader Reader(R.getRecordData());
-    FileChecksumArray Checksums;
-    if (auto EC = Reader.readArray(Checksums, Reader.bytesRemaining()))
-      return EC;
-    return V.visitFileChecksums(R.getRecordData(), Checksums);
-  }
-  case ModuleSubstreamKind::FrameData:
-    return V.visitFrameData(R.getRecordData());
-  case ModuleSubstreamKind::InlineeLines:
-    return V.visitInlineeLines(R.getRecordData());
-  case ModuleSubstreamKind::CrossScopeImports:
-    return V.visitCrossScopeImports(R.getRecordData());
-  case ModuleSubstreamKind::CrossScopeExports:
-    return V.visitCrossScopeExports(R.getRecordData());
-  case ModuleSubstreamKind::ILLines:
-    return V.visitILLines(R.getRecordData());
-  case ModuleSubstreamKind::FuncMDTokenMap:
-    return V.visitFuncMDTokenMap(R.getRecordData());
-  case ModuleSubstreamKind::TypeMDTokenMap:
-    return V.visitTypeMDTokenMap(R.getRecordData());
-  case ModuleSubstreamKind::MergedAssemblyInput:
-    return V.visitMergedAssemblyInput(R.getRecordData());
-  case ModuleSubstreamKind::CoffSymbolRVA:
-    return V.visitCoffSymbolRVA(R.getRecordData());
-  default:
-    return V.visitUnknown(R.getSubstreamKind(), R.getRecordData());
-  }
-}
diff --git a/lib/DebugInfo/CodeView/TypeDatabase.cpp b/lib/DebugInfo/CodeView/TypeDatabase.cpp
index f9ded6ce2a8..efaba4646ff 100644
--- a/lib/DebugInfo/CodeView/TypeDatabase.cpp
+++ b/lib/DebugInfo/CodeView/TypeDatabase.cpp
@@ -110,6 +110,10 @@ const CVType &TypeDatabase::getTypeRecord(TypeIndex Index) const {
   return TypeRecords[Index.getIndex() - TypeIndex::FirstNonSimpleIndex];
 }
 
+CVType &TypeDatabase::getTypeRecord(TypeIndex Index) {
+  return TypeRecords[Index.getIndex() - TypeIndex::FirstNonSimpleIndex];
+}
+
 bool TypeDatabase::containsTypeIndex(TypeIndex Index) const {
   uint32_t I = Index.getIndex() - TypeIndex::FirstNonSimpleIndex;
   return I < CVUDTNames.size();
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index 7e8d04672c0..b4ecbf805d1 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -284,6 +284,119 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH,
                      getStringSection(), isLittleEndian());
 }
 
+bool DWARFContext::verify(raw_ostream &OS, DIDumpType DumpType) {
+  bool Success = true;
+  if (DumpType == DIDT_All || DumpType == DIDT_Info) {
+    OS << "Verifying .debug_info...\n";
+    for (const auto &CU : compile_units()) {
+      unsigned NumDies = CU->getNumDIEs();
+      for (unsigned I = 0; I < NumDies; ++I) {
+        auto Die = CU->getDIEAtIndex(I);
+        const auto Tag = Die.getTag();
+        if (Tag == DW_TAG_null)
+          continue;
+        for (auto AttrValue : Die.attributes()) {
+          const auto Attr = AttrValue.Attr;
+          const auto Form = AttrValue.Value.getForm();
+          switch (Attr) {
+          case DW_AT_ranges:
+            // Make sure the offset in the DW_AT_ranges attribute is valid.
+            if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
+              if (*SectionOffset >= getRangeSection().Data.size()) {
+                Success = false;
+                OS << "error: DW_AT_ranges offset is beyond .debug_ranges "
+                      "bounds:\n";
+                Die.dump(OS, 0);
+                OS << "\n";
+              }
+            } else {
+              Success = false;
+              OS << "error: DIE has invalid DW_AT_ranges encoding:\n";
+              Die.dump(OS, 0);
+              OS << "\n";
+            }
+            break;
+          case DW_AT_stmt_list:
+            // Make sure the offset in the DW_AT_stmt_list attribute is valid.
+            if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
+              if (*SectionOffset >= getLineSection().Data.size()) {
+                Success = false;
+                OS << "error: DW_AT_stmt_list offset is beyond .debug_line "
+                      "bounds: "
+                   << format("0x%08" PRIx32, *SectionOffset) << "\n";
+                CU->getUnitDIE().dump(OS, 0);
+                OS << "\n";
+              }
+            } else {
+              Success = false;
+              OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n";
+              Die.dump(OS, 0);
+              OS << "\n";
+            }
+            break;
+
+          default:
+            break;
+          }
+          switch (Form) {
+          case DW_FORM_ref1:
+          case DW_FORM_ref2:
+          case DW_FORM_ref4:
+          case DW_FORM_ref8:
+          case DW_FORM_ref_udata: {
+            // Verify all CU relative references are valid CU offsets.
+            Optional<uint64_t> RefVal = AttrValue.Value.getAsReference();
+            assert(RefVal);
+            if (RefVal) {
+              auto DieCU = Die.getDwarfUnit();
+              auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset();
+              auto CUOffset = AttrValue.Value.getRawUValue();
+              if (CUOffset >= CUSize) {
+                Success = false;
+                OS << "error: " << FormEncodingString(Form) << " CU offset "
+                   << format("0x%08" PRIx32, CUOffset)
+                   << " is invalid (must be less than CU size of "
+                   << format("0x%08" PRIx32, CUSize) << "):\n";
+                Die.dump(OS, 0);
+                OS << "\n";
+              }
+            }
+            break;
+          }
+          case DW_FORM_ref_addr: {
+            // Verify all absolute DIE references have valid offsets in the
+            // .debug_info section.
+            Optional<uint64_t> RefVal = AttrValue.Value.getAsReference();
+            assert(RefVal);
+            if (RefVal && *RefVal >= getInfoSection().Data.size()) {
+              Success = false;
+              OS << "error: DW_FORM_ref_addr offset beyond .debug_info "
+                    "bounds:\n";
+              Die.dump(OS, 0);
+              OS << "\n";
+            }
+            break;
+          }
+          case DW_FORM_strp: {
+            auto SecOffset = AttrValue.Value.getAsSectionOffset();
+            assert(SecOffset); // DW_FORM_strp is a section offset.
+            if (SecOffset && *SecOffset >= getStringSection().size()) {
+              Success = false;
+              OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n";
+              Die.dump(OS, 0);
+              OS << "\n";
+            }
+            break;
+          }
+          default:
+            break;
+          }
+        }
+      }
+    }
+  }
+  return Success;
+}
 const DWARFUnitIndex &DWARFContext::getCUIndex() {
   if (CUIndex)
     return *CUIndex;
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index ff6ed9c6741..77f3c00cc03 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
@@ -51,95 +51,95 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const {
      << format("      line_range: %u\n", LineRange)
      << format("     opcode_base: %u\n", OpcodeBase);
 
-  for (uint32_t i = 0; i < StandardOpcodeLengths.size(); ++i)
+  for (uint32_t I = 0; I != StandardOpcodeLengths.size(); ++I)
     OS << format("standard_opcode_lengths[%s] = %u\n",
-                 LNStandardString(i + 1).data(), StandardOpcodeLengths[i]);
+                 LNStandardString(I + 1).data(), StandardOpcodeLengths[I]);
 
   if (!IncludeDirectories.empty())
-    for (uint32_t i = 0; i < IncludeDirectories.size(); ++i)
-      OS << format("include_directories[%3u] = '", i + 1)
-         << IncludeDirectories[i] << "'\n";
+    for (uint32_t I = 0; I != IncludeDirectories.size(); ++I)
+      OS << format("include_directories[%3u] = '", I + 1)
+         << IncludeDirectories[I] << "'\n";
 
   if (!FileNames.empty()) {
     OS << "                Dir  Mod Time   File Len   File Name\n"
        << "                ---- ---------- ---------- -----------"
           "----------------\n";
-    for (uint32_t i = 0; i < FileNames.size(); ++i) {
-      const FileNameEntry &fileEntry = FileNames[i];
-      OS << format("file_names[%3u] %4" PRIu64 " ", i + 1, fileEntry.DirIdx)
-         << format("0x%8.8" PRIx64 " 0x%8.8" PRIx64 " ", fileEntry.ModTime,
-                   fileEntry.Length)
-         << fileEntry.Name << '\n';
+    for (uint32_t I = 0; I != FileNames.size(); ++I) {
+      const FileNameEntry &FileEntry = FileNames[I];
+      OS << format("file_names[%3u] %4" PRIu64 " ", I + 1, FileEntry.DirIdx)
+         << format("0x%8.8" PRIx64 " 0x%8.8" PRIx64 " ", FileEntry.ModTime,
+                   FileEntry.Length)
+         << FileEntry.Name << '\n';
     }
   }
 }
 
-bool DWARFDebugLine::Prologue::parse(DataExtractor debug_line_data,
-                                     uint32_t *offset_ptr) {
-  const uint64_t prologue_offset = *offset_ptr;
+bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData,
+                                     uint32_t *OffsetPtr) {
+  const uint64_t PrologueOffset = *OffsetPtr;
 
   clear();
-  TotalLength = debug_line_data.getU32(offset_ptr);
+  TotalLength = DebugLineData.getU32(OffsetPtr);
   if (TotalLength == UINT32_MAX) {
     IsDWARF64 = true;
-    TotalLength = debug_line_data.getU64(offset_ptr);
+    TotalLength = DebugLineData.getU64(OffsetPtr);
   } else if (TotalLength > 0xffffff00) {
     return false;
   }
-  Version = debug_line_data.getU16(offset_ptr);
+  Version = DebugLineData.getU16(OffsetPtr);
   if (Version < 2)
     return false;
 
-  PrologueLength =
-      debug_line_data.getUnsigned(offset_ptr, sizeofPrologueLength());
-  const uint64_t end_prologue_offset = PrologueLength + *offset_ptr;
-  MinInstLength = debug_line_data.getU8(offset_ptr);
+  PrologueLength = DebugLineData.getUnsigned(OffsetPtr, sizeofPrologueLength());
+  const uint64_t EndPrologueOffset = PrologueLength + *OffsetPtr;
+  MinInstLength = DebugLineData.getU8(OffsetPtr);
   if (Version >= 4)
-    MaxOpsPerInst = debug_line_data.getU8(offset_ptr);
-  DefaultIsStmt = debug_line_data.getU8(offset_ptr);
-  LineBase = debug_line_data.getU8(offset_ptr);
-  LineRange = debug_line_data.getU8(offset_ptr);
-  OpcodeBase = debug_line_data.getU8(offset_ptr);
+    MaxOpsPerInst = DebugLineData.getU8(OffsetPtr);
+  DefaultIsStmt = DebugLineData.getU8(OffsetPtr);
+  LineBase = DebugLineData.getU8(OffsetPtr);
+  LineRange = DebugLineData.getU8(OffsetPtr);
+  OpcodeBase = DebugLineData.getU8(OffsetPtr);
 
   StandardOpcodeLengths.reserve(OpcodeBase - 1);
-  for (uint32_t i = 1; i < OpcodeBase; ++i) {
-    uint8_t op_len = debug_line_data.getU8(offset_ptr);
-    StandardOpcodeLengths.push_back(op_len);
+  for (uint32_t I = 1; I < OpcodeBase; ++I) {
+    uint8_t OpLen = DebugLineData.getU8(OffsetPtr);
+    StandardOpcodeLengths.push_back(OpLen);
   }
 
-  while (*offset_ptr < end_prologue_offset) {
-    const char *s = debug_line_data.getCStr(offset_ptr);
-    if (s && s[0])
-      IncludeDirectories.push_back(s);
+  while (*OffsetPtr < EndPrologueOffset) {
+    const char *S = DebugLineData.getCStr(OffsetPtr);
+    if (S && S[0])
+      IncludeDirectories.push_back(S);
     else
       break;
   }
 
-  while (*offset_ptr < end_prologue_offset) {
-    const char *name = debug_line_data.getCStr(offset_ptr);
-    if (name && name[0]) {
-      FileNameEntry fileEntry;
-      fileEntry.Name = name;
-      fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr);
-      fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr);
-      fileEntry.Length = debug_line_data.getULEB128(offset_ptr);
-      FileNames.push_back(fileEntry);
+  while (*OffsetPtr < EndPrologueOffset) {
+    const char *Name = DebugLineData.getCStr(OffsetPtr);
+    if (Name && Name[0]) {
+      FileNameEntry FileEntry;
+      FileEntry.Name = Name;
+      FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr);
+      FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr);
+      FileEntry.Length = DebugLineData.getULEB128(OffsetPtr);
+      FileNames.push_back(FileEntry);
     } else {
       break;
     }
   }
 
-  if (*offset_ptr != end_prologue_offset) {
-    fprintf(stderr, "warning: parsing line table prologue at 0x%8.8" PRIx64
-                    " should have ended at 0x%8.8" PRIx64
-                    " but it ended at 0x%8.8" PRIx64 "\n",
-            prologue_offset, end_prologue_offset, (uint64_t)*offset_ptr);
+  if (*OffsetPtr != EndPrologueOffset) {
+    fprintf(stderr,
+            "warning: parsing line table prologue at 0x%8.8" PRIx64
+            " should have ended at 0x%8.8" PRIx64
+            " but it ended at 0x%8.8" PRIx64 "\n",
+            PrologueOffset, EndPrologueOffset, (uint64_t)*OffsetPtr);
     return false;
   }
   return true;
 }
 
-DWARFDebugLine::Row::Row(bool default_is_stmt) { reset(default_is_stmt); }
+DWARFDebugLine::Row::Row(bool DefaultIsStmt) { reset(DefaultIsStmt); }
 
 void DWARFDebugLine::Row::postAppend() {
   BasicBlock = false;
@@ -147,14 +147,14 @@ void DWARFDebugLine::Row::postAppend() {
   EpilogueBegin = false;
 }
 
-void DWARFDebugLine::Row::reset(bool default_is_stmt) {
+void DWARFDebugLine::Row::reset(bool DefaultIsStmt) {
   Address = 0;
   Line = 1;
   Column = 0;
   File = 1;
   Isa = 0;
   Discriminator = 0;
-  IsStmt = default_is_stmt;
+  IsStmt = DefaultIsStmt;
   BasicBlock = false;
   EndSequence = false;
   PrologueEnd = false;
@@ -212,7 +212,7 @@ void DWARFDebugLine::ParsingState::resetRowAndSequence() {
   Sequence.reset();
 }
 
-void DWARFDebugLine::ParsingState::appendRowToMatrix(uint32_t offset) {
+void DWARFDebugLine::ParsingState::appendRowToMatrix(uint32_t Offset) {
   if (Sequence.Empty) {
     // Record the beginning of instruction sequence.
     Sequence.Empty = false;
@@ -233,56 +233,56 @@ void DWARFDebugLine::ParsingState::appendRowToMatrix(uint32_t offset) {
 }
 
 const DWARFDebugLine::LineTable *
-DWARFDebugLine::getLineTable(uint32_t offset) const {
-  LineTableConstIter pos = LineTableMap.find(offset);
-  if (pos != LineTableMap.end())
-    return &pos->second;
+DWARFDebugLine::getLineTable(uint32_t Offset) const {
+  LineTableConstIter Pos = LineTableMap.find(Offset);
+  if (Pos != LineTableMap.end())
+    return &Pos->second;
   return nullptr;
 }
 
 const DWARFDebugLine::LineTable *
-DWARFDebugLine::getOrParseLineTable(DataExtractor debug_line_data,
-                                    uint32_t offset) {
-  std::pair<LineTableIter, bool> pos =
-      LineTableMap.insert(LineTableMapTy::value_type(offset, LineTable()));
-  LineTable *LT = &pos.first->second;
-  if (pos.second) {
-    if (!LT->parse(debug_line_data, RelocMap, &offset))
+DWARFDebugLine::getOrParseLineTable(DataExtractor DebugLineData,
+                                    uint32_t Offset) {
+  std::pair<LineTableIter, bool> Pos =
+      LineTableMap.insert(LineTableMapTy::value_type(Offset, LineTable()));
+  LineTable *LT = &Pos.first->second;
+  if (Pos.second) {
+    if (!LT->parse(DebugLineData, RelocMap, &Offset))
       return nullptr;
   }
   return LT;
 }
 
-bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data,
+bool DWARFDebugLine::LineTable::parse(DataExtractor DebugLineData,
                                       const RelocAddrMap *RMap,
-                                      uint32_t *offset_ptr) {
-  const uint32_t debug_line_offset = *offset_ptr;
+                                      uint32_t *OffsetPtr) {
+  const uint32_t DebugLineOffset = *OffsetPtr;
 
   clear();
 
-  if (!Prologue.parse(debug_line_data, offset_ptr)) {
+  if (!Prologue.parse(DebugLineData, OffsetPtr)) {
     // Restore our offset and return false to indicate failure!
-    *offset_ptr = debug_line_offset;
+    *OffsetPtr = DebugLineOffset;
     return false;
   }
 
-  const uint32_t end_offset =
-      debug_line_offset + Prologue.TotalLength + Prologue.sizeofTotalLength();
+  const uint32_t EndOffset =
+      DebugLineOffset + Prologue.TotalLength + Prologue.sizeofTotalLength();
 
   ParsingState State(this);
 
-  while (*offset_ptr < end_offset) {
-    uint8_t opcode = debug_line_data.getU8(offset_ptr);
+  while (*OffsetPtr < EndOffset) {
+    uint8_t Opcode = DebugLineData.getU8(OffsetPtr);
 
-    if (opcode == 0) {
+    if (Opcode == 0) {
       // Extended Opcodes always start with a zero opcode followed by
       // a uleb128 length so you can skip ones you don't know about
-      uint32_t ext_offset = *offset_ptr;
-      uint64_t len = debug_line_data.getULEB128(offset_ptr);
-      uint32_t arg_size = len - (*offset_ptr - ext_offset);
+      uint32_t ExtOffset = *OffsetPtr;
+      uint64_t Len = DebugLineData.getULEB128(OffsetPtr);
+      uint32_t ArgSize = Len - (*OffsetPtr - ExtOffset);
 
-      uint8_t sub_opcode = debug_line_data.getU8(offset_ptr);
-      switch (sub_opcode) {
+      uint8_t SubOpcode = DebugLineData.getU8(OffsetPtr);
+      switch (SubOpcode) {
       case DW_LNE_end_sequence:
         // Set the end_sequence register of the state machine to true and
         // append a row to the matrix using the current values of the
@@ -292,7 +292,7 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data,
         // address is that of the byte after the last target machine instruction
         // of the sequence.
         State.Row.EndSequence = true;
-        State.appendRowToMatrix(*offset_ptr);
+        State.appendRowToMatrix(*OffsetPtr);
         State.resetRowAndSequence();
         break;
 
@@ -303,9 +303,8 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data,
         // relocatable address. All of the other statement program opcodes
         // that affect the address register add a delta to it. This instruction
         // stores a relocatable value into it instead.
-        State.Row.Address =
-            getRelocatedValue(debug_line_data, debug_line_data.getAddressSize(),
-                              offset_ptr, RMap);
+        State.Row.Address = getRelocatedValue(
+            DebugLineData, DebugLineData.getAddressSize(), OffsetPtr, RMap);
         break;
 
       case DW_LNE_define_file:
@@ -330,33 +329,33 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data,
         // the DW_LNE_define_file instruction. These numbers are used in the
         // the file register of the state machine.
         {
-          FileNameEntry fileEntry;
-          fileEntry.Name = debug_line_data.getCStr(offset_ptr);
-          fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr);
-          fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr);
-          fileEntry.Length = debug_line_data.getULEB128(offset_ptr);
-          Prologue.FileNames.push_back(fileEntry);
+          FileNameEntry FileEntry;
+          FileEntry.Name = DebugLineData.getCStr(OffsetPtr);
+          FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr);
+          FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr);
+          FileEntry.Length = DebugLineData.getULEB128(OffsetPtr);
+          Prologue.FileNames.push_back(FileEntry);
         }
         break;
 
       case DW_LNE_set_discriminator:
-        State.Row.Discriminator = debug_line_data.getULEB128(offset_ptr);
+        State.Row.Discriminator = DebugLineData.getULEB128(OffsetPtr);
         break;
 
       default:
         // Length doesn't include the zero opcode byte or the length itself, but
         // it does include the sub_opcode, so we have to adjust for that below
-        (*offset_ptr) += arg_size;
+        (*OffsetPtr) += ArgSize;
         break;
       }
-    } else if (opcode < Prologue.OpcodeBase) {
-      switch (opcode) {
+    } else if (Opcode < Prologue.OpcodeBase) {
+      switch (Opcode) {
       // Standard Opcodes
       case DW_LNS_copy:
         // Takes no arguments. Append a row to the matrix using the
         // current values of the state-machine registers. Then set
         // the basic_block register to false.
-        State.appendRowToMatrix(*offset_ptr);
+        State.appendRowToMatrix(*OffsetPtr);
         break;
 
       case DW_LNS_advance_pc:
@@ -364,25 +363,25 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data,
         // min_inst_length field of the prologue, and adds the
         // result to the address register of the state machine.
         State.Row.Address +=
-            debug_line_data.getULEB128(offset_ptr) * Prologue.MinInstLength;
+            DebugLineData.getULEB128(OffsetPtr) * Prologue.MinInstLength;
         break;
 
       case DW_LNS_advance_line:
         // Takes a single signed LEB128 operand and adds that value to
         // the line register of the state machine.
-        State.Row.Line += debug_line_data.getSLEB128(offset_ptr);
+        State.Row.Line += DebugLineData.getSLEB128(OffsetPtr);
         break;
 
       case DW_LNS_set_file:
         // Takes a single unsigned LEB128 operand and stores it in the file
         // register of the state machine.
-        State.Row.File = debug_line_data.getULEB128(offset_ptr);
+        State.Row.File = DebugLineData.getULEB128(OffsetPtr);
         break;
 
       case DW_LNS_set_column:
         // Takes a single unsigned LEB128 operand and stores it in the
         // column register of the state machine.
-        State.Row.Column = debug_line_data.getULEB128(offset_ptr);
+        State.Row.Column = DebugLineData.getULEB128(OffsetPtr);
         break;
 
       case DW_LNS_negate_stmt:
@@ -410,10 +409,10 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data,
         // than twice that range will it need to use both DW_LNS_advance_pc
         // and a special opcode, requiring three or more bytes.
         {
-          uint8_t adjust_opcode = 255 - Prologue.OpcodeBase;
-          uint64_t addr_offset =
-              (adjust_opcode / Prologue.LineRange) * Prologue.MinInstLength;
-          State.Row.Address += addr_offset;
+          uint8_t AdjustOpcode = 255 - Prologue.OpcodeBase;
+          uint64_t AddrOffset =
+              (AdjustOpcode / Prologue.LineRange) * Prologue.MinInstLength;
+          State.Row.Address += AddrOffset;
         }
         break;
 
@@ -427,7 +426,7 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data,
         // judge when the computation of a special opcode overflows and
         // requires the use of DW_LNS_advance_pc. Such assemblers, however,
         // can use DW_LNS_fixed_advance_pc instead, sacrificing compression.
-        State.Row.Address += debug_line_data.getU16(offset_ptr);
+        State.Row.Address += DebugLineData.getU16(OffsetPtr);
         break;
 
       case DW_LNS_set_prologue_end:
@@ -445,7 +444,7 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data,
       case DW_LNS_set_isa:
         // Takes a single unsigned LEB128 operand and stores it in the
         // column register of the state machine.
-        State.Row.Isa = debug_line_data.getULEB128(offset_ptr);
+        State.Row.Isa = DebugLineData.getULEB128(OffsetPtr);
         break;
 
       default:
@@ -453,10 +452,10 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data,
         // of such opcodes because they are specified in the prologue
         // as a multiple of LEB128 operands for each opcode.
         {
-          assert(opcode - 1U < Prologue.StandardOpcodeLengths.size());
-          uint8_t opcode_length = Prologue.StandardOpcodeLengths[opcode - 1];
-          for (uint8_t i = 0; i < opcode_length; ++i)
-            debug_line_data.getULEB128(offset_ptr);
+          assert(Opcode - 1U < Prologue.StandardOpcodeLengths.size());
+          uint8_t OpcodeLength = Prologue.StandardOpcodeLengths[Opcode - 1];
+          for (uint8_t I = 0; I < OpcodeLength; ++I)
+            DebugLineData.getULEB128(OffsetPtr);
         }
         break;
       }
@@ -494,14 +493,14 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data,
       //
       // line increment = line_base + (adjusted opcode % line_range)
 
-      uint8_t adjust_opcode = opcode - Prologue.OpcodeBase;
-      uint64_t addr_offset =
-          (adjust_opcode / Prologue.LineRange) * Prologue.MinInstLength;
-      int32_t line_offset =
-          Prologue.LineBase + (adjust_opcode % Prologue.LineRange);
-      State.Row.Line += line_offset;
-      State.Row.Address += addr_offset;
-      State.appendRowToMatrix(*offset_ptr);
+      uint8_t AdjustOpcode = Opcode - Prologue.OpcodeBase;
+      uint64_t AddrOffset =
+          (AdjustOpcode / Prologue.LineRange) * Prologue.MinInstLength;
+      int32_t LineOffset =
+          Prologue.LineBase + (AdjustOpcode % Prologue.LineRange);
+      State.Row.Line += LineOffset;
+      State.Row.Address += AddrOffset;
+      State.appendRowToMatrix(*OffsetPtr);
       // Reset discriminator to 0.
       State.Row.Discriminator = 0;
     }
@@ -523,120 +522,118 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data,
     // rudimentary sequences for address ranges [0x0, 0xsomething).
   }
 
-  return end_offset;
+  return EndOffset;
 }
 
 uint32_t
-DWARFDebugLine::LineTable::findRowInSeq(const DWARFDebugLine::Sequence &seq,
-                                        uint64_t address) const {
-  if (!seq.containsPC(address))
+DWARFDebugLine::LineTable::findRowInSeq(const DWARFDebugLine::Sequence &Seq,
+                                        uint64_t Address) const {
+  if (!Seq.containsPC(Address))
     return UnknownRowIndex;
   // Search for instruction address in the rows describing the sequence.
   // Rows are stored in a vector, so we may use arithmetical operations with
   // iterators.
-  DWARFDebugLine::Row row;
-  row.Address = address;
-  RowIter first_row = Rows.begin() + seq.FirstRowIndex;
-  RowIter last_row = Rows.begin() + seq.LastRowIndex;
-  LineTable::RowIter row_pos = std::lower_bound(
-      first_row, last_row, row, DWARFDebugLine::Row::orderByAddress);
-  if (row_pos == last_row) {
-    return seq.LastRowIndex - 1;
+  DWARFDebugLine::Row Row;
+  Row.Address = Address;
+  RowIter FirstRow = Rows.begin() + Seq.FirstRowIndex;
+  RowIter LastRow = Rows.begin() + Seq.LastRowIndex;
+  LineTable::RowIter RowPos = std::lower_bound(
+      FirstRow, LastRow, Row, DWARFDebugLine::Row::orderByAddress);
+  if (RowPos == LastRow) {
+    return Seq.LastRowIndex - 1;
   }
-  uint32_t index = seq.FirstRowIndex + (row_pos - first_row);
-  if (row_pos->Address > address) {
-    if (row_pos == first_row)
+  uint32_t Index = Seq.FirstRowIndex + (RowPos - FirstRow);
+  if (RowPos->Address > Address) {
+    if (RowPos == FirstRow)
       return UnknownRowIndex;
     else
-      index--;
+      Index--;
   }
-  return index;
+  return Index;
 }
 
-uint32_t DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const {
+uint32_t DWARFDebugLine::LineTable::lookupAddress(uint64_t Address) const {
   if (Sequences.empty())
     return UnknownRowIndex;
   // First, find an instruction sequence containing the given address.
-  DWARFDebugLine::Sequence sequence;
-  sequence.LowPC = address;
-  SequenceIter first_seq = Sequences.begin();
-  SequenceIter last_seq = Sequences.end();
-  SequenceIter seq_pos = std::lower_bound(
-      first_seq, last_seq, sequence, DWARFDebugLine::Sequence::orderByLowPC);
-  DWARFDebugLine::Sequence found_seq;
-  if (seq_pos == last_seq) {
-    found_seq = Sequences.back();
-  } else if (seq_pos->LowPC == address) {
-    found_seq = *seq_pos;
+  DWARFDebugLine::Sequence Sequence;
+  Sequence.LowPC = Address;
+  SequenceIter FirstSeq = Sequences.begin();
+  SequenceIter LastSeq = Sequences.end();
+  SequenceIter SeqPos = std::lower_bound(
+      FirstSeq, LastSeq, Sequence, DWARFDebugLine::Sequence::orderByLowPC);
+  DWARFDebugLine::Sequence FoundSeq;
+  if (SeqPos == LastSeq) {
+    FoundSeq = Sequences.back();
+  } else if (SeqPos->LowPC == Address) {
+    FoundSeq = *SeqPos;
   } else {
-    if (seq_pos == first_seq)
+    if (SeqPos == FirstSeq)
       return UnknownRowIndex;
-    found_seq = *(seq_pos - 1);
+    FoundSeq = *(SeqPos - 1);
   }
-  return findRowInSeq(found_seq, address);
+  return findRowInSeq(FoundSeq, Address);
 }
 
 bool DWARFDebugLine::LineTable::lookupAddressRange(
-    uint64_t address, uint64_t size, std::vector<uint32_t> &result) const {
+    uint64_t Address, uint64_t Size, std::vector<uint32_t> &Result) const {
   if (Sequences.empty())
     return false;
-  uint64_t end_addr = address + size;
+  uint64_t EndAddr = Address + Size;
   // First, find an instruction sequence containing the given address.
-  DWARFDebugLine::Sequence sequence;
-  sequence.LowPC = address;
-  SequenceIter first_seq = Sequences.begin();
-  SequenceIter last_seq = Sequences.end();
-  SequenceIter seq_pos = std::lower_bound(
-      first_seq, last_seq, sequence, DWARFDebugLine::Sequence::orderByLowPC);
-  if (seq_pos == last_seq || seq_pos->LowPC != address) {
-    if (seq_pos == first_seq)
+  DWARFDebugLine::Sequence Sequence;
+  Sequence.LowPC = Address;
+  SequenceIter FirstSeq = Sequences.begin();
+  SequenceIter LastSeq = Sequences.end();
+  SequenceIter SeqPos = std::lower_bound(
+      FirstSeq, LastSeq, Sequence, DWARFDebugLine::Sequence::orderByLowPC);
+  if (SeqPos == LastSeq || SeqPos->LowPC != Address) {
+    if (SeqPos == FirstSeq)
       return false;
-    seq_pos--;
+    SeqPos--;
   }
-  if (!seq_pos->containsPC(address))
+  if (!SeqPos->containsPC(Address))
     return false;
 
-  SequenceIter start_pos = seq_pos;
+  SequenceIter StartPos = SeqPos;
 
   // Add the rows from the first sequence to the vector, starting with the
   // index we just calculated
 
-  while (seq_pos != last_seq && seq_pos->LowPC < end_addr) {
-    const DWARFDebugLine::Sequence &cur_seq = *seq_pos;
+  while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) {
+    const DWARFDebugLine::Sequence &CurSeq = *SeqPos;
     // For the first sequence, we need to find which row in the sequence is the
     // first in our range.
-    uint32_t first_row_index = cur_seq.FirstRowIndex;
-    if (seq_pos == start_pos)
-      first_row_index = findRowInSeq(cur_seq, address);
+    uint32_t FirstRowIndex = CurSeq.FirstRowIndex;
+    if (SeqPos == StartPos)
+      FirstRowIndex = findRowInSeq(CurSeq, Address);
 
     // Figure out the last row in the range.
-    uint32_t last_row_index = findRowInSeq(cur_seq, end_addr - 1);
-    if (last_row_index == UnknownRowIndex)
-      last_row_index = cur_seq.LastRowIndex - 1;
+    uint32_t LastRowIndex = findRowInSeq(CurSeq, EndAddr - 1);
+    if (LastRowIndex == UnknownRowIndex)
+      LastRowIndex = CurSeq.LastRowIndex - 1;
 
-    assert(first_row_index != UnknownRowIndex);
-    assert(last_row_index != UnknownRowIndex);
+    assert(FirstRowIndex != UnknownRowIndex);
+    assert(LastRowIndex != UnknownRowIndex);
 
-    for (uint32_t i = first_row_index; i <= last_row_index; ++i) {
-      result.push_back(i);
+    for (uint32_t I = FirstRowIndex; I <= LastRowIndex; ++I) {
+      Result.push_back(I);
     }
 
-    ++seq_pos;
+    ++SeqPos;
   }
 
   return true;
 }
 
-bool
-DWARFDebugLine::LineTable::hasFileAtIndex(uint64_t FileIndex) const {
+bool DWARFDebugLine::LineTable::hasFileAtIndex(uint64_t FileIndex) const {
   return FileIndex != 0 && FileIndex <= Prologue.FileNames.size();
 }
 
-bool
-DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
-                                              const char *CompDir,
-                                              FileLineInfoKind Kind,
-                                              std::string &Result) const {
+bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
+                                                   const char *CompDir,
+                                                   FileLineInfoKind Kind,
+                                                   std::string &Result) const {
   if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex))
     return false;
   const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1];
diff --git a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
index 662e53d9d7e..daded255f8c 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
@@ -45,7 +45,7 @@ DWARFDebugPubTable::DWARFDebugPubTable(StringRef Data, bool LittleEndian,
 }
 
 void DWARFDebugPubTable::dump(StringRef Name, raw_ostream &OS) const {
-  OS << "\n." << Name << " contents: a\n";
+  OS << "\n." << Name << " contents:\n";
   for (const Set &S : Sets) {
     OS << "length = " << format("0x%08x", S.Length);
     OS << " version = " << format("0x%04x", S.Version);
diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 28592e4dfb6..7f827de8924 100644
--- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -309,8 +309,10 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const {
   }
   // In DWARF3 DW_FORM_data4 and DW_FORM_data8 served also as a section offset.
   // Don't check for DWARF version here, as some producers may still do this
-  // by mistake.
-  return (Form == DW_FORM_data4 || Form == DW_FORM_data8) &&
+  // by mistake. Also accept DW_FORM_strp since this is .debug_str section
+  // offset.
+  return (Form == DW_FORM_data4 || Form == DW_FORM_data8 ||
+          Form == DW_FORM_strp) &&
          FC == FC_SectionOffset;
 }
 
diff --git a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
index 76354a9b1dd..0625d01097c 100644
--- a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
+++ b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
@@ -39,8 +39,9 @@ void DWARFGdbIndex::dumpAddressArea(raw_ostream &OS) const {
      << '\n';
   for (const AddressEntry &Addr : AddressArea)
     OS << format(
-        "    Low address = 0x%llx, High address = 0x%llx, CU index = %d\n",
-        Addr.LowAddress, Addr.HighAddress, Addr.CuIndex);
+        "    Low/High address = [0x%llx, 0x%llx) (Size: 0x%llx), CU id = %d\n",
+        Addr.LowAddress, Addr.HighAddress, Addr.HighAddress - Addr.LowAddress,
+        Addr.CuIndex);
 }
 
 void DWARFGdbIndex::dumpSymbolTable(raw_ostream &OS) const {
diff --git a/lib/DebugInfo/PDB/CMakeLists.txt b/lib/DebugInfo/PDB/CMakeLists.txt
index f87a0b0a72e..bd35efb51c7 100644
--- a/lib/DebugInfo/PDB/CMakeLists.txt
+++ b/lib/DebugInfo/PDB/CMakeLists.txt
@@ -28,6 +28,8 @@ if(LLVM_ENABLE_DIA_SDK)
 endif()
 
 add_pdb_impl_folder(Native
+  Native/DbiModuleDescriptor.cpp
+  Native/DbiModuleDescriptorBuilder.cpp
   Native/DbiStream.cpp
   Native/DbiStreamBuilder.cpp
   Native/EnumTables.cpp
@@ -37,9 +39,7 @@ add_pdb_impl_folder(Native
   Native/HashTable.cpp
   Native/InfoStream.cpp
   Native/InfoStreamBuilder.cpp
-  Native/ModInfo.cpp
-  Native/ModInfoBuilder.cpp
-  Native/ModStream.cpp
+  Native/ModuleDebugStream.cpp
   Native/NativeCompilandSymbol.cpp
   Native/NativeEnumModules.cpp
   Native/NativeExeSymbol.cpp
diff --git a/lib/DebugInfo/PDB/Native/ModInfo.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp
similarity index 50%
rename from lib/DebugInfo/PDB/Native/ModInfo.cpp
rename to lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp
index 1405286fd08..dabcc3447ee 100644
--- a/lib/DebugInfo/PDB/Native/ModInfo.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp
@@ -1,4 +1,4 @@
-//===- ModInfo.cpp - PDB module information -------------------------------===//
+//===- DbiModuleDescriptor.cpp - PDB module information -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/PDB/Native/ModInfo.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
 #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
 #include "llvm/Support/BinaryStreamReader.h"
 #include "llvm/Support/Endian.h"
@@ -19,13 +19,15 @@ using namespace llvm;
 using namespace llvm::pdb;
 using namespace llvm::support;
 
-ModInfo::ModInfo() = default;
+DbiModuleDescriptor::DbiModuleDescriptor() = default;
 
-ModInfo::ModInfo(const ModInfo &Info) = default;
+DbiModuleDescriptor::DbiModuleDescriptor(const DbiModuleDescriptor &Info) =
+    default;
 
-ModInfo::~ModInfo() = default;
+DbiModuleDescriptor::~DbiModuleDescriptor() = default;
 
-Error ModInfo::initialize(BinaryStreamRef Stream, ModInfo &Info) {
+Error DbiModuleDescriptor::initialize(BinaryStreamRef Stream,
+                                      DbiModuleDescriptor &Info) {
   BinaryStreamReader Reader(Stream);
   if (auto EC = Reader.readObject(Info.Layout))
     return EC;
@@ -38,40 +40,48 @@ Error ModInfo::initialize(BinaryStreamRef Stream, ModInfo &Info) {
   return Error::success();
 }
 
-bool ModInfo::hasECInfo() const {
+bool DbiModuleDescriptor::hasECInfo() const {
   return (Layout->Flags & ModInfoFlags::HasECFlagMask) != 0;
 }
 
-uint16_t ModInfo::getTypeServerIndex() const {
+uint16_t DbiModuleDescriptor::getTypeServerIndex() const {
   return (Layout->Flags & ModInfoFlags::TypeServerIndexMask) >>
          ModInfoFlags::TypeServerIndexShift;
 }
 
-uint16_t ModInfo::getModuleStreamIndex() const { return Layout->ModDiStream; }
+uint16_t DbiModuleDescriptor::getModuleStreamIndex() const {
+  return Layout->ModDiStream;
+}
 
-uint32_t ModInfo::getSymbolDebugInfoByteSize() const {
+uint32_t DbiModuleDescriptor::getSymbolDebugInfoByteSize() const {
   return Layout->SymBytes;
 }
 
-uint32_t ModInfo::getLineInfoByteSize() const { return Layout->LineBytes; }
+uint32_t DbiModuleDescriptor::getC11LineInfoByteSize() const {
+  return Layout->C11Bytes;
+}
 
-uint32_t ModInfo::getC13LineInfoByteSize() const { return Layout->C13Bytes; }
+uint32_t DbiModuleDescriptor::getC13LineInfoByteSize() const {
+  return Layout->C13Bytes;
+}
 
-uint32_t ModInfo::getNumberOfFiles() const { return Layout->NumFiles; }
+uint32_t DbiModuleDescriptor::getNumberOfFiles() const {
+  return Layout->NumFiles;
+}
 
-uint32_t ModInfo::getSourceFileNameIndex() const {
+uint32_t DbiModuleDescriptor::getSourceFileNameIndex() const {
   return Layout->SrcFileNameNI;
 }
 
-uint32_t ModInfo::getPdbFilePathNameIndex() const {
+uint32_t DbiModuleDescriptor::getPdbFilePathNameIndex() const {
   return Layout->PdbFilePathNI;
 }
 
-StringRef ModInfo::getModuleName() const { return ModuleName; }
+StringRef DbiModuleDescriptor::getModuleName() const { return ModuleName; }
 
-StringRef ModInfo::getObjFileName() const { return ObjFileName; }
+StringRef DbiModuleDescriptor::getObjFileName() const { return ObjFileName; }
 
-uint32_t ModInfo::getRecordLength() const {
+uint32_t DbiModuleDescriptor::getRecordLength() const {
   uint32_t M = ModuleName.str().size() + 1;
   uint32_t O = ObjFileName.str().size() + 1;
   uint32_t Size = sizeof(ModuleInfoHeader) + M + O;
diff --git a/lib/DebugInfo/PDB/Native/ModInfoBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
similarity index 51%
rename from lib/DebugInfo/PDB/Native/ModInfoBuilder.cpp
rename to lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
index 73c45a95352..f994b4538ef 100644
--- a/lib/DebugInfo/PDB/Native/ModInfoBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
@@ -1,4 +1,4 @@
-//===- ModInfoBuilder.cpp - PDB Module Info Stream Creation -----*- C++ -*-===//
+//===- DbiModuleDescriptorBuilder.cpp - PDB Mod Info Creation ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,13 +7,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/PDB/Native/ModInfoBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h"
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h"
 #include "llvm/DebugInfo/MSF/MSFBuilder.h"
 #include "llvm/DebugInfo/MSF/MSFCommon.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-#include "llvm/DebugInfo/PDB/Native/ModInfo.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
 #include "llvm/DebugInfo/PDB/Native/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
 #include "llvm/Support/BinaryItemStream.h"
@@ -35,47 +36,64 @@ template <> struct BinaryItemTraits<CVSymbol> {
 };
 }
 
-static uint32_t calculateDiSymbolStreamSize(uint32_t SymbolByteSize) {
+static uint32_t calculateDiSymbolStreamSize(uint32_t SymbolByteSize,
+                                            uint32_t C13Size) {
   uint32_t Size = sizeof(uint32_t); // Signature
   Size += SymbolByteSize;           // Symbol Data
-  Size += 0;                        // TODO: Layout.LineBytes
-  Size += 0;                        // TODO: Layout.C13Bytes
+  Size += 0;                        // TODO: Layout.C11Bytes
+  Size += C13Size;                  // C13 Debug Info Size
   Size += sizeof(uint32_t);         // GlobalRefs substream size (always 0)
   Size += 0;                        // GlobalRefs substream bytes
   return Size;
 }
 
-ModInfoBuilder::ModInfoBuilder(StringRef ModuleName, uint32_t ModIndex,
-                               msf::MSFBuilder &Msf)
+DbiModuleDescriptorBuilder::DbiModuleDescriptorBuilder(StringRef ModuleName,
+                                                       uint32_t ModIndex,
+                                                       msf::MSFBuilder &Msf)
     : MSF(Msf), ModuleName(ModuleName) {
   Layout.Mod = ModIndex;
 }
 
-uint16_t ModInfoBuilder::getStreamIndex() const { return Layout.ModDiStream; }
+DbiModuleDescriptorBuilder::~DbiModuleDescriptorBuilder() {}
 
-void ModInfoBuilder::setObjFileName(StringRef Name) { ObjFileName = Name; }
+uint16_t DbiModuleDescriptorBuilder::getStreamIndex() const {
+  return Layout.ModDiStream;
+}
 
-void ModInfoBuilder::addSymbol(CVSymbol Symbol) {
+void DbiModuleDescriptorBuilder::setObjFileName(StringRef Name) {
+  ObjFileName = Name;
+}
+
+void DbiModuleDescriptorBuilder::addSymbol(CVSymbol Symbol) {
   Symbols.push_back(Symbol);
   SymbolByteSize += Symbol.data().size();
 }
 
-void ModInfoBuilder::addSourceFile(StringRef Path) {
+void DbiModuleDescriptorBuilder::addSourceFile(StringRef Path) {
   SourceFiles.push_back(Path);
 }
 
-uint32_t ModInfoBuilder::calculateSerializedLength() const {
+uint32_t DbiModuleDescriptorBuilder::calculateC13DebugInfoSize() const {
+  uint32_t Result = 0;
+  for (const auto &Builder : C13Builders) {
+    assert(Builder && "Empty C13 Fragment Builder!");
+    Result += Builder->calculateSerializedLength();
+  }
+  return Result;
+}
+
+uint32_t DbiModuleDescriptorBuilder::calculateSerializedLength() const {
   uint32_t L = sizeof(Layout);
   uint32_t M = ModuleName.size() + 1;
   uint32_t O = ObjFileName.size() + 1;
   return alignTo(L + M + O, sizeof(uint32_t));
 }
 
-void ModInfoBuilder::finalize() {
-  Layout.C13Bytes = 0;
+void DbiModuleDescriptorBuilder::finalize() {
   Layout.FileNameOffs = 0; // TODO: Fix this
   Layout.Flags = 0;        // TODO: Fix this
-  Layout.LineBytes = 0;
+  Layout.C11Bytes = 0;
+  Layout.C13Bytes = calculateC13DebugInfoSize();
   (void)Layout.Mod;         // Set in constructor
   (void)Layout.ModDiStream; // Set in finalizeMsfLayout
   Layout.NumFiles = SourceFiles.size();
@@ -87,18 +105,20 @@ void ModInfoBuilder::finalize() {
   Layout.SymBytes = SymbolByteSize + sizeof(uint32_t);
 }
 
-Error ModInfoBuilder::finalizeMsfLayout() {
+Error DbiModuleDescriptorBuilder::finalizeMsfLayout() {
   this->Layout.ModDiStream = kInvalidStreamIndex;
-  auto ExpectedSN = MSF.addStream(calculateDiSymbolStreamSize(SymbolByteSize));
+  uint32_t C13Size = calculateC13DebugInfoSize();
+  auto ExpectedSN =
+      MSF.addStream(calculateDiSymbolStreamSize(SymbolByteSize, C13Size));
   if (!ExpectedSN)
     return ExpectedSN.takeError();
   Layout.ModDiStream = *ExpectedSN;
   return Error::success();
 }
 
-Error ModInfoBuilder::commit(BinaryStreamWriter &ModiWriter,
-                             const msf::MSFLayout &MsfLayout,
-                             WritableBinaryStreamRef MsfBuffer) {
+Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter,
+                                         const msf::MSFLayout &MsfLayout,
+                                         WritableBinaryStreamRef MsfBuffer) {
   // We write the Modi record to the `ModiWriter`, but we additionally write its
   // symbol stream to a brand new stream.
   if (auto EC = ModiWriter.writeObject(Layout))
@@ -125,7 +145,13 @@ Error ModInfoBuilder::commit(BinaryStreamWriter &ModiWriter,
     if (auto EC = SymbolWriter.writeStreamRef(RecordsRef))
       return EC;
     // TODO: Write C11 Line data
-    // TODO: Write C13 Line data
+
+    for (const auto &Builder : C13Builders) {
+      assert(Builder && "Empty C13 Fragment Builder!");
+      if (auto EC = Builder->commit(SymbolWriter))
+        return EC;
+    }
+
     // TODO: Figure out what GlobalRefs substream actually is and populate it.
     if (auto EC = SymbolWriter.writeInteger<uint32_t>(0))
       return EC;
@@ -134,3 +160,43 @@ Error ModInfoBuilder::commit(BinaryStreamWriter &ModiWriter,
   }
   return Error::success();
 }
+
+void DbiModuleDescriptorBuilder::addC13Fragment(
+    std::unique_ptr<ModuleDebugLineFragment> Lines) {
+  ModuleDebugLineFragment &Frag = *Lines;
+
+  // File Checksums have to come first, so push an empty entry on if this
+  // is the first.
+  if (C13Builders.empty())
+    C13Builders.push_back(nullptr);
+
+  this->LineInfo.push_back(std::move(Lines));
+  C13Builders.push_back(
+      llvm::make_unique<ModuleDebugFragmentRecordBuilder>(Frag.kind(), Frag));
+}
+
+void DbiModuleDescriptorBuilder::addC13Fragment(
+    std::unique_ptr<codeview::ModuleDebugInlineeLineFragment> Inlinees) {
+  ModuleDebugInlineeLineFragment &Frag = *Inlinees;
+
+  // File Checksums have to come first, so push an empty entry on if this
+  // is the first.
+  if (C13Builders.empty())
+    C13Builders.push_back(nullptr);
+
+  this->Inlinees.push_back(std::move(Inlinees));
+  C13Builders.push_back(
+      llvm::make_unique<ModuleDebugFragmentRecordBuilder>(Frag.kind(), Frag));
+}
+
+void DbiModuleDescriptorBuilder::setC13FileChecksums(
+    std::unique_ptr<ModuleDebugFileChecksumFragment> Checksums) {
+  assert(!ChecksumInfo && "Can't have more than one checksum info!");
+
+  if (C13Builders.empty())
+    C13Builders.push_back(nullptr);
+
+  ChecksumInfo = std::move(Checksums);
+  C13Builders[0] = llvm::make_unique<ModuleDebugFragmentRecordBuilder>(
+      ChecksumInfo->kind(), *ChecksumInfo);
+}
diff --git a/lib/DebugInfo/PDB/Native/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp
index b9f53578d32..4802cc6e819 100644
--- a/lib/DebugInfo/PDB/Native/DbiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp
@@ -10,9 +10,9 @@
 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
 #include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h"
 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
-#include "llvm/DebugInfo/PDB/Native/ModInfo.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Native/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
@@ -252,11 +252,12 @@ Error DbiStream::initializeModInfoArray() {
   if (ModInfoSubstream.getLength() == 0)
     return Error::success();
 
-  // Since each ModInfo in the stream is a variable length, we have to iterate
+  // Since each DbiModuleDescriptor in the stream is a variable length, we have
+  // to iterate
   // them to know how many there actually are.
   BinaryStreamReader Reader(ModInfoSubstream);
 
-  VarStreamArray<ModInfo> ModInfoArray;
+  VarStreamArray<DbiModuleDescriptor> ModInfoArray;
   if (auto EC = Reader.readArray(ModInfoArray, ModInfoSubstream.getLength()))
     return EC;
   for (auto &Info : ModInfoArray) {
@@ -371,10 +372,12 @@ Error DbiStream::initializeFileInfo() {
     NumSourceFiles += Count;
 
   // This is the array that in the reference implementation corresponds to
-  // `ModInfo::FileLayout::FileNameOffs`, which is commented there as being a
+  // `DbiModuleDescriptor::FileLayout::FileNameOffs`, which is commented there
+  // as being a
   // pointer. Due to the mentioned problems of pointers causing difficulty
   // when reading from the file on 64-bit systems, we continue to ignore that
-  // field in `ModInfo`, and instead build a vector of StringRefs and stores
+  // field in `DbiModuleDescriptor`, and instead build a vector of StringRefs
+  // and stores
   // them in `ModuleInfoEx`.  The value written to and read from the file is
   // not used anyway, it is only there as a way to store the offsets for the
   // purposes of later accessing the names at runtime.
diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index a203aea60fe..c19a2f0d311 100644
--- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -12,8 +12,8 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/DebugInfo/MSF/MSFBuilder.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h"
 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
-#include "llvm/DebugInfo/PDB/Native/ModInfoBuilder.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Support/BinaryStreamWriter.h"
@@ -74,10 +74,11 @@ uint32_t DbiStreamBuilder::calculateSerializedLength() const {
          calculateSectionMapStreamSize() + calculateDbgStreamsSize();
 }
 
-Expected<ModInfoBuilder &>
+Expected<DbiModuleDescriptorBuilder &>
 DbiStreamBuilder::addModuleInfo(StringRef ModuleName) {
   uint32_t Index = ModiList.size();
-  auto MIB = llvm::make_unique<ModInfoBuilder>(ModuleName, Index, Msf);
+  auto MIB =
+      llvm::make_unique<DbiModuleDescriptorBuilder>(ModuleName, Index, Msf);
   auto M = MIB.get();
   auto Result = ModiMap.insert(std::make_pair(ModuleName, std::move(MIB)));
 
@@ -100,6 +101,14 @@ Error DbiStreamBuilder::addModuleSourceFile(StringRef Module, StringRef File) {
   return Error::success();
 }
 
+Expected<uint32_t> DbiStreamBuilder::getSourceFileNameIndex(StringRef File) {
+  auto NameIter = SourceFileNames.find(File);
+  if (NameIter == SourceFileNames.end())
+    return make_error<RawError>(raw_error_code::no_entry,
+                                "The specified source file was not found");
+  return NameIter->getValue();
+}
+
 uint32_t DbiStreamBuilder::calculateModiSubstreamSize() const {
   uint32_t Size = 0;
   for (const auto &M : ModiList)
diff --git a/lib/DebugInfo/PDB/Native/ModStream.cpp b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
similarity index 64%
rename from lib/DebugInfo/PDB/Native/ModStream.cpp
rename to lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
index e87e2c40759..d7a203746a0 100644
--- a/lib/DebugInfo/PDB/Native/ModStream.cpp
+++ b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
@@ -1,4 +1,4 @@
-//===- ModStream.cpp - PDB Module Info Stream Access ----------------------===//
+//===- ModuleDebugStream.cpp - PDB Module Info Stream Access --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/PDB/Native/ModStream.h"
+#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/PDB/Native/ModInfo.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
 #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
@@ -21,20 +21,22 @@
 #include <cstdint>
 
 using namespace llvm;
+using namespace llvm::codeview;
 using namespace llvm::msf;
 using namespace llvm::pdb;
 
-ModStream::ModStream(const ModInfo &Module,
-                     std::unique_ptr<MappedBlockStream> Stream)
+ModuleDebugStreamRef::ModuleDebugStreamRef(
+    const DbiModuleDescriptor &Module,
+    std::unique_ptr<MappedBlockStream> Stream)
     : Mod(Module), Stream(std::move(Stream)) {}
 
-ModStream::~ModStream() = default;
+ModuleDebugStreamRef::~ModuleDebugStreamRef() = default;
 
-Error ModStream::reload() {
+Error ModuleDebugStreamRef::reload() {
   BinaryStreamReader Reader(*Stream);
 
   uint32_t SymbolSize = Mod.getSymbolDebugInfoByteSize();
-  uint32_t C11Size = Mod.getLineInfoByteSize();
+  uint32_t C11Size = Mod.getC11LineInfoByteSize();
   uint32_t C13Size = Mod.getC13LineInfoByteSize();
 
   if (C11Size > 0 && C13Size > 0)
@@ -48,13 +50,14 @@ Error ModStream::reload() {
   if (auto EC = Reader.readArray(SymbolsSubstream, SymbolSize - 4))
     return EC;
 
-  if (auto EC = Reader.readStreamRef(LinesSubstream, C11Size))
+  if (auto EC = Reader.readStreamRef(C11LinesSubstream, C11Size))
     return EC;
   if (auto EC = Reader.readStreamRef(C13LinesSubstream, C13Size))
     return EC;
 
   BinaryStreamReader LineReader(C13LinesSubstream);
-  if (auto EC = LineReader.readArray(LineInfo, LineReader.bytesRemaining()))
+  if (auto EC =
+          LineReader.readArray(LinesAndChecksums, LineReader.bytesRemaining()))
     return EC;
 
   uint32_t GlobalRefsSize;
@@ -70,20 +73,17 @@ Error ModStream::reload() {
 }
 
 iterator_range<codeview::CVSymbolArray::Iterator>
-ModStream::symbols(bool *HadError) const {
-  // It's OK if the stream is empty.
-  if (SymbolsSubstream.getUnderlyingStream().getLength() == 0)
-    return make_range(SymbolsSubstream.end(), SymbolsSubstream.end());
+ModuleDebugStreamRef::symbols(bool *HadError) const {
   return make_range(SymbolsSubstream.begin(HadError), SymbolsSubstream.end());
 }
 
-iterator_range<codeview::ModuleSubstreamArray::Iterator>
-ModStream::lines(bool *HadError) const {
-  return make_range(LineInfo.begin(HadError), LineInfo.end());
+llvm::iterator_range<ModuleDebugStreamRef::LinesAndChecksumsIterator>
+ModuleDebugStreamRef::linesAndChecksums() const {
+  return make_range(LinesAndChecksums.begin(), LinesAndChecksums.end());
 }
 
-bool ModStream::hasLineInfo() const {
-  return C13LinesSubstream.getLength() > 0 || LinesSubstream.getLength() > 0;
+bool ModuleDebugStreamRef::hasLineInfo() const {
+  return C13LinesSubstream.getLength() > 0;
 }
 
-Error ModStream::commit() { return Error::success(); }
+Error ModuleDebugStreamRef::commit() { return Error::success(); }
diff --git a/lib/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.cpp
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp b/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp
index e0f8370ab60..40dc8e1bfcb 100644
--- a/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp
@@ -29,6 +29,12 @@ uint32_t StringTableBuilder::insert(StringRef S) {
   return P.first->second;
 }
 
+uint32_t StringTableBuilder::getStringIndex(StringRef S) {
+  auto Iter = Strings.find(S);
+  assert(Iter != Strings.end());
+  return Iter->second;
+}
+
 static uint32_t computeBucketCount(uint32_t NumStrings) {
   // The /names stream is basically an on-disk open-addressing hash table.
   // Hash collisions are resolved by linear probing. We cannot make
diff --git a/lib/DebugInfo/Symbolize/Symbolize.cpp b/lib/DebugInfo/Symbolize/Symbolize.cpp
index 1abb368127a..9de3ddc039d 100644
--- a/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -461,8 +461,9 @@ extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer,
                                 size_t *length, int *status);
 #endif
 
-std::string LLVMSymbolizer::DemangleName(const std::string &Name,
-                                         const SymbolizableModule *ModInfo) {
+std::string
+LLVMSymbolizer::DemangleName(const std::string &Name,
+                             const SymbolizableModule *DbiModuleDescriptor) {
 #if !defined(_MSC_VER)
   // We can spoil names of symbols with C linkage, so use an heuristic
   // approach to check if the name should be demangled.
@@ -490,7 +491,7 @@ std::string LLVMSymbolizer::DemangleName(const std::string &Name,
     return (result == 0) ? Name : std::string(DemangledName);
   }
 #endif
-  if (ModInfo && ModInfo->isWin32Module())
+  if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module())
     return std::string(demanglePE32ExternCFunc(Name));
   return Name;
 }
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 10b4e98b607..96844439e72 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -1565,7 +1565,7 @@ GenericValue Interpreter::executeBitCastInst(Value *SrcVal, Type *DstTy,
           Tmp = Tmp.zext(SrcBitSize);
           Tmp = TempSrc.AggregateVal[SrcElt++].IntVal;
           Tmp = Tmp.zext(DstBitSize);
-          Tmp = Tmp.shl(ShiftAmt);
+          Tmp <<= ShiftAmt;
           ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
           Elt.IntVal |= Tmp;
         }
diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h
index c26615631ec..ad067ee2c0d 100644
--- a/lib/Fuzzer/FuzzerInternal.h
+++ b/lib/Fuzzer/FuzzerInternal.h
@@ -145,6 +145,6 @@ private:
   static thread_local bool IsMyThread;
 };
 
-}; // namespace fuzzer
+} // namespace fuzzer
 
 #endif // LLVM_FUZZER_INTERNAL_H
diff --git a/lib/Fuzzer/test/cxxstring.test b/lib/Fuzzer/test/cxxstring.test
index c60d7aee968..52168fc8c82 100644
--- a/lib/Fuzzer/test/cxxstring.test
+++ b/lib/Fuzzer/test/cxxstring.test
@@ -1,2 +1,4 @@
+UNSUPPORTED: windows
+
 RUN: not LLVMFuzzer-CxxStringEqTest -seed=1 -runs=1000000 2>&1 | FileCheck %s
 CHECK: BINGO
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index b7de07170de..4c6e3e3788b 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -332,6 +332,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
   case CallingConv::HHVM:          Out << "hhvmcc"; break;
   case CallingConv::HHVM_C:        Out << "hhvm_ccc"; break;
   case CallingConv::AMDGPU_VS:     Out << "amdgpu_vs"; break;
+  case CallingConv::AMDGPU_HS:     Out << "amdgpu_hs"; break;
   case CallingConv::AMDGPU_GS:     Out << "amdgpu_gs"; break;
   case CallingConv::AMDGPU_PS:     Out << "amdgpu_ps"; break;
   case CallingConv::AMDGPU_CS:     Out << "amdgpu_cs"; break;
@@ -1719,6 +1720,7 @@ static void writeDISubprogram(raw_ostream &Out, const DISubprogram *N,
   Printer.printMetadata("templateParams", N->getRawTemplateParams());
   Printer.printMetadata("declaration", N->getRawDeclaration());
   Printer.printMetadata("variables", N->getRawVariables());
+  Printer.printMetadata("thrownTypes", N->getRawThrownTypes());
   Out << ")";
 }
 
@@ -1755,8 +1757,6 @@ static void writeDINamespace(raw_ostream &Out, const DINamespace *N,
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printString("name", N->getName());
   Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
-  Printer.printMetadata("file", N->getRawFile());
-  Printer.printInt("line", N->getLine());
   Printer.printBool("exportSymbols", N->getExportSymbols(), false);
   Out << ")";
 }
@@ -2084,8 +2084,7 @@ public:
   void printModule(const Module *M);
 
   void writeOperand(const Value *Op, bool PrintType);
-  void writeParamOperand(const Value *Operand, AttributeList Attrs,
-                         unsigned Idx);
+  void writeParamOperand(const Value *Operand, AttributeSet Attrs);
   void writeOperandBundles(ImmutableCallSite CS);
   void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope);
   void writeAtomicCmpXchg(AtomicOrdering SuccessOrdering,
@@ -2101,7 +2100,7 @@ public:
   void printIndirectSymbol(const GlobalIndirectSymbol *GIS);
   void printComdat(const Comdat *C);
   void printFunction(const Function *F);
-  void printArgument(const Argument *FA, AttributeList Attrs, unsigned Idx);
+  void printArgument(const Argument *FA, AttributeSet Attrs);
   void printBasicBlock(const BasicBlock *BB);
   void printInstructionLine(const Instruction &I);
   void printInstruction(const Instruction &I);
@@ -2180,7 +2179,7 @@ void AssemblyWriter::writeAtomicCmpXchg(AtomicOrdering SuccessOrdering,
 }
 
 void AssemblyWriter::writeParamOperand(const Value *Operand,
-                                       AttributeList Attrs, unsigned Idx) {
+                                       AttributeSet Attrs) {
   if (!Operand) {
     Out << "<null operand!>";
     return;
@@ -2189,8 +2188,8 @@ void AssemblyWriter::writeParamOperand(const Value *Operand,
   // Print the type
   TypePrinter.print(Operand->getType(), Out);
   // Print parameter attributes list
-  if (Attrs.hasAttributes(Idx))
-    Out << ' ' << Attrs.getAsString(Idx);
+  if (Attrs.hasAttributes())
+    Out << ' ' << Attrs.getAsString();
   Out << ' ';
   // Print the operand
   WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
@@ -2653,17 +2652,17 @@ void AssemblyWriter::printFunction(const Function *F) {
       // Output type...
       TypePrinter.print(FT->getParamType(I), Out);
 
-      if (Attrs.hasAttributes(I + 1))
-        Out << ' ' << Attrs.getAsString(I + 1);
+      AttributeSet ArgAttrs = Attrs.getParamAttributes(I);
+      if (ArgAttrs.hasAttributes())
+        Out << ' ' << ArgAttrs.getAsString();
     }
   } else {
     // The arguments are meaningful here, print them in detail.
-    unsigned Idx = 1;
     for (const Argument &Arg : F->args()) {
       // Insert commas as we go... the first arg doesn't get a comma
-      if (Idx != 1)
+      if (Arg.getArgNo() != 0)
         Out << ", ";
-      printArgument(&Arg, Attrs, Idx++);
+      printArgument(&Arg, Attrs.getParamAttributes(Arg.getArgNo()));
     }
   }
 
@@ -2725,14 +2724,13 @@ void AssemblyWriter::printFunction(const Function *F) {
 /// printArgument - This member is called for every argument that is passed into
 /// the function.  Simply print it out
 ///
-void AssemblyWriter::printArgument(const Argument *Arg, AttributeList Attrs,
-                                   unsigned Idx) {
+void AssemblyWriter::printArgument(const Argument *Arg, AttributeSet Attrs) {
   // Output type...
   TypePrinter.print(Arg->getType(), Out);
 
   // Output parameter attributes list
-  if (Attrs.hasAttributes(Idx))
-    Out << ' ' << Attrs.getAsString(Idx);
+  if (Attrs.hasAttributes())
+    Out << ' ' << Attrs.getAsString();
 
   // Output name, if available...
   if (Arg->hasName()) {
@@ -3026,7 +3024,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) {
       if (op > 0)
         Out << ", ";
-      writeParamOperand(CI->getArgOperand(op), PAL, op + 1);
+      writeParamOperand(CI->getArgOperand(op), PAL.getParamAttributes(op));
     }
 
     // Emit an ellipsis if this is a musttail call in a vararg function.  This
@@ -3069,7 +3067,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     for (unsigned op = 0, Eop = II->getNumArgOperands(); op < Eop; ++op) {
       if (op)
         Out << ", ";
-      writeParamOperand(II->getArgOperand(op), PAL, op + 1);
+      writeParamOperand(II->getArgOperand(op), PAL.getParamAttributes(op));
     }
 
     Out << ')';
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index e30414537a6..62f127bd02e 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -315,6 +315,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return "returns_twice";
   if (hasAttribute(Attribute::SExt))
     return "signext";
+  if (hasAttribute(Attribute::Speculatable))
+    return "speculatable";
   if (hasAttribute(Attribute::StackProtect))
     return "ssp";
   if (hasAttribute(Attribute::StackProtectReq))
@@ -1189,8 +1191,12 @@ Attribute AttributeList::getAttribute(unsigned Index, StringRef Kind) const {
   return getAttributes(Index).getAttribute(Kind);
 }
 
-unsigned AttributeList::getParamAlignment(unsigned Index) const {
-  return getAttributes(Index).getAlignment();
+unsigned AttributeList::getRetAlignment() const {
+  return getAttributes(ReturnIndex).getAlignment();
+}
+
+unsigned AttributeList::getParamAlignment(unsigned ArgNo) const {
+  return getAttributes(ArgNo + 1).getAlignment();
 }
 
 unsigned AttributeList::getStackAlignment(unsigned Index) const {
@@ -1363,15 +1369,7 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
 }
 
 AttrBuilder &AttrBuilder::removeAttributes(AttributeList A, uint64_t Index) {
-  for (Attribute Attr : A.getAttributes(Index)) {
-    if (Attr.isEnumAttribute() || Attr.isIntAttribute()) {
-      removeAttribute(Attr.getKindAsEnum());
-    } else {
-      assert(Attr.isStringAttribute() && "Invalid attribute type!");
-      removeAttribute(Attr.getKindAsString());
-    }
-  }
-
+  remove(A.getAttributes(Index));
   return *this;
 }
 
@@ -1513,25 +1511,16 @@ bool AttrBuilder::hasAttributes() const {
   return !Attrs.none() || !TargetDepAttrs.empty();
 }
 
-bool AttrBuilder::hasAttributes(AttributeList A, uint64_t Index) const {
-  unsigned Slot = ~0U;
-  for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I)
-    if (A.getSlotIndex(I) == Index) {
-      Slot = I;
-      break;
-    }
+bool AttrBuilder::hasAttributes(AttributeList AL, uint64_t Index) const {
+  AttributeSet AS = AL.getAttributes(Index);
 
-  assert(Slot != ~0U && "Couldn't find the index!");
-
-  for (AttributeList::iterator I = A.begin(Slot), E = A.end(Slot); I != E;
-       ++I) {
-    Attribute Attr = *I;
+  for (Attribute Attr : AS) {
     if (Attr.isEnumAttribute() || Attr.isIntAttribute()) {
-      if (Attrs[I->getKindAsEnum()])
+      if (contains(Attr.getKindAsEnum()))
         return true;
     } else {
       assert(Attr.isStringAttribute() && "Invalid attribute kind!");
-      return TargetDepAttrs.find(Attr.getKindAsString())!=TargetDepAttrs.end();
+      return contains(Attr.getKindAsString());
     }
   }
 
diff --git a/lib/IR/ConstantRange.cpp b/lib/IR/ConstantRange.cpp
index 0cc38b02520..5425676e4ed 100644
--- a/lib/IR/ConstantRange.cpp
+++ b/lib/IR/ConstantRange.cpp
@@ -29,12 +29,9 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) {
-  if (Full)
-    Lower = Upper = APInt::getMaxValue(BitWidth);
-  else
-    Lower = Upper = APInt::getMinValue(BitWidth);
-}
+ConstantRange::ConstantRange(uint32_t BitWidth, bool Full)
+    : Lower(Full ? APInt::getMaxValue(BitWidth) : APInt::getMinValue(BitWidth)),
+      Upper(Lower) {}
 
 ConstantRange::ConstantRange(APInt V)
     : Lower(std::move(V)), Upper(Lower + 1) {}
@@ -66,49 +63,49 @@ ConstantRange ConstantRange::makeAllowedICmpRegion(CmpInst::Predicate Pred,
     APInt UMax(CR.getUnsignedMax());
     if (UMax.isMinValue())
       return ConstantRange(W, /* empty */ false);
-    return ConstantRange(APInt::getMinValue(W), UMax);
+    return ConstantRange(APInt::getMinValue(W), std::move(UMax));
   }
   case CmpInst::ICMP_SLT: {
     APInt SMax(CR.getSignedMax());
     if (SMax.isMinSignedValue())
       return ConstantRange(W, /* empty */ false);
-    return ConstantRange(APInt::getSignedMinValue(W), SMax);
+    return ConstantRange(APInt::getSignedMinValue(W), std::move(SMax));
   }
   case CmpInst::ICMP_ULE: {
     APInt UMax(CR.getUnsignedMax());
     if (UMax.isMaxValue())
       return ConstantRange(W);
-    return ConstantRange(APInt::getMinValue(W), UMax + 1);
+    return ConstantRange(APInt::getMinValue(W), std::move(UMax) + 1);
   }
   case CmpInst::ICMP_SLE: {
     APInt SMax(CR.getSignedMax());
     if (SMax.isMaxSignedValue())
       return ConstantRange(W);
-    return ConstantRange(APInt::getSignedMinValue(W), SMax + 1);
+    return ConstantRange(APInt::getSignedMinValue(W), std::move(SMax) + 1);
   }
   case CmpInst::ICMP_UGT: {
     APInt UMin(CR.getUnsignedMin());
     if (UMin.isMaxValue())
       return ConstantRange(W, /* empty */ false);
-    return ConstantRange(UMin + 1, APInt::getNullValue(W));
+    return ConstantRange(std::move(UMin) + 1, APInt::getNullValue(W));
   }
   case CmpInst::ICMP_SGT: {
     APInt SMin(CR.getSignedMin());
     if (SMin.isMaxSignedValue())
       return ConstantRange(W, /* empty */ false);
-    return ConstantRange(SMin + 1, APInt::getSignedMinValue(W));
+    return ConstantRange(std::move(SMin) + 1, APInt::getSignedMinValue(W));
   }
   case CmpInst::ICMP_UGE: {
     APInt UMin(CR.getUnsignedMin());
     if (UMin.isMinValue())
       return ConstantRange(W);
-    return ConstantRange(UMin, APInt::getNullValue(W));
+    return ConstantRange(std::move(UMin), APInt::getNullValue(W));
   }
   case CmpInst::ICMP_SGE: {
     APInt SMin(CR.getSignedMin());
     if (SMin.isMinSignedValue())
       return ConstantRange(W);
-    return ConstantRange(SMin, APInt::getSignedMinValue(W));
+    return ConstantRange(std::move(SMin), APInt::getSignedMinValue(W));
   }
   }
 }
@@ -198,7 +195,7 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp,
     return ConstantRange(BitWidth, false);
 
   if (auto *C = Other.getSingleElement())
-    if (C->isMinValue())
+    if (C->isNullValue())
       // Full set: nothing signed / unsigned wraps when added to 0.
       return ConstantRange(BitWidth);
 
@@ -210,8 +207,8 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp,
                                               -Other.getUnsignedMax()));
 
   if (NoWrapKind & OBO::NoSignedWrap) {
-    APInt SignedMin = Other.getSignedMin();
-    APInt SignedMax = Other.getSignedMax();
+    const APInt &SignedMin = Other.getSignedMin();
+    const APInt &SignedMax = Other.getSignedMax();
 
     if (SignedMax.isStrictlyPositive())
       Result = SubsetIntersect(
@@ -246,11 +243,8 @@ bool ConstantRange::isSignWrappedSet() const {
 }
 
 APInt ConstantRange::getSetSize() const {
-  if (isFullSet()) {
-    APInt Size(getBitWidth()+1, 0);
-    Size.setBit(getBitWidth());
-    return Size;
-  }
+  if (isFullSet())
+    return APInt::getOneBitSet(getBitWidth()+1, getBitWidth());
 
   // This is also correct for wrapped sets.
   return (Upper - Lower).zext(getBitWidth()+1);
@@ -279,7 +273,6 @@ APInt ConstantRange::getUnsignedMin() const {
 }
 
 APInt ConstantRange::getSignedMax() const {
-  APInt SignedMax(APInt::getSignedMaxValue(getBitWidth()));
   if (!isWrappedSet()) {
     APInt UpperMinusOne = getUpper() - 1;
     if (getLower().sle(UpperMinusOne))
@@ -435,16 +428,13 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
       return ConstantRange(CR.Lower, Upper);
     }
 
-    APInt L = Lower, U = Upper;
-    if (CR.Lower.ult(L))
-      L = CR.Lower;
-    if ((CR.Upper - 1).ugt(U - 1))
-      U = CR.Upper;
+    APInt L = CR.Lower.ult(Lower) ? CR.Lower : Lower;
+    APInt U = (CR.Upper - 1).ugt(Upper - 1) ? CR.Upper : Upper;
 
     if (L == 0 && U == 0)
       return ConstantRange(getBitWidth());
 
-    return ConstantRange(L, U);
+    return ConstantRange(std::move(L), std::move(U));
   }
 
   if (!CR.isWrappedSet()) {
@@ -485,13 +475,10 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
   if (CR.Lower.ule(Upper) || Lower.ule(CR.Upper))
     return ConstantRange(getBitWidth());
 
-  APInt L = Lower, U = Upper;
-  if (CR.Upper.ugt(U))
-    U = CR.Upper;
-  if (CR.Lower.ult(L))
-    L = CR.Lower;
+  APInt L = CR.Lower.ult(Lower) ? CR.Lower : Lower;
+  APInt U = CR.Upper.ugt(Upper) ? CR.Upper : Upper;
 
-  return ConstantRange(L, U);
+  return ConstantRange(std::move(L), std::move(U));
 }
 
 ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp,
@@ -518,14 +505,14 @@ ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp,
     auto BW = getBitWidth();
     APInt Min = APInt::getMinValue(BW).zextOrSelf(ResultBitWidth);
     APInt Max = APInt::getMaxValue(BW).zextOrSelf(ResultBitWidth);
-    return ConstantRange(Min, Max);
+    return ConstantRange(std::move(Min), std::move(Max));
   }
   case Instruction::SIToFP: {
     // TODO: use input range if available
     auto BW = getBitWidth();
     APInt SMin = APInt::getSignedMinValue(BW).sextOrSelf(ResultBitWidth);
     APInt SMax = APInt::getSignedMaxValue(BW).sextOrSelf(ResultBitWidth);
-    return ConstantRange(SMin, SMax);
+    return ConstantRange(std::move(SMin), std::move(SMax));
   }
   case Instruction::FPTrunc:
   case Instruction::FPExt:
@@ -547,7 +534,8 @@ ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const {
     APInt LowerExt(DstTySize, 0);
     if (!Upper) // special case: [X, 0) -- not really wrapping around
       LowerExt = Lower.zext(DstTySize);
-    return ConstantRange(LowerExt, APInt::getOneBitSet(DstTySize, SrcTySize));
+    return ConstantRange(std::move(LowerExt),
+                         APInt::getOneBitSet(DstTySize, SrcTySize));
   }
 
   return ConstantRange(Lower.zext(DstTySize), Upper.zext(DstTySize));
@@ -578,9 +566,8 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
   if (isFullSet())
     return ConstantRange(DstTySize, /*isFullSet=*/true);
 
-  APInt MaxValue = APInt::getMaxValue(DstTySize).zext(getBitWidth());
-  APInt MaxBitValue(getBitWidth(), 0);
-  MaxBitValue.setBit(DstTySize);
+  APInt MaxValue = APInt::getLowBitsSet(getBitWidth(), DstTySize);
+  APInt MaxBitValue = APInt::getOneBitSet(getBitWidth(), DstTySize);
 
   APInt LowerDiv(Lower), UpperDiv(Upper);
   ConstantRange Union(DstTySize, /*isFullSet=*/false);
@@ -594,7 +581,7 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
       return ConstantRange(DstTySize, /*isFullSet=*/true);
 
     Union = ConstantRange(APInt::getMaxValue(DstTySize),Upper.trunc(DstTySize));
-    UpperDiv = APInt::getMaxValue(getBitWidth());
+    UpperDiv.setAllBits();
 
     // Union covers the MaxValue case, so return if the remaining range is just
     // MaxValue.
@@ -606,7 +593,7 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
   if (LowerDiv.uge(MaxValue)) {
     APInt Div(getBitWidth(), 0);
     APInt::udivrem(LowerDiv, MaxBitValue, Div, LowerDiv);
-    UpperDiv = UpperDiv - MaxBitValue * Div;
+    UpperDiv -= MaxBitValue * Div;
   }
 
   if (UpperDiv.ule(MaxValue))
@@ -614,10 +601,10 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
                          UpperDiv.trunc(DstTySize)).unionWith(Union);
 
   // The truncated value wraps around. Check if we can do better than fullset.
-  APInt UpperModulo = UpperDiv - MaxBitValue;
-  if (UpperModulo.ult(LowerDiv))
+  UpperDiv -= MaxBitValue;
+  if (UpperDiv.ult(LowerDiv))
     return ConstantRange(LowerDiv.trunc(DstTySize),
-                         UpperModulo.trunc(DstTySize)).unionWith(Union);
+                         UpperDiv.trunc(DstTySize)).unionWith(Union);
 
   return ConstantRange(DstTySize, /*isFullSet=*/true);
 }
@@ -688,7 +675,7 @@ ConstantRange::add(const ConstantRange &Other) const {
   if (NewLower == NewUpper)
     return ConstantRange(getBitWidth(), /*isFullSet=*/true);
 
-  ConstantRange X = ConstantRange(NewLower, NewUpper);
+  ConstantRange X = ConstantRange(std::move(NewLower), std::move(NewUpper));
   if (X.isSizeStrictlySmallerThanOf(*this) ||
       X.isSizeStrictlySmallerThanOf(Other))
     // We've wrapped, therefore, full set.
@@ -721,7 +708,7 @@ ConstantRange::sub(const ConstantRange &Other) const {
   if (NewLower == NewUpper)
     return ConstantRange(getBitWidth(), /*isFullSet=*/true);
 
-  ConstantRange X = ConstantRange(NewLower, NewUpper);
+  ConstantRange X = ConstantRange(std::move(NewLower), std::move(NewUpper));
   if (X.isSizeStrictlySmallerThanOf(*this) ||
       X.isSizeStrictlySmallerThanOf(Other))
     // We've wrapped, therefore, full set.
@@ -792,7 +779,7 @@ ConstantRange::smax(const ConstantRange &Other) const {
   APInt NewU = APIntOps::smax(getSignedMax(), Other.getSignedMax()) + 1;
   if (NewU == NewL)
     return ConstantRange(getBitWidth(), /*isFullSet=*/true);
-  return ConstantRange(NewL, NewU);
+  return ConstantRange(std::move(NewL), std::move(NewU));
 }
 
 ConstantRange
@@ -805,7 +792,7 @@ ConstantRange::umax(const ConstantRange &Other) const {
   APInt NewU = APIntOps::umax(getUnsignedMax(), Other.getUnsignedMax()) + 1;
   if (NewU == NewL)
     return ConstantRange(getBitWidth(), /*isFullSet=*/true);
-  return ConstantRange(NewL, NewU);
+  return ConstantRange(std::move(NewL), std::move(NewU));
 }
 
 ConstantRange
@@ -818,7 +805,7 @@ ConstantRange::smin(const ConstantRange &Other) const {
   APInt NewU = APIntOps::smin(getSignedMax(), Other.getSignedMax()) + 1;
   if (NewU == NewL)
     return ConstantRange(getBitWidth(), /*isFullSet=*/true);
-  return ConstantRange(NewL, NewU);
+  return ConstantRange(std::move(NewL), std::move(NewU));
 }
 
 ConstantRange
@@ -831,7 +818,7 @@ ConstantRange::umin(const ConstantRange &Other) const {
   APInt NewU = APIntOps::umin(getUnsignedMax(), Other.getUnsignedMax()) + 1;
   if (NewU == NewL)
     return ConstantRange(getBitWidth(), /*isFullSet=*/true);
-  return ConstantRange(NewL, NewU);
+  return ConstantRange(std::move(NewL), std::move(NewU));
 }
 
 ConstantRange
@@ -850,7 +837,7 @@ ConstantRange::udiv(const ConstantRange &RHS) const {
     if (RHS.getUpper() == 1)
       RHS_umin = RHS.getLower();
     else
-      RHS_umin = APInt(getBitWidth(), 1);
+      RHS_umin = 1;
   }
 
   APInt Upper = getUnsignedMax().udiv(RHS_umin) + 1;
@@ -860,7 +847,7 @@ ConstantRange::udiv(const ConstantRange &RHS) const {
   if (Lower == Upper)
     return ConstantRange(getBitWidth(), /*isFullSet=*/true);
 
-  return ConstantRange(Lower, Upper);
+  return ConstantRange(std::move(Lower), std::move(Upper));
 }
 
 ConstantRange
@@ -873,7 +860,7 @@ ConstantRange::binaryAnd(const ConstantRange &Other) const {
   APInt umin = APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax());
   if (umin.isAllOnesValue())
     return ConstantRange(getBitWidth(), /*isFullSet=*/true);
-  return ConstantRange(APInt::getNullValue(getBitWidth()), umin + 1);
+  return ConstantRange(APInt::getNullValue(getBitWidth()), std::move(umin) + 1);
 }
 
 ConstantRange
@@ -884,9 +871,9 @@ ConstantRange::binaryOr(const ConstantRange &Other) const {
   // TODO: replace this with something less conservative
 
   APInt umax = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin());
-  if (umax.isMinValue())
+  if (umax.isNullValue())
     return ConstantRange(getBitWidth(), /*isFullSet=*/true);
-  return ConstantRange(umax, APInt::getNullValue(getBitWidth()));
+  return ConstantRange(std::move(umax), APInt::getNullValue(getBitWidth()));
 }
 
 ConstantRange
@@ -900,7 +887,7 @@ ConstantRange::shl(const ConstantRange &Other) const {
   // there's no overflow!
   APInt Zeros(getBitWidth(), getUnsignedMax().countLeadingZeros());
   if (Zeros.ugt(Other.getUnsignedMax()))
-    return ConstantRange(min, max + 1);
+    return ConstantRange(std::move(min), std::move(max) + 1);
 
   // FIXME: implement the other tricky cases
   return ConstantRange(getBitWidth(), /*isFullSet=*/true);
@@ -916,7 +903,7 @@ ConstantRange::lshr(const ConstantRange &Other) const {
   if (min == max + 1)
     return ConstantRange(getBitWidth(), /*isFullSet=*/true);
 
-  return ConstantRange(min, max + 1);
+  return ConstantRange(std::move(min), std::move(max) + 1);
 }
 
 ConstantRange ConstantRange::inverse() const {
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 9407c805b92..7e6f9a7804b 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -676,13 +676,14 @@ DISubprogram *DIBuilder::createFunction(
     DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
     unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
     bool isDefinition, unsigned ScopeLine, DINode::DIFlags Flags,
-    bool isOptimized, DITemplateParameterArray TParams, DISubprogram *Decl) {
+    bool isOptimized, DITemplateParameterArray TParams, DISubprogram *Decl,
+    DITypeArray ThrownTypes) {
   auto *Node = getSubprogram(
       /* IsDistinct = */ isDefinition, VMContext,
       getNonCompileUnitScope(Context), Name, LinkageName, File, LineNo, Ty,
       isLocalToUnit, isDefinition, ScopeLine, nullptr, 0, 0, 0, Flags,
       isOptimized, isDefinition ? CUNode : nullptr, TParams, Decl,
-      MDTuple::getTemporary(VMContext, None).release());
+      MDTuple::getTemporary(VMContext, None).release(), ThrownTypes);
 
   if (isDefinition)
     AllSubprograms.push_back(Node);
@@ -694,23 +695,22 @@ DISubprogram *DIBuilder::createTempFunctionFwdDecl(
     DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
     unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
     bool isDefinition, unsigned ScopeLine, DINode::DIFlags Flags,
-    bool isOptimized, DITemplateParameterArray TParams, DISubprogram *Decl) {
+    bool isOptimized, DITemplateParameterArray TParams, DISubprogram *Decl,
+    DITypeArray ThrownTypes) {
   return DISubprogram::getTemporary(
              VMContext, getNonCompileUnitScope(Context), Name, LinkageName,
              File, LineNo, Ty, isLocalToUnit, isDefinition, ScopeLine, nullptr,
              0, 0, 0, Flags, isOptimized, isDefinition ? CUNode : nullptr,
-             TParams, Decl, nullptr)
+             TParams, Decl, nullptr, ThrownTypes)
       .release();
 }
 
-DISubprogram *DIBuilder::createMethod(DIScope *Context, StringRef Name,
-                                      StringRef LinkageName, DIFile *F,
-                                      unsigned LineNo, DISubroutineType *Ty,
-                                      bool isLocalToUnit, bool isDefinition,
-                                      unsigned VK, unsigned VIndex,
-                                      int ThisAdjustment, DIType *VTableHolder,
-                                      DINode::DIFlags Flags, bool isOptimized,
-                                      DITemplateParameterArray TParams) {
+DISubprogram *DIBuilder::createMethod(
+    DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F,
+    unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
+    bool isDefinition, unsigned VK, unsigned VIndex, int ThisAdjustment,
+    DIType *VTableHolder, DINode::DIFlags Flags, bool isOptimized,
+    DITemplateParameterArray TParams, DITypeArray ThrownTypes) {
   assert(getNonCompileUnitScope(Context) &&
          "Methods should have both a Context and a context that isn't "
          "the compile unit.");
@@ -719,7 +719,7 @@ DISubprogram *DIBuilder::createMethod(DIScope *Context, StringRef Name,
       /* IsDistinct = */ isDefinition, VMContext, cast<DIScope>(Context), Name,
       LinkageName, F, LineNo, Ty, isLocalToUnit, isDefinition, LineNo,
       VTableHolder, VK, VIndex, ThisAdjustment, Flags, isOptimized,
-      isDefinition ? CUNode : nullptr, TParams, nullptr, nullptr);
+      isDefinition ? CUNode : nullptr, TParams, nullptr, nullptr, ThrownTypes);
 
   if (isDefinition)
     AllSubprograms.push_back(SP);
@@ -728,10 +728,15 @@ DISubprogram *DIBuilder::createMethod(DIScope *Context, StringRef Name,
 }
 
 DINamespace *DIBuilder::createNameSpace(DIScope *Scope, StringRef Name,
-                                        DIFile *File, unsigned LineNo,
                                         bool ExportSymbols) {
-  return DINamespace::get(VMContext, getNonCompileUnitScope(Scope), File, Name,
-                          LineNo, ExportSymbols);
+
+  // It is okay to *not* make anonymous top-level namespaces distinct, because
+  // all nodes that have an anonymous namespace as their parent scope are
+  // guaranteed to be unique and/or are linked to their containing
+  // DICompileUnit. This decision is an explicit tradeoff of link time versus
+  // memory usage versus code simplicity and may get revisited in the future.
+  return DINamespace::get(VMContext, getNonCompileUnitScope(Scope), Name,
+                          ExportSymbols);
 }
 
 DIModule *DIBuilder::createModule(DIScope *Scope, StringRef Name,
diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp
index d14c6018d40..cdbe237766a 100644
--- a/lib/IR/DebugInfoMetadata.cpp
+++ b/lib/IR/DebugInfoMetadata.cpp
@@ -15,6 +15,7 @@
 #include "LLVMContextImpl.h"
 #include "MetadataImpl.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/Function.h"
 
 using namespace llvm;
@@ -214,6 +215,10 @@ void GenericDINode::recalculateHash() {
 #define DEFINE_GETIMPL_STORE_NO_CONSTRUCTOR_ARGS(CLASS, OPS)                   \
   return storeImpl(new (array_lengthof(OPS)) CLASS(Context, Storage, OPS),     \
                    Storage, Context.pImpl->CLASS##s)
+#define DEFINE_GETIMPL_STORE_N(CLASS, ARGS, OPS, NUM_OPS)                      \
+  return storeImpl(new (NUM_OPS)                                               \
+                       CLASS(Context, Storage, UNWRAP_ARGS(ARGS), OPS),        \
+                   Storage, Context.pImpl->CLASS##s)
 
 DISubrange *DISubrange::getImpl(LLVMContext &Context, int64_t Count, int64_t Lo,
                                 StorageType Storage, bool ShouldCreate) {
@@ -441,21 +446,30 @@ DISubprogram *DISubprogram::getImpl(
     Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex,
     int ThisAdjustment, DIFlags Flags, bool IsOptimized, Metadata *Unit,
     Metadata *TemplateParams, Metadata *Declaration, Metadata *Variables,
-    StorageType Storage, bool ShouldCreate) {
+    Metadata *ThrownTypes, StorageType Storage, bool ShouldCreate) {
   assert(isCanonical(Name) && "Expected canonical MDString");
   assert(isCanonical(LinkageName) && "Expected canonical MDString");
   DEFINE_GETIMPL_LOOKUP(
-      DISubprogram,
-      (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition,
-       ScopeLine, ContainingType, Virtuality, VirtualIndex, ThisAdjustment,
-       Flags, IsOptimized, Unit, TemplateParams, Declaration, Variables));
-  Metadata *Ops[] = {File,           Scope,       Name,           Name,
-                     LinkageName,    Type,        ContainingType, Unit,
-                     TemplateParams, Declaration, Variables};
-  DEFINE_GETIMPL_STORE(DISubprogram, (Line, ScopeLine, Virtuality, VirtualIndex,
-                                      ThisAdjustment, Flags, IsLocalToUnit,
-                                      IsDefinition, IsOptimized),
-                       Ops);
+      DISubprogram, (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
+                     IsDefinition, ScopeLine, ContainingType, Virtuality,
+                     VirtualIndex, ThisAdjustment, Flags, IsOptimized, Unit,
+                     TemplateParams, Declaration, Variables, ThrownTypes));
+  SmallVector<Metadata *, 11> Ops = {
+      File,        Scope,     Name,           LinkageName,    Type,       Unit,
+      Declaration, Variables, ContainingType, TemplateParams, ThrownTypes};
+  if (!ThrownTypes) {
+    Ops.pop_back();
+    if (!TemplateParams) {
+      Ops.pop_back();
+      if (!ContainingType)
+        Ops.pop_back();
+    }
+  }
+  DEFINE_GETIMPL_STORE_N(DISubprogram,
+                         (Line, ScopeLine, Virtuality, VirtualIndex,
+                          ThisAdjustment, Flags, IsLocalToUnit, IsDefinition,
+                          IsOptimized),
+                         Ops, Ops.size());
 }
 
 bool DISubprogram::describes(const Function *F) const {
@@ -493,13 +507,13 @@ DILexicalBlockFile *DILexicalBlockFile::getImpl(LLVMContext &Context,
 }
 
 DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope,
-                                  Metadata *File, MDString *Name, unsigned Line,
-                                  bool ExportSymbols, StorageType Storage,
-                                  bool ShouldCreate) {
+                                  MDString *Name, bool ExportSymbols,
+                                  StorageType Storage, bool ShouldCreate) {
   assert(isCanonical(Name) && "Expected canonical MDString");
-  DEFINE_GETIMPL_LOOKUP(DINamespace, (Scope, File, Name, Line, ExportSymbols));
-  Metadata *Ops[] = {File, Scope, Name};
-  DEFINE_GETIMPL_STORE(DINamespace, (Line, ExportSymbols), Ops);
+  DEFINE_GETIMPL_LOOKUP(DINamespace, (Scope, Name, ExportSymbols));
+  // The nullptr is for DIScope's File operand. This should be refactored.
+  Metadata *Ops[] = {nullptr, Scope, Name};
+  DEFINE_GETIMPL_STORE(DINamespace, (ExportSymbols), Ops);
 }
 
 DIModule *DIModule::getImpl(LLVMContext &Context, Metadata *Scope,
@@ -647,6 +661,43 @@ DIExpression::getFragmentInfo(expr_op_iterator Start, expr_op_iterator End) {
   return None;
 }
 
+void DIExpression::appendOffset(SmallVectorImpl<uint64_t> &Ops,
+                                int64_t Offset) {
+  if (Offset > 0) {
+    Ops.push_back(dwarf::DW_OP_plus);
+    Ops.push_back(Offset);
+  } else if (Offset < 0) {
+    Ops.push_back(dwarf::DW_OP_minus);
+    Ops.push_back(-Offset);
+  }
+}
+
+DIExpression *DIExpression::prepend(const DIExpression *Expr, bool Deref,
+                                    int64_t Offset, bool StackValue) {
+  SmallVector<uint64_t, 8> Ops;
+  appendOffset(Ops, Offset);
+  if (Deref)
+    Ops.push_back(dwarf::DW_OP_deref);
+  if (Expr)
+    for (auto Op : Expr->expr_ops()) {
+      // A DW_OP_stack_value comes at the end, but before a DW_OP_LLVM_fragment.
+      if (StackValue) {
+        if (Op.getOp() == dwarf::DW_OP_stack_value)
+          StackValue = false;
+        else if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) {
+          Ops.push_back(dwarf::DW_OP_stack_value);
+          StackValue = false;
+        }
+      }
+      Ops.push_back(Op.getOp());
+      for (unsigned I = 0; I < Op.getNumArgs(); ++I)
+        Ops.push_back(Op.getArg(I));
+    }
+  if (StackValue)
+    Ops.push_back(dwarf::DW_OP_stack_value);
+  return DIExpression::get(Expr->getContext(), Ops);
+}
+
 bool DIExpression::isConstant() const {
   // Recognize DW_OP_constu C DW_OP_stack_value (DW_OP_LLVM_fragment Len Ofs)?.
   if (getNumElements() != 3 && getNumElements() != 6)
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index e1f5fdea44e..fc61ba7439b 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -84,8 +84,7 @@ bool Argument::hasByValOrInAllocaAttr() const {
 
 unsigned Argument::getParamAlignment() const {
   assert(getType()->isPointerTy() && "Only pointers have alignments");
-  return getParent()->getParamAlignment(getArgNo()+1);
-
+  return getParent()->getParamAlignment(getArgNo());
 }
 
 uint64_t Argument::getDereferenceableBytes() const {
@@ -152,15 +151,6 @@ void Argument::addAttr(Attribute Attr) {
   getParent()->addAttribute(getArgNo() + 1, Attr);
 }
 
-void Argument::removeAttr(AttributeList AS) {
-  assert(AS.getNumSlots() <= 1 &&
-         "Trying to remove more than one attribute set from an argument!");
-  AttrBuilder B(AS, AS.getSlotIndex(0));
-  getParent()->removeAttributes(
-      getArgNo() + 1,
-      AttributeList::get(Parent->getContext(), getArgNo() + 1, B));
-}
-
 void Argument::removeAttr(Attribute::AttrKind Kind) {
   getParent()->removeAttribute(getArgNo() + 1, Kind);
 }
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 76582e334d1..59500992abb 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -501,7 +501,8 @@ static Instruction *createMalloc(Instruction *InsertBefore,
   MCall->setTailCall();
   if (Function *F = dyn_cast<Function>(MallocFunc)) {
     MCall->setCallingConv(F->getCallingConv());
-    if (!F->doesNotAlias(0)) F->setDoesNotAlias(0);
+    if (!F->doesNotAlias(AttributeList::ReturnIndex))
+      F->setDoesNotAlias(AttributeList::ReturnIndex);
   }
   assert(!MCall->getType()->isVoidTy() && "Malloc has void return type");
 
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index 0ee0b9c0da2..7185736fa2e 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -552,6 +552,7 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
   Metadata *TemplateParams;
   Metadata *Declaration;
   Metadata *Variables;
+  Metadata *ThrownTypes;
 
   MDNodeKeyImpl(Metadata *Scope, MDString *Name, MDString *LinkageName,
                 Metadata *File, unsigned Line, Metadata *Type,
@@ -559,7 +560,8 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
                 Metadata *ContainingType, unsigned Virtuality,
                 unsigned VirtualIndex, int ThisAdjustment, unsigned Flags,
                 bool IsOptimized, Metadata *Unit, Metadata *TemplateParams,
-                Metadata *Declaration, Metadata *Variables)
+                Metadata *Declaration, Metadata *Variables,
+                Metadata *ThrownTypes)
       : Scope(Scope), Name(Name), LinkageName(LinkageName), File(File),
         Line(Line), Type(Type), IsLocalToUnit(IsLocalToUnit),
         IsDefinition(IsDefinition), ScopeLine(ScopeLine),
@@ -567,7 +569,7 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
         VirtualIndex(VirtualIndex), ThisAdjustment(ThisAdjustment),
         Flags(Flags), IsOptimized(IsOptimized), Unit(Unit),
         TemplateParams(TemplateParams), Declaration(Declaration),
-        Variables(Variables) {}
+        Variables(Variables), ThrownTypes(ThrownTypes) {}
   MDNodeKeyImpl(const DISubprogram *N)
       : Scope(N->getRawScope()), Name(N->getRawName()),
         LinkageName(N->getRawLinkageName()), File(N->getRawFile()),
@@ -578,7 +580,8 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
         ThisAdjustment(N->getThisAdjustment()), Flags(N->getFlags()),
         IsOptimized(N->isOptimized()), Unit(N->getRawUnit()),
         TemplateParams(N->getRawTemplateParams()),
-        Declaration(N->getRawDeclaration()), Variables(N->getRawVariables()) {}
+        Declaration(N->getRawDeclaration()), Variables(N->getRawVariables()),
+        ThrownTypes(N->getRawThrownTypes()) {}
 
   bool isKeyOf(const DISubprogram *RHS) const {
     return Scope == RHS->getRawScope() && Name == RHS->getRawName() &&
@@ -595,7 +598,8 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
            Unit == RHS->getUnit() &&
            TemplateParams == RHS->getRawTemplateParams() &&
            Declaration == RHS->getRawDeclaration() &&
-           Variables == RHS->getRawVariables();
+           Variables == RHS->getRawVariables() &&
+           ThrownTypes == RHS->getRawThrownTypes();
   }
   unsigned getHashValue() const {
     // If this is a declaration inside an ODR type, only hash the type and the
@@ -695,26 +699,21 @@ template <> struct MDNodeKeyImpl<DILexicalBlockFile> {
 
 template <> struct MDNodeKeyImpl<DINamespace> {
   Metadata *Scope;
-  Metadata *File;
   MDString *Name;
-  unsigned Line;
   bool ExportSymbols;
 
-  MDNodeKeyImpl(Metadata *Scope, Metadata *File, MDString *Name, unsigned Line,
-                bool ExportSymbols)
-      : Scope(Scope), File(File), Name(Name), Line(Line),
-        ExportSymbols(ExportSymbols) {}
+  MDNodeKeyImpl(Metadata *Scope, MDString *Name, bool ExportSymbols)
+      : Scope(Scope), Name(Name), ExportSymbols(ExportSymbols) {}
   MDNodeKeyImpl(const DINamespace *N)
-      : Scope(N->getRawScope()), File(N->getRawFile()), Name(N->getRawName()),
-        Line(N->getLine()), ExportSymbols(N->getExportSymbols()) {}
+      : Scope(N->getRawScope()), Name(N->getRawName()),
+        ExportSymbols(N->getExportSymbols()) {}
 
   bool isKeyOf(const DINamespace *RHS) const {
-    return Scope == RHS->getRawScope() && File == RHS->getRawFile() &&
-           Name == RHS->getRawName() && Line == RHS->getLine() &&
+    return Scope == RHS->getRawScope() && Name == RHS->getRawName() &&
            ExportSymbols == RHS->getExportSymbols();
   }
   unsigned getHashValue() const {
-    return hash_combine(Scope, File, Name, Line);
+    return hash_combine(Scope, Name);
   }
 };
 
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index 7228de3d237..2411dc5ce7d 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -967,7 +967,7 @@ static void addRange(SmallVectorImpl<ConstantInt *> &EndPoints,
 
 MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) {
   // Given two ranges, we want to compute the union of the ranges. This
-  // is slightly complitade by having to combine the intervals and merge
+  // is slightly complicated by having to combine the intervals and merge
   // the ones that overlap.
 
   if (!A || !B)
@@ -976,7 +976,7 @@ MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) {
   if (A == B)
     return A;
 
-  // First, walk both lists in older of the lower boundary of each interval.
+  // First, walk both lists in order of the lower boundary of each interval.
   // At each step, try to merge the new interval to the last one we adedd.
   SmallVector<ConstantInt *, 4> EndPoints;
   int AI = 0;
diff --git a/lib/IR/ModuleSummaryIndex.cpp b/lib/IR/ModuleSummaryIndex.cpp
index 9072f4bc7b1..01e1b8168af 100644
--- a/lib/IR/ModuleSummaryIndex.cpp
+++ b/lib/IR/ModuleSummaryIndex.cpp
@@ -16,54 +16,6 @@
 #include "llvm/ADT/StringMap.h"
 using namespace llvm;
 
-// Create the combined module index/summary from multiple
-// per-module instances.
-void ModuleSummaryIndex::mergeFrom(std::unique_ptr<ModuleSummaryIndex> Other,
-                                   uint64_t NextModuleId) {
-  if (Other->modulePaths().empty())
-    return;
-
-  assert(Other->modulePaths().size() == 1 &&
-         "Can only merge from an single-module index at that time");
-
-  StringRef OtherModPath = Other->modulePaths().begin()->first();
-  StringRef ModPath = addModulePath(OtherModPath, NextModuleId,
-                                    Other->getModuleHash(OtherModPath))
-                          ->first();
-
-  for (auto &OtherGlobalValSummaryLists : *Other) {
-    GlobalValue::GUID ValueGUID = OtherGlobalValSummaryLists.first;
-    GlobalValueSummaryList &List = OtherGlobalValSummaryLists.second;
-
-    // Assert that the value summary list only has one entry, since we shouldn't
-    // have duplicate names within a single per-module index.
-    assert(List.size() == 1);
-    std::unique_ptr<GlobalValueSummary> Summary = std::move(List.front());
-
-    // Note the module path string ref was copied above and is still owned by
-    // the original per-module index. Reset it to the new module path
-    // string reference owned by the combined index.
-    Summary->setModulePath(ModPath);
-
-    // Add new value summary to existing list. There may be duplicates when
-    // combining GlobalValueMap entries, due to COMDAT values. Any local
-    // values were given unique global IDs.
-    addGlobalValueSummary(ValueGUID, std::move(Summary));
-  }
-}
-
-void ModuleSummaryIndex::removeEmptySummaryEntries() {
-  for (auto MI = begin(), MIE = end(); MI != MIE;) {
-    // Only expect this to be called on a per-module index, which has a single
-    // entry per value entry list.
-    assert(MI->second.size() == 1);
-    if (!MI->second[0])
-      MI = GlobalValueMap.erase(MI);
-    else
-      ++MI;
-  }
-}
-
 // Collect for the given module the list of function it defines
 // (GUID -> Summary).
 void ModuleSummaryIndex::collectDefinedFunctionsForModule(
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index d83bdf2acd4..02b40c93b5d 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -578,9 +578,9 @@ unsigned Value::getPointerDereferenceableBytes(const DataLayout &DL,
       CanBeNull = true;
     }
   } else if (auto CS = ImmutableCallSite(this)) {
-    DerefBytes = CS.getDereferenceableBytes(0);
+    DerefBytes = CS.getDereferenceableBytes(AttributeList::ReturnIndex);
     if (DerefBytes == 0) {
-      DerefBytes = CS.getDereferenceableOrNullBytes(0);
+      DerefBytes = CS.getDereferenceableOrNullBytes(AttributeList::ReturnIndex);
       CanBeNull = true;
     }
   } else if (const LoadInst *LI = dyn_cast<LoadInst>(this)) {
@@ -649,7 +649,7 @@ unsigned Value::getPointerAlignment(const DataLayout &DL) const {
         Align = DL.getPrefTypeAlignment(AllocatedType);
     }
   } else if (auto CS = ImmutableCallSite(this))
-    Align = CS.getAttributes().getParamAlignment(AttributeList::ReturnIndex);
+    Align = CS.getAttributes().getRetAlignment();
   else if (const LoadInst *LI = dyn_cast<LoadInst>(this))
     if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) {
       ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0));
@@ -711,7 +711,7 @@ void ValueHandleBase::AddToExistingUseList(ValueHandleBase **List) {
   setPrevPtr(List);
   if (Next) {
     Next->setPrevPtr(&Next);
-    assert(V == Next->V && "Added to wrong list?");
+    assert(getValPtr() == Next->getValPtr() && "Added to wrong list?");
   }
 }
 
@@ -726,14 +726,14 @@ void ValueHandleBase::AddToExistingUseListAfter(ValueHandleBase *List) {
 }
 
 void ValueHandleBase::AddToUseList() {
-  assert(V && "Null pointer doesn't have a use list!");
+  assert(getValPtr() && "Null pointer doesn't have a use list!");
 
-  LLVMContextImpl *pImpl = V->getContext().pImpl;
+  LLVMContextImpl *pImpl = getValPtr()->getContext().pImpl;
 
-  if (V->HasValueHandle) {
+  if (getValPtr()->HasValueHandle) {
     // If this value already has a ValueHandle, then it must be in the
     // ValueHandles map already.
-    ValueHandleBase *&Entry = pImpl->ValueHandles[V];
+    ValueHandleBase *&Entry = pImpl->ValueHandles[getValPtr()];
     assert(Entry && "Value doesn't have any handles?");
     AddToExistingUseList(&Entry);
     return;
@@ -747,10 +747,10 @@ void ValueHandleBase::AddToUseList() {
   DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
   const void *OldBucketPtr = Handles.getPointerIntoBucketsArray();
 
-  ValueHandleBase *&Entry = Handles[V];
+  ValueHandleBase *&Entry = Handles[getValPtr()];
   assert(!Entry && "Value really did already have handles?");
   AddToExistingUseList(&Entry);
-  V->HasValueHandle = true;
+  getValPtr()->HasValueHandle = true;
 
   // If reallocation didn't happen or if this was the first insertion, don't
   // walk the table.
@@ -762,14 +762,14 @@ void ValueHandleBase::AddToUseList() {
   // Okay, reallocation did happen.  Fix the Prev Pointers.
   for (DenseMap<Value*, ValueHandleBase*>::iterator I = Handles.begin(),
        E = Handles.end(); I != E; ++I) {
-    assert(I->second && I->first == I->second->V &&
+    assert(I->second && I->first == I->second->getValPtr() &&
            "List invariant broken!");
     I->second->setPrevPtr(&I->second);
   }
 }
 
 void ValueHandleBase::RemoveFromUseList() {
-  assert(V && V->HasValueHandle &&
+  assert(getValPtr() && getValPtr()->HasValueHandle &&
          "Pointer doesn't have a use list!");
 
   // Unlink this from its use list.
@@ -786,11 +786,11 @@ void ValueHandleBase::RemoveFromUseList() {
   // If the Next pointer was null, then it is possible that this was the last
   // ValueHandle watching VP.  If so, delete its entry from the ValueHandles
   // map.
-  LLVMContextImpl *pImpl = V->getContext().pImpl;
+  LLVMContextImpl *pImpl = getValPtr()->getContext().pImpl;
   DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
   if (Handles.isPointerIntoBucketsArray(PrevPtr)) {
-    Handles.erase(V);
-    V->HasValueHandle = false;
+    Handles.erase(getValPtr());
+    getValPtr()->HasValueHandle = false;
   }
 }
 
@@ -820,13 +820,10 @@ void ValueHandleBase::ValueIsDeleted(Value *V) {
     switch (Entry->getKind()) {
     case Assert:
       break;
-    case Tracking:
-      // Mark that this value has been deleted by setting it to an invalid Value
-      // pointer.
-      Entry->operator=(DenseMapInfo<Value *>::getTombstoneKey());
-      break;
     case Weak:
-      // Weak just goes to null, which will unlink it from the list.
+    case WeakTracking:
+      // WeakTracking and Weak just go to null, which unlinks them
+      // from the list.
       Entry->operator=(nullptr);
       break;
     case Callback:
@@ -874,16 +871,10 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
 
     switch (Entry->getKind()) {
     case Assert:
-      // Asserting handle does not follow RAUW implicitly.
-      break;
-    case Tracking:
-      // Tracking goes to new value like a WeakVH. Note that this may make it
-      // something incompatible with its templated type. We don't want to have a
-      // virtual (or inline) interface to handle this though, so instead we make
-      // the TrackingVH accessors guarantee that a client never sees this value.
-
-      LLVM_FALLTHROUGH;
     case Weak:
+      // Asserting and Weak handles do not follow RAUW implicitly.
+      break;
+    case WeakTracking:
       // Weak goes to the new value, which will unlink it from Old's list.
       Entry->operator=(New);
       break;
@@ -895,18 +886,17 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
   }
 
 #ifndef NDEBUG
-  // If any new tracking or weak value handles were added while processing the
+  // If any new weak value handles were added while processing the
   // list, then complain about it now.
   if (Old->HasValueHandle)
     for (Entry = pImpl->ValueHandles[Old]; Entry; Entry = Entry->Next)
       switch (Entry->getKind()) {
-      case Tracking:
-      case Weak:
+      case WeakTracking:
         dbgs() << "After RAUW from " << *Old->getType() << " %"
                << Old->getName() << " to " << *New->getType() << " %"
                << New->getName() << "\n";
-        llvm_unreachable("A tracking or weak value handle still pointed to the"
-                         " old value!\n");
+        llvm_unreachable(
+            "A weak tracking value handle still pointed to the  old value!\n");
       default:
         break;
       }
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 4e04020f206..65e12456249 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -1050,6 +1050,14 @@ void Verifier::visitDISubprogram(const DISubprogram &N) {
     // Subprogram declarations (part of the type hierarchy).
     AssertDI(!Unit, "subprogram declarations must not have a compile unit", &N);
   }
+
+  if (auto *RawThrownTypes = N.getRawThrownTypes()) {
+    auto *ThrownTypes = dyn_cast<MDTuple>(RawThrownTypes);
+    AssertDI(ThrownTypes, "invalid thrown types list", &N, RawThrownTypes);
+    for (Metadata *Op : ThrownTypes->operands())
+      AssertDI(Op && isa<DIType>(Op), "invalid thrown type", &N, ThrownTypes,
+               Op);
+  }
 }
 
 void Verifier::visitDILexicalBlockBase(const DILexicalBlockBase &N) {
@@ -1195,9 +1203,9 @@ void Verifier::visitComdat(const Comdat &C) {
 
 void Verifier::visitModuleIdents(const Module &M) {
   const NamedMDNode *Idents = M.getNamedMetadata("llvm.ident");
-  if (!Idents) 
+  if (!Idents)
     return;
-  
+
   // llvm.ident takes a list of metadata entry. Each entry has only one string.
   // Scan each llvm.ident entry and make sure that this requirement is met.
   for (const MDNode *N : Idents->operands()) {
@@ -1207,7 +1215,7 @@ void Verifier::visitModuleIdents(const Module &M) {
            ("invalid value for llvm.ident metadata entry operand"
             "(the operand should be a string)"),
            N->getOperand(0));
-  } 
+  }
 }
 
 void Verifier::visitModuleFlags(const Module &M) {
@@ -1344,6 +1352,7 @@ static bool isFuncOnlyAttr(Attribute::AttrKind Kind) {
   case Attribute::InaccessibleMemOnly:
   case Attribute::InaccessibleMemOrArgMemOnly:
   case Attribute::AllocSize:
+  case Attribute::Speculatable:
     return true;
   default:
     break;
@@ -1829,7 +1838,7 @@ void Verifier::verifyStatepoint(ImmutableCallSite CS) {
   Assert(ExpectedNumArgs <= (int)CS.arg_size(),
          "gc.statepoint too few arguments according to length fields", &CI);
 
-  // Check that the only uses of this gc.statepoint are gc.result or 
+  // Check that the only uses of this gc.statepoint are gc.result or
   // gc.relocate calls which are tied to this statepoint and thus part
   // of the same statepoint sequence
   for (const User *U : CI.users()) {
@@ -1975,6 +1984,7 @@ void Verifier::visitFunction(const Function &F) {
            "Calling convention requires void return type", &F);
     LLVM_FALLTHROUGH;
   case CallingConv::AMDGPU_VS:
+  case CallingConv::AMDGPU_HS:
   case CallingConv::AMDGPU_GS:
   case CallingConv::AMDGPU_PS:
   case CallingConv::AMDGPU_CS:
@@ -2602,6 +2612,15 @@ void Verifier::verifyCallSite(CallSite CS) {
   Assert(verifyAttributeCount(Attrs, CS.arg_size()),
          "Attribute after last parameter!", I);
 
+  if (Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::Speculatable)) {
+    // Don't allow speculatable on call sites, unless the underlying function
+    // declaration is also speculatable.
+    Function *Callee
+      = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+    Assert(Callee && Callee->isSpeculatable(),
+           "speculatable attribute may not apply to call sites", I);
+  }
+
   // Verify call attributes.
   verifyFunctionAttrs(FTy, Attrs, I);
 
@@ -2754,7 +2773,7 @@ static AttrBuilder getParameterABIAttributes(int I, AttributeList Attrs) {
       Copy.addAttribute(AK);
   }
   if (Attrs.hasParamAttribute(I, Attribute::Alignment))
-    Copy.addAlignmentAttr(Attrs.getParamAlignment(I + 1));
+    Copy.addAlignmentAttr(Attrs.getParamAlignment(I));
   return Copy;
 }
 
@@ -3900,7 +3919,7 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
 
   // If the intrinsic takes MDNode arguments, verify that they are either global
   // or are local to *this* function.
-  for (Value *V : CS.args()) 
+  for (Value *V : CS.args())
     if (auto *MD = dyn_cast<MetadataAsValue>(V))
       visitMetadataAsValue(*MD, CS.getCaller());
 
@@ -3973,9 +3992,9 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
     auto IsValidAlignment = [&](uint64_t Alignment) {
       return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment);
     };
-    
-    uint64_t DstAlignment = CS.getParamAlignment(1),
-             SrcAlignment = CS.getParamAlignment(2);
+
+    uint64_t DstAlignment = CS.getParamAlignment(0),
+             SrcAlignment = CS.getParamAlignment(1);
 
     Assert(IsValidAlignment(DstAlignment),
            "incorrect alignment of the destination argument",
@@ -4212,7 +4231,7 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
   }
   case Intrinsic::masked_load: {
     Assert(CS.getType()->isVectorTy(), "masked_load: must return a vector", CS);
-    
+
     Value *Ptr = CS.getArgOperand(0);
     //Value *Alignment = CS.getArgOperand(1);
     Value *Mask = CS.getArgOperand(2);
@@ -4222,12 +4241,12 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
 
     // DataTy is the overloaded type
     Type *DataTy = cast<PointerType>(Ptr->getType())->getElementType();
-    Assert(DataTy == CS.getType(), 
+    Assert(DataTy == CS.getType(),
            "masked_load: return must match pointer type", CS);
     Assert(PassThru->getType() == DataTy,
            "masked_load: pass through and data type must match", CS);
     Assert(Mask->getType()->getVectorNumElements() ==
-           DataTy->getVectorNumElements(), 
+           DataTy->getVectorNumElements(),
            "masked_load: vector mask must be same length as data", CS);
     break;
   }
@@ -4241,10 +4260,10 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
 
     // DataTy is the overloaded type
     Type *DataTy = cast<PointerType>(Ptr->getType())->getElementType();
-    Assert(DataTy == Val->getType(), 
+    Assert(DataTy == Val->getType(),
            "masked_store: storee must match pointer type", CS);
     Assert(Mask->getType()->getVectorNumElements() ==
-           DataTy->getVectorNumElements(), 
+           DataTy->getVectorNumElements(),
            "masked_store: vector mask must be same length as data", CS);
     break;
   }
diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp
index 1bc0d7361d4..0afa1ba6ecd 100644
--- a/lib/LTO/LTO.cpp
+++ b/lib/LTO/LTO.cpp
@@ -25,7 +25,6 @@
 #include "llvm/LTO/LTOBackend.h"
 #include "llvm/Linker/IRMover.h"
 #include "llvm/Object/IRObjectFile.h"
-#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -592,11 +591,9 @@ Error LTO::addThinLTO(BitcodeModule BM,
                       ArrayRef<InputFile::Symbol> Syms,
                       const SymbolResolution *&ResI,
                       const SymbolResolution *ResE) {
-  Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr = BM.getSummary();
-  if (!SummaryOrErr)
-    return SummaryOrErr.takeError();
-  ThinLTO.CombinedIndex.mergeFrom(std::move(*SummaryOrErr),
-                                  ThinLTO.ModuleMap.size());
+  if (Error Err =
+          BM.readSummary(ThinLTO.CombinedIndex, ThinLTO.ModuleMap.size()))
+    return Err;
 
   for (const InputFile::Symbol &Sym : Syms) {
     assert(ResI != ResE);
diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp
index 0d845a26d0c..440275c3425 100644
--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -33,7 +33,6 @@
 #include "llvm/Linker/Linker.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Object/IRObjectFile.h"
-#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
 #include "llvm/Support/CachePruning.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Error.h"
@@ -566,25 +565,18 @@ std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const {
  * "thin-link".
  */
 std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
-  std::unique_ptr<ModuleSummaryIndex> CombinedIndex;
+  std::unique_ptr<ModuleSummaryIndex> CombinedIndex =
+      llvm::make_unique<ModuleSummaryIndex>();
   uint64_t NextModuleId = 0;
   for (auto &ModuleBuffer : Modules) {
-    Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
-        object::ModuleSummaryIndexObjectFile::create(
-            ModuleBuffer.getMemBuffer());
-    if (!ObjOrErr) {
+    if (Error Err = readModuleSummaryIndex(ModuleBuffer.getMemBuffer(),
+                                           *CombinedIndex, NextModuleId++)) {
       // FIXME diagnose
       logAllUnhandledErrors(
-          ObjOrErr.takeError(), errs(),
-          "error: can't create ModuleSummaryIndexObjectFile for buffer: ");
+          std::move(Err), errs(),
+          "error: can't create module summary index for buffer: ");
       return nullptr;
     }
-    auto Index = (*ObjOrErr)->takeIndex();
-    if (CombinedIndex) {
-      CombinedIndex->mergeFrom(std::move(Index), ++NextModuleId);
-    } else {
-      CombinedIndex = std::move(Index);
-    }
   }
   return CombinedIndex;
 }
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index ee9c25cda94..e86db933af3 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -63,7 +63,7 @@ using namespace llvm;
 
 namespace {
 
-typedef DenseMap<const MCSectionELF *, uint32_t> SectionIndexMapTy;
+using SectionIndexMapTy = DenseMap<const MCSectionELF *, uint32_t>;
 
 class ELFObjectWriter;
 
@@ -194,8 +194,8 @@ public:
                    ELFSymbolData &MSD, const MCAsmLayout &Layout);
 
   // Start and end offset of each section
-  typedef std::map<const MCSectionELF *, std::pair<uint64_t, uint64_t>>
-      SectionOffsetsTy;
+  using SectionOffsetsTy =
+      std::map<const MCSectionELF *, std::pair<uint64_t, uint64_t>>;
 
   bool shouldRelocateWithSymbol(const MCAssembler &Asm,
                                 const MCSymbolRefExpr *RefA,
@@ -208,7 +208,7 @@ public:
                         uint64_t &FixedValue) override;
 
   // Map from a signature symbol to the group section index
-  typedef DenseMap<const MCSymbol *, unsigned> RevGroupMapTy;
+  using RevGroupMapTy = DenseMap<const MCSymbol *, unsigned>;
 
   /// Compute the symbol table data
   ///
diff --git a/lib/MC/MCCodeView.cpp b/lib/MC/MCCodeView.cpp
index 99a5c11a498..2b97ecc0fd2 100644
--- a/lib/MC/MCCodeView.cpp
+++ b/lib/MC/MCCodeView.cpp
@@ -145,7 +145,7 @@ void CodeViewContext::emitStringTable(MCObjectStreamer &OS) {
   MCSymbol *StringBegin = Ctx.createTempSymbol("strtab_begin", false),
            *StringEnd = Ctx.createTempSymbol("strtab_end", false);
 
-  OS.EmitIntValue(unsigned(ModuleSubstreamKind::StringTable), 4);
+  OS.EmitIntValue(unsigned(ModuleDebugFragmentKind::StringTable), 4);
   OS.emitAbsoluteSymbolDiff(StringEnd, StringBegin, 4);
   OS.EmitLabel(StringBegin);
 
@@ -172,7 +172,7 @@ void CodeViewContext::emitFileChecksums(MCObjectStreamer &OS) {
   MCSymbol *FileBegin = Ctx.createTempSymbol("filechecksums_begin", false),
            *FileEnd = Ctx.createTempSymbol("filechecksums_end", false);
 
-  OS.EmitIntValue(unsigned(ModuleSubstreamKind::FileChecksums), 4);
+  OS.EmitIntValue(unsigned(ModuleDebugFragmentKind::FileChecksums), 4);
   OS.emitAbsoluteSymbolDiff(FileEnd, FileBegin, 4);
   OS.EmitLabel(FileBegin);
 
@@ -197,7 +197,7 @@ void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS,
   MCSymbol *LineBegin = Ctx.createTempSymbol("linetable_begin", false),
            *LineEnd = Ctx.createTempSymbol("linetable_end", false);
 
-  OS.EmitIntValue(unsigned(ModuleSubstreamKind::Lines), 4);
+  OS.EmitIntValue(unsigned(ModuleDebugFragmentKind::Lines), 4);
   OS.emitAbsoluteSymbolDiff(LineEnd, LineBegin, 4);
   OS.EmitLabel(LineBegin);
   OS.EmitCOFFSecRel32(FuncBegin, /*Offset=*/0);
@@ -208,7 +208,7 @@ void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS,
   bool HaveColumns = any_of(Locs, [](const MCCVLineEntry &LineEntry) {
     return LineEntry.getColumn() != 0;
   });
-  OS.EmitIntValue(HaveColumns ? int(LineFlags::HaveColumns) : 0, 2);
+  OS.EmitIntValue(HaveColumns ? int(LF_HaveColumns) : 0, 2);
   OS.emitAbsoluteSymbolDiff(FuncEnd, FuncBegin, 4);
 
   for (auto I = Locs.begin(), E = Locs.end(); I != E;) {
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 2fa9c03b608..f36a21bf112 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -412,7 +412,7 @@ private:
     DK_CFI_REMEMBER_STATE, DK_CFI_RESTORE_STATE, DK_CFI_SAME_VALUE,
     DK_CFI_RESTORE, DK_CFI_ESCAPE, DK_CFI_SIGNAL_FRAME, DK_CFI_UNDEFINED,
     DK_CFI_REGISTER, DK_CFI_WINDOW_SAVE,
-    DK_MACROS_ON, DK_MACROS_OFF,
+    DK_MACROS_ON, DK_MACROS_OFF, DK_ALTMACRO, DK_NOALTMACRO,
     DK_MACRO, DK_EXITM, DK_ENDM, DK_ENDMACRO, DK_PURGEM,
     DK_SLEB128, DK_ULEB128,
     DK_ERR, DK_ERROR, DK_WARNING,
@@ -484,7 +484,8 @@ private:
   bool parseDirectiveEndMacro(StringRef Directive);
   bool parseDirectiveMacro(SMLoc DirectiveLoc);
   bool parseDirectiveMacrosOnOff(StringRef Directive);
-
+  // alternate macro mode directives
+  bool parseDirectiveAltmacro(StringRef Directive);
   // ".bundle_align_mode"
   bool parseDirectiveBundleAlignMode();
   // ".bundle_lock"
@@ -1922,6 +1923,9 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
       return parseDirectiveMacrosOnOff(IDVal);
     case DK_MACRO:
       return parseDirectiveMacro(IDLoc);
+    case DK_ALTMACRO:
+    case DK_NOALTMACRO:
+      return parseDirectiveAltmacro(IDVal);
     case DK_EXITM:
       return parseDirectiveExitMacro(IDVal);
     case DK_ENDM:
@@ -2270,9 +2274,18 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
         } else {
           bool VarargParameter = HasVararg && Index == (NParameters - 1);
           for (const AsmToken &Token : A[Index])
+            // For altmacro mode, you can write '%expr'.
+            // The prefix '%' evaluates the expression 'expr'
+            // and uses the result as a string (e.g. replace %(1+2) with the string "3").
+            // Here, we identify the integer token which is the result of the
+            // absolute expression evaluation and replace it with its string representation.
+            if ((Lexer.IsaAltMacroMode()) &&
+                 (*(Token.getString().begin()) == '%') && Token.is(AsmToken::Integer))
+              // Emit an integer value to the buffer.
+              OS << Token.getIntVal();
             // We expect no quotes around the string's contents when
             // parsing for varargs.
-            if (Token.getKind() != AsmToken::String || VarargParameter)
+            else if (Token.isNot(AsmToken::String) || VarargParameter)
               OS << Token.getString();
             else
               OS << Token.getStringContents();
@@ -2443,13 +2456,29 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M,
 
       NamedParametersFound = true;
     }
+    bool Vararg = HasVararg && Parameter == (NParameters - 1);
 
     if (NamedParametersFound && FA.Name.empty())
       return Error(IDLoc, "cannot mix positional and keyword arguments");
 
-    bool Vararg = HasVararg && Parameter == (NParameters - 1);
-    if (parseMacroArgument(FA.Value, Vararg))
-      return true;
+    if (Lexer.IsaAltMacroMode() && Lexer.is(AsmToken::Percent)) {
+        SMLoc StrLoc = Lexer.getLoc();
+        SMLoc EndLoc;
+        const MCExpr *AbsoluteExp;
+        int64_t Value;
+        /// Eat '%'
+        Lex();
+        if (parseExpression(AbsoluteExp, EndLoc))
+          return false;
+        if (!AbsoluteExp->evaluateAsAbsolute(Value))
+          return Error(StrLoc, "expected absolute expression");
+        const char *StrChar = StrLoc.getPointer();
+        const char *EndChar = EndLoc.getPointer();
+        AsmToken newToken(AsmToken::Integer, StringRef(StrChar , EndChar - StrChar), Value);
+        FA.Value.push_back(newToken);
+    }
+    else if(parseMacroArgument(FA.Value, Vararg))
+        return true;
 
     unsigned PI = Parameter;
     if (!FA.Name.empty()) {
@@ -3841,6 +3870,19 @@ bool AsmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
   return false;
 }
 
+/// parseDirectiveAltmacro
+/// ::= .altmacro
+/// ::= .noaltmacro
+bool AsmParser::parseDirectiveAltmacro(StringRef Directive) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '" + Directive + "' directive");
+  if (Directive == ".altmacro")
+    getLexer().SetAltMacroMode(true);
+  else
+    getLexer().SetAltMacroMode(false);
+  return false;
+}
+
 /// parseDirectiveMacrosOnOff
 /// ::= .macros_on
 /// ::= .macros_off
@@ -4938,6 +4980,8 @@ void AsmParser::initializeDirectiveKindMap() {
   DirectiveKindMap[".err"] = DK_ERR;
   DirectiveKindMap[".error"] = DK_ERROR;
   DirectiveKindMap[".warning"] = DK_WARNING;
+  DirectiveKindMap[".altmacro"] = DK_ALTMACRO;
+  DirectiveKindMap[".noaltmacro"] = DK_NOALTMACRO;
   DirectiveKindMap[".reloc"] = DK_RELOC;
   DirectiveKindMap[".dc"] = DK_DC;
   DirectiveKindMap[".dc.a"] = DK_DC_A;
diff --git a/lib/MC/MCParser/MCAsmLexer.cpp b/lib/MC/MCParser/MCAsmLexer.cpp
index f8fe78aece0..1d12ab85828 100644
--- a/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/lib/MC/MCParser/MCAsmLexer.cpp
@@ -13,7 +13,7 @@
 
 using namespace llvm;
 
-MCAsmLexer::MCAsmLexer() {
+MCAsmLexer::MCAsmLexer() : AltMacroMode(false) {
   CurTok.emplace_back(AsmToken::Space, StringRef());
 }
 
diff --git a/lib/MC/StringTableBuilder.cpp b/lib/MC/StringTableBuilder.cpp
index fbd7ba60bc9..a0fb33846fc 100644
--- a/lib/MC/StringTableBuilder.cpp
+++ b/lib/MC/StringTableBuilder.cpp
@@ -58,7 +58,7 @@ void StringTableBuilder::write(raw_ostream &OS) const {
   OS << Data;
 }
 
-typedef std::pair<CachedHashStringRef, size_t> StringPair;
+using StringPair = std::pair<CachedHashStringRef, size_t>;
 
 void StringTableBuilder::write(uint8_t *Buf) const {
   assert(isFinalized());
diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp
index 6444046a30d..0540c4c47a3 100644
--- a/lib/MC/WasmObjectWriter.cpp
+++ b/lib/MC/WasmObjectWriter.cpp
@@ -468,16 +468,16 @@ static void ApplyRelocations(
 
 // Write out the portions of the relocation records that the linker will
 // need to handle.
-static void WriteRelocations(
-    ArrayRef<WasmRelocationEntry> Relocations,
-    raw_pwrite_stream &Stream,
-    DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices)
-{
+static void
+WriteRelocations(ArrayRef<WasmRelocationEntry> Relocations,
+                 raw_pwrite_stream &Stream,
+                 DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices,
+                 uint64_t HeaderSize) {
   for (const WasmRelocationEntry RelEntry : Relocations) {
     encodeULEB128(RelEntry.Type, Stream);
 
     uint64_t Offset = RelEntry.Offset +
-                      RelEntry.FixupSection->getSectionOffset();
+                      RelEntry.FixupSection->getSectionOffset() + HeaderSize;
     uint32_t Index = SymbolIndices[RelEntry.Symbol];
     int64_t Addend = RelEntry.Addend;
 
@@ -913,12 +913,14 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
   // For now, always emit the memory section, since loads and stores are not
   // valid without it. In the future, we could perhaps be more clever and omit
   // it if there are no loads or stores.
-  startSection(Section, wasm::WASM_SEC_MEMORY);
+  uint32_t NumPages =
+      (DataBytes.size() + wasm::WasmPageSize - 1) / wasm::WasmPageSize;
 
+  startSection(Section, wasm::WASM_SEC_MEMORY);
   encodeULEB128(1, getStream()); // number of memory spaces
 
   encodeULEB128(0, getStream()); // flags
-  encodeULEB128(DataBytes.size(), getStream()); // initial
+  encodeULEB128(NumPages, getStream()); // initial
 
   endSection(Section);
 
@@ -1050,6 +1052,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
   }
 
   // === Data Section ==========================================================
+  uint32_t DataSectionHeaderSize = 0;
   if (!DataBytes.empty()) {
     startSection(Section, wasm::WASM_SEC_DATA);
 
@@ -1059,11 +1062,12 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
     encodeSLEB128(0, getStream()); // offset
     write8(wasm::WASM_OPCODE_END);
     encodeULEB128(DataBytes.size(), getStream()); // size
+    DataSectionHeaderSize = getStream().tell() - Section.ContentsOffset;
     writeBytes(DataBytes); // data
 
     // Apply fixups.
     ApplyRelocations(DataRelocations, getStream(), SymbolIndices,
-                     Section.ContentsOffset);
+                     Section.ContentsOffset + DataSectionHeaderSize);
 
     endSection(Section);
   }
@@ -1107,7 +1111,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
 
     encodeULEB128(CodeRelocations.size() + TypeIndexFixups.size(), getStream());
 
-    WriteRelocations(CodeRelocations, getStream(), SymbolIndices);
+    WriteRelocations(CodeRelocations, getStream(), SymbolIndices, 0);
     WriteTypeRelocations(TypeIndexFixups, TypeIndexFixupTypes, getStream());
 
     endSection(Section);
@@ -1121,7 +1125,8 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
 
     encodeULEB128(DataRelocations.size(), getStream());
 
-    WriteRelocations(DataRelocations, getStream(), SymbolIndices);
+    WriteRelocations(DataRelocations, getStream(), SymbolIndices,
+                     DataSectionHeaderSize);
 
     endSection(Section);
   }
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index da8fe73f823..e99a548ac00 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -38,6 +38,7 @@
 #include "llvm/Support/JamCRC.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm> 
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
@@ -54,7 +55,7 @@ using llvm::support::endian::write32le;
 
 namespace {
 
-typedef SmallString<COFF::NameSize> name;
+using name = SmallString<COFF::NameSize>;
 
 enum AuxiliaryType {
   ATFunctionDefinition,
@@ -75,7 +76,7 @@ class COFFSymbol {
 public:
   COFF::symbol Data = {};
 
-  typedef SmallVector<AuxSymbol, 1> AuxiliarySymbols;
+  using AuxiliarySymbols = SmallVector<AuxSymbol, 1>;
 
   name Name;
   int Index;
@@ -107,7 +108,7 @@ struct COFFRelocation {
   static size_t size() { return COFF::RelocationSize; }
 };
 
-typedef std::vector<COFFRelocation> relocations;
+using relocations = std::vector<COFFRelocation>;
 
 class COFFSection {
 public:
@@ -124,11 +125,11 @@ public:
 
 class WinCOFFObjectWriter : public MCObjectWriter {
 public:
-  typedef std::vector<std::unique_ptr<COFFSymbol>> symbols;
-  typedef std::vector<std::unique_ptr<COFFSection>> sections;
+  using symbols = std::vector<std::unique_ptr<COFFSymbol>>;
+  using sections = std::vector<std::unique_ptr<COFFSection>>;
 
-  typedef DenseMap<MCSymbol const *, COFFSymbol *> symbol_map;
-  typedef DenseMap<MCSection const *, COFFSection *> section_map;
+  using symbol_map = DenseMap<MCSymbol const *, COFFSymbol *>;
+  using section_map = DenseMap<MCSection const *, COFFSection *>;
 
   std::unique_ptr<MCWinCOFFObjectTargetWriter> TargetObjectWriter;
 
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index 2007f560c16..08365e71c2f 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -11,7 +11,6 @@ add_llvm_library(LLVMObject
   IRSymtab.cpp
   MachOObjectFile.cpp
   MachOUniversal.cpp
-  ModuleSummaryIndexObjectFile.cpp
   ModuleSymbolTable.cpp
   Object.cpp
   ObjectFile.cpp
diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp
index e89a4a315c4..5798a3540f5 100644
--- a/lib/Object/ELF.cpp
+++ b/lib/Object/ELF.cpp
@@ -13,9 +13,11 @@
 using namespace llvm;
 using namespace object;
 
-#define ELF_RELOC(name, value)                                          \
-  case ELF::name:                                                       \
-    return #name;                                                       \
+#define STRINGIFY_ENUM_CASE(ns, name)                                          \
+  case ns::name:                                                               \
+    return #name;
+
+#define ELF_RELOC(name, value) STRINGIFY_ENUM_CASE(ELF, name)
 
 StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine,
                                                  uint32_t Type) {
@@ -141,3 +143,61 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine,
 }
 
 #undef ELF_RELOC
+
+StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
+  switch (Machine) {
+  case ELF::EM_ARM:
+    switch (Type) {
+      STRINGIFY_ENUM_CASE(ELF, SHT_ARM_EXIDX);
+      STRINGIFY_ENUM_CASE(ELF, SHT_ARM_PREEMPTMAP);
+      STRINGIFY_ENUM_CASE(ELF, SHT_ARM_ATTRIBUTES);
+      STRINGIFY_ENUM_CASE(ELF, SHT_ARM_DEBUGOVERLAY);
+      STRINGIFY_ENUM_CASE(ELF, SHT_ARM_OVERLAYSECTION);
+    }
+    break;
+  case ELF::EM_HEXAGON:
+    switch (Type) { STRINGIFY_ENUM_CASE(ELF, SHT_HEX_ORDERED); }
+    break;
+  case ELF::EM_X86_64:
+    switch (Type) { STRINGIFY_ENUM_CASE(ELF, SHT_X86_64_UNWIND); }
+    break;
+  case ELF::EM_MIPS:
+  case ELF::EM_MIPS_RS3_LE:
+    switch (Type) {
+      STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_REGINFO);
+      STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_OPTIONS);
+      STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_ABIFLAGS);
+      STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_DWARF);
+    }
+    break;
+  default:
+    break;
+  }
+
+  switch (Type) {
+    STRINGIFY_ENUM_CASE(ELF, SHT_NULL);
+    STRINGIFY_ENUM_CASE(ELF, SHT_PROGBITS);
+    STRINGIFY_ENUM_CASE(ELF, SHT_SYMTAB);
+    STRINGIFY_ENUM_CASE(ELF, SHT_STRTAB);
+    STRINGIFY_ENUM_CASE(ELF, SHT_RELA);
+    STRINGIFY_ENUM_CASE(ELF, SHT_HASH);
+    STRINGIFY_ENUM_CASE(ELF, SHT_DYNAMIC);
+    STRINGIFY_ENUM_CASE(ELF, SHT_NOTE);
+    STRINGIFY_ENUM_CASE(ELF, SHT_NOBITS);
+    STRINGIFY_ENUM_CASE(ELF, SHT_REL);
+    STRINGIFY_ENUM_CASE(ELF, SHT_SHLIB);
+    STRINGIFY_ENUM_CASE(ELF, SHT_DYNSYM);
+    STRINGIFY_ENUM_CASE(ELF, SHT_INIT_ARRAY);
+    STRINGIFY_ENUM_CASE(ELF, SHT_FINI_ARRAY);
+    STRINGIFY_ENUM_CASE(ELF, SHT_PREINIT_ARRAY);
+    STRINGIFY_ENUM_CASE(ELF, SHT_GROUP);
+    STRINGIFY_ENUM_CASE(ELF, SHT_SYMTAB_SHNDX);
+    STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES);
+    STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH);
+    STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef);
+    STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verneed);
+    STRINGIFY_ENUM_CASE(ELF, SHT_GNU_versym);
+  default:
+    return "Unknown";
+  }
+}
diff --git a/lib/Object/ModuleSummaryIndexObjectFile.cpp b/lib/Object/ModuleSummaryIndexObjectFile.cpp
deleted file mode 100644
index 91f93a41032..00000000000
--- a/lib/Object/ModuleSummaryIndexObjectFile.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-//==- ModuleSummaryIndexObjectFile.cpp - Summary index file implementation -==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Part of the ModuleSummaryIndexObjectFile class implementation.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/IR/ModuleSummaryIndex.h"
-#include "llvm/Object/Binary.h"
-#include "llvm/Object/Error.h"
-#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include <algorithm>
-#include <memory>
-#include <system_error>
-
-using namespace llvm;
-using namespace object;
-
-static cl::opt<bool> IgnoreEmptyThinLTOIndexFile(
-    "ignore-empty-index-file", cl::ZeroOrMore,
-    cl::desc(
-        "Ignore an empty index file and perform non-ThinLTO compilation"),
-    cl::init(false));
-
-ModuleSummaryIndexObjectFile::ModuleSummaryIndexObjectFile(
-    MemoryBufferRef Object, std::unique_ptr<ModuleSummaryIndex> I)
-    : SymbolicFile(Binary::ID_ModuleSummaryIndex, Object), Index(std::move(I)) {
-}
-
-ModuleSummaryIndexObjectFile::~ModuleSummaryIndexObjectFile() = default;
-
-std::unique_ptr<ModuleSummaryIndex> ModuleSummaryIndexObjectFile::takeIndex() {
-  return std::move(Index);
-}
-
-ErrorOr<MemoryBufferRef>
-ModuleSummaryIndexObjectFile::findBitcodeInObject(const ObjectFile &Obj) {
-  for (const SectionRef &Sec : Obj.sections()) {
-    if (Sec.isBitcode()) {
-      StringRef SecContents;
-      if (std::error_code EC = Sec.getContents(SecContents))
-        return EC;
-      return MemoryBufferRef(SecContents, Obj.getFileName());
-    }
-  }
-
-  return object_error::bitcode_section_not_found;
-}
-
-ErrorOr<MemoryBufferRef>
-ModuleSummaryIndexObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) {
-  sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer());
-  switch (Type) {
-  case sys::fs::file_magic::bitcode:
-    return Object;
-  case sys::fs::file_magic::elf_relocatable:
-  case sys::fs::file_magic::macho_object:
-  case sys::fs::file_magic::coff_object: {
-    Expected<std::unique_ptr<ObjectFile>> ObjFile =
-        ObjectFile::createObjectFile(Object, Type);
-    if (!ObjFile)
-      return errorToErrorCode(ObjFile.takeError());
-    return findBitcodeInObject(*ObjFile->get());
-  }
-  default:
-    return object_error::invalid_file_type;
-  }
-}
-
-// Parse module summary index in the given memory buffer.
-// Return new ModuleSummaryIndexObjectFile instance containing parsed
-// module summary/index.
-Expected<std::unique_ptr<ModuleSummaryIndexObjectFile>>
-ModuleSummaryIndexObjectFile::create(MemoryBufferRef Object) {
-  ErrorOr<MemoryBufferRef> BCOrErr = findBitcodeInMemBuffer(Object);
-  if (!BCOrErr)
-    return errorCodeToError(BCOrErr.getError());
-
-  Expected<std::unique_ptr<ModuleSummaryIndex>> IOrErr =
-      getModuleSummaryIndex(BCOrErr.get());
-
-  if (!IOrErr)
-    return IOrErr.takeError();
-
-  std::unique_ptr<ModuleSummaryIndex> Index = std::move(IOrErr.get());
-  return llvm::make_unique<ModuleSummaryIndexObjectFile>(Object,
-                                                         std::move(Index));
-}
-
-// Parse the module summary index out of an IR file and return the summary
-// index object if found, or nullptr if not.
-Expected<std::unique_ptr<ModuleSummaryIndex>>
-llvm::getModuleSummaryIndexForFile(StringRef Path, StringRef Identifier) {
-  ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
-      MemoryBuffer::getFileOrSTDIN(Path);
-  std::error_code EC = FileOrErr.getError();
-  if (EC)
-    return errorCodeToError(EC);
-  std::unique_ptr<MemoryBuffer> MemBuffer = std::move(FileOrErr.get());
-  // If Identifier is non-empty, use it as the buffer identifier, which
-  // will become the module path in the index.
-  if (Identifier.empty())
-    Identifier = MemBuffer->getBufferIdentifier();
-  MemoryBufferRef BufferRef(MemBuffer->getBuffer(), Identifier);
-  if (IgnoreEmptyThinLTOIndexFile && !BufferRef.getBufferSize())
-    return nullptr;
-  Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
-      object::ModuleSummaryIndexObjectFile::create(BufferRef);
-  if (!ObjOrErr)
-    return ObjOrErr.takeError();
-
-  object::ModuleSummaryIndexObjectFile &Obj = **ObjOrErr;
-  return Obj.takeIndex();
-}
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index 55ac2541948..8db65f7f0e8 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -125,6 +125,7 @@
 #include "llvm/Transforms/Scalar/Reassociate.h"
 #include "llvm/Transforms/Scalar/SCCP.h"
 #include "llvm/Transforms/Scalar/SROA.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
 #include "llvm/Transforms/Scalar/SimplifyCFG.h"
 #include "llvm/Transforms/Scalar/Sink.h"
 #include "llvm/Transforms/Scalar/SpeculativeExecution.h"
@@ -150,6 +151,10 @@ using namespace llvm;
 static cl::opt<unsigned> MaxDevirtIterations("pm-max-devirt-iterations",
                                              cl::ReallyHidden, cl::init(4));
 
+static cl::opt<bool> EnableGVNHoist(
+    "enable-npm-gvn-hoist", cl::init(false), cl::Hidden,
+    cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
+
 static Regex DefaultAliasRegex("^(default|lto-pre-link|lto)<(O[0123sz])>$");
 
 static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
@@ -454,7 +459,8 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
   EarlyFPM.addPass(SROA());
   EarlyFPM.addPass(EarlyCSEPass());
   EarlyFPM.addPass(LowerExpectIntrinsicPass());
-  EarlyFPM.addPass(GVNHoistPass());
+  if (EnableGVNHoist)
+    EarlyFPM.addPass(GVNHoistPass());
   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
 
   // Interprocedural constant propagation now that basic cleanup has occured
diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def
index efd4c097a67..d59ec7f8584 100644
--- a/lib/Passes/PassRegistry.def
+++ b/lib/Passes/PassRegistry.def
@@ -229,6 +229,7 @@ LOOP_PASS("strength-reduce", LoopStrengthReducePass())
 LOOP_PASS("indvars", IndVarSimplifyPass())
 LOOP_PASS("unroll", LoopUnrollPass::create())
 LOOP_PASS("unroll-full", LoopUnrollPass::createFull())
+LOOP_PASS("unswitch", SimpleLoopUnswitchPass())
 LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs()))
 LOOP_PASS("print<ivusers>", IVUsersPrinterPass(dbgs()))
 LOOP_PASS("loop-predication", LoopPredicationPass())
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 1227d7528c8..b6c8cbee66d 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -392,13 +392,6 @@ int APInt::compareSigned(const APInt& RHS) const {
   return tcCompare(pVal, RHS.pVal, getNumWords());
 }
 
-void APInt::setBit(unsigned bitPosition) {
-  if (isSingleWord())
-    VAL |= maskBit(bitPosition);
-  else
-    pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
-}
-
 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
   unsigned loWord = whichWord(loBit);
   unsigned hiWord = whichWord(hiBit);
@@ -426,15 +419,6 @@ void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
     pVal[word] = WORD_MAX;
 }
 
-/// Set the given bit to 0 whose position is given as "bitPosition".
-/// @brief Set a given bit to 0.
-void APInt::clearBit(unsigned bitPosition) {
-  if (isSingleWord())
-    VAL &= ~maskBit(bitPosition);
-  else
-    pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
-}
-
 /// @brief Toggle every bit to its opposite value.
 void APInt::flipAllBitsSlowCase() {
   tcComplement(pVal, getNumWords());
@@ -625,6 +609,17 @@ APInt APInt::getLoBits(unsigned numBits) const {
   return Result;
 }
 
+/// Return a value containing V broadcasted over NewLen bits.
+APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
+  assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
+
+  APInt Val = V.zextOrSelf(NewLen);
+  for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
+    Val |= Val << I;
+
+  return Val;
+}
+
 unsigned APInt::countLeadingZerosSlowCase() const {
   unsigned Count = 0;
   for (int i = getNumWords()-1; i >= 0; --i) {
@@ -844,7 +839,7 @@ APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
 
   // Otherwise, we have to shift the mantissa bits up to the right location
   APInt Tmp(width, mantissa);
-  Tmp = Tmp.shl((unsigned)exp - 52);
+  Tmp <<= (unsigned)exp - 52;
   return isNeg ? -Tmp : Tmp;
 }
 
@@ -1072,9 +1067,10 @@ void APInt::lshrSlowCase(unsigned ShiftAmt) {
 
 /// Left-shift this APInt by shiftAmt.
 /// @brief Left-shift function.
-APInt APInt::shl(const APInt &shiftAmt) const {
+APInt &APInt::operator<<=(const APInt &shiftAmt) {
   // It's undefined behavior in C to shift by BitWidth or greater.
-  return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
+  *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
+  return *this;
 }
 
 void APInt::shlSlowCase(unsigned ShiftAmt) {
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 491614b4bf6..63c440037c2 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -130,7 +130,6 @@ add_llvm_library(LLVMSupport
   Process.cpp
   Program.cpp
   RWMutex.cpp
-  SearchForAddressOfSpecialSymbol.cpp
   Signals.cpp
   TargetRegistry.cpp
   ThreadLocal.cpp
diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp
index 22fb3f2cb9c..1541a572630 100644
--- a/lib/Support/DynamicLibrary.cpp
+++ b/lib/Support/DynamicLibrary.cpp
@@ -20,169 +20,164 @@
 #include "llvm/Support/Mutex.h"
 #include <cstdio>
 #include <cstring>
+#include <vector>
 
+using namespace llvm;
+using namespace llvm::sys;
+
+// All methods for HandleSet should be used holding SymbolsMutex.
+class DynamicLibrary::HandleSet {
+  typedef std::vector<void *> HandleList;
+  HandleList Handles;
+  void *Process;
+
+public:
+  static void *DLOpen(const char *Filename, std::string *Err);
+  static void DLClose(void *Handle);
+  static void *DLSym(void *Handle, const char *Symbol);
+
+  HandleSet() : Process(nullptr) {}
+  ~HandleSet();
+
+  HandleList::iterator Find(void *Handle) {
+    return std::find(Handles.begin(), Handles.end(), Handle);
+  }
+
+  bool Contains(void *Handle) {
+    return Handle == Process || Find(Handle) != Handles.end();
+  }
+
+  bool AddLibrary(void *Handle, bool IsProcess = false, bool CanClose = true) {
+#ifdef LLVM_ON_WIN32
+    assert((Handle == this ? IsProcess : !IsProcess) && "Bad Handle.");
+#endif
+
+    if (LLVM_LIKELY(!IsProcess)) {
+      if (Find(Handle) != Handles.end()) {
+        if (CanClose)
+          DLClose(Handle);
+        return false;
+      }
+      Handles.push_back(Handle);
+    } else {
+#ifndef LLVM_ON_WIN32
+      if (Process) {
+        if (CanClose)
+          DLClose(Process);
+        if (Process == Handle)
+          return false;
+      }
+#endif
+      Process = Handle;
+    }
+    return true;
+  }
+
+  void *Lookup(const char *Symbol) {
+    // Process handle gets first try.
+    if (Process) {
+      if (void *Ptr = DLSym(Process, Symbol))
+        return Ptr;
+#ifndef NDEBUG
+      for (void *Handle : Handles)
+        assert(!DLSym(Handle, Symbol) && "Symbol exists in non process handle");
+#endif
+    } else {
+      // Iterate in reverse, so newer libraries/symbols override older.
+      for (auto &&I = Handles.rbegin(), E = Handles.rend(); I != E; ++I) {
+        if (void *Ptr = DLSym(*I, Symbol))
+          return Ptr;
+      }
+    }
+    return nullptr;
+  }
+};
+
+namespace {
 // Collection of symbol name/value pairs to be searched prior to any libraries.
-static llvm::ManagedStatic<llvm::StringMap<void *> > ExplicitSymbols;
-static llvm::ManagedStatic<llvm::sys::SmartMutex<true> > SymbolsMutex;
-
-void llvm::sys::DynamicLibrary::AddSymbol(StringRef symbolName,
-                                          void *symbolValue) {
-  SmartScopedLock<true> lock(*SymbolsMutex);
-  (*ExplicitSymbols)[symbolName] = symbolValue;
+static llvm::ManagedStatic<llvm::StringMap<void *>> ExplicitSymbols;
+// Collection of known library handles.
+static llvm::ManagedStatic<DynamicLibrary::HandleSet> OpenedHandles;
+// Lock for ExplicitSymbols and OpenedHandles.
+static llvm::ManagedStatic<llvm::sys::SmartMutex<true>> SymbolsMutex;
 }
 
-char llvm::sys::DynamicLibrary::Invalid = 0;
-
 #ifdef LLVM_ON_WIN32
 
 #include "Windows/DynamicLibrary.inc"
 
 #else
 
-#if defined(HAVE_DLFCN_H) && defined(HAVE_DLOPEN)
-#include <dlfcn.h>
-using namespace llvm;
-using namespace llvm::sys;
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only TRULY operating system
-//===          independent code.
-//===----------------------------------------------------------------------===//
-
-static llvm::ManagedStatic<DenseSet<void *> > OpenedHandles;
-
-DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
-                                                   std::string *errMsg) {
-  SmartScopedLock<true> lock(*SymbolsMutex);
-
-  void *handle = dlopen(filename, RTLD_LAZY|RTLD_GLOBAL);
-  if (!handle) {
-    if (errMsg) *errMsg = dlerror();
-    return DynamicLibrary();
-  }
-
-#ifdef __CYGWIN__
-  // Cygwin searches symbols only in the main
-  // with the handle of dlopen(NULL, RTLD_GLOBAL).
-  if (!filename)
-    handle = RTLD_DEFAULT;
-#endif
-
-  // If we've already loaded this library, dlclose() the handle in order to
-  // keep the internal refcount at +1.
-  if (!OpenedHandles->insert(handle).second)
-    dlclose(handle);
-
-  return DynamicLibrary(handle);
-}
-
-DynamicLibrary DynamicLibrary::addPermanentLibrary(void *handle,
-                                                   std::string *errMsg) {
-  SmartScopedLock<true> lock(*SymbolsMutex);
-  // If we've already loaded this library, tell the caller.
-  if (!OpenedHandles->insert(handle).second) {
-    if (errMsg) *errMsg = "Library already loaded";
-    return DynamicLibrary();
-  }
-
-  return DynamicLibrary(handle);
-}
-
-void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) {
-  if (!isValid())
-    return nullptr;
-  return dlsym(Data, symbolName);
-}
-
-#else
-
-using namespace llvm;
-using namespace llvm::sys;
-
-DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
-                                                   std::string *errMsg) {
-  if (errMsg) *errMsg = "dlopen() not supported on this platform";
-  return DynamicLibrary();
-}
-
-void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) {
-  return NULL;
-}
+#include "Unix/DynamicLibrary.inc"
 
 #endif
 
+char DynamicLibrary::Invalid;
+
 namespace llvm {
-void *SearchForAddressOfSpecialSymbol(const char* symbolName);
+void *SearchForAddressOfSpecialSymbol(const char *SymbolName) {
+  return DoSearch(SymbolName); // DynamicLibrary.inc
+}
 }
 
-void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) {
+void DynamicLibrary::AddSymbol(StringRef SymbolName, void *SymbolValue) {
   SmartScopedLock<true> Lock(*SymbolsMutex);
+  (*ExplicitSymbols)[SymbolName] = SymbolValue;
+}
 
-  // First check symbols added via AddSymbol().
-  if (ExplicitSymbols.isConstructed()) {
-    StringMap<void *>::iterator i = ExplicitSymbols->find(symbolName);
+DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *FileName,
+                                                   std::string *Err) {
+  SmartScopedLock<true> Lock(*SymbolsMutex);
+  void *Handle = HandleSet::DLOpen(FileName, Err);
+  if (Handle != &Invalid)
+    OpenedHandles->AddLibrary(Handle, /*IsProcess*/ FileName == nullptr);
 
-    if (i != ExplicitSymbols->end())
-      return i->second;
-  }
+  return DynamicLibrary(Handle);
+}
 
-#if defined(HAVE_DLFCN_H) && defined(HAVE_DLOPEN)
-  // Now search the libraries.
-  if (OpenedHandles.isConstructed()) {
-    for (DenseSet<void *>::iterator I = OpenedHandles->begin(),
-         E = OpenedHandles->end(); I != E; ++I) {
-      //lt_ptr ptr = lt_dlsym(*I, symbolName);
-      void *ptr = dlsym(*I, symbolName);
-      if (ptr) {
-        return ptr;
-      }
+DynamicLibrary DynamicLibrary::addPermanentLibrary(void *Handle,
+                                                   std::string *Err) {
+  SmartScopedLock<true> Lock(*SymbolsMutex);
+  // If we've already loaded this library, tell the caller.
+  if (!OpenedHandles->AddLibrary(Handle, /*IsProcess*/false, /*CanClose*/false))
+    *Err = "Library already loaded";
+
+  return DynamicLibrary(Handle);
+}
+
+void *DynamicLibrary::getAddressOfSymbol(const char *SymbolName) {
+  if (!isValid())
+    return nullptr;
+  return HandleSet::DLSym(Data, SymbolName);
+}
+
+void *DynamicLibrary::SearchForAddressOfSymbol(const char *SymbolName) {
+  {
+    SmartScopedLock<true> Lock(*SymbolsMutex);
+
+    // First check symbols added via AddSymbol().
+    if (ExplicitSymbols.isConstructed()) {
+      StringMap<void *>::iterator i = ExplicitSymbols->find(SymbolName);
+
+      if (i != ExplicitSymbols->end())
+        return i->second;
+    }
+
+    // Now search the libraries.
+    if (OpenedHandles.isConstructed()) {
+      if (void *Ptr = OpenedHandles->Lookup(SymbolName))
+        return Ptr;
     }
   }
-#endif
 
-  if (void *Result = llvm::SearchForAddressOfSpecialSymbol(symbolName))
-    return Result;
-
-// This macro returns the address of a well-known, explicit symbol
-#define EXPLICIT_SYMBOL(SYM) \
-   if (!strcmp(symbolName, #SYM)) return &SYM
-
-// On linux we have a weird situation. The stderr/out/in symbols are both
-// macros and global variables because of standards requirements. So, we
-// boldly use the EXPLICIT_SYMBOL macro without checking for a #define first.
-#if defined(__linux__) and !defined(__ANDROID__)
-  {
-    EXPLICIT_SYMBOL(stderr);
-    EXPLICIT_SYMBOL(stdout);
-    EXPLICIT_SYMBOL(stdin);
-  }
-#else
-  // For everything else, we want to check to make sure the symbol isn't defined
-  // as a macro before using EXPLICIT_SYMBOL.
-  {
-#ifndef stdin
-    EXPLICIT_SYMBOL(stdin);
-#endif
-#ifndef stdout
-    EXPLICIT_SYMBOL(stdout);
-#endif
-#ifndef stderr
-    EXPLICIT_SYMBOL(stderr);
-#endif
-  }
-#endif
-#undef EXPLICIT_SYMBOL
-
-  return nullptr;
+  return llvm::SearchForAddressOfSpecialSymbol(SymbolName);
 }
 
-#endif // LLVM_ON_WIN32
-
 //===----------------------------------------------------------------------===//
 // C API.
 //===----------------------------------------------------------------------===//
 
-LLVMBool LLVMLoadLibraryPermanently(const char* Filename) {
+LLVMBool LLVMLoadLibraryPermanently(const char *Filename) {
   return llvm::sys::DynamicLibrary::LoadLibraryPermanently(Filename);
 }
 
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index 5b079ff211f..abf61b73a70 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Support/raw_ostream.h"
 
 #include <cstdarg>
+#include <cstdio>
 #include <tuple>
 
 #ifdef HAVE_CRASHREPORTERCLIENT_H
diff --git a/lib/Support/ScopedPrinter.cpp b/lib/Support/ScopedPrinter.cpp
index d8ee1efd8f3..537ff62c7b0 100644
--- a/lib/Support/ScopedPrinter.cpp
+++ b/lib/Support/ScopedPrinter.cpp
@@ -21,7 +21,8 @@ const std::string to_hexString(uint64_t Value, bool UpperCase) {
 }
 
 void ScopedPrinter::printBinaryImpl(StringRef Label, StringRef Str,
-                                    ArrayRef<uint8_t> Data, bool Block) {
+                                    ArrayRef<uint8_t> Data, bool Block,
+                                    uint32_t StartOffset) {
   if (Data.size() > 16)
     Block = true;
 
@@ -31,7 +32,8 @@ void ScopedPrinter::printBinaryImpl(StringRef Label, StringRef Str,
       OS << ": " << Str;
     OS << " (\n";
     if (!Data.empty())
-      OS << format_bytes_with_ascii(Data, 0, 16, 4, (IndentLevel + 1) * 2, true)
+      OS << format_bytes_with_ascii(Data, StartOffset, 16, 4,
+                                    (IndentLevel + 1) * 2, true)
          << "\n";
     startLine() << ")\n";
   } else {
diff --git a/lib/Support/SearchForAddressOfSpecialSymbol.cpp b/lib/Support/SearchForAddressOfSpecialSymbol.cpp
deleted file mode 100644
index 55f3320f640..00000000000
--- a/lib/Support/SearchForAddressOfSpecialSymbol.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-//===- SearchForAddressOfSpecialSymbol.cpp - Function addresses -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This file pulls the addresses of certain symbols out of the linker.  It must
-//  include as few header files as possible because it declares the symbols as
-//  void*, which would conflict with the actual symbol type if any header
-//  declared it.
-//
-//===----------------------------------------------------------------------===//
-
-#include <string.h>
-
-// Must declare the symbols in the global namespace.
-static void *DoSearch(const char* symbolName) {
-#define EXPLICIT_SYMBOL(SYM) \
-   extern void *SYM; if (!strcmp(symbolName, #SYM)) return &SYM
-
-  // If this is darwin, it has some funky issues, try to solve them here.  Some
-  // important symbols are marked 'private external' which doesn't allow
-  // SearchForAddressOfSymbol to find them.  As such, we special case them here,
-  // there is only a small handful of them.
-
-#ifdef __APPLE__
-  {
-    // __eprintf is sometimes used for assert() handling on x86.
-    //
-    // FIXME: Currently disabled when using Clang, as we don't always have our
-    // runtime support libraries available.
-#ifndef __clang__
-#ifdef __i386__
-    EXPLICIT_SYMBOL(__eprintf);
-#endif
-#endif
-  }
-#endif
-
-#ifdef __CYGWIN__
-  {
-    EXPLICIT_SYMBOL(_alloca);
-    EXPLICIT_SYMBOL(__main);
-  }
-#endif
-
-#undef EXPLICIT_SYMBOL
-  return nullptr;
-}
-
-namespace llvm {
-void *SearchForAddressOfSpecialSymbol(const char* symbolName) {
-  return DoSearch(symbolName);
-}
-}  // namespace llvm
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index ca2391c10ff..5199fad7d9e 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -51,9 +51,7 @@ static LineNoCacheTy *getCache(void *Ptr) {
 }
 
 SourceMgr::~SourceMgr() {
-  // Delete the line # cache if allocated.
-  if (LineNoCacheTy *Cache = getCache(LineNoCache))
-    delete Cache;
+  delete getCache(LineNoCache);
 }
 
 unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
diff --git a/lib/Support/Unix/DynamicLibrary.inc b/lib/Support/Unix/DynamicLibrary.inc
new file mode 100644
index 00000000000..a0110e7044e
--- /dev/null
+++ b/lib/Support/Unix/DynamicLibrary.inc
@@ -0,0 +1,131 @@
+//===- Unix/DynamicLibrary.cpp - Unix DL Implementation ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the UNIX specific implementation of DynamicLibrary.
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(HAVE_DLFCN_H) && defined(HAVE_DLOPEN)
+#include <dlfcn.h>
+
+DynamicLibrary::HandleSet::~HandleSet() {
+  for (void *Handle : Handles)
+    ::dlclose(Handle);
+  if (Process)
+    ::dlclose(Process);
+}
+
+void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) {
+  void *Handle = ::dlopen(File, RTLD_LAZY|RTLD_GLOBAL);
+  if (!Handle) {
+    if (Err) *Err = ::dlerror();
+    return &DynamicLibrary::Invalid;
+  }
+
+#ifdef __CYGWIN__
+  // Cygwin searches symbols only in the main
+  // with the handle of dlopen(NULL, RTLD_GLOBAL).
+  if (!Filename)
+    Handle = RTLD_DEFAULT;
+#endif
+
+  return Handle;
+}
+
+void DynamicLibrary::HandleSet::DLClose(void *Handle) {
+  ::dlclose(Handle);
+}
+
+void *DynamicLibrary::HandleSet::DLSym(void *Handle, const char *Symbol) {
+  return ::dlsym(Handle, Symbol);
+}
+
+#else // !HAVE_DLOPEN
+
+DynamicLibrary::HandleSet::~HandleSet() {}
+
+void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) {
+  if (Err) *Err = "dlopen() not supported on this platform";
+  return &Invalid;
+}
+
+void DynamicLibrary::HandleSet::DLClose(void *Handle) {
+}
+
+void *DynamicLibrary::HandleSet::DLSym(void *Handle, const char *Symbol) {
+  return nullptr;
+}
+
+#endif
+
+// Must declare the symbols in the global namespace.
+static void *DoSearch(const char* SymbolName) {
+#define EXPLICIT_SYMBOL(SYM) \
+   extern void *SYM; if (!strcmp(SymbolName, #SYM)) return &SYM
+
+  // If this is darwin, it has some funky issues, try to solve them here.  Some
+  // important symbols are marked 'private external' which doesn't allow
+  // SearchForAddressOfSymbol to find them.  As such, we special case them here,
+  // there is only a small handful of them.
+
+#ifdef __APPLE__
+  {
+    // __eprintf is sometimes used for assert() handling on x86.
+    //
+    // FIXME: Currently disabled when using Clang, as we don't always have our
+    // runtime support libraries available.
+#ifndef __clang__
+#ifdef __i386__
+    EXPLICIT_SYMBOL(__eprintf);
+#endif
+#endif
+  }
+#endif
+
+#ifdef __CYGWIN__
+  {
+    EXPLICIT_SYMBOL(_alloca);
+    EXPLICIT_SYMBOL(__main);
+  }
+#endif
+
+#undef EXPLICIT_SYMBOL
+
+// This macro returns the address of a well-known, explicit symbol
+#define EXPLICIT_SYMBOL(SYM) \
+   if (!strcmp(SymbolName, #SYM)) return &SYM
+
+// On linux we have a weird situation. The stderr/out/in symbols are both
+// macros and global variables because of standards requirements. So, we
+// boldly use the EXPLICIT_SYMBOL macro without checking for a #define first.
+#if defined(__linux__) and !defined(__ANDROID__)
+  {
+    EXPLICIT_SYMBOL(stderr);
+    EXPLICIT_SYMBOL(stdout);
+    EXPLICIT_SYMBOL(stdin);
+  }
+#else
+  // For everything else, we want to check to make sure the symbol isn't defined
+  // as a macro before using EXPLICIT_SYMBOL.
+  {
+#ifndef stdin
+    EXPLICIT_SYMBOL(stdin);
+#endif
+#ifndef stdout
+    EXPLICIT_SYMBOL(stdout);
+#endif
+#ifndef stderr
+    EXPLICIT_SYMBOL(stderr);
+#endif
+  }
+#endif
+#undef EXPLICIT_SYMBOL
+
+  return nullptr;
+}
diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc
index 709499deeaf..0b54b5dfdbc 100644
--- a/lib/Support/Windows/DynamicLibrary.inc
+++ b/lib/Support/Windows/DynamicLibrary.inc
@@ -12,98 +12,140 @@
 //===----------------------------------------------------------------------===//
 
 #include "WindowsSupport.h"
+#include "llvm/Support/raw_ostream.h"
 
-#ifdef __MINGW32__
- #include <imagehlp.h>
-#else
- #include <dbghelp.h>
-#endif
-
-#ifdef _MSC_VER
- #include <ntverp.h>
-#endif
-
-namespace llvm {
+#include <psapi.h>
 
 //===----------------------------------------------------------------------===//
 //=== WARNING: Implementation here must contain only Win32 specific code
 //===          and must not be UNIX code.
 //===----------------------------------------------------------------------===//
 
-typedef BOOL (WINAPI *fpEnumerateLoadedModules)(HANDLE,PENUMLOADED_MODULES_CALLBACK64,PVOID);
-static fpEnumerateLoadedModules fEnumerateLoadedModules;
-static llvm::ManagedStatic<DenseSet<HMODULE> > OpenedHandles;
 
-static bool loadDebugHelp(void) {
-  HMODULE hLib = ::LoadLibraryW(L"Dbghelp.dll");
-  if (hLib) {
-    fEnumerateLoadedModules = (fpEnumerateLoadedModules)
-      ::GetProcAddress(hLib, "EnumerateLoadedModules64");
-  }
-  return fEnumerateLoadedModules != 0;
+DynamicLibrary::HandleSet::~HandleSet() {
+  for (void *Handle : Handles)
+    FreeLibrary(HMODULE(Handle));
+
+  // 'Process' should not be released on Windows.
+  assert((!Process || Process==this) && "Bad Handle");
 }
 
-static BOOL CALLBACK
-ELM_Callback(PCSTR ModuleName, DWORD64 ModuleBase,
-             ULONG ModuleSize, PVOID UserContext) {
-  OpenedHandles->insert((HMODULE)ModuleBase);
-  return TRUE;
-}
+void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) {
+  // Create the instance and return it to be the *Process* handle
+  // simillar to dlopen(NULL, RTLD_LAZY|RTLD_GLOBAL)
+  if (!File)
+    return &(*OpenedHandles);
 
-sys::DynamicLibrary
-sys::DynamicLibrary::getPermanentLibrary(const char *filename,
-                                         std::string *errMsg) {
-  SmartScopedLock<true> lock(*SymbolsMutex);
-
-  if (!filename) {
-    // When no file is specified, enumerate all DLLs and EXEs in the process.
-    if (!fEnumerateLoadedModules) {
-      if (!loadDebugHelp()) {
-        assert(false && "These APIs should always be available");
-        return DynamicLibrary();
-      }
-    }
-
-    fEnumerateLoadedModules(GetCurrentProcess(), ELM_Callback, 0);
-    // Dummy library that represents "search all handles".
-    // This is mostly to ensure that the return value still shows up as "valid".
-    return DynamicLibrary(&OpenedHandles);
-  }
-
-  SmallVector<wchar_t, MAX_PATH> filenameUnicode;
-  if (std::error_code ec = windows::UTF8ToUTF16(filename, filenameUnicode)) {
+  SmallVector<wchar_t, MAX_PATH> FileUnicode;
+  if (std::error_code ec = windows::UTF8ToUTF16(File, FileUnicode)) {
     SetLastError(ec.value());
-    MakeErrMsg(errMsg, std::string(filename) + ": Can't convert to UTF-16");
-    return DynamicLibrary();
+    MakeErrMsg(Err, std::string(File) + ": Can't convert to UTF-16");
+    return &DynamicLibrary::Invalid;
   }
 
-  HMODULE a_handle = LoadLibraryW(filenameUnicode.data());
-
-  if (a_handle == 0) {
-    MakeErrMsg(errMsg, std::string(filename) + ": Can't open");
-    return DynamicLibrary();
+  HMODULE Handle = LoadLibraryW(FileUnicode.data());
+  if (Handle == NULL) {
+    MakeErrMsg(Err, std::string(File) + ": Can't open");
+    return &DynamicLibrary::Invalid;
   }
 
-  // If we've already loaded this library, FreeLibrary() the handle in order to
-  // keep the internal refcount at +1.
-  if (!OpenedHandles->insert(a_handle).second)
-    FreeLibrary(a_handle);
-
-  return DynamicLibrary(a_handle);
+  return reinterpret_cast<void*>(Handle);
 }
 
-sys::DynamicLibrary
-sys::DynamicLibrary::addPermanentLibrary(void *handle, std::string *errMsg) {
-  SmartScopedLock<true> lock(*SymbolsMutex);
-  // If we've already loaded this library, tell the caller.
-  if (!OpenedHandles->insert((HMODULE)handle).second) {
-    MakeErrMsg(errMsg, "Library already loaded");
-    return DynamicLibrary();
-  }
-
-  return DynamicLibrary(handle);
+static DynamicLibrary::HandleSet *IsOpenedHandlesInstance(void *Handle) {
+  if (!OpenedHandles.isConstructed())
+    return nullptr;
+  DynamicLibrary::HandleSet &Inst = *OpenedHandles;
+  return Handle == &Inst ? &Inst : nullptr;
 }
 
+void DynamicLibrary::HandleSet::DLClose(void *Handle) {
+  if (HandleSet* HS = IsOpenedHandlesInstance(Handle))
+    HS->Process = nullptr; // Just drop the *Process* handle.
+  else
+    FreeLibrary((HMODULE)Handle);
+}
+
+static bool GetProcessModules(HANDLE H, DWORD &Bytes, HMODULE *Data = nullptr) {
+  // EnumProcessModules will fail on Windows 64 while some versions of
+  // MingW-32 don't have EnumProcessModulesEx.
+  if (
+#ifdef _WIN64
+      !EnumProcessModulesEx(H, Data, Bytes, &Bytes, LIST_MODULES_64BIT)
+#else
+      !EnumProcessModules(H, Data, Bytes, &Bytes)
+#endif
+     ) {
+    std::string Err;
+    if (MakeErrMsg(&Err, "EnumProcessModules failure"))
+      llvm::errs() << Err << "\n";
+    return false;
+  }
+  return true;
+}
+
+void *DynamicLibrary::HandleSet::DLSym(void *Handle, const char *Symbol) {
+  HandleSet* HS = IsOpenedHandlesInstance(Handle);
+  if (!HS)
+    return (void *)uintptr_t(GetProcAddress((HMODULE)Handle, Symbol));
+
+  // Could have done a dlclose on the *Process* handle
+  if (!HS->Process)
+    return nullptr;
+
+  // Trials indicate EnumProcessModulesEx is consistantly faster than using
+  // EnumerateLoadedModules64 or CreateToolhelp32Snapshot.
+  //
+  // | Handles | DbgHelp.dll | CreateSnapshot | EnumProcessModulesEx
+  // |=========|=============|========================================
+  // | 37      | 0.0000585 * | 0.0003031      | 0.0000152
+  // | 1020    | 0.0026310 * | 0.0121598      | 0.0002683
+  // | 2084    | 0.0149418 * | 0.0369936      | 0.0005610
+  //
+  // * Not including the load time of Dbghelp.dll (~.005 sec)
+  //
+  // There's still a case to somehow cache the result of EnumProcessModulesEx
+  // across invocations, but the complication of doing that properly...
+  // Possibly using LdrRegisterDllNotification to invalidate the cache?
+
+  DWORD Bytes = 0;
+  HMODULE Self = HMODULE(GetCurrentProcess());
+  if (!GetProcessModules(Self, Bytes))
+    return nullptr;
+
+  // Get the most recent list in case any modules added/removed between calls
+  // to EnumProcessModulesEx that gets the amount of, then copies the HMODULES.
+  // MSDN is pretty clear that if the module list changes during the call to
+  // EnumProcessModulesEx the results should not be used.
+  std::vector<HMODULE> Handles;
+  do {
+    assert(Bytes && ((Bytes % sizeof(HMODULE)) == 0) &&
+           "Should have at least one module and be aligned");
+    Handles.resize(Bytes / sizeof(HMODULE));
+    if (!GetProcessModules(Self, Bytes, Handles.data()))
+      return nullptr;
+  } while (Bytes != (Handles.size() * sizeof(HMODULE)));
+
+  // Try EXE first, mirroring what dlsym(dlopen(NULL)) does.
+  if (FARPROC Ptr = GetProcAddress(HMODULE(Handles.front()), Symbol))
+    return (void *) uintptr_t(Ptr);
+
+  if (Handles.size() > 1) {
+    // This is different behaviour than what Posix dlsym(dlopen(NULL)) does.
+    // Doing that here is causing real problems for the JIT where msvc.dll
+    // and ucrt.dll can define the same symbols. The runtime linker will choose
+    // symbols from ucrt.dll first, but iterating NOT in reverse here would
+    // mean that the msvc.dll versions would be returned.
+
+    for (auto I = Handles.rbegin(), E = Handles.rend()-1; I != E; ++I) {
+      if (FARPROC Ptr = GetProcAddress(HMODULE(*I), Symbol))
+        return (void *) uintptr_t(Ptr);
+    }
+  }
+  return nullptr;
+}
+
+
 // Stack probing routines are in the support library (e.g. libgcc), but we don't
 // have dynamic linking on windows. Provide a hook.
 #define EXPLICIT_SYMBOL(SYM)                    \
@@ -129,38 +171,18 @@ sys::DynamicLibrary::addPermanentLibrary(void *handle, std::string *errMsg) {
 #undef INLINE_DEF_SYMBOL1
 #undef INLINE_DEF_SYMBOL2
 
-void *sys::DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) {
-  SmartScopedLock<true> Lock(*SymbolsMutex);
-
-  // First check symbols added via AddSymbol().
-  if (ExplicitSymbols.isConstructed()) {
-    StringMap<void *>::iterator i = ExplicitSymbols->find(symbolName);
-
-    if (i != ExplicitSymbols->end())
-      return i->second;
-  }
-
-  // Now search the libraries.
-  if (OpenedHandles.isConstructed()) {
-    for (DenseSet<HMODULE>::iterator I = OpenedHandles->begin(),
-         E = OpenedHandles->end(); I != E; ++I) {
-      FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName);
-      if (ptr) {
-        return (void *)(intptr_t)ptr;
-      }
-    }
-  }
+static void *DoSearch(const char *SymbolName) {
 
 #define EXPLICIT_SYMBOL(SYM)                                                   \
-  if (!strcmp(symbolName, #SYM))                                               \
+  if (!strcmp(SymbolName, #SYM))                                               \
     return (void *)&SYM;
 #define EXPLICIT_SYMBOL2(SYMFROM, SYMTO)                                       \
-  if (!strcmp(symbolName, #SYMFROM))                                           \
+  if (!strcmp(SymbolName, #SYMFROM))                                           \
     return (void *)&SYMTO;
 
 #ifdef _M_IX86
 #define INLINE_DEF_SYMBOL1(TYP, SYM)                                           \
-  if (!strcmp(symbolName, #SYM))                                               \
+  if (!strcmp(SymbolName, #SYM))                                               \
     return (void *)&inline_##SYM;
 #define INLINE_DEF_SYMBOL2(TYP, SYM) INLINE_DEF_SYMBOL1(TYP, SYM)
 #endif
@@ -174,15 +196,5 @@ void *sys::DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) {
 #undef INLINE_DEF_SYMBOL1
 #undef INLINE_DEF_SYMBOL2
 
-  return 0;
-}
-
-void *sys::DynamicLibrary::getAddressOfSymbol(const char *symbolName) {
-  if (!isValid())
-    return NULL;
-  if (Data == &OpenedHandles)
-    return SearchForAddressOfSymbol(symbolName);
-  return (void *)(intptr_t)GetProcAddress((HMODULE)Data, symbolName);
-}
-
+  return nullptr;
 }
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index efc22189378..056ffd58b52 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -580,8 +580,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MachineOperand &MO_Sym = MI->getOperand(0);
     MachineOperand MO_TLSDESC_LO12(MO_Sym), MO_TLSDESC(MO_Sym);
     MCOperand Sym, SymTLSDescLo12, SymTLSDesc;
-    MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF |
-                                   AArch64II::MO_NC);
+    MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF);
     MO_TLSDESC.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGE);
     MCInstLowering.lowerOperand(MO_Sym, Sym);
     MCInstLowering.lowerOperand(MO_TLSDESC_LO12, SymTLSDescLo12);
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 4e5e3e43a46..08370800175 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -2907,16 +2907,13 @@ bool AArch64FastISel::fastLowerArguments() {
   // Only handle simple cases of up to 8 GPR and FPR each.
   unsigned GPRCnt = 0;
   unsigned FPRCnt = 0;
-  unsigned Idx = 0;
   for (auto const &Arg : F->args()) {
-    // The first argument is at index 1.
-    ++Idx;
-    if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
-        F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
-        F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
-        F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) ||
-        F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) ||
-        F->getAttributes().hasAttribute(Idx, Attribute::Nest))
+    if (Arg.hasAttribute(Attribute::ByVal) ||
+        Arg.hasAttribute(Attribute::InReg) ||
+        Arg.hasAttribute(Attribute::StructRet) ||
+        Arg.hasAttribute(Attribute::SwiftSelf) ||
+        Arg.hasAttribute(Attribute::SwiftError) ||
+        Arg.hasAttribute(Attribute::Nest))
       return false;
 
     Type *ArgTy = Arg.getType();
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 7141e77fcd2..b18fb30eb2d 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -1852,17 +1853,17 @@ static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
   OpUsefulBits = 1;
 
   if (MSB >= Imm) {
-    OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
+    OpUsefulBits <<= MSB - Imm + 1;
     --OpUsefulBits;
     // The interesting part will be in the lower part of the result
     getUsefulBits(Op, OpUsefulBits, Depth + 1);
     // The interesting part was starting at Imm in the argument
-    OpUsefulBits = OpUsefulBits.shl(Imm);
+    OpUsefulBits <<= Imm;
   } else {
-    OpUsefulBits = OpUsefulBits.shl(MSB + 1);
+    OpUsefulBits <<= MSB + 1;
     --OpUsefulBits;
     // The interesting part will be shifted in the result
-    OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm);
+    OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
     getUsefulBits(Op, OpUsefulBits, Depth + 1);
     // The interesting part was at zero in the argument
     OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
@@ -1892,7 +1893,7 @@ static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
   if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
     // Shift Left
     uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
-    Mask = Mask.shl(ShiftAmt);
+    Mask <<= ShiftAmt;
     getUsefulBits(Op, Mask, Depth + 1);
     Mask.lshrInPlace(ShiftAmt);
   } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
@@ -1902,7 +1903,7 @@ static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
     uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
     Mask.lshrInPlace(ShiftAmt);
     getUsefulBits(Op, Mask, Depth + 1);
-    Mask = Mask.shl(ShiftAmt);
+    Mask <<= ShiftAmt;
   } else
     return;
 
@@ -1930,13 +1931,13 @@ static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
     uint64_t Width = MSB - Imm + 1;
     uint64_t LSB = Imm;
 
-    OpUsefulBits = OpUsefulBits.shl(Width);
+    OpUsefulBits <<= Width;
     --OpUsefulBits;
 
     if (Op.getOperand(1) == Orig) {
       // Copy the low bits from the result to bits starting from LSB.
       Mask = ResultUsefulBits & OpUsefulBits;
-      Mask = Mask.shl(LSB);
+      Mask <<= LSB;
     }
 
     if (Op.getOperand(0) == Orig)
@@ -1947,9 +1948,9 @@ static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
     uint64_t Width = MSB + 1;
     uint64_t LSB = UsefulBits.getBitWidth() - Imm;
 
-    OpUsefulBits = OpUsefulBits.shl(Width);
+    OpUsefulBits <<= Width;
     --OpUsefulBits;
-    OpUsefulBits = OpUsefulBits.shl(LSB);
+    OpUsefulBits <<= LSB;
 
     if (Op.getOperand(1) == Orig) {
       // Copy the bits from the result to the zero bits.
@@ -2078,18 +2079,18 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
   (void)BitWidth;
   assert(BitWidth == 32 || BitWidth == 64);
 
-  APInt KnownZero, KnownOne;
-  CurDAG->computeKnownBits(Op, KnownZero, KnownOne);
+  KnownBits Known;
+  CurDAG->computeKnownBits(Op, Known);
 
   // Non-zero in the sense that they're not provably zero, which is the key
   // point if we want to use this value
-  uint64_t NonZeroBits = (~KnownZero).getZExtValue();
+  uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
 
   // Discard a constant AND mask if present. It's safe because the node will
   // already have been factored into the computeKnownBits calculation above.
   uint64_t AndImm;
   if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
-    assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0);
+    assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0);
     Op = Op.getOperand(0);
   }
 
@@ -2158,15 +2159,15 @@ static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
 
   // Compute the Known Zero for the AND as this allows us to catch more general
   // cases than just looking for AND with imm.
-  APInt KnownZero, KnownOne;
-  CurDAG->computeKnownBits(And, KnownZero, KnownOne);
+  KnownBits Known;
+  CurDAG->computeKnownBits(And, Known);
 
   // Non-zero in the sense that they're not provably zero, which is the key
   // point if we want to use this value.
-  uint64_t NotKnownZero = (~KnownZero).getZExtValue();
+  uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
 
   // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
-  if (!isShiftedMask(KnownZero.getZExtValue(), VT))
+  if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
     return false;
 
   // The bits being inserted must only set those bits that are known to be zero.
@@ -2300,15 +2301,15 @@ static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
     // This allows to catch more general case than just looking for
     // AND with imm. Indeed, simplify-demanded-bits may have removed
     // the AND instruction because it proves it was useless.
-    APInt KnownZero, KnownOne;
-    CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne);
+    KnownBits Known;
+    CurDAG->computeKnownBits(OrOpd1Val, Known);
 
     // Check if there is enough room for the second operand to appear
     // in the first one
     APInt BitsToBeInserted =
-        APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width);
+        APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
 
-    if ((BitsToBeInserted & ~KnownZero) != 0)
+    if ((BitsToBeInserted & ~Known.Zero) != 0)
       continue;
 
     // Set the first operand
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index a7c98fbb425..eb1bbcafe6e 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -67,6 +67,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetCallingConv.h"
@@ -929,20 +930,19 @@ bool AArch64TargetLowering::targetShrinkDemandedConstant(
 }
 
 /// computeKnownBitsForTargetNode - Determine which of the bits specified in
-/// Mask are known to be either zero or one and return them in the
-/// KnownZero/KnownOne bitsets.
+/// Mask are known to be either zero or one and return them Known.
 void AArch64TargetLowering::computeKnownBitsForTargetNode(
-    const SDValue Op, APInt &KnownZero, APInt &KnownOne,
+    const SDValue Op, KnownBits &Known,
     const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
   switch (Op.getOpcode()) {
   default:
     break;
   case AArch64ISD::CSEL: {
-    APInt KnownZero2, KnownOne2;
-    DAG.computeKnownBits(Op->getOperand(0), KnownZero, KnownOne, Depth + 1);
-    DAG.computeKnownBits(Op->getOperand(1), KnownZero2, KnownOne2, Depth + 1);
-    KnownZero &= KnownZero2;
-    KnownOne &= KnownOne2;
+    KnownBits Known2;
+    DAG.computeKnownBits(Op->getOperand(0), Known, Depth + 1);
+    DAG.computeKnownBits(Op->getOperand(1), Known2, Depth + 1);
+    Known.Zero &= Known2.Zero;
+    Known.One &= Known2.One;
     break;
   }
   case ISD::INTRINSIC_W_CHAIN: {
@@ -952,10 +952,10 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
     default: return;
     case Intrinsic::aarch64_ldaxr:
     case Intrinsic::aarch64_ldxr: {
-      unsigned BitWidth = KnownOne.getBitWidth();
+      unsigned BitWidth = Known.getBitWidth();
       EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
       unsigned MemBits = VT.getScalarSizeInBits();
-      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
+      Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
       return;
     }
     }
@@ -974,15 +974,15 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
       // bits larger than the element datatype. 32-bit or larget doesn't need
       // this as those are legal types and will be handled by isel directly.
       MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
-      unsigned BitWidth = KnownZero.getBitWidth();
+      unsigned BitWidth = Known.getBitWidth();
       if (VT == MVT::v8i8 || VT == MVT::v16i8) {
         assert(BitWidth >= 8 && "Unexpected width!");
         APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
-        KnownZero |= Mask;
+        Known.Zero |= Mask;
       } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
         assert(BitWidth >= 16 && "Unexpected width!");
         APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
-        KnownZero |= Mask;
+        Known.Zero |= Mask;
       }
       break;
     } break;
@@ -4847,9 +4847,9 @@ SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
       // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
       for (int i = ExtraSteps; i > 0; --i) {
         SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
-                                   &Flags);
-        Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, &Flags);
-        Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, &Flags);
+                                   Flags);
+        Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
+        Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
       }
 
       if (!Reciprocal) {
@@ -4858,7 +4858,7 @@ SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
         SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
         SDValue Eq = DAG.getSetCC(DL, CCVT, Operand, FPZero, ISD::SETEQ);
 
-        Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, &Flags);
+        Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
         // Correct the result if the operand is 0.0.
         Estimate = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL,
                                VT, Eq, Operand, Estimate);
@@ -4887,8 +4887,8 @@ SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
       // AArch64 reciprocal iteration instruction: (2 - M * N)
       for (int i = ExtraSteps; i > 0; --i) {
         SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
-                                   Estimate, &Flags);
-        Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, &Flags);
+                                   Estimate, Flags);
+        Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
       }
 
       ExtraSteps = 0;
@@ -9461,11 +9461,11 @@ static bool performTBISimplification(SDValue Addr,
                                      TargetLowering::DAGCombinerInfo &DCI,
                                      SelectionDAG &DAG) {
   APInt DemandedMask = APInt::getLowBitsSet(64, 56);
-  APInt KnownZero, KnownOne;
+  KnownBits Known;
   TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
                                         DCI.isBeforeLegalizeOps());
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  if (TLI.SimplifyDemandedBits(Addr, DemandedMask, KnownZero, KnownOne, TLO)) {
+  if (TLI.SimplifyDemandedBits(Addr, DemandedMask, Known, TLO)) {
     DCI.CommitTargetLoweringOpt(TLO);
     return true;
   }
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 6081b07479b..89db566c219 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -250,8 +250,8 @@ public:
 
   /// Determine which of the bits specified in Mask are known to be either zero
   /// or one and return them in the KnownZero/KnownOne bitsets.
-  void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero,
-                                     APInt &KnownOne, const APInt &DemandedElts,
+  void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
+                                     const APInt &DemandedElts,
                                      const SelectionDAG &DAG,
                                      unsigned Depth = 0) const override;
 
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 82e9c5a88e3..ce401206e51 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -314,8 +314,8 @@ def AArch64umaxv    : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
 // AArch64 Instruction Predicate Definitions.
 def IsDarwin  : Predicate<"Subtarget->isTargetDarwin()">;
 def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">;
-def ForCodeSize   : Predicate<"ForCodeSize">;
-def NotForCodeSize   : Predicate<"!ForCodeSize">;
+def ForCodeSize   : Predicate<"Subtarget->getForCodeSize()">;
+def NotForCodeSize   : Predicate<"!Subtarget->getForCodeSize()">;
 
 include "AArch64InstrFormats.td"
 
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index b0e0e3eb4ba..9bfd570e9a8 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -51,7 +51,6 @@ public:
                              const AArch64Subtarget &STI,
                              const AArch64RegisterBankInfo &RBI);
 
-  void beginFunction(const MachineFunction &MF) override;
   bool select(MachineInstr &I) const override;
 
 private:
@@ -74,12 +73,10 @@ private:
   const AArch64InstrInfo &TII;
   const AArch64RegisterInfo &TRI;
   const AArch64RegisterBankInfo &RBI;
-  bool ForCodeSize;
 
-  PredicateBitset AvailableFeatures;
-  PredicateBitset
-  computeAvailableFeatures(const MachineFunction *MF,
-                           const AArch64Subtarget *Subtarget) const;
+#define GET_GLOBALISEL_PREDICATES_DECL
+#include "AArch64GenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_DECL
 
 // We declare the temporaries used by selectImpl() in the class to minimize the
 // cost of constructing placeholder values.
@@ -98,7 +95,10 @@ AArch64InstructionSelector::AArch64InstructionSelector(
     const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
     const AArch64RegisterBankInfo &RBI)
     : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
-      TRI(*STI.getRegisterInfo()), RBI(RBI), ForCodeSize(), AvailableFeatures()
+      TRI(*STI.getRegisterInfo()), RBI(RBI),
+#define GET_GLOBALISEL_PREDICATES_INIT
+#include "AArch64GenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_INIT
 #define GET_GLOBALISEL_TEMPORARIES_INIT
 #include "AArch64GenGlobalISel.inc"
 #undef GET_GLOBALISEL_TEMPORARIES_INIT
@@ -577,12 +577,6 @@ bool AArch64InstructionSelector::selectVaStartDarwin(
   return true;
 }
 
-void AArch64InstructionSelector::beginFunction(
-    const MachineFunction &MF) {
-  ForCodeSize = MF.getFunction()->optForSize();
-  AvailableFeatures = computeAvailableFeatures(&MF, &STI);
-}
-
 bool AArch64InstructionSelector::select(MachineInstr &I) const {
   assert(I.getParent() && "Instruction should be in a basic block!");
   assert(I.getParent()->getParent() && "Instruction should be in a function!");
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index 042755bd36d..abdeac019a1 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -12,8 +12,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "AArch64Subtarget.h"
+
+#include "AArch64.h"
 #include "AArch64InstrInfo.h"
 #include "AArch64PBQPRegAlloc.h"
+#include "AArch64TargetMachine.h"
+
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+#include "AArch64CallLowering.h"
+#include "AArch64LegalizerInfo.h"
+#include "AArch64RegisterBankInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
+#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#endif
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -111,13 +125,63 @@ void AArch64Subtarget::initializeProperties() {
   }
 }
 
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+namespace {
+
+struct AArch64GISelActualAccessor : public GISelAccessor {
+  std::unique_ptr<CallLowering> CallLoweringInfo;
+  std::unique_ptr<InstructionSelector> InstSelector;
+  std::unique_ptr<LegalizerInfo> Legalizer;
+  std::unique_ptr<RegisterBankInfo> RegBankInfo;
+
+  const CallLowering *getCallLowering() const override {
+    return CallLoweringInfo.get();
+  }
+
+  const InstructionSelector *getInstructionSelector() const override {
+    return InstSelector.get();
+  }
+
+  const LegalizerInfo *getLegalizerInfo() const override {
+    return Legalizer.get();
+  }
+
+  const RegisterBankInfo *getRegBankInfo() const override {
+    return RegBankInfo.get();
+  }
+};
+
+} // end anonymous namespace
+#endif
+
 AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
                                    const std::string &FS,
-                                   const TargetMachine &TM, bool LittleEndian)
+                                   const TargetMachine &TM, bool LittleEndian,
+                                   bool ForCodeSize)
     : AArch64GenSubtargetInfo(TT, CPU, FS), ReserveX18(TT.isOSDarwin()),
       IsLittle(LittleEndian), TargetTriple(TT), FrameLowering(),
       InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
-      TLInfo(TM, *this), GISel() {}
+      TLInfo(TM, *this), GISel(), ForCodeSize(ForCodeSize) {
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+  GISelAccessor *AArch64GISel = new GISelAccessor();
+#else
+  AArch64GISelActualAccessor *AArch64GISel = new AArch64GISelActualAccessor();
+  AArch64GISel->CallLoweringInfo.reset(
+      new AArch64CallLowering(*getTargetLowering()));
+  AArch64GISel->Legalizer.reset(new AArch64LegalizerInfo());
+
+  auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
+
+  // FIXME: At this point, we can't rely on Subtarget having RBI.
+  // It's awkward to mix passing RBI and the Subtarget; should we pass
+  // TII/TRI as well?
+  AArch64GISel->InstSelector.reset(createAArch64InstructionSelector(
+      *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
+
+  AArch64GISel->RegBankInfo.reset(RBI);
+#endif
+  setGISelAccessor(*AArch64GISel);
+}
 
 const CallLowering *AArch64Subtarget::getCallLowering() const {
   assert(GISel && "Access to GlobalISel APIs not set");
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index 3d66a9ea8ce..5b9bee6e41b 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -124,6 +124,8 @@ protected:
   /// an optional library.
   std::unique_ptr<GISelAccessor> GISel;
 
+  bool ForCodeSize;
+
 private:
   /// initializeSubtargetDependencies - Initializes using CPUString and the
   /// passed in feature string so that we can use initializer lists for
@@ -139,7 +141,7 @@ public:
   /// of the specified triple.
   AArch64Subtarget(const Triple &TT, const std::string &CPU,
                    const std::string &FS, const TargetMachine &TM,
-                   bool LittleEndian);
+                   bool LittleEndian, bool ForCodeSize);
 
   /// This object will take onwership of \p GISelAccessor.
   void setGISelAccessor(GISelAccessor &GISel) {
@@ -262,6 +264,8 @@ public:
     }
   }
 
+  bool getForCodeSize() const { return ForCodeSize; }
+
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index dcc51bf0232..de7108d302d 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -11,12 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AArch64.h"
-#include "AArch64CallLowering.h"
-#include "AArch64LegalizerInfo.h"
 #include "AArch64MacroFusion.h"
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-#include "AArch64RegisterBankInfo.h"
-#endif
 #include "AArch64Subtarget.h"
 #include "AArch64TargetMachine.h"
 #include "AArch64TargetObjectFile.h"
@@ -25,7 +20,6 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
@@ -222,39 +216,11 @@ AArch64TargetMachine::AArch64TargetMachine(
 
 AArch64TargetMachine::~AArch64TargetMachine() = default;
 
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-namespace {
-
-struct AArch64GISelActualAccessor : public GISelAccessor {
-  std::unique_ptr<CallLowering> CallLoweringInfo;
-  std::unique_ptr<InstructionSelector> InstSelector;
-  std::unique_ptr<LegalizerInfo> Legalizer;
-  std::unique_ptr<RegisterBankInfo> RegBankInfo;
-
-  const CallLowering *getCallLowering() const override {
-    return CallLoweringInfo.get();
-  }
-
-  const InstructionSelector *getInstructionSelector() const override {
-    return InstSelector.get();
-  }
-
-  const LegalizerInfo *getLegalizerInfo() const override {
-    return Legalizer.get();
-  }
-
-  const RegisterBankInfo *getRegBankInfo() const override {
-    return RegBankInfo.get();
-  }
-};
-
-} // end anonymous namespace
-#endif
-
 const AArch64Subtarget *
 AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
   Attribute CPUAttr = F.getFnAttribute("target-cpu");
   Attribute FSAttr = F.getFnAttribute("target-features");
+  bool ForCodeSize = F.optForSize();
 
   std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
                         ? CPUAttr.getValueAsString().str()
@@ -262,35 +228,17 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
   std::string FS = !FSAttr.hasAttribute(Attribute::None)
                        ? FSAttr.getValueAsString().str()
                        : TargetFS;
+  std::string ForCodeSizeStr =
+      std::string(ForCodeSize ? "+" : "-") + "forcodesize";
 
-  auto &I = SubtargetMap[CPU + FS];
+  auto &I = SubtargetMap[CPU + FS + ForCodeSizeStr];
   if (!I) {
     // This needs to be done before we create a new subtarget since any
     // creation will depend on the TM and the code generation flags on the
     // function that reside in TargetOptions.
     resetTargetOptions(F);
     I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this,
-                                            isLittle);
-#ifndef LLVM_BUILD_GLOBAL_ISEL
-    GISelAccessor *GISel = new GISelAccessor();
-#else
-    AArch64GISelActualAccessor *GISel =
-        new AArch64GISelActualAccessor();
-    GISel->CallLoweringInfo.reset(
-        new AArch64CallLowering(*I->getTargetLowering()));
-    GISel->Legalizer.reset(new AArch64LegalizerInfo());
-
-    auto *RBI = new AArch64RegisterBankInfo(*I->getRegisterInfo());
-
-    // FIXME: At this point, we can't rely on Subtarget having RBI.
-    // It's awkward to mix passing RBI and the Subtarget; should we pass
-    // TII/TRI as well?
-    GISel->InstSelector.reset(
-        createAArch64InstructionSelector(*this, *I, *RBI));
-
-    GISel->RegBankInfo.reset(RBI);
-#endif
-    I->setGISelAccessor(*GISel);
+                                            isLittle, ForCodeSize);
   }
   return I.get();
 }
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index c954c0eb2c6..10e7241da70 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -69,34 +69,34 @@ static bool isNonILP32reloc(const MCFixup &Fixup,
       return true;
     case AArch64MCExpr::VK_ABS_G2_S:
       Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G2));
-      return ELF::R_AARCH64_NONE;
+      return true;
     case AArch64MCExpr::VK_ABS_G2_NC:
       Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2_NC));
-      return ELF::R_AARCH64_NONE;
+      return true;
     case AArch64MCExpr::VK_ABS_G1_S:
       Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G1));
-      return ELF::R_AARCH64_NONE;
+      return true;
     case AArch64MCExpr::VK_ABS_G1_NC:
       Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G1_NC));
-      return ELF::R_AARCH64_NONE;
+      return true;
     case AArch64MCExpr::VK_DTPREL_G2:
       Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G2));
-      return ELF::R_AARCH64_NONE;
+      return true;
     case AArch64MCExpr::VK_DTPREL_G1_NC:
       Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G1_NC));
-      return ELF::R_AARCH64_NONE;
+      return true;
     case AArch64MCExpr::VK_TPREL_G2:
       Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G2));
-      return ELF::R_AARCH64_NONE;
+      return true;
     case AArch64MCExpr::VK_TPREL_G1_NC:
       Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G1_NC));
-      return ELF::R_AARCH64_NONE;
+      return true;
     case AArch64MCExpr::VK_GOTTPREL_G1:
       Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G1));
-      return ELF::R_AARCH64_NONE;
+      return true;
     case AArch64MCExpr::VK_GOTTPREL_G0_NC:
       Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G0_NC));
-      return ELF::R_AARCH64_NONE;
+      return true;
     default: return false;
   }
   return false;
@@ -141,6 +141,16 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
     case AArch64::fixup_aarch64_pcrel_adrp_imm21:
       if (SymLoc == AArch64MCExpr::VK_ABS && !IsNC)
         return R_CLS(ADR_PREL_PG_HI21);
+      if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) {
+        if (IsILP32) {
+          Ctx.reportError(Fixup.getLoc(),
+                          "invalid fixup for 32-bit pcrel ADRP instruction "
+                          "VK_ABS VK_NC");
+          return ELF::R_AARCH64_NONE;
+        } else {
+          return ELF::R_AARCH64_ADR_PREL_PG_HI21_NC;
+        }
+      }
       if (SymLoc == AArch64MCExpr::VK_GOT && !IsNC)
         return R_CLS(ADR_GOT_PAGE);
       if (SymLoc == AArch64MCExpr::VK_GOTTPREL && !IsNC)
@@ -179,7 +189,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
       return R_CLS(ABS32);
     case FK_Data_8:
       if (IsILP32) {
-        Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(ABS64));
+        Ctx.reportError(Fixup.getLoc(), "ILP32 8 byte absolute data "
+			"relocation not supported (LP64 eqv: ABS64)");
         return ELF::R_AARCH64_NONE;
       } else
         return ELF::R_AARCH64_ABS64;
@@ -197,7 +208,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
       if (RefKind == AArch64MCExpr::VK_TPREL_LO12)
         return R_CLS(TLSLE_ADD_TPREL_LO12);
       if (RefKind == AArch64MCExpr::VK_TLSDESC_LO12)
-        return R_CLS(TLSDESC_ADD_LO12_NC);
+        return R_CLS(TLSDESC_ADD_LO12);
       if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
         return R_CLS(ADD_ABS_LO12_NC);
 
@@ -245,15 +256,67 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
         return R_CLS(TLSLE_LDST32_TPREL_LO12);
       if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC)
         return R_CLS(TLSLE_LDST32_TPREL_LO12_NC);
+      if (SymLoc == AArch64MCExpr::VK_GOT && IsNC) {
+        if (IsILP32) {
+          return ELF::R_AARCH64_P32_LD32_GOT_LO12_NC;
+        } else {
+          Ctx.reportError(Fixup.getLoc(),
+                          "LP64 4 byte unchecked GOT load/store relocation "
+			  "not supported (ILP32 eqv: LD32_GOT_LO12_NC");
+          return ELF::R_AARCH64_NONE;
+        }
+      }
+      if (SymLoc == AArch64MCExpr::VK_GOT && !IsNC) {
+        if (IsILP32) {
+          Ctx.reportError(Fixup.getLoc(),
+                          "ILP32 4 byte checked GOT load/store relocation "
+			  "not supported (unchecked eqv: LD32_GOT_LO12_NC)");
+        } else {
+          Ctx.reportError(Fixup.getLoc(),
+                          "LP64 4 byte checked GOT load/store relocation "
+			  "not supported (unchecked/ILP32 eqv: "
+			  "LD32_GOT_LO12_NC)");
+        }
+        return ELF::R_AARCH64_NONE;
+      }
+      if (SymLoc == AArch64MCExpr::VK_GOTTPREL && IsNC) {
+        if (IsILP32) {
+          return ELF::R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC;
+        } else {
+          Ctx.reportError(Fixup.getLoc(), "LP64 32-bit load/store "
+                          "relocation not supported (ILP32 eqv: "
+                          "TLSIE_LD32_GOTTPREL_LO12_NC)");
+          return ELF::R_AARCH64_NONE;
+        }
+      }
+      if (SymLoc == AArch64MCExpr::VK_TLSDESC && !IsNC) {
+        if (IsILP32) {
+          return ELF::R_AARCH64_P32_TLSDESC_LD32_LO12;
+        } else {
+          Ctx.reportError(Fixup.getLoc(),
+                          "LP64 4 byte TLSDESC load/store relocation "
+			  "not supported (ILP32 eqv: TLSDESC_LD64_LO12)");
+          return ELF::R_AARCH64_NONE;
+        }
+      }
 
       Ctx.reportError(Fixup.getLoc(),
-                      "invalid fixup for 32-bit load/store instruction");
+                      "invalid fixup for 32-bit load/store instruction "
+		      "fixup_aarch64_ldst_imm12_scale4");
       return ELF::R_AARCH64_NONE;
     case AArch64::fixup_aarch64_ldst_imm12_scale8:
       if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
         return R_CLS(LDST64_ABS_LO12_NC);
-      if (SymLoc == AArch64MCExpr::VK_GOT && IsNC)
-        return R_CLS(LD64_GOT_LO12_NC);
+      if (SymLoc == AArch64MCExpr::VK_GOT && IsNC) {
+        if (!IsILP32) {
+          return ELF::R_AARCH64_LD64_GOT_LO12_NC;
+        } else {
+          Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store "
+                          "relocation not supported (LP64 eqv: "
+                          "LD64_GOT_LO12_NC)");
+          return ELF::R_AARCH64_NONE;
+        }
+      }
       if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC)
         return R_CLS(TLSLD_LDST64_DTPREL_LO12);
       if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC)
@@ -262,19 +325,40 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
         return R_CLS(TLSLE_LDST64_TPREL_LO12);
       if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC)
         return R_CLS(TLSLE_LDST64_TPREL_LO12_NC);
-      if (SymLoc == AArch64MCExpr::VK_GOTTPREL && IsNC)
-        return IsILP32 ? ELF::R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC
-                       : ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
-      if (SymLoc == AArch64MCExpr::VK_TLSDESC && IsNC)
-        return IsILP32 ? ELF::R_AARCH64_P32_TLSDESC_LD32_LO12_NC
-                       : ELF::R_AARCH64_TLSDESC_LD64_LO12_NC;
-
+      if (SymLoc == AArch64MCExpr::VK_GOTTPREL && IsNC) {
+        if (!IsILP32) {
+          return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
+        } else {
+          Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store "
+                          "relocation not supported (LP64 eqv: "
+                          "TLSIE_LD64_GOTTPREL_LO12_NC)");
+          return ELF::R_AARCH64_NONE;
+        }
+      }
+      if (SymLoc == AArch64MCExpr::VK_TLSDESC) {
+        if (!IsILP32) {
+          return ELF::R_AARCH64_TLSDESC_LD64_LO12;
+        } else {
+          Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store "
+                          "relocation not supported (LP64 eqv: "
+                          "TLSDESC_LD64_LO12)");
+          return ELF::R_AARCH64_NONE;
+        }
+      }
       Ctx.reportError(Fixup.getLoc(),
                       "invalid fixup for 64-bit load/store instruction");
       return ELF::R_AARCH64_NONE;
     case AArch64::fixup_aarch64_ldst_imm12_scale16:
       if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
         return R_CLS(LDST128_ABS_LO12_NC);
+      if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC)
+        return R_CLS(TLSLD_LDST128_DTPREL_LO12);
+      if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC)
+        return R_CLS(TLSLD_LDST128_DTPREL_LO12_NC);
+      if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC)
+        return R_CLS(TLSLE_LDST128_TPREL_LO12);
+      if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC)
+        return R_CLS(TLSLE_LDST128_TPREL_LO12_NC);
 
       Ctx.reportError(Fixup.getLoc(),
                       "invalid fixup for 128-bit load/store instruction");
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index a540f49866a..97c92fa0778 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -62,6 +62,7 @@ StringRef AArch64MCExpr::getVariantKindName() const {
   case VK_TPREL_LO12_NC:       return ":tprel_lo12_nc:";
   case VK_TLSDESC_LO12:        return ":tlsdesc_lo12:";
   case VK_ABS_PAGE:            return "";
+  case VK_ABS_PAGE_NC:         return ":pg_hi21_nc:";
   case VK_GOT_PAGE:            return ":got:";
   case VK_GOT_LO12:            return ":got_lo12:";
   case VK_GOTTPREL_PAGE:       return ":gottprel:";
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index db36a65564c..3dbf0f84a66 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -62,6 +62,7 @@ public:
     // since a user would write ":lo12:").
     VK_CALL              = VK_ABS,
     VK_ABS_PAGE          = VK_ABS      | VK_PAGE,
+    VK_ABS_PAGE_NC       = VK_ABS      | VK_PAGE    | VK_NC,
     VK_ABS_G3            = VK_ABS      | VK_G3,
     VK_ABS_G2            = VK_ABS      | VK_G2,
     VK_ABS_G2_S          = VK_SABS     | VK_G2,
@@ -95,7 +96,7 @@ public:
     VK_TPREL_HI12        = VK_TPREL    | VK_HI12,
     VK_TPREL_LO12        = VK_TPREL    | VK_PAGEOFF,
     VK_TPREL_LO12_NC     = VK_TPREL    | VK_PAGEOFF | VK_NC,
-    VK_TLSDESC_LO12      = VK_TLSDESC  | VK_PAGEOFF | VK_NC,
+    VK_TLSDESC_LO12      = VK_TLSDESC  | VK_PAGEOFF,
     VK_TLSDESC_PAGE      = VK_TLSDESC  | VK_PAGE,
 
     VK_INVALID  = 0xfff
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index 0f331486d0f..2e5b78bbf7e 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -407,7 +407,7 @@ def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
    FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
    FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
    FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
-   FeatureFastFMAF32
+   FeatureFastFMAF32, FeatureDPP
   ]
 >;
 
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index a81bcb56dfd..2ce23dbf08e 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -149,11 +149,9 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
     return;
 
   const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
-  SIProgramInfo KernelInfo;
   amd_kernel_code_t KernelCode;
   if (STM.isAmdCodeObjectV2(*MF)) {
-    getSIProgramInfo(KernelInfo, *MF);
-    getAmdKernelCode(KernelCode, KernelInfo, *MF);
+    getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
 
     OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
     getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
@@ -187,7 +185,26 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   AsmPrinter::EmitGlobalVariable(GV);
 }
 
+bool AMDGPUAsmPrinter::doFinalization(Module &M) {
+  CallGraphResourceInfo.clear();
+  return AsmPrinter::doFinalization(M);
+}
+
+// Print comments that apply to both callable functions and entry points.
+void AMDGPUAsmPrinter::emitCommonFunctionComments(
+  uint32_t NumVGPR,
+  uint32_t NumSGPR,
+  uint32_t ScratchSize,
+  uint64_t CodeSize) {
+  OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
+  OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);
+  OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);
+  OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);
+}
+
 bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  CurrentProgramInfo = SIProgramInfo();
+
   const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
 
   // The starting address of all shader programs must be 256 bytes aligned.
@@ -204,11 +221,19 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
     OutStreamer->SwitchSection(ConfigSection);
   }
 
-  SIProgramInfo KernelInfo;
   if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
-    getSIProgramInfo(KernelInfo, MF);
+    if (MFI->isEntryFunction()) {
+      getSIProgramInfo(CurrentProgramInfo, MF);
+    } else {
+      auto I = CallGraphResourceInfo.insert(
+        std::make_pair(MF.getFunction(), SIFunctionResourceInfo()));
+      SIFunctionResourceInfo &Info = I.first->second;
+      assert(I.second && "should only be called once per function");
+      Info = analyzeResourceUsage(MF);
+    }
+
     if (!STM.isAmdHsaOS()) {
-      EmitProgramInfoSI(MF, KernelInfo);
+      EmitProgramInfoSI(MF, CurrentProgramInfo);
     }
   } else {
     EmitProgramInfoR600(MF);
@@ -226,72 +251,87 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
     OutStreamer->SwitchSection(CommentSection);
 
     if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
-      if (MFI->isEntryFunction()) {
-        OutStreamer->emitRawComment(" Kernel info:", false);
-      } else {
+      if (!MFI->isEntryFunction()) {
         OutStreamer->emitRawComment(" Function info:", false);
+        SIFunctionResourceInfo &Info = CallGraphResourceInfo[MF.getFunction()];
+        emitCommonFunctionComments(
+          Info.NumVGPR,
+          Info.getTotalNumSGPRs(MF.getSubtarget<SISubtarget>()),
+          Info.PrivateSegmentSize,
+          getFunctionCodeSize(MF));
+        return false;
       }
 
+      OutStreamer->emitRawComment(" Kernel info:", false);
+      emitCommonFunctionComments(CurrentProgramInfo.NumVGPR,
+                                 CurrentProgramInfo.NumSGPR,
+                                 CurrentProgramInfo.ScratchSize,
+                                 getFunctionCodeSize(MF));
+
       OutStreamer->emitRawComment(" codeLenInByte = " +
                                   Twine(getFunctionCodeSize(MF)), false);
-      OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
-                                  false);
-      OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
-                                  false);
+      OutStreamer->emitRawComment(
+        " NumSgprs: " + Twine(CurrentProgramInfo.NumSGPR), false);
+      OutStreamer->emitRawComment(
+        " NumVgprs: " + Twine(CurrentProgramInfo.NumVGPR), false);
 
-      OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
-                                  false);
-      OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
-                                  false);
-      OutStreamer->emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize),
-                                  false);
-      OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) +
-                                  " bytes/workgroup (compile time only)", false);
+      OutStreamer->emitRawComment(
+        " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);
+      OutStreamer->emitRawComment(
+        " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false);
+      OutStreamer->emitRawComment(
+        " ScratchSize: " + Twine(CurrentProgramInfo.ScratchSize), false);
+      OutStreamer->emitRawComment(
+        " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) +
+        " bytes/workgroup (compile time only)", false);
 
-      if (!MFI->isEntryFunction())
-        return false;
+      OutStreamer->emitRawComment(
+        " SGPRBlocks: " + Twine(CurrentProgramInfo.SGPRBlocks), false);
+      OutStreamer->emitRawComment(
+        " VGPRBlocks: " + Twine(CurrentProgramInfo.VGPRBlocks), false);
 
-      OutStreamer->emitRawComment(" SGPRBlocks: " +
-                                  Twine(KernelInfo.SGPRBlocks), false);
-      OutStreamer->emitRawComment(" VGPRBlocks: " +
-                                  Twine(KernelInfo.VGPRBlocks), false);
+      OutStreamer->emitRawComment(
+        " NumSGPRsForWavesPerEU: " +
+        Twine(CurrentProgramInfo.NumSGPRsForWavesPerEU), false);
+      OutStreamer->emitRawComment(
+        " NumVGPRsForWavesPerEU: " +
+        Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false);
 
-      OutStreamer->emitRawComment(" NumSGPRsForWavesPerEU: " +
-                                  Twine(KernelInfo.NumSGPRsForWavesPerEU), false);
-      OutStreamer->emitRawComment(" NumVGPRsForWavesPerEU: " +
-                                  Twine(KernelInfo.NumVGPRsForWavesPerEU), false);
-
-      OutStreamer->emitRawComment(" ReservedVGPRFirst: " + Twine(KernelInfo.ReservedVGPRFirst),
-                                  false);
-      OutStreamer->emitRawComment(" ReservedVGPRCount: " + Twine(KernelInfo.ReservedVGPRCount),
-                                  false);
+      OutStreamer->emitRawComment(
+        " ReservedVGPRFirst: " + Twine(CurrentProgramInfo.ReservedVGPRFirst),
+        false);
+      OutStreamer->emitRawComment(
+        " ReservedVGPRCount: " + Twine(CurrentProgramInfo.ReservedVGPRCount),
+        false);
 
       if (MF.getSubtarget<SISubtarget>().debuggerEmitPrologue()) {
-        OutStreamer->emitRawComment(" DebuggerWavefrontPrivateSegmentOffsetSGPR: s" +
-                                    Twine(KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false);
-        OutStreamer->emitRawComment(" DebuggerPrivateSegmentBufferSGPR: s" +
-                                    Twine(KernelInfo.DebuggerPrivateSegmentBufferSGPR), false);
+        OutStreamer->emitRawComment(
+          " DebuggerWavefrontPrivateSegmentOffsetSGPR: s" +
+          Twine(CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false);
+        OutStreamer->emitRawComment(
+          " DebuggerPrivateSegmentBufferSGPR: s" +
+          Twine(CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR), false);
       }
 
-      OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
-                                  Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
-                                  false);
-      OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
-                                  Twine(G_00B84C_TRAP_HANDLER(KernelInfo.ComputePGMRSrc2)),
-                                  false);
-      OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
-                                  Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)),
-                                  false);
-      OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
-                                  Twine(G_00B84C_TGID_Y_EN(KernelInfo.ComputePGMRSrc2)),
-                                  false);
-      OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
-                                  Twine(G_00B84C_TGID_Z_EN(KernelInfo.ComputePGMRSrc2)),
-                                  false);
-      OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
-                                  Twine(G_00B84C_TIDIG_COMP_CNT(KernelInfo.ComputePGMRSrc2)),
-                                  false);
-
+      OutStreamer->emitRawComment(
+        " COMPUTE_PGM_RSRC2:USER_SGPR: " +
+        Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false);
+      OutStreamer->emitRawComment(
+        " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
+        Twine(G_00B84C_TRAP_HANDLER(CurrentProgramInfo.ComputePGMRSrc2)), false);
+      OutStreamer->emitRawComment(
+        " COMPUTE_PGM_RSRC2:TGID_X_EN: " +
+        Twine(G_00B84C_TGID_X_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
+      OutStreamer->emitRawComment(
+        " COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
+        Twine(G_00B84C_TGID_Y_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
+      OutStreamer->emitRawComment(
+        " COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
+        Twine(G_00B84C_TGID_Z_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
+      OutStreamer->emitRawComment(
+        " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
+        Twine(G_00B84C_TIDIG_COMP_CNT(CurrentProgramInfo.ComputePGMRSrc2)),
+        false);
     } else {
       R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
       OutStreamer->emitRawComment(
@@ -407,71 +447,117 @@ static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
   return false;
 }
 
-void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
-                                        const MachineFunction &MF) const {
-  const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
-  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  const MachineRegisterInfo &MRI = MF.getRegInfo();
-  const SIInstrInfo *TII = STM.getInstrInfo();
-  const SIRegisterInfo *RI = &TII->getRegisterInfo();
-
-
-  MCPhysReg NumVGPRReg = AMDGPU::NoRegister;
-  for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
-    if (MRI.isPhysRegUsed(Reg)) {
-      NumVGPRReg = Reg;
-      break;
-    }
-  }
-
-  MCPhysReg NumSGPRReg = AMDGPU::NoRegister;
-  for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
-    if (MRI.isPhysRegUsed(Reg)) {
-      NumSGPRReg = Reg;
-      break;
-    }
-  }
-
-  // We found the maximum register index. They start at 0, so add one to get the
-  // number of registers.
-  ProgInfo.NumVGPR = NumVGPRReg == AMDGPU::NoRegister ? 0 :
-    RI->getHWRegIndex(NumVGPRReg) + 1;
-  ProgInfo.NumSGPR = NumSGPRReg == AMDGPU::NoRegister ? 0 :
-    RI->getHWRegIndex(NumSGPRReg) + 1;
+static unsigned getNumExtraSGPRs(const SISubtarget &ST,
+                                 bool VCCUsed,
+                                 bool FlatScrUsed) {
   unsigned ExtraSGPRs = 0;
-
-  ProgInfo.VCCUsed = MRI.isPhysRegUsed(AMDGPU::VCC_LO) ||
-                     MRI.isPhysRegUsed(AMDGPU::VCC_HI);
-  if (ProgInfo.VCCUsed)
+  if (VCCUsed)
     ExtraSGPRs = 2;
 
-  ProgInfo.FlatUsed = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
-                      MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI);
-
-  // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
-  // instructions aren't used to access the scratch buffer. Inline assembly
-  // may need it though.
-  //
-  // If we only have implicit uses of flat_scr on flat instructions, it is not
-  // really needed.
-  if (ProgInfo.FlatUsed && !MFI->hasFlatScratchInit() &&
-      (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
-       !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
-       !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
-    ProgInfo.FlatUsed = false;
-  }
-
-  if (STM.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) {
-    if (ProgInfo.FlatUsed)
+  if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) {
+    if (FlatScrUsed)
       ExtraSGPRs = 4;
   } else {
-    if (STM.isXNACKEnabled())
+    if (ST.isXNACKEnabled())
       ExtraSGPRs = 4;
 
-    if (ProgInfo.FlatUsed)
+    if (FlatScrUsed)
       ExtraSGPRs = 6;
   }
 
+  return ExtraSGPRs;
+}
+
+int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
+  const SISubtarget &ST) const {
+  return NumExplicitSGPR + getNumExtraSGPRs(ST, UsesVCC, UsesFlatScratch);
+}
+
+AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
+  const MachineFunction &MF) const {
+  SIFunctionResourceInfo Info;
+
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const SIRegisterInfo &TRI = TII->getRegisterInfo();
+
+  Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
+                         MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI);
+
+  // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
+  // instructions aren't used to access the scratch buffer. Inline assembly may
+  // need it though.
+  //
+  // If we only have implicit uses of flat_scr on flat instructions, it is not
+  // really needed.
+  if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() &&
+      (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
+       !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
+       !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
+    Info.UsesFlatScratch = false;
+  }
+
+  Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
+  Info.PrivateSegmentSize = FrameInfo.getStackSize();
+
+  if (!FrameInfo.hasCalls()) {
+    Info.UsesVCC = MRI.isPhysRegUsed(AMDGPU::VCC_LO) ||
+                   MRI.isPhysRegUsed(AMDGPU::VCC_HI);
+
+    // If there are no calls, MachineRegisterInfo can tell us the used register
+    // count easily.
+
+    MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
+    for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
+      if (MRI.isPhysRegUsed(Reg)) {
+        HighestVGPRReg = Reg;
+        break;
+      }
+    }
+
+    MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
+    for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
+      if (MRI.isPhysRegUsed(Reg)) {
+        HighestSGPRReg = Reg;
+        break;
+      }
+    }
+
+    // We found the maximum register index. They start at 0, so add one to get the
+    // number of registers.
+    Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister ? 0 :
+      TRI.getHWRegIndex(HighestVGPRReg) + 1;
+    Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister ? 0 :
+      TRI.getHWRegIndex(HighestSGPRReg) + 1;
+
+    return Info;
+  }
+
+  llvm_unreachable("calls not implemented");
+}
+
+void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
+                                        const MachineFunction &MF) {
+  SIFunctionResourceInfo Info = analyzeResourceUsage(MF);
+
+  ProgInfo.NumVGPR = Info.NumVGPR;
+  ProgInfo.NumSGPR = Info.NumExplicitSGPR;
+  ProgInfo.ScratchSize = Info.PrivateSegmentSize;
+  ProgInfo.VCCUsed = Info.UsesVCC;
+  ProgInfo.FlatUsed = Info.UsesFlatScratch;
+  ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
+
+  const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  const SIInstrInfo *TII = STM.getInstrInfo();
+  const SIRegisterInfo *RI = &TII->getRegisterInfo();
+
+  unsigned ExtraSGPRs = getNumExtraSGPRs(STM,
+                                         ProgInfo.VCCUsed,
+                                         ProgInfo.FlatUsed);
   unsigned ExtraVGPRs = STM.getReservedNumVGPRs(MF);
 
   // Check the addressable register limit before we add ExtraSGPRs.
@@ -574,9 +660,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   // Make clamp modifier on NaN input returns 0.
   ProgInfo.DX10Clamp = STM.enableDX10Clamp();
 
-  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
-  ProgInfo.ScratchSize = FrameInfo.getStackSize();
-
   unsigned LDSAlignShift;
   if (STM.getGeneration() < SISubtarget::SEA_ISLANDS) {
     // LDS is allocated in 64 dword blocks.
@@ -638,6 +721,7 @@ static unsigned getRsrcReg(CallingConv::ID CallConv) {
   switch (CallConv) {
   default: LLVM_FALLTHROUGH;
   case CallingConv::AMDGPU_CS: return R_00B848_COMPUTE_PGM_RSRC1;
+  case CallingConv::AMDGPU_HS: return R_00B428_SPI_SHADER_PGM_RSRC1_HS;
   case CallingConv::AMDGPU_GS: return R_00B228_SPI_SHADER_PGM_RSRC1_GS;
   case CallingConv::AMDGPU_PS: return R_00B028_SPI_SHADER_PGM_RSRC1_PS;
   case CallingConv::AMDGPU_VS: return R_00B128_SPI_SHADER_PGM_RSRC1_VS;
@@ -645,7 +729,7 @@ static unsigned getRsrcReg(CallingConv::ID CallConv) {
 }
 
 void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
-                                         const SIProgramInfo &KernelInfo) {
+                                         const SIProgramInfo &CurrentProgramInfo) {
   const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   unsigned RsrcReg = getRsrcReg(MF.getFunction()->getCallingConv());
@@ -653,29 +737,29 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
   if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
     OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
 
-    OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
+    OutStreamer->EmitIntValue(CurrentProgramInfo.ComputePGMRSrc1, 4);
 
     OutStreamer->EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
-    OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc2, 4);
+    OutStreamer->EmitIntValue(CurrentProgramInfo.ComputePGMRSrc2, 4);
 
     OutStreamer->EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
-    OutStreamer->EmitIntValue(S_00B860_WAVESIZE(KernelInfo.ScratchBlocks), 4);
+    OutStreamer->EmitIntValue(S_00B860_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
 
     // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
     // 0" comment but I don't see a corresponding field in the register spec.
   } else {
     OutStreamer->EmitIntValue(RsrcReg, 4);
-    OutStreamer->EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
-                              S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
+    OutStreamer->EmitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
+                              S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
     if (STM.isVGPRSpillingEnabled(*MF.getFunction())) {
       OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
-      OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4);
+      OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
     }
   }
 
   if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) {
     OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
-    OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
+    OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks), 4);
     OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
     OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4);
     OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
@@ -703,7 +787,7 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
 }
 
 void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
-                                        const SIProgramInfo &KernelInfo,
+                                        const SIProgramInfo &CurrentProgramInfo,
                                         const MachineFunction &MF) const {
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
@@ -711,10 +795,13 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
   AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());
 
   Out.compute_pgm_resource_registers =
-      KernelInfo.ComputePGMRSrc1 |
-      (KernelInfo.ComputePGMRSrc2 << 32);
+      CurrentProgramInfo.ComputePGMRSrc1 |
+      (CurrentProgramInfo.ComputePGMRSrc2 << 32);
   Out.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
 
+  if (CurrentProgramInfo.DynamicCallStack)
+    Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK;
+
   AMD_HSA_BITS_SET(Out.code_properties,
                    AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
                    getElementByteSizeValue(STM.getMaxPrivateElementSize()));
@@ -766,12 +853,12 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
   // FIXME: Should use getKernArgSize
   Out.kernarg_segment_byte_size =
     STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset());
-  Out.wavefront_sgpr_count = KernelInfo.NumSGPR;
-  Out.workitem_vgpr_count = KernelInfo.NumVGPR;
-  Out.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
-  Out.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
-  Out.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
-  Out.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
+  Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
+  Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
+  Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
+  Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
+  Out.reserved_vgpr_first = CurrentProgramInfo.ReservedVGPRFirst;
+  Out.reserved_vgpr_count = CurrentProgramInfo.ReservedVGPRCount;
 
   // These alignment values are specified in powers of two, so alignment =
   // 2^n.  The minimum alignment is 2^4 = 16.
@@ -780,9 +867,9 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
 
   if (STM.debuggerEmitPrologue()) {
     Out.debug_wavefront_private_segment_offset_sgpr =
-      KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
+      CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
     Out.debug_private_segment_buffer_sgpr =
-      KernelInfo.DebuggerPrivateSegmentBufferSGPR;
+      CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR;
   }
 }
 
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 8c86dea4b88..e5adeeb465e 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -30,9 +30,26 @@ namespace llvm {
 
 class AMDGPUTargetStreamer;
 class MCOperand;
+class SISubtarget;
 
 class AMDGPUAsmPrinter final : public AsmPrinter {
 private:
+  // Track resource usage for callee functions.
+  struct SIFunctionResourceInfo {
+    // Track the number of explicitly used VGPRs. Special registers reserved at
+    // the end are tracked separately.
+    int32_t NumVGPR = 0;
+    int32_t NumExplicitSGPR = 0;
+    uint32_t PrivateSegmentSize = 0;
+    bool UsesVCC = false;
+    bool UsesFlatScratch = false;
+    bool HasDynamicallySizedStack = false;
+    bool HasRecursion = false;
+
+    int32_t getTotalNumSGPRs(const SISubtarget &ST) const;
+  };
+
+  // Track resource usage for kernels / entry functions.
   struct SIProgramInfo {
     // Fields set in PGM_RSRC1 pm4 packet.
     uint32_t VGPRBlocks = 0;
@@ -83,14 +100,23 @@ private:
     uint16_t DebuggerPrivateSegmentBufferSGPR =
         std::numeric_limits<uint16_t>::max();
 
+    // Whether there is recursion, dynamic allocas, indirect calls or some other
+    // reason there may be statically unknown stack usage.
+    bool DynamicCallStack = false;
+
     // Bonus information for debugging.
     bool VCCUsed = false;
 
     SIProgramInfo() = default;
   };
 
+  SIProgramInfo CurrentProgramInfo;
+  DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;
+
   uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
-  void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF) const;
+  SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF) const;
+
+  void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
   void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
                         const MachineFunction &MF) const;
   void findNumUsedRegistersSI(const MachineFunction &MF,
@@ -101,6 +127,10 @@ private:
   /// can correctly setup the GPU state.
   void EmitProgramInfoR600(const MachineFunction &MF);
   void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo);
+  void emitCommonFunctionComments(uint32_t NumVGPR,
+                                  uint32_t NumSGPR,
+                                  uint32_t ScratchSize,
+                                  uint64_t CodeSize);
 
 public:
   explicit AMDGPUAsmPrinter(TargetMachine &TM,
@@ -112,6 +142,7 @@ public:
 
   AMDGPUTargetStreamer& getTargetStreamer() const;
 
+  bool doFinalization(Module &M) override;
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   /// \brief Wrapper for MCInstLowering.lowerOperand() for the tblgen'erated
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index f5110857da8..ccae36ced1f 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -207,8 +207,8 @@ bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
     return true;
 
   // TODO: Move into isKnownNeverNaN
-  if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(N))
-    return BO->Flags.hasNoNaNs();
+  if (N->getFlags().isDefined())
+    return N->getFlags().hasNoNaNs();
 
   return CurDAG->isKnownNeverNaN(N);
 }
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index e21775e61dd..64e1b8f0d7f 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -29,6 +29,7 @@
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/Support/KnownBits.h"
 #include "SIInstrInfo.h"
 using namespace llvm;
 
@@ -895,6 +896,7 @@ CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
   case CallingConv::SPIR_KERNEL:
     return CC_AMDGPU_Kernel;
   case CallingConv::AMDGPU_VS:
+  case CallingConv::AMDGPU_HS:
   case CallingConv::AMDGPU_GS:
   case CallingConv::AMDGPU_PS:
   case CallingConv::AMDGPU_CS:
@@ -2293,11 +2295,11 @@ SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
 //===----------------------------------------------------------------------===//
 
 static bool isU24(SDValue Op, SelectionDAG &DAG) {
-  APInt KnownZero, KnownOne;
+  KnownBits Known;
   EVT VT = Op.getValueType();
-  DAG.computeKnownBits(Op, KnownZero, KnownOne);
+  DAG.computeKnownBits(Op, Known);
 
-  return (VT.getSizeInBits() - KnownZero.countLeadingOnes()) <= 24;
+  return (VT.getSizeInBits() - Known.Zero.countLeadingOnes()) <= 24;
 }
 
 static bool isI24(SDValue Op, SelectionDAG &DAG) {
@@ -3358,13 +3360,12 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
                                          OffsetVal,
                                          OffsetVal + WidthVal);
 
-      APInt KnownZero, KnownOne;
+      KnownBits Known;
       TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
                                             !DCI.isBeforeLegalizeOps());
       const TargetLowering &TLI = DAG.getTargetLoweringInfo();
       if (TLI.ShrinkDemandedConstant(BitsFrom, Demanded, TLO) ||
-          TLI.SimplifyDemandedBits(BitsFrom, Demanded,
-                                   KnownZero, KnownOne, TLO)) {
+          TLI.SimplifyDemandedBits(BitsFrom, Demanded, Known, TLO)) {
         DCI.CommitTargetLoweringOpt(TLO);
       }
     }
@@ -3516,6 +3517,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(KILL)
   NODE_NAME_CASE(DUMMY_CHAIN)
   case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
+  NODE_NAME_CASE(INIT_EXEC)
+  NODE_NAME_CASE(INIT_EXEC_FROM_INPUT)
   NODE_NAME_CASE(SENDMSG)
   NODE_NAME_CASE(SENDMSGHALT)
   NODE_NAME_CASE(INTERP_MOV)
@@ -3574,14 +3577,12 @@ SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
 }
 
 void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
-    const SDValue Op, APInt &KnownZero, APInt &KnownOne,
+    const SDValue Op, KnownBits &Known,
     const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
 
-  unsigned BitWidth = KnownZero.getBitWidth();
-  KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
+  Known.Zero.clearAllBits(); Known.One.clearAllBits(); // Don't know anything.
 
-  APInt KnownZero2;
-  APInt KnownOne2;
+  KnownBits Known2;
   unsigned Opc = Op.getOpcode();
 
   switch (Opc) {
@@ -3589,7 +3590,7 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
     break;
   case AMDGPUISD::CARRY:
   case AMDGPUISD::BORROW: {
-    KnownZero = APInt::getHighBitsSet(32, 31);
+    Known.Zero = APInt::getHighBitsSet(32, 31);
     break;
   }
 
@@ -3602,16 +3603,16 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
     uint32_t Width = CWidth->getZExtValue() & 0x1f;
 
     if (Opc == AMDGPUISD::BFE_U32)
-      KnownZero = APInt::getHighBitsSet(32, 32 - Width);
+      Known.Zero = APInt::getHighBitsSet(32, 32 - Width);
 
     break;
   }
   case AMDGPUISD::FP_TO_FP16:
   case AMDGPUISD::FP16_ZEXT: {
-    unsigned BitWidth = KnownZero.getBitWidth();
+    unsigned BitWidth = Known.getBitWidth();
 
     // High bits are zero.
-    KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
+    Known.Zero = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
     break;
   }
   }
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 13cbfe26793..e1a5a207241 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -125,8 +125,9 @@ public:
     if (getTargetMachine().Options.NoSignedZerosFPMath)
       return true;
 
-    if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(Op))
-      return BO->Flags.hasNoSignedZeros();
+    const auto Flags = Op.getNode()->getFlags();
+    if (Flags.isDefined())
+      return Flags.hasNoSignedZeros();
 
     return false;
   }
@@ -199,8 +200,7 @@ public:
   /// either zero or one and return them in the \p KnownZero and \p KnownOne
   /// bitsets.
   void computeKnownBitsForTargetNode(const SDValue Op,
-                                     APInt &KnownZero,
-                                     APInt &KnownOne,
+                                     KnownBits &Known,
                                      const APInt &DemandedElts,
                                      const SelectionDAG &DAG,
                                      unsigned Depth = 0) const override;
@@ -370,6 +370,8 @@ enum NodeType : unsigned {
   BUILD_VERTICAL_VECTOR,
   /// Pointer to the start of the shader's constant data.
   CONST_DATA_PTR,
+  INIT_EXEC,
+  INIT_EXEC_FROM_INPUT,
   SENDMSG,
   SENDMSGHALT,
   INTERP_MOV,
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index c1706d12a2e..353cc574279 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -299,6 +299,15 @@ def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp,
 
 def AMDGPUfmed3 : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>;
 
+def AMDGPUinit_exec : SDNode<"AMDGPUISD::INIT_EXEC",
+                      SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+                      [SDNPHasChain, SDNPInGlue]>;
+
+def AMDGPUinit_exec_from_input : SDNode<"AMDGPUISD::INIT_EXEC_FROM_INPUT",
+                                 SDTypeProfile<0, 2,
+                                 [SDTCisInt<0>, SDTCisInt<1>]>,
+                                 [SDNPHasChain, SDNPInGlue]>;
+
 def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG",
                     SDTypeProfile<0, 1, [SDTCisInt<0>]>,
                     [SDNPHasChain, SDNPInGlue]>;
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 27fe639e3d4..fe7283ccf7d 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -17,6 +17,7 @@ static bool isEntryFunctionCC(CallingConv::ID CC) {
   case CallingConv::AMDGPU_KERNEL:
   case CallingConv::SPIR_KERNEL:
   case CallingConv::AMDGPU_VS:
+  case CallingConv::AMDGPU_HS:
   case CallingConv::AMDGPU_GS:
   case CallingConv::AMDGPU_PS:
   case CallingConv::AMDGPU_CS:
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 0202220b801..cd5bad04d0b 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -309,6 +309,7 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
             default:
               return false;
             case CallingConv::AMDGPU_VS:
+            case CallingConv::AMDGPU_HS:
             case CallingConv::AMDGPU_GS:
             case CallingConv::AMDGPU_PS:
             case CallingConv::AMDGPU_CS:
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 6edd3e923ba..c9482c37ec8 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -432,6 +432,7 @@ static bool isArgPassedInSGPR(const Argument *A) {
   case CallingConv::SPIR_KERNEL:
     return true;
   case CallingConv::AMDGPU_VS:
+  case CallingConv::AMDGPU_HS:
   case CallingConv::AMDGPU_GS:
   case CallingConv::AMDGPU_PS:
   case CallingConv::AMDGPU_CS:
diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index ea305a92fc6..630442625aa 100644
--- a/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -422,8 +422,9 @@ void GCNScheduleDAGMILive::discoverLiveIns() {
   unsigned SGPRs = 0;
   unsigned VGPRs = 0;
 
+  auto &MI = *begin()->getParent()->getFirstNonDebugInstr();
   const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
-  SlotIndex SI = LIS->getInstructionIndex(*begin()).getBaseIndex();
+  SlotIndex SI = LIS->getInstructionIndex(MI).getBaseIndex();
   assert (SI.isValid());
 
   DEBUG(dbgs() << "Region live-ins:");
diff --git a/lib/Target/AMDGPU/R600Intrinsics.td b/lib/Target/AMDGPU/R600Intrinsics.td
index a5310e9fd6d..4c9e1e8a543 100644
--- a/lib/Target/AMDGPU/R600Intrinsics.td
+++ b/lib/Target/AMDGPU/R600Intrinsics.td
@@ -61,7 +61,7 @@ def int_r600_ddx : TextureIntrinsicFloatInput;
 def int_r600_ddy : TextureIntrinsicFloatInput;
 
 def int_r600_dot4 : Intrinsic<[llvm_float_ty],
-  [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]
+  [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable]
 >;
 
 } // End TargetPrefix = "r600", isTarget = 1
diff --git a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index b7e62075244..d8cb98fe1b1 100644
--- a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -77,9 +77,10 @@ class SIAnnotateControlFlow : public FunctionPass {
 
   void insertElse(BranchInst *Term);
 
-  Value *handleLoopCondition(Value *Cond, PHINode *Broken,
-                             llvm::Loop *L, BranchInst *Term,
-                             SmallVectorImpl<WeakVH> &LoopPhiConditions);
+  Value *
+  handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L,
+                      BranchInst *Term,
+                      SmallVectorImpl<WeakTrackingVH> &LoopPhiConditions);
 
   void handleLoop(BranchInst *Term);
 
@@ -212,9 +213,8 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
 
 /// \brief Recursively handle the condition leading to a loop
 Value *SIAnnotateControlFlow::handleLoopCondition(
-  Value *Cond, PHINode *Broken,
-  llvm::Loop *L, BranchInst *Term,
-  SmallVectorImpl<WeakVH> &LoopPhiConditions) {
+    Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term,
+    SmallVectorImpl<WeakTrackingVH> &LoopPhiConditions) {
 
   // Only search through PHI nodes which are inside the loop.  If we try this
   // with PHI nodes that are outside of the loop, we end up inserting new PHI
@@ -281,7 +281,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition(
       NewPhi->setIncomingValue(i, PhiArg);
     }
 
-    LoopPhiConditions.push_back(WeakVH(Phi));
+    LoopPhiConditions.push_back(WeakTrackingVH(Phi));
     return Ret;
   }
 
@@ -323,7 +323,7 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
   BasicBlock *Target = Term->getSuccessor(1);
   PHINode *Broken = PHINode::Create(Int64, 0, "phi.broken", &Target->front());
 
-  SmallVector<WeakVH, 8> LoopPhiConditions;
+  SmallVector<WeakTrackingVH, 8> LoopPhiConditions;
   Value *Cond = Term->getCondition();
   Term->setCondition(BoolTrue);
   Value *Arg = handleLoopCondition(Cond, Broken, L, Term, LoopPhiConditions);
@@ -333,7 +333,7 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
 
   Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
 
-  for (WeakVH Val : reverse(LoopPhiConditions)) {
+  for (WeakTrackingVH Val : reverse(LoopPhiConditions)) {
     if (PHINode *Cond = cast_or_null<PHINode>(Val))
       eraseIfUnused(Cond);
   }
diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h
index 3dd372b3286..a01330cb917 100644
--- a/lib/Target/AMDGPU/SIDefines.h
+++ b/lib/Target/AMDGPU/SIDefines.h
@@ -302,6 +302,7 @@ enum DstUnused {
 #define   S_00B02C_EXTRA_LDS_SIZE(x)                                  (((x) & 0xFF) << 8)
 #define R_00B128_SPI_SHADER_PGM_RSRC1_VS                                0x00B128
 #define R_00B228_SPI_SHADER_PGM_RSRC1_GS                                0x00B228
+#define R_00B428_SPI_SHADER_PGM_RSRC1_HS                                0x00B428
 #define R_00B848_COMPUTE_PGM_RSRC1                                      0x00B848
 #define   S_00B028_VGPRS(x)                                           (((x) & 0x3F) << 0)
 #define   S_00B028_SGPRS(x)                                           (((x) & 0x0F) << 6)
diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index b0f0bf04a89..3cca815d877 100644
--- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -278,8 +278,7 @@ static bool phiHasBreakDef(const MachineInstr &PHI,
 
     Visited.insert(Reg);
 
-    MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
-    assert(DefInstr);
+    MachineInstr *DefInstr = MRI.getVRegDef(Reg);
     switch (DefInstr->getOpcode()) {
     default:
       break;
@@ -346,7 +345,7 @@ bool searchPredecessors(const MachineBasicBlock *MBB,
     return false;
 
   DenseSet<const MachineBasicBlock*> Visited;
-  SmallVector<MachineBasicBlock*, 4> Worklist(MBB->pred_begin(), 
+  SmallVector<MachineBasicBlock*, 4> Worklist(MBB->pred_begin(),
                                               MBB->pred_end());
 
   while (!Worklist.empty()) {
@@ -546,7 +545,13 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         const TargetRegisterClass *SrcRC, *DstRC;
         std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
         if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
-          MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+          unsigned SrcReg = MI.getOperand(1).getReg();
+          if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+            TII->moveToVALU(MI);
+            break;
+          }
+
+          MachineInstr *DefMI = MRI.getVRegDef(SrcReg);
           unsigned SMovOp;
           int64_t Imm;
           // If we are just copying an immediate, we can replace the copy with
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index ce74a7cd8b0..853c8737b46 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -68,6 +68,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetCallingConv.h"
 #include "llvm/Target/TargetOptions.h"
@@ -1956,6 +1957,63 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
     MI.eraseFromParent();
     return BB;
 
+  case AMDGPU::SI_INIT_EXEC:
+    // This should be before all vector instructions.
+    BuildMI(*BB, &*BB->begin(), MI.getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
+            AMDGPU::EXEC)
+        .addImm(MI.getOperand(0).getImm());
+    MI.eraseFromParent();
+    return BB;
+
+  case AMDGPU::SI_INIT_EXEC_FROM_INPUT: {
+    // Extract the thread count from an SGPR input and set EXEC accordingly.
+    // Since BFM can't shift by 64, handle that case with CMP + CMOV.
+    //
+    // S_BFE_U32 count, input, {shift, 7}
+    // S_BFM_B64 exec, count, 0
+    // S_CMP_EQ_U32 count, 64
+    // S_CMOV_B64 exec, -1
+    MachineInstr *FirstMI = &*BB->begin();
+    MachineRegisterInfo &MRI = MF->getRegInfo();
+    unsigned InputReg = MI.getOperand(0).getReg();
+    unsigned CountReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+    bool Found = false;
+
+    // Move the COPY of the input reg to the beginning, so that we can use it.
+    for (auto I = BB->begin(); I != &MI; I++) {
+      if (I->getOpcode() != TargetOpcode::COPY ||
+          I->getOperand(0).getReg() != InputReg)
+        continue;
+
+      if (I == FirstMI) {
+        FirstMI = &*++BB->begin();
+      } else {
+        I->removeFromParent();
+        BB->insert(FirstMI, &*I);
+      }
+      Found = true;
+      break;
+    }
+    assert(Found);
+
+    // This should be before all vector instructions.
+    BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_BFE_U32), CountReg)
+        .addReg(InputReg)
+        .addImm((MI.getOperand(1).getImm() & 0x7f) | 0x70000);
+    BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_BFM_B64),
+            AMDGPU::EXEC)
+        .addReg(CountReg)
+        .addImm(0);
+    BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_CMP_EQ_U32))
+        .addReg(CountReg, RegState::Kill)
+        .addImm(64);
+    BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_CMOV_B64),
+            AMDGPU::EXEC)
+        .addImm(-1);
+    MI.eraseFromParent();
+    return BB;
+  }
+
   case AMDGPU::GET_GROUPSTATICSIZE: {
     DebugLoc DL = MI.getDebugLoc();
     BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32))
@@ -3223,6 +3281,14 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
     return DAG.getNode(NodeOp, DL, MVT::Other, Chain,
                        Op.getOperand(2), Glue);
   }
+  case Intrinsic::amdgcn_init_exec: {
+    return DAG.getNode(AMDGPUISD::INIT_EXEC, DL, MVT::Other, Chain,
+                       Op.getOperand(2));
+  }
+  case Intrinsic::amdgcn_init_exec_from_input: {
+    return DAG.getNode(AMDGPUISD::INIT_EXEC_FROM_INPUT, DL, MVT::Other, Chain,
+                       Op.getOperand(2), Op.getOperand(3));
+  }
   case AMDGPUIntrinsic::SI_tbuffer_store: {
     SDValue Ops[] = {
       Chain,
@@ -3455,15 +3521,15 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
     }
   }
 
-  const SDNodeFlags *Flags = Op->getFlags();
+  const SDNodeFlags Flags = Op->getFlags();
 
-  if (Unsafe || Flags->hasAllowReciprocal()) {
+  if (Unsafe || Flags.hasAllowReciprocal()) {
     // Turn into multiply by the reciprocal.
     // x / y -> x * (1.0 / y)
-    SDNodeFlags Flags;
-    Flags.setUnsafeAlgebra(true);
+    SDNodeFlags NewFlags;
+    NewFlags.setUnsafeAlgebra(true);
     SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
-    return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, &Flags);
+    return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, NewFlags);
   }
 
   return SDValue();
@@ -4542,10 +4608,9 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
     return ISD::FMAD;
 
   const TargetOptions &Options = DAG.getTarget().Options;
-  if ((Options.AllowFPOpFusion == FPOpFusion::Fast ||
-       Options.UnsafeFPMath ||
-       (cast<BinaryWithFlagsSDNode>(N0)->Flags.hasUnsafeAlgebra() &&
-        cast<BinaryWithFlagsSDNode>(N1)->Flags.hasUnsafeAlgebra())) &&
+  if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
+       (N0->getFlags().hasUnsafeAlgebra() &&
+        N1->getFlags().hasUnsafeAlgebra())) &&
       isFMAFasterThanFMulAndFAdd(VT)) {
     return ISD::FMA;
   }
@@ -4706,12 +4771,12 @@ SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N,
 
   APInt Demanded = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8);
 
-  APInt KnownZero, KnownOne;
+  KnownBits Known;
   TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
                                         !DCI.isBeforeLegalizeOps());
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (TLI.ShrinkDemandedConstant(Src, Demanded, TLO) ||
-      TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) {
+      TLI.SimplifyDemandedBits(Src, Demanded, Known, TLO)) {
     DCI.CommitTargetLoweringOpt(TLO);
   }
 
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 3f6ddec7047..7ccb54f54e3 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -286,6 +286,19 @@ def SI_INIT_M0 : SPseudoInstSI <(outs), (ins SSrc_b32:$src)> {
   let isReMaterializable = 1;
 }
 
+def SI_INIT_EXEC : SPseudoInstSI <
+  (outs), (ins i64imm:$src), []> {
+  let Defs = [EXEC];
+  let usesCustomInserter = 1;
+  let isAsCheapAsAMove = 1;
+}
+
+def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI <
+  (outs), (ins SSrc_b32:$input, i32imm:$shift), []> {
+  let Defs = [EXEC];
+  let usesCustomInserter = 1;
+}
+
 // Return for returning shaders to a shader variant epilog.
 def SI_RETURN_TO_EPILOG : SPseudoInstSI <
   (outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> {
@@ -399,6 +412,16 @@ def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
 } // End SubtargetPredicate = isGCN
 
 let Predicates = [isGCN] in {
+def : Pat <
+  (AMDGPUinit_exec i64:$src),
+  (SI_INIT_EXEC (as_i64imm $src))
+>;
+
+def : Pat <
+  (AMDGPUinit_exec_from_input i32:$input, i32:$shift),
+  (SI_INIT_EXEC_FROM_INPUT (i32 $input), (as_i32imm $shift))
+>;
+
 def : Pat<
   (AMDGPUtrap timm:$trapid),
   (S_TRAP $trapid)
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 5a3242bed1d..d565c84bfed 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -503,6 +503,7 @@ unsigned getInitialPSInputAddr(const Function &F) {
 bool isShader(CallingConv::ID cc) {
   switch(cc) {
     case CallingConv::AMDGPU_VS:
+    case CallingConv::AMDGPU_HS:
     case CallingConv::AMDGPU_GS:
     case CallingConv::AMDGPU_PS:
     case CallingConv::AMDGPU_CS:
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 39f7988200e..4676226acd9 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -23,9 +23,12 @@ namespace llvm {
 
 class ARMAsmPrinter;
 class ARMBaseTargetMachine;
+class ARMRegisterBankInfo;
+class ARMSubtarget;
 struct BasicBlockInfo;
 class Function;
 class FunctionPass;
+class InstructionSelector;
 class MachineBasicBlock;
 class MachineFunction;
 class MachineInstr;
@@ -43,6 +46,9 @@ FunctionPass *createThumb2ITBlockPass();
 FunctionPass *createARMOptimizeBarriersPass();
 FunctionPass *createThumb2SizeReductionPass(
     std::function<bool(const Function &)> Ftor = nullptr);
+InstructionSelector *
+createARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI,
+                             const ARMRegisterBankInfo &RBI);
 
 void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                   ARMAsmPrinter &AP);
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
index 13fb30767c9..a8188411bf5 100644
--- a/lib/Target/ARM/ARMCallLowering.cpp
+++ b/lib/Target/ARM/ARMCallLowering.cpp
@@ -245,12 +245,21 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
       // that's what we should load.
       Size = 4;
       assert(MRI.getType(ValVReg).isScalar() && "Only scalars supported atm");
-      MRI.setType(ValVReg, LLT::scalar(32));
-    }
 
+      auto LoadVReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
+      buildLoad(LoadVReg, Addr, Size, /* Alignment */ 0, MPO);
+      MIRBuilder.buildTrunc(ValVReg, LoadVReg);
+    } else {
+      // If the value is not extended, a simple load will suffice.
+      buildLoad(ValVReg, Addr, Size, /* Alignment */ 0, MPO);
+    }
+  }
+
+  void buildLoad(unsigned Val, unsigned Addr, uint64_t Size, unsigned Alignment,
+                 MachinePointerInfo &MPO) {
     auto MMO = MIRBuilder.getMF().getMachineMemOperand(
-        MPO, MachineMemOperand::MOLoad, Size, /* Alignment */ 0);
-    MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
+        MPO, MachineMemOperand::MOLoad, Size, Alignment);
+    MIRBuilder.buildLoad(Val, Addr, *MMO);
   }
 
   void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index e9bc7db66fa..56cac855620 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -3025,20 +3025,18 @@ bool ARMFastISel::fastLowerArguments() {
 
   // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
   // which are passed in r0 - r3.
-  unsigned Idx = 1;
-  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
-       I != E; ++I, ++Idx) {
-    if (Idx > 4)
+  for (const Argument &Arg : F->args()) {
+    if (Arg.getArgNo() >= 4)
       return false;
 
-    if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
-        F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
-        F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) ||
-        F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) ||
-        F->getAttributes().hasAttribute(Idx, Attribute::ByVal))
+    if (Arg.hasAttribute(Attribute::InReg) ||
+        Arg.hasAttribute(Attribute::StructRet) ||
+        Arg.hasAttribute(Attribute::SwiftSelf) ||
+        Arg.hasAttribute(Attribute::SwiftError) ||
+        Arg.hasAttribute(Attribute::ByVal))
       return false;
 
-    Type *ArgTy = I->getType();
+    Type *ArgTy = Arg.getType();
     if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
       return false;
 
@@ -3059,10 +3057,10 @@ bool ARMFastISel::fastLowerArguments() {
   };
 
   const TargetRegisterClass *RC = &ARM::rGPRRegClass;
-  Idx = 0;
   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
-       I != E; ++I, ++Idx) {
-    unsigned SrcReg = GPRArgRegs[Idx];
+       I != E; ++I) {
+    unsigned ArgNo = I->getArgNo();
+    unsigned SrcReg = GPRArgRegs[ArgNo];
     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
     // Without this, EmitLiveInCopies may eliminate the livein if its only
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 382f881f774..9f7e60a848d 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -91,6 +91,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -11758,9 +11759,9 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
 
   // Lastly, can we determine that the bits defined by OrCI
   // are zero in Y?
-  APInt KnownZero, KnownOne;
-  DAG.computeKnownBits(Y, KnownZero, KnownOne);
-  if ((OrCI & KnownZero) != OrCI)
+  KnownBits Known;
+  DAG.computeKnownBits(Y, Known);
+  if ((OrCI & Known.Zero) != OrCI)
     return SDValue();
 
   // OK, we can do the combine.
@@ -11898,16 +11899,16 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
   }
 
   if (Res.getNode()) {
-    APInt KnownZero, KnownOne;
-    DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne);
+    KnownBits Known;
+    DAG.computeKnownBits(SDValue(N,0), Known);
     // Capture demanded bits information that would be otherwise lost.
-    if (KnownZero == 0xfffffffe)
+    if (Known.Zero == 0xfffffffe)
       Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
                         DAG.getValueType(MVT::i1));
-    else if (KnownZero == 0xffffff00)
+    else if (Known.Zero == 0xffffff00)
       Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
                         DAG.getValueType(MVT::i8));
-    else if (KnownZero == 0xffff0000)
+    else if (Known.Zero == 0xffff0000)
       Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
                         DAG.getValueType(MVT::i16));
   }
@@ -12596,13 +12597,12 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
 }
 
 void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
-                                                      APInt &KnownZero,
-                                                      APInt &KnownOne,
+                                                      KnownBits &Known,
                                                       const APInt &DemandedElts,
                                                       const SelectionDAG &DAG,
                                                       unsigned Depth) const {
-  unsigned BitWidth = KnownOne.getBitWidth();
-  KnownZero = KnownOne = APInt(BitWidth, 0);
+  unsigned BitWidth = Known.getBitWidth();
+  Known.Zero.clearAllBits(); Known.One.clearAllBits();
   switch (Op.getOpcode()) {
   default: break;
   case ARMISD::ADDC:
@@ -12612,17 +12612,17 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     // These nodes' second result is a boolean
     if (Op.getResNo() == 0)
       break;
-    KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+    Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
     break;
   case ARMISD::CMOV: {
     // Bits are known zero/one if known on the LHS and RHS.
-    DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
-    if (KnownZero == 0 && KnownOne == 0) return;
+    DAG.computeKnownBits(Op.getOperand(0), Known, Depth+1);
+    if (Known.Zero == 0 && Known.One == 0) return;
 
-    APInt KnownZeroRHS, KnownOneRHS;
-    DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
-    KnownZero &= KnownZeroRHS;
-    KnownOne  &= KnownOneRHS;
+    KnownBits KnownRHS;
+    DAG.computeKnownBits(Op.getOperand(1), KnownRHS, Depth+1);
+    Known.Zero &= KnownRHS.Zero;
+    Known.One  &= KnownRHS.One;
     return;
   }
   case ISD::INTRINSIC_W_CHAIN: {
@@ -12634,7 +12634,7 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     case Intrinsic::arm_ldrex: {
       EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
       unsigned MemBits = VT.getScalarSizeInBits();
-      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
+      Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
       return;
     }
     }
@@ -12642,14 +12642,14 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
   case ARMISD::BFI: {
     // Conservatively, we can recurse down the first operand
     // and just mask out all affected bits.
-    DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth + 1);
+    DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1);
 
     // The operand to BFI is already a mask suitable for removing the bits it
     // sets.
     ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
     const APInt &Mask = CI->getAPIntValue();
-    KnownZero &= Mask;
-    KnownOne &= Mask;
+    Known.Zero &= Mask;
+    Known.One &= Mask;
     return;
   }
   }
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 8b54ce430ed..76e4b60e01f 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -350,8 +350,7 @@ class InstrItineraryData;
                                     SDValue &Offset, ISD::MemIndexedMode &AM,
                                     SelectionDAG &DAG) const override;
 
-    void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero,
-                                       APInt &KnownOne,
+    void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
                                        const APInt &DemandedElts,
                                        const SelectionDAG &DAG,
                                        unsigned Depth) const override;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 9d8ee5c3f9d..28eb5fc3086 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -317,8 +317,10 @@ def UseNegativeImmediates :
                                "NegativeImmediates">;
 
 // FIXME: Eventually this will be just "hasV6T2Ops".
-def UseMovt          : Predicate<"Subtarget->useMovt(*MF)">;
-def DontUseMovt      : Predicate<"!Subtarget->useMovt(*MF)">;
+let RecomputePerFunction = 1 in {
+  def UseMovt          : Predicate<"Subtarget->useMovt(*MF)">;
+  def DontUseMovt      : Predicate<"!Subtarget->useMovt(*MF)">;
+}
 def UseFPVMLx        : Predicate<"Subtarget->useFPVMLx()">;
 def UseMulOps        : Predicate<"Subtarget->useMulOps()">;
 
@@ -345,8 +347,10 @@ def UseVMOVSR : Predicate<"Subtarget->preferVMOVSR() ||"
 def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&"
                               "Subtarget->useNEONForSinglePrecisionFP()">;
 
-def IsLE             : Predicate<"MF->getDataLayout().isLittleEndian()">;
-def IsBE             : Predicate<"MF->getDataLayout().isBigEndian()">;
+let RecomputePerFunction = 1 in {
+  def IsLE             : Predicate<"MF->getDataLayout().isLittleEndian()">;
+  def IsBE             : Predicate<"MF->getDataLayout().isBigEndian()">;
+}
 
 def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">;
 
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index 1c13d51a468..2ac3fda9f44 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -11,10 +11,10 @@
 /// \todo This should be generated by TableGen.
 //===----------------------------------------------------------------------===//
 
-#include "ARMInstructionSelector.h"
 #include "ARMRegisterBankInfo.h"
 #include "ARMSubtarget.h"
 #include "ARMTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 
@@ -26,10 +26,68 @@ using namespace llvm;
 #error "You shouldn't build this"
 #endif
 
-ARMInstructionSelector::ARMInstructionSelector(const ARMSubtarget &STI,
+namespace {
+
+#define GET_GLOBALISEL_PREDICATE_BITSET
+#include "ARMGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATE_BITSET
+
+class ARMInstructionSelector : public InstructionSelector {
+public:
+  ARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI,
+                         const ARMRegisterBankInfo &RBI);
+
+  bool select(MachineInstr &I) const override;
+
+private:
+  bool selectImpl(MachineInstr &I) const;
+
+  const ARMBaseInstrInfo &TII;
+  const ARMBaseRegisterInfo &TRI;
+  const ARMBaseTargetMachine &TM;
+  const ARMRegisterBankInfo &RBI;
+  const ARMSubtarget &STI;
+
+#define GET_GLOBALISEL_PREDICATES_DECL
+#include "ARMGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_DECL
+
+// We declare the temporaries used by selectImpl() in the class to minimize the
+// cost of constructing placeholder values.
+#define GET_GLOBALISEL_TEMPORARIES_DECL
+#include "ARMGenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_DECL
+};
+} // end anonymous namespace
+
+namespace llvm {
+InstructionSelector *
+createARMInstructionSelector(const ARMBaseTargetMachine &TM,
+                             const ARMSubtarget &STI,
+                             const ARMRegisterBankInfo &RBI) {
+  return new ARMInstructionSelector(TM, STI, RBI);
+}
+}
+
+unsigned zero_reg = 0;
+
+#define GET_GLOBALISEL_IMPL
+#include "ARMGenGlobalISel.inc"
+#undef GET_GLOBALISEL_IMPL
+
+ARMInstructionSelector::ARMInstructionSelector(const ARMBaseTargetMachine &TM,
+                                               const ARMSubtarget &STI,
                                                const ARMRegisterBankInfo &RBI)
     : InstructionSelector(), TII(*STI.getInstrInfo()),
-      TRI(*STI.getRegisterInfo()), RBI(RBI) {}
+      TRI(*STI.getRegisterInfo()), TM(TM), RBI(RBI), STI(STI),
+#define GET_GLOBALISEL_PREDICATES_INIT
+#include "ARMGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_INIT
+#define GET_GLOBALISEL_TEMPORARIES_INIT
+#include "ARMGenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_INIT
+{
+}
 
 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
                        MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
@@ -232,6 +290,9 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
     return true;
   }
 
+  if (selectImpl(I))
+    return true;
+
   MachineInstrBuilder MIB{MF, I};
   bool isSExt = false;
 
@@ -332,16 +393,6 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
     }
     MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
     break;
-  case G_SDIV:
-    assert(TII.getSubtarget().hasDivideInARMMode() && "Unsupported operation");
-    I.setDesc(TII.get(ARM::SDIV));
-    MIB.add(predOps(ARMCC::AL));
-    break;
-  case G_UDIV:
-    assert(TII.getSubtarget().hasDivideInARMMode() && "Unsupported operation");
-    I.setDesc(TII.get(ARM::UDIV));
-    MIB.add(predOps(ARMCC::AL));
-    break;
   case G_FADD:
     if (!selectFAdd(MIB, TII, MRI))
       return false;
diff --git a/lib/Target/ARM/ARMInstructionSelector.h b/lib/Target/ARM/ARMInstructionSelector.h
deleted file mode 100644
index 530141d92c2..00000000000
--- a/lib/Target/ARM/ARMInstructionSelector.h
+++ /dev/null
@@ -1,42 +0,0 @@
-//===- ARMInstructionSelector -----------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This file declares the targeting of the InstructionSelector class for ARM.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H
-#define LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H
-
-#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-
-namespace llvm {
-
-class ARMBaseInstrInfo;
-class ARMBaseRegisterInfo;
-class ARMRegisterBankInfo;
-class ARMSubtarget;
-
-class ARMInstructionSelector : public InstructionSelector {
-public:
-  ARMInstructionSelector(const ARMSubtarget &STI,
-                         const ARMRegisterBankInfo &RBI);
-
-  bool select(MachineInstr &I) const override;
-
-private:
-  const ARMBaseInstrInfo &TII;
-  const ARMBaseRegisterInfo &TRI;
-  const ARMRegisterBankInfo &RBI;
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index b8dadb331ec..d09f3ecbaa2 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -12,7 +12,6 @@
 
 #include "ARM.h"
 #include "ARMCallLowering.h"
-#include "ARMInstructionSelector.h"
 #include "ARMLegalizerInfo.h"
 #include "ARMRegisterBankInfo.h"
 #include "ARMSubtarget.h"
@@ -339,7 +338,7 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
     // FIXME: At this point, we can't rely on Subtarget having RBI.
     // It's awkward to mix passing RBI and the Subtarget; should we pass
     // TII/TRI as well?
-    GISel->InstSelector.reset(new ARMInstructionSelector(*I, *RBI));
+    GISel->InstSelector.reset(createARMInstructionSelector(*this, *I, *RBI));
 
     GISel->RegBankInfo.reset(RBI);
 #endif
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 1062c794320..3cde4396756 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -1,6 +1,9 @@
 set(LLVM_TARGET_DEFINITIONS ARM.td)
 
-tablegen(LLVM ARMGenRegisterBank.inc -gen-register-bank)
+if(LLVM_BUILD_GLOBAL_ISEL)
+  tablegen(LLVM ARMGenRegisterBank.inc -gen-register-bank)
+  tablegen(LLVM ARMGenGlobalISel.inc -gen-global-isel)
+endif()
 tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info)
 tablegen(LLVM ARMGenInstrInfo.inc -gen-instr-info)
 tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter)
diff --git a/lib/Target/AVR/AVRFrameLowering.cpp b/lib/Target/AVR/AVRFrameLowering.cpp
index ab42a7aa990..25232d2e47e 100644
--- a/lib/Target/AVR/AVRFrameLowering.cpp
+++ b/lib/Target/AVR/AVRFrameLowering.cpp
@@ -57,6 +57,7 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF,
   DebugLoc DL = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc();
   const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
   const AVRInstrInfo &TII = *STI.getInstrInfo();
+  bool HasFP = hasFP(MF);
 
   // Interrupt handlers re-enable interrupts in function entry.
   if (CallConv == CallingConv::AVR_INTR) {
@@ -65,6 +66,13 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF,
         .setMIFlag(MachineInstr::FrameSetup);
   }
 
+  // Save the frame pointer if we have one.
+  if (HasFP) {
+    BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHWRr))
+        .addReg(AVR::R29R28, RegState::Kill)
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
+
   // Emit special prologue code to save R1, R0 and SREG in interrupt/signal
   // handlers before saving any other registers.
   if (CallConv == CallingConv::AVR_INTR ||
@@ -72,6 +80,7 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF,
     BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHWRr))
         .addReg(AVR::R1R0, RegState::Kill)
         .setMIFlag(MachineInstr::FrameSetup);
+
     BuildMI(MBB, MBBI, DL, TII.get(AVR::INRdA), AVR::R0)
         .addImm(0x3f)
         .setMIFlag(MachineInstr::FrameSetup);
@@ -86,7 +95,7 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF,
   }
 
   // Early exit if the frame pointer is not needed in this function.
-  if (!hasFP(MF)) {
+  if (!HasFP) {
     return;
   }
 
@@ -165,6 +174,9 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF,
     BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R1R0);
   }
 
+  if (hasFP(MF))
+    BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R29R28);
+
   // Early exit if there is no need to restore the frame pointer.
   if (!FrameSize) {
     return;
@@ -216,8 +228,9 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF,
 bool AVRFrameLowering::hasFP(const MachineFunction &MF) const {
   const AVRMachineFunctionInfo *FuncInfo = MF.getInfo<AVRMachineFunctionInfo>();
 
-  return (FuncInfo->getHasSpills() || FuncInfo->getHasAllocas() ||
-          FuncInfo->getHasStackArgs());
+  // TODO: We do not always need a frame pointer.
+  // This can be optimised.
+  return true;
 }
 
 bool AVRFrameLowering::spillCalleeSavedRegisters(
@@ -407,12 +420,9 @@ void AVRFrameLowering::determineCalleeSaves(MachineFunction &MF,
                                             RegScavenger *RS) const {
   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
 
-  // Spill register Y when it is used as the frame pointer.
-  if (hasFP(MF)) {
-    SavedRegs.set(AVR::R29R28);
-    SavedRegs.set(AVR::R29);
-    SavedRegs.set(AVR::R28);
-  }
+  // If we have a frame pointer, the Y register needs to be saved as well.
+  // We don't do that here however - the prologue and epilogue generation
+  // code will handle it specially.
 }
 /// The frame analyzer pass.
 ///
diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp
index 0b95d381939..f0ab6acedad 100644
--- a/lib/Target/AVR/AVRISelLowering.cpp
+++ b/lib/Target/AVR/AVRISelLowering.cpp
@@ -79,6 +79,11 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
   setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand);
   setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand);
 
+  setOperationAction(ISD::ROTL, MVT::i8, Custom);
+  setOperationAction(ISD::ROTL, MVT::i16, Custom);
+  setOperationAction(ISD::ROTR, MVT::i8, Custom);
+  setOperationAction(ISD::ROTR, MVT::i16, Custom);
+
   setOperationAction(ISD::BR_CC, MVT::i8, Custom);
   setOperationAction(ISD::BR_CC, MVT::i16, Custom);
   setOperationAction(ISD::BR_CC, MVT::i32, Custom);
@@ -273,6 +278,12 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
     case ISD::SRL:
       return DAG.getNode(AVRISD::LSRLOOP, dl, VT, N->getOperand(0),
                          N->getOperand(1));
+    case ISD::ROTL:
+      return DAG.getNode(AVRISD::ROLLOOP, dl, VT, N->getOperand(0),
+                         N->getOperand(1));
+    case ISD::ROTR:
+      return DAG.getNode(AVRISD::RORLOOP, dl, VT, N->getOperand(0),
+                         N->getOperand(1));
     case ISD::SRA:
       return DAG.getNode(AVRISD::ASRLOOP, dl, VT, N->getOperand(0),
                          N->getOperand(1));
@@ -1440,6 +1451,22 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
     Opc = AVR::LSRWRd;
     RC = &AVR::DREGSRegClass;
     break;
+  case AVR::Rol8:
+    Opc = AVR::ROLRd;
+    RC = &AVR::GPR8RegClass;
+    break;
+  case AVR::Rol16:
+    Opc = AVR::ROLWRd;
+    RC = &AVR::DREGSRegClass;
+    break;
+  case AVR::Ror8:
+    Opc = AVR::RORRd;
+    RC = &AVR::GPR8RegClass;
+    break;
+  case AVR::Ror16:
+    Opc = AVR::RORWRd;
+    RC = &AVR::DREGSRegClass;
+    break;
   }
 
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -1552,6 +1579,10 @@ AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   case AVR::Lsl16:
   case AVR::Lsr8:
   case AVR::Lsr16:
+  case AVR::Rol8:
+  case AVR::Rol16:
+  case AVR::Ror8:
+  case AVR::Ror16:
   case AVR::Asr8:
   case AVR::Asr16:
     return insertShift(MI, MBB);
diff --git a/lib/Target/AVR/AVRISelLowering.h b/lib/Target/AVR/AVRISelLowering.h
index a8cdc4e7ae2..b44c62a21ac 100644
--- a/lib/Target/AVR/AVRISelLowering.h
+++ b/lib/Target/AVR/AVRISelLowering.h
@@ -43,6 +43,8 @@ enum NodeType {
   ROL,     ///< Bit rotate left.
   LSLLOOP, ///< A loop of single logical shift left instructions.
   LSRLOOP, ///< A loop of single logical shift right instructions.
+  ROLLOOP, ///< A loop of single left bit rotate instructions.
+  RORLOOP, ///< A loop of single right bit rotate instructions.
   ASRLOOP, ///< A loop of single arithmetic shift right instructions.
   /// AVR conditional branches. Operand 0 is the chain operand, operand 1
   /// is the block to branch if condition is true, operand 2 is the
diff --git a/lib/Target/AVR/AVRInstrInfo.td b/lib/Target/AVR/AVRInstrInfo.td
index 693d80a1c06..1b6547ef779 100644
--- a/lib/Target/AVR/AVRInstrInfo.td
+++ b/lib/Target/AVR/AVRInstrInfo.td
@@ -64,6 +64,8 @@ def AVRasr : SDNode<"AVRISD::ASR", SDTIntUnaryOp>;
 // Pseudo shift nodes for non-constant shift amounts.
 def AVRlslLoop : SDNode<"AVRISD::LSLLOOP", SDTIntShiftOp>;
 def AVRlsrLoop : SDNode<"AVRISD::LSRLOOP", SDTIntShiftOp>;
+def AVRrolLoop : SDNode<"AVRISD::ROLLOOP", SDTIntShiftOp>;
+def AVRrorLoop : SDNode<"AVRISD::RORLOOP", SDTIntShiftOp>;
 def AVRasrLoop : SDNode<"AVRISD::ASRLOOP", SDTIntShiftOp>;
 
 //===----------------------------------------------------------------------===//
@@ -183,33 +185,33 @@ def call_target : Operand<iPTR>
 // A 16-bit address (which can lead to an R_AVR_16 relocation).
 def imm16 : Operand<i16>
 {
-    let EncoderMethod = "encodeImm<AVR::fixup_16>";
+    let EncoderMethod = "encodeImm<AVR::fixup_16, 2>";
 }
 
 /// A 6-bit immediate used in the ADIW/SBIW instructions.
 def imm_arith6 : Operand<i16>
 {
-    let EncoderMethod = "encodeImm<AVR::fixup_6_adiw>";
+    let EncoderMethod = "encodeImm<AVR::fixup_6_adiw, 0>";
 }
 
 /// An 8-bit immediate inside an instruction with the same format
 /// as the `LDI` instruction (the `FRdK` format).
 def imm_ldi8 : Operand<i8>
 {
-    let EncoderMethod = "encodeImm<AVR::fixup_ldi>";
+    let EncoderMethod = "encodeImm<AVR::fixup_ldi, 0>";
 }
 
 /// A 5-bit port number used in SBIC and friends (the `FIOBIT` format).
 def imm_port5 : Operand<i8>
 {
-    let EncoderMethod = "encodeImm<AVR::fixup_port5>";
+    let EncoderMethod = "encodeImm<AVR::fixup_port5, 0>";
 }
 
 /// A 6-bit port number used in the `IN` instruction and friends (the
 /// `FIORdA` format.
 def imm_port6 : Operand<i8>
 {
-    let EncoderMethod = "encodeImm<AVR::fixup_port6>";
+    let EncoderMethod = "encodeImm<AVR::fixup_port6, 0>";
 }
 
 // Addressing mode pattern reg+imm6
@@ -1932,7 +1934,6 @@ def Lsr8 : ShiftPseudo<
   [(set i8:$dst, (AVRlsrLoop i8:$src, i8:$cnt))]
 >;
 
-
 def Lsr16 : ShiftPseudo<
   (outs DREGS:$dst),
    (ins DREGS:$src, GPR8:$cnt),
@@ -1940,6 +1941,34 @@ def Lsr16 : ShiftPseudo<
    [(set i16:$dst, (AVRlsrLoop i16:$src, i8:$cnt))]
 >;
 
+def Rol8 : ShiftPseudo<
+  (outs GPR8:$dst),
+  (ins GPR8:$src, GPR8:$cnt),
+  "# Rol8 PSEUDO",
+  [(set i8:$dst, (AVRrolLoop i8:$src, i8:$cnt))]
+>;
+
+def Rol16 : ShiftPseudo<
+  (outs DREGS:$dst),
+  (ins DREGS:$src, GPR8:$cnt),
+  "# Rol16 PSEUDO",
+  [(set i16:$dst, (AVRrolLoop i16:$src, i8:$cnt))]
+>;
+
+def Ror8 : ShiftPseudo<
+  (outs GPR8:$dst),
+  (ins GPR8:$src, GPR8:$cnt),
+  "# Ror8 PSEUDO",
+  [(set i8:$dst, (AVRrorLoop i8:$src, i8:$cnt))]
+>;
+
+def Ror16 : ShiftPseudo<
+  (outs DREGS:$dst),
+  (ins DREGS:$src, GPR8:$cnt),
+  "# Ror16 PSEUDO",
+  [(set i16:$dst, (AVRrorLoop i16:$src, i8:$cnt))]
+>;
+
 def Asr8 : ShiftPseudo<
   (outs GPR8:$dst),
   (ins GPR8:$src, GPR8:$cnt),
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
index c3d43ebb407..4dbbce8c205 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
@@ -177,7 +177,7 @@ unsigned AVRMCCodeEmitter::encodeComplement(const MCInst &MI, unsigned OpNo,
   return (~0) - Imm;
 }
 
-template <AVR::Fixups Fixup>
+template <AVR::Fixups Fixup, unsigned Offset>
 unsigned AVRMCCodeEmitter::encodeImm(const MCInst &MI, unsigned OpNo,
                                      SmallVectorImpl<MCFixup> &Fixups,
                                      const MCSubtargetInfo &STI) const {
@@ -193,7 +193,7 @@ unsigned AVRMCCodeEmitter::encodeImm(const MCInst &MI, unsigned OpNo,
     }
 
     MCFixupKind FixupKind = static_cast<MCFixupKind>(Fixup);
-    Fixups.push_back(MCFixup::create(0, MO.getExpr(), FixupKind, MI.getLoc()));
+    Fixups.push_back(MCFixup::create(Offset, MO.getExpr(), FixupKind, MI.getLoc()));
 
     return 0;
   }
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
index 4cee8d904c9..883abf8db78 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
@@ -69,7 +69,8 @@ private:
                             const MCSubtargetInfo &STI) const;
 
   /// Encodes an immediate value with a given fixup.
-  template <AVR::Fixups Fixup>
+  /// \tparam Offset The offset into the instruction for the fixup.
+  template <AVR::Fixups Fixup, unsigned Offset>
   unsigned encodeImm(const MCInst &MI, unsigned OpNo,
                      SmallVectorImpl<MCFixup> &Fixups,
                      const MCSubtargetInfo &STI) const;
diff --git a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
index 9beefcdcc1d..b98621ca474 100644
--- a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -17,6 +17,8 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -88,9 +90,9 @@ static DecodeStatus decodeMemoryOpValue(MCInst &Inst, unsigned Insn,
 }
 
 #include "BPFGenDisassemblerTables.inc"
-
 static DecodeStatus readInstruction64(ArrayRef<uint8_t> Bytes, uint64_t Address,
-                                      uint64_t &Size, uint64_t &Insn) {
+                                      uint64_t &Size, uint64_t &Insn,
+                                      bool IsLittleEndian) {
   uint64_t Lo, Hi;
 
   if (Bytes.size() < 8) {
@@ -99,8 +101,14 @@ static DecodeStatus readInstruction64(ArrayRef<uint8_t> Bytes, uint64_t Address,
   }
 
   Size = 8;
-  Hi = (Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 0) | (Bytes[3] << 8);
-  Lo = (Bytes[4] << 0) | (Bytes[5] << 8) | (Bytes[6] << 16) | (Bytes[7] << 24);
+  if (IsLittleEndian) {
+    Hi = (Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 0) | (Bytes[3] << 8);
+    Lo = (Bytes[4] << 0) | (Bytes[5] << 8) | (Bytes[6] << 16) | (Bytes[7] << 24);
+  } else {
+    Hi = (Bytes[0] << 24) | ((Bytes[1] & 0x0F) << 20) | ((Bytes[1] & 0xF0) << 12) |
+         (Bytes[2] << 8) | (Bytes[3] << 0);
+    Lo = (Bytes[4] << 24) | (Bytes[5] << 16) | (Bytes[6] << 8) | (Bytes[7] << 0);
+  }
   Insn = Make_64(Hi, Lo);
 
   return MCDisassembler::Success;
@@ -111,10 +119,11 @@ DecodeStatus BPFDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
                                              uint64_t Address,
                                              raw_ostream &VStream,
                                              raw_ostream &CStream) const {
-  uint64_t Insn;
+  bool IsLittleEndian = getContext().getAsmInfo()->isLittleEndian();
+  uint64_t Insn, Hi;
   DecodeStatus Result;
 
-  Result = readInstruction64(Bytes, Address, Size, Insn);
+  Result = readInstruction64(Bytes, Address, Size, Insn, IsLittleEndian);
   if (Result == MCDisassembler::Fail) return MCDisassembler::Fail;
 
   Result = decodeInstruction(DecoderTableBPF64, Instr, Insn,
@@ -128,7 +137,10 @@ DecodeStatus BPFDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
       return MCDisassembler::Fail;
     }
     Size = 16;
-    uint64_t Hi = (Bytes[12] << 0) | (Bytes[13] << 8) | (Bytes[14] << 16) | (Bytes[15] << 24);
+    if (IsLittleEndian)
+      Hi = (Bytes[12] << 0) | (Bytes[13] << 8) | (Bytes[14] << 16) | (Bytes[15] << 24);
+    else
+      Hi = (Bytes[12] << 24) | (Bytes[13] << 16) | (Bytes[14] << 8) | (Bytes[15] << 0);
     auto& Op = Instr.getOperand(1);
     Op.setImm(Make_64(Hi, Op.getImm()));
     break;
diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index 4bbc36a86e5..42ff9cc3d18 100644
--- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -459,94 +459,16 @@ bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) {
   DEBUG(MCB.dump_pretty(dbgs()));
   DEBUG(dbgs() << "--\n");
 
+  MCB.setLoc(IDLoc);
   // Check the bundle for errors.
   const MCRegisterInfo *RI = getContext().getRegisterInfo();
-  HexagonMCChecker Check(MCII, getSTI(), MCB, MCB, *RI);
+  HexagonMCChecker Check(getContext(), MCII, getSTI(), MCB, *RI);
 
   bool CheckOk = HexagonMCInstrInfo::canonicalizePacket(MCII, getSTI(),
                                                         getContext(), MCB,
                                                         &Check);
 
-  while (Check.getNextErrInfo()) {
-    unsigned Reg = Check.getErrRegister();
-    Twine R(RI->getName(Reg));
-
-    uint64_t Err = Check.getError();
-    if (Err != HexagonMCErrInfo::CHECK_SUCCESS) {
-      if (HexagonMCErrInfo::CHECK_ERROR_BRANCHES & Err)
-        return Error(
-            IDLoc,
-            "unconditional branch cannot precede another branch in packet");
-
-      if (HexagonMCErrInfo::CHECK_ERROR_NEWP & Err ||
-          HexagonMCErrInfo::CHECK_ERROR_NEWV & Err)
-        return Error(IDLoc, "register `" + R +
-                                "' used with `.new' "
-                                "but not validly modified in the same packet");
-
-      if (HexagonMCErrInfo::CHECK_ERROR_REGISTERS & Err)
-        return Error(IDLoc, "register `" + R + "' modified more than once");
-
-      if (HexagonMCErrInfo::CHECK_ERROR_READONLY & Err)
-        return Error(IDLoc, "cannot write to read-only register `" + R + "'");
-
-      if (HexagonMCErrInfo::CHECK_ERROR_LOOP & Err)
-        return Error(IDLoc, "loop-setup and some branch instructions "
-                            "cannot be in the same packet");
-
-      if (HexagonMCErrInfo::CHECK_ERROR_ENDLOOP & Err) {
-        Twine N(HexagonMCInstrInfo::isInnerLoop(MCB) ? '0' : '1');
-        return Error(IDLoc,
-                     "packet marked with `:endloop" + N + "' " +
-                         "cannot contain instructions that modify register " +
-                         "`" + R + "'");
-      }
-
-      if (HexagonMCErrInfo::CHECK_ERROR_SOLO & Err)
-        return Error(
-            IDLoc,
-            "instruction cannot appear in packet with other instructions");
-
-      if (HexagonMCErrInfo::CHECK_ERROR_NOSLOTS & Err)
-        return Error(IDLoc, "too many slots used in packet");
-
-      if (Err & HexagonMCErrInfo::CHECK_ERROR_SHUFFLE) {
-        uint64_t Erm = Check.getShuffleError();
-
-        if (HexagonShuffler::SHUFFLE_ERROR_INVALID == Erm)
-          return Error(IDLoc, "invalid instruction packet");
-        else if (HexagonShuffler::SHUFFLE_ERROR_STORES == Erm)
-          return Error(IDLoc, "invalid instruction packet: too many stores");
-        else if (HexagonShuffler::SHUFFLE_ERROR_LOADS == Erm)
-          return Error(IDLoc, "invalid instruction packet: too many loads");
-        else if (HexagonShuffler::SHUFFLE_ERROR_BRANCHES == Erm)
-          return Error(IDLoc, "too many branches in packet");
-        else if (HexagonShuffler::SHUFFLE_ERROR_NOSLOTS == Erm)
-          return Error(IDLoc, "invalid instruction packet: out of slots");
-        else if (HexagonShuffler::SHUFFLE_ERROR_SLOTS == Erm)
-          return Error(IDLoc, "invalid instruction packet: slot error");
-        else if (HexagonShuffler::SHUFFLE_ERROR_ERRATA2 == Erm)
-          return Error(IDLoc, "v60 packet violation");
-        else if (HexagonShuffler::SHUFFLE_ERROR_STORE_LOAD_CONFLICT == Erm)
-          return Error(IDLoc, "slot 0 instruction does not allow slot 1 store");
-        else
-          return Error(IDLoc, "unknown error in instruction packet");
-      }
-    }
-
-    unsigned Warn = Check.getWarning();
-    if (Warn != HexagonMCErrInfo::CHECK_SUCCESS) {
-      if (HexagonMCErrInfo::CHECK_WARN_CURRENT & Warn)
-        Warning(IDLoc, "register `" + R + "' used with `.cur' "
-                                          "but not used in the same packet");
-      else if (HexagonMCErrInfo::CHECK_WARN_TEMPORARY & Warn)
-        Warning(IDLoc, "register `" + R + "' used with `.tmp' "
-                                          "but not used in the same packet");
-    }
-  }
-
   if (CheckOk) {
-    MCB.setLoc(IDLoc);
     if (HexagonMCInstrInfo::bundleSize(MCB) == 0) {
       assert(!HexagonMCInstrInfo::isInnerLoop(MCB));
       assert(!HexagonMCInstrInfo::isOuterLoop(MCB));
diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index ae15ed0e924..3396ddbe4fa 100644
--- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -191,7 +191,8 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     return Result;
   if (Size > HEXAGON_MAX_PACKET_SIZE)
     return MCDisassembler::Fail;
-  HexagonMCChecker Checker(*MCII, STI, MI, MI, *getContext().getRegisterInfo());
+  HexagonMCChecker Checker(getContext(), *MCII, STI, MI,
+                           *getContext().getRegisterInfo(), false);
   if (!Checker.check())
     return MCDisassembler::Fail;
   return MCDisassembler::Success;
diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp
index 90ccecb6629..af0f8b265bd 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -57,12 +57,10 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri,
   // tion). To avoid the complications with in-memory arguments, only consi-
   // der the initial sequence of formal parameters that are known to be
   // passed via registers.
-  unsigned AttrIdx = 0;
   unsigned InVirtReg, InPhysReg = 0;
   const Function &F = *MF.getFunction();
   typedef Function::const_arg_iterator arg_iterator;
   for (arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
-    AttrIdx++;
     const Argument &Arg = *I;
     Type *ATy = Arg.getType();
     unsigned Width = 0;
@@ -74,8 +72,7 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri,
     // Module::AnyPointerSize.
     if (Width == 0 || Width > 64)
       break;
-    AttributeList Attrs = F.getAttributes();
-    if (Attrs.hasAttribute(AttrIdx, Attribute::ByVal))
+    if (Arg.hasAttribute(Attribute::ByVal))
       continue;
     InPhysReg = getNextPhysReg(InPhysReg, Width);
     if (!InPhysReg)
@@ -83,9 +80,9 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri,
     InVirtReg = getVirtRegFor(InPhysReg);
     if (!InVirtReg)
       continue;
-    if (Attrs.hasAttribute(AttrIdx, Attribute::SExt))
+    if (Arg.hasAttribute(Attribute::SExt))
       VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::SExt, Width)));
-    else if (Attrs.hasAttribute(AttrIdx, Attribute::ZExt))
+    else if (Arg.hasAttribute(Attribute::ZExt))
       VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::ZExt, Width)));
   }
 }
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index 2f8fe6e087f..c7b422e7efd 100644
--- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -38,6 +38,7 @@ class HexagonCFGOptimizer : public MachineFunctionPass {
 
 private:
   void InvertAndChangeJumpTarget(MachineInstr &, MachineBasicBlock *);
+  bool isOnFallThroughPath(MachineBasicBlock *MBB);
 
 public:
   static char ID;
@@ -106,6 +107,14 @@ void HexagonCFGOptimizer::InvertAndChangeJumpTarget(
   MI.getOperand(1).setMBB(NewTarget);
 }
 
+bool HexagonCFGOptimizer::isOnFallThroughPath(MachineBasicBlock *MBB) {
+  if (MBB->canFallThrough())
+    return true;
+  for (MachineBasicBlock *PB : MBB->predecessors())
+    if (PB->isLayoutSuccessor(MBB) && PB->canFallThrough())
+      return true;
+  return false;
+}
 
 bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
   if (skipFunction(*Fn.getFunction()))
@@ -182,7 +191,6 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
         }
 
         if ((NumSuccs == 2) && LayoutSucc && (LayoutSucc->pred_size() == 1)) {
-
           // Ensure that BB2 has one instruction -- an unconditional jump.
           if ((LayoutSucc->size() == 1) &&
               IsUnconditionalJump(LayoutSucc->front().getOpcode())) {
@@ -211,9 +219,8 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
                 JumpAroundTarget->moveAfter(LayoutSucc);
                 // only move a block if it doesn't have a fall-thru. otherwise
                 // the CFG will be incorrect.
-                if (!UncondTarget->canFallThrough()) {
+                if (!isOnFallThroughPath(UncondTarget))
                   UncondTarget->moveAfter(JumpAroundTarget);
-                }
               }
 
               //
diff --git a/lib/Target/Hexagon/HexagonDepITypes.h b/lib/Target/Hexagon/HexagonDepITypes.h
index f8ae39a3799..331edaf5831 100644
--- a/lib/Target/Hexagon/HexagonDepITypes.h
+++ b/lib/Target/Hexagon/HexagonDepITypes.h
@@ -21,7 +21,6 @@ enum Type {
   TypeCVI_VA = 16,
   TypeCVI_VA_DV = 17,
   TypeCVI_VINLANESAT = 18,
-  TypeCVI_VM_CUR_LD = 19,
   TypeCVI_VM_LD = 20,
   TypeCVI_VM_NEW_ST = 21,
   TypeCVI_VM_ST = 22,
diff --git a/lib/Target/Hexagon/HexagonDepITypes.td b/lib/Target/Hexagon/HexagonDepITypes.td
index f1d689ce12f..b35f7ba6d2a 100644
--- a/lib/Target/Hexagon/HexagonDepITypes.td
+++ b/lib/Target/Hexagon/HexagonDepITypes.td
@@ -19,7 +19,6 @@ def TypeCVI_HIST : IType<10>;
 def TypeCVI_VA : IType<16>;
 def TypeCVI_VA_DV : IType<17>;
 def TypeCVI_VINLANESAT : IType<18>;
-def TypeCVI_VM_CUR_LD : IType<19>;
 def TypeCVI_VM_LD : IType<20>;
 def TypeCVI_VM_NEW_ST : IType<21>;
 def TypeCVI_VM_ST : IType<22>;
diff --git a/lib/Target/Hexagon/HexagonDepInstrInfo.td b/lib/Target/Hexagon/HexagonDepInstrInfo.td
index 2bfde9acaea..d910d4af219 100644
--- a/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -26685,6 +26685,7 @@ let opNewValue = 0;
 let addrMode = BaseImmOffset;
 let accessSize = Vector64Access;
 let isCVLoad = 1;
+let CVINew = 1;
 let mayLoad = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
@@ -26701,6 +26702,7 @@ let opNewValue = 0;
 let addrMode = BaseImmOffset;
 let accessSize = Vector128Access;
 let isCVLoad = 1;
+let CVINew = 1;
 let mayLoad = 1;
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -26709,7 +26711,7 @@ def V6_vL32b_cur_npred_ai : HInst<
 (outs VectorRegs:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii)",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
 let Inst{7-5} = 0b101;
 let Inst{31-21} = 0b00101000100;
 let isPredicated = 1;
@@ -26719,6 +26721,7 @@ let opNewValue = 0;
 let addrMode = BaseImmOffset;
 let accessSize = Vector64Access;
 let isCVLoad = 1;
+let CVINew = 1;
 let mayLoad = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
@@ -26726,7 +26729,7 @@ def V6_vL32b_cur_npred_ai_128B : HInst<
 (outs VectorRegs128B:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii)",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
 let Inst{7-5} = 0b101;
 let Inst{31-21} = 0b00101000100;
 let isPredicated = 1;
@@ -26736,6 +26739,7 @@ let opNewValue = 0;
 let addrMode = BaseImmOffset;
 let accessSize = Vector128Access;
 let isCVLoad = 1;
+let CVINew = 1;
 let mayLoad = 1;
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -26744,7 +26748,7 @@ def V6_vL32b_cur_npred_pi : HInst<
 (outs VectorRegs:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii)",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001100;
@@ -26755,6 +26759,7 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isCVLoad = 1;
+let CVINew = 1;
 let mayLoad = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Rx32 = $Rx32in";
@@ -26763,7 +26768,7 @@ def V6_vL32b_cur_npred_pi_128B : HInst<
 (outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii)",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001100;
@@ -26774,6 +26779,7 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isCVLoad = 1;
+let CVINew = 1;
 let mayLoad = 1;
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -26783,7 +26789,7 @@ def V6_vL32b_cur_npred_ppu : HInst<
 (outs VectorRegs:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2)",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
 let Inst{10-5} = 0b000101;
 let Inst{31-21} = 0b00101011100;
 let isPredicated = 1;
@@ -26793,6 +26799,7 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isCVLoad = 1;
+let CVINew = 1;
 let mayLoad = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Rx32 = $Rx32in";
@@ -26801,7 +26808,7 @@ def V6_vL32b_cur_npred_ppu_128B : HInst<
 (outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2)",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
 let Inst{10-5} = 0b000101;
 let Inst{31-21} = 0b00101011100;
 let isPredicated = 1;
@@ -26811,6 +26818,7 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isCVLoad = 1;
+let CVINew = 1;
 let mayLoad = 1;
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -26829,6 +26837,7 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isCVLoad = 1;
+let CVINew = 1;
 let mayLoad = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Rx32 = $Rx32in";
@@ -26846,6 +26855,7 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isCVLoad = 1;
+let CVINew = 1;
 let mayLoad = 1;
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -26855,7 +26865,7 @@ def V6_vL32b_cur_ppu : HInst<
 (outs VectorRegs:$Vd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Vd32.cur = vmem($Rx32++$Mu2)",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
 let Inst{12-5} = 0b00000001;
 let Inst{31-21} = 0b00101011000;
 let hasNewValue = 1;
@@ -26863,6 +26873,7 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isCVLoad = 1;
+let CVINew = 1;
 let mayLoad = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Rx32 = $Rx32in";
@@ -26871,7 +26882,7 @@ def V6_vL32b_cur_ppu_128B : HInst<
 (outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Vd32.cur = vmem($Rx32++$Mu2)",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
 let Inst{12-5} = 0b00000001;
 let Inst{31-21} = 0b00101011000;
 let hasNewValue = 1;
@@ -26879,6 +26890,7 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isCVLoad = 1;
+let CVINew = 1;
 let mayLoad = 1;
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -26888,7 +26900,7 @@ def V6_vL32b_cur_pred_ai : HInst<
 (outs VectorRegs:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii)",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
 let Inst{7-5} = 0b100;
 let Inst{31-21} = 0b00101000100;
 let isPredicated = 1;
@@ -26897,6 +26909,7 @@ let opNewValue = 0;
 let addrMode = BaseImmOffset;
 let accessSize = Vector64Access;
 let isCVLoad = 1;
+let CVINew = 1;
 let mayLoad = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
@@ -26904,7 +26917,7 @@ def V6_vL32b_cur_pred_ai_128B : HInst<
 (outs VectorRegs128B:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii)",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
 let Inst{7-5} = 0b100;
 let Inst{31-21} = 0b00101000100;
 let isPredicated = 1;
@@ -26913,6 +26926,7 @@ let opNewValue = 0;
 let addrMode = BaseImmOffset;
 let accessSize = Vector128Access;
 let isCVLoad = 1;
+let CVINew = 1;
 let mayLoad = 1;
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -26921,7 +26935,7 @@ def V6_vL32b_cur_pred_pi : HInst<
 (outs VectorRegs:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii)",
-CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCOPROC_VMEM>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001100;
@@ -26931,6 +26945,7 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isCVLoad = 1;
+let CVINew = 1;
 let mayLoad = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Rx32 = $Rx32in";
@@ -26939,7 +26954,7 @@ def V6_vL32b_cur_pred_pi_128B : HInst<
 (outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii)",
-CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCOPROC_VMEM>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001100;
@@ -26949,6 +26964,7 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isCVLoad = 1;
+let CVINew = 1;
 let mayLoad = 1;
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -26958,7 +26974,7 @@ def V6_vL32b_cur_pred_ppu : HInst<
 (outs VectorRegs:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2)",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
 let Inst{10-5} = 0b000100;
 let Inst{31-21} = 0b00101011100;
 let isPredicated = 1;
@@ -26967,6 +26983,7 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isCVLoad = 1;
+let CVINew = 1;
 let mayLoad = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Rx32 = $Rx32in";
@@ -26975,7 +26992,7 @@ def V6_vL32b_cur_pred_ppu_128B : HInst<
 (outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2)",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
 let Inst{10-5} = 0b000100;
 let Inst{31-21} = 0b00101011100;
 let isPredicated = 1;
@@ -26984,6 +27001,7 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isCVLoad = 1;
+let CVINew = 1;
 let mayLoad = 1;
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -27150,8 +27168,9 @@ let opNewValue = 0;
 let addrMode = BaseImmOffset;
 let accessSize = Vector64Access;
 let isCVLoad = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let mayLoad = 1;
+let isNonTemporal = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vL32b_nt_cur_ai_128B : HInst<
@@ -27167,8 +27186,9 @@ let opNewValue = 0;
 let addrMode = BaseImmOffset;
 let accessSize = Vector128Access;
 let isCVLoad = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let mayLoad = 1;
+let isNonTemporal = 1;
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
 }
@@ -27176,7 +27196,7 @@ def V6_vL32b_nt_cur_npred_ai : HInst<
 (outs VectorRegs:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
 let Inst{7-5} = 0b101;
 let Inst{31-21} = 0b00101000110;
 let isPredicated = 1;
@@ -27186,15 +27206,16 @@ let opNewValue = 0;
 let addrMode = BaseImmOffset;
 let accessSize = Vector64Access;
 let isCVLoad = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let mayLoad = 1;
+let isNonTemporal = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vL32b_nt_cur_npred_ai_128B : HInst<
 (outs VectorRegs128B:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
 let Inst{7-5} = 0b101;
 let Inst{31-21} = 0b00101000110;
 let isPredicated = 1;
@@ -27204,8 +27225,9 @@ let opNewValue = 0;
 let addrMode = BaseImmOffset;
 let accessSize = Vector128Access;
 let isCVLoad = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let mayLoad = 1;
+let isNonTemporal = 1;
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
 }
@@ -27213,7 +27235,7 @@ def V6_vL32b_nt_cur_npred_pi : HInst<
 (outs VectorRegs:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001110;
@@ -27224,8 +27246,9 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isCVLoad = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let mayLoad = 1;
+let isNonTemporal = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Rx32 = $Rx32in";
 }
@@ -27233,7 +27256,7 @@ def V6_vL32b_nt_cur_npred_pi_128B : HInst<
 (outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001110;
@@ -27244,8 +27267,9 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isCVLoad = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let mayLoad = 1;
+let isNonTemporal = 1;
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
 let Constraints = "$Rx32 = $Rx32in";
@@ -27254,7 +27278,7 @@ def V6_vL32b_nt_cur_npred_ppu : HInst<
 (outs VectorRegs:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
 let Inst{10-5} = 0b000101;
 let Inst{31-21} = 0b00101011110;
 let isPredicated = 1;
@@ -27264,8 +27288,9 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isCVLoad = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let mayLoad = 1;
+let isNonTemporal = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Rx32 = $Rx32in";
 }
@@ -27273,7 +27298,7 @@ def V6_vL32b_nt_cur_npred_ppu_128B : HInst<
 (outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
 let Inst{10-5} = 0b000101;
 let Inst{31-21} = 0b00101011110;
 let isPredicated = 1;
@@ -27283,8 +27308,9 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isCVLoad = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let mayLoad = 1;
+let isNonTemporal = 1;
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
 let Constraints = "$Rx32 = $Rx32in";
@@ -27302,8 +27328,9 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isCVLoad = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let mayLoad = 1;
+let isNonTemporal = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Rx32 = $Rx32in";
 }
@@ -27320,8 +27347,9 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isCVLoad = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let mayLoad = 1;
+let isNonTemporal = 1;
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
 let Constraints = "$Rx32 = $Rx32in";
@@ -27330,7 +27358,7 @@ def V6_vL32b_nt_cur_ppu : HInst<
 (outs VectorRegs:$Vd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Vd32.cur = vmem($Rx32++$Mu2):nt",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
 let Inst{12-5} = 0b00000001;
 let Inst{31-21} = 0b00101011010;
 let hasNewValue = 1;
@@ -27338,8 +27366,9 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isCVLoad = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let mayLoad = 1;
+let isNonTemporal = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Rx32 = $Rx32in";
 }
@@ -27347,7 +27376,7 @@ def V6_vL32b_nt_cur_ppu_128B : HInst<
 (outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, ModRegs:$Mu2),
 "$Vd32.cur = vmem($Rx32++$Mu2):nt",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
 let Inst{12-5} = 0b00000001;
 let Inst{31-21} = 0b00101011010;
 let hasNewValue = 1;
@@ -27355,8 +27384,9 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isCVLoad = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let mayLoad = 1;
+let isNonTemporal = 1;
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
 let Constraints = "$Rx32 = $Rx32in";
@@ -27365,7 +27395,7 @@ def V6_vL32b_nt_cur_pred_ai : HInst<
 (outs VectorRegs:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
 let Inst{7-5} = 0b100;
 let Inst{31-21} = 0b00101000110;
 let isPredicated = 1;
@@ -27374,15 +27404,16 @@ let opNewValue = 0;
 let addrMode = BaseImmOffset;
 let accessSize = Vector64Access;
 let isCVLoad = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let mayLoad = 1;
+let isNonTemporal = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vL32b_nt_cur_pred_ai_128B : HInst<
 (outs VectorRegs128B:$Vd32),
 (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
 "if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
 let Inst{7-5} = 0b100;
 let Inst{31-21} = 0b00101000110;
 let isPredicated = 1;
@@ -27391,8 +27422,9 @@ let opNewValue = 0;
 let addrMode = BaseImmOffset;
 let accessSize = Vector128Access;
 let isCVLoad = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let mayLoad = 1;
+let isNonTemporal = 1;
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
 }
@@ -27400,7 +27432,7 @@ def V6_vL32b_nt_cur_pred_pi : HInst<
 (outs VectorRegs:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001110;
@@ -27410,8 +27442,9 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isCVLoad = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let mayLoad = 1;
+let isNonTemporal = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Rx32 = $Rx32in";
 }
@@ -27419,7 +27452,7 @@ def V6_vL32b_nt_cur_pred_pi_128B : HInst<
 (outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
 "if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt",
-CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00101001110;
@@ -27429,8 +27462,9 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isCVLoad = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let mayLoad = 1;
+let isNonTemporal = 1;
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
 let Constraints = "$Rx32 = $Rx32in";
@@ -27439,7 +27473,7 @@ def V6_vL32b_nt_cur_pred_ppu : HInst<
 (outs VectorRegs:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt",
-CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCOPROC_VMEM>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
 let Inst{10-5} = 0b000100;
 let Inst{31-21} = 0b00101011110;
 let isPredicated = 1;
@@ -27448,8 +27482,9 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isCVLoad = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let mayLoad = 1;
+let isNonTemporal = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Rx32 = $Rx32in";
 }
@@ -27457,7 +27492,7 @@ def V6_vL32b_nt_cur_pred_ppu_128B : HInst<
 (outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
 (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
 "if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt",
-CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+CVI_VM_LD, TypeCOPROC_VMEM>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
 let Inst{10-5} = 0b000100;
 let Inst{31-21} = 0b00101011110;
 let isPredicated = 1;
@@ -27466,8 +27501,9 @@ let opNewValue = 0;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isCVLoad = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let mayLoad = 1;
+let isNonTemporal = 1;
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
 let Constraints = "$Rx32 = $Rx32in";
@@ -28936,8 +28972,9 @@ let Inst{31-21} = 0b00101000001;
 let addrMode = BaseImmOffset;
 let accessSize = Vector64Access;
 let isNVStore = 1;
-let mayStore = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ai";
 let isPredicable = 1;
 let DecoderNamespace = "EXT_mmvec";
@@ -28954,8 +28991,9 @@ let Inst{31-21} = 0b00101000001;
 let addrMode = BaseImmOffset;
 let accessSize = Vector128Access;
 let isNVStore = 1;
-let mayStore = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ai_128B";
 let isPredicable = 1;
 let DecoderNamespace = "EXT_mmvec";
@@ -28974,8 +29012,9 @@ let isPredicatedFalse = 1;
 let addrMode = BaseImmOffset;
 let accessSize = Vector64Access;
 let isNVStore = 1;
-let mayStore = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ai";
 let DecoderNamespace = "EXT_mmvec";
 let opNewValue = 3;
@@ -28992,8 +29031,9 @@ let isPredicatedFalse = 1;
 let addrMode = BaseImmOffset;
 let accessSize = Vector128Access;
 let isNVStore = 1;
-let mayStore = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ai_128B";
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -29012,8 +29052,9 @@ let isPredicatedFalse = 1;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isNVStore = 1;
-let mayStore = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_pi";
 let DecoderNamespace = "EXT_mmvec";
 let opNewValue = 4;
@@ -29032,8 +29073,9 @@ let isPredicatedFalse = 1;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isNVStore = 1;
-let mayStore = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_pi_128B";
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -29052,8 +29094,9 @@ let isPredicatedFalse = 1;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isNVStore = 1;
-let mayStore = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ppu";
 let DecoderNamespace = "EXT_mmvec";
 let opNewValue = 4;
@@ -29071,8 +29114,9 @@ let isPredicatedFalse = 1;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isNVStore = 1;
-let mayStore = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ppu_128B";
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -29090,8 +29134,9 @@ let Inst{31-21} = 0b00101001001;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isNVStore = 1;
-let mayStore = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_pi";
 let isPredicable = 1;
 let DecoderNamespace = "EXT_mmvec";
@@ -29109,8 +29154,9 @@ let Inst{31-21} = 0b00101001001;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isNVStore = 1;
-let mayStore = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_pi_128B";
 let isPredicable = 1;
 let DecoderNamespace = "EXT_mmvec";
@@ -29128,8 +29174,9 @@ let Inst{31-21} = 0b00101011001;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isNVStore = 1;
-let mayStore = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ppu";
 let isPredicable = 1;
 let DecoderNamespace = "EXT_mmvec";
@@ -29146,8 +29193,9 @@ let Inst{31-21} = 0b00101011001;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isNVStore = 1;
-let mayStore = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ppu_128B";
 let isPredicable = 1;
 let DecoderNamespace = "EXT_mmvec";
@@ -29166,8 +29214,9 @@ let isPredicated = 1;
 let addrMode = BaseImmOffset;
 let accessSize = Vector64Access;
 let isNVStore = 1;
-let mayStore = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ai";
 let DecoderNamespace = "EXT_mmvec";
 let opNewValue = 3;
@@ -29183,8 +29232,9 @@ let isPredicated = 1;
 let addrMode = BaseImmOffset;
 let accessSize = Vector128Access;
 let isNVStore = 1;
-let mayStore = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ai_128B";
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -29202,8 +29252,9 @@ let isPredicated = 1;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isNVStore = 1;
-let mayStore = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_pi";
 let DecoderNamespace = "EXT_mmvec";
 let opNewValue = 4;
@@ -29221,8 +29272,9 @@ let isPredicated = 1;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isNVStore = 1;
-let mayStore = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_pi_128B";
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -29240,8 +29292,9 @@ let isPredicated = 1;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isNVStore = 1;
-let mayStore = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ppu";
 let DecoderNamespace = "EXT_mmvec";
 let opNewValue = 4;
@@ -29258,8 +29311,9 @@ let isPredicated = 1;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isNVStore = 1;
-let mayStore = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ppu_128B";
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -29498,9 +29552,10 @@ let Inst{31-21} = 0b00101000011;
 let addrMode = BaseImmOffset;
 let accessSize = Vector64Access;
 let isNVStore = 1;
-let mayStore = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let isNonTemporal = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ai";
 let isPredicable = 1;
 let DecoderNamespace = "EXT_mmvec";
@@ -29517,9 +29572,10 @@ let Inst{31-21} = 0b00101000011;
 let addrMode = BaseImmOffset;
 let accessSize = Vector128Access;
 let isNVStore = 1;
-let mayStore = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let isNonTemporal = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ai_128B";
 let isPredicable = 1;
 let DecoderNamespace = "EXT_mmvec";
@@ -29538,9 +29594,10 @@ let isPredicatedFalse = 1;
 let addrMode = BaseImmOffset;
 let accessSize = Vector64Access;
 let isNVStore = 1;
-let mayStore = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let isNonTemporal = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ai";
 let DecoderNamespace = "EXT_mmvec";
 let opNewValue = 3;
@@ -29557,9 +29614,10 @@ let isPredicatedFalse = 1;
 let addrMode = BaseImmOffset;
 let accessSize = Vector128Access;
 let isNVStore = 1;
-let mayStore = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let isNonTemporal = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ai_128B";
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -29578,9 +29636,10 @@ let isPredicatedFalse = 1;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isNVStore = 1;
-let mayStore = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let isNonTemporal = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_pi";
 let DecoderNamespace = "EXT_mmvec";
 let opNewValue = 4;
@@ -29599,9 +29658,10 @@ let isPredicatedFalse = 1;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isNVStore = 1;
-let mayStore = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let isNonTemporal = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_pi_128B";
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -29620,9 +29680,10 @@ let isPredicatedFalse = 1;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isNVStore = 1;
-let mayStore = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let isNonTemporal = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ppu";
 let DecoderNamespace = "EXT_mmvec";
 let opNewValue = 4;
@@ -29640,9 +29701,10 @@ let isPredicatedFalse = 1;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isNVStore = 1;
-let mayStore = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let isNonTemporal = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ppu_128B";
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -29660,9 +29722,10 @@ let Inst{31-21} = 0b00101001011;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isNVStore = 1;
-let mayStore = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let isNonTemporal = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_pi";
 let isPredicable = 1;
 let DecoderNamespace = "EXT_mmvec";
@@ -29680,9 +29743,10 @@ let Inst{31-21} = 0b00101001011;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isNVStore = 1;
-let mayStore = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let isNonTemporal = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_pi_128B";
 let isPredicable = 1;
 let DecoderNamespace = "EXT_mmvec";
@@ -29700,9 +29764,10 @@ let Inst{31-21} = 0b00101011011;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isNVStore = 1;
-let mayStore = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let isNonTemporal = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ppu";
 let isPredicable = 1;
 let DecoderNamespace = "EXT_mmvec";
@@ -29719,9 +29784,10 @@ let Inst{31-21} = 0b00101011011;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isNVStore = 1;
-let mayStore = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let isNonTemporal = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ppu_128B";
 let isPredicable = 1;
 let DecoderNamespace = "EXT_mmvec";
@@ -29740,9 +29806,10 @@ let isPredicated = 1;
 let addrMode = BaseImmOffset;
 let accessSize = Vector64Access;
 let isNVStore = 1;
-let mayStore = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let isNonTemporal = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ai";
 let DecoderNamespace = "EXT_mmvec";
 let opNewValue = 3;
@@ -29758,9 +29825,10 @@ let isPredicated = 1;
 let addrMode = BaseImmOffset;
 let accessSize = Vector128Access;
 let isNVStore = 1;
-let mayStore = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let isNonTemporal = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ai_128B";
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -29778,9 +29846,10 @@ let isPredicated = 1;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isNVStore = 1;
-let mayStore = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let isNonTemporal = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_pi";
 let DecoderNamespace = "EXT_mmvec";
 let opNewValue = 4;
@@ -29798,9 +29867,10 @@ let isPredicated = 1;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isNVStore = 1;
-let mayStore = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let isNonTemporal = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_pi_128B";
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
@@ -29818,9 +29888,10 @@ let isPredicated = 1;
 let addrMode = PostInc;
 let accessSize = Vector64Access;
 let isNVStore = 1;
-let mayStore = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let isNonTemporal = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ppu";
 let DecoderNamespace = "EXT_mmvec";
 let opNewValue = 4;
@@ -29837,9 +29908,10 @@ let isPredicated = 1;
 let addrMode = PostInc;
 let accessSize = Vector128Access;
 let isNVStore = 1;
-let mayStore = 1;
-let isNonTemporal = 1;
+let CVINew = 1;
 let isNewValue = 1;
+let isNonTemporal = 1;
+let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ppu_128B";
 let DecoderNamespace = "EXT_mmvec";
 let isCodeGenOnly = 1;
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index 39c2a6e4f5a..7d1da5c3ba2 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -165,6 +165,9 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
   bit cofMax1 = 0;
   let TSFlags{60} = cofMax1;
 
+  bit CVINew = 0;
+  let TSFlags{61} = CVINew;
+
   // Fields used for relation models.
   bit isNonTemporal = 0;
   string isNT = ""; // set to "true" for non-temporal vector stores.
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/lib/Target/Hexagon/HexagonInstrFormatsV60.td
index c8a7faea5ed..1c46ae77b6c 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV60.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV60.td
@@ -131,12 +131,6 @@ class CVI_VM_TMP_LD_Resource_long<dag outs, dag ins, string asmstr,
    : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_TMP_LD>,
      OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
 
-class CVI_VM_CUR_LD_Resource<dag outs, dag ins, string asmstr,
-                             list<dag> pattern = [], string cstr = "",
-                             InstrItinClass itin = CVI_VM_CUR_LD>
-   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_CUR_LD>,
-     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
-
 class CVI_VM_VP_LDU_Resource<dag outs, dag ins, string asmstr,
                              list<dag> pattern = [], string cstr = "",
                              InstrItinClass itin = CVI_VM_VP_LDU>
diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index 1829c5da02a..5a5799dbe00 100644
--- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -1420,7 +1420,7 @@ bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
 
 void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
   for (auto &I : *LoopB)
-    if (Value *SV = SimplifyInstruction(&I, DL, &TLI, &DT))
+    if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT}))
       I.replaceAllUsesWith(SV);
 
   for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) {
@@ -2044,7 +2044,7 @@ CleanupAndExit:
                                SCEV::FlagNUW);
   Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
   if (Instruction *In = dyn_cast<Instruction>(NumBytes))
-    if (Value *Simp = SimplifyInstruction(In, *DL, TLI, DT))
+    if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
       NumBytes = Simp;
 
   CallInst *NewCall;
@@ -2156,7 +2156,7 @@ CleanupAndExit:
       Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,
                                                MemmoveB->getTerminator());
       if (Instruction *In = dyn_cast<Instruction>(NumWords))
-        if (Value *Simp = SimplifyInstruction(In, *DL, TLI, DT))
+        if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
           NumWords = Simp;
 
       Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
index dc10028c042..810abf38863 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -32,14 +32,10 @@
 using namespace llvm;
 
 namespace llvm {
-//===----------------------------------------------------------------------===//
-// ConvergingVLIWScheduler - Implementation of the standard
-// MachineSchedStrategy.
-//===----------------------------------------------------------------------===//
 
 class VLIWResourceModel {
   /// ResourcesModel - Represents VLIW state.
-  /// Not limited to VLIW targets per say, but assumes
+  /// Not limited to VLIW targets per se, but assumes
   /// definition of DFA by a target.
   DFAPacketizer *ResourcesModel;
 
@@ -110,6 +106,11 @@ public:
   void schedule() override;
 };
 
+//===----------------------------------------------------------------------===//
+// ConvergingVLIWScheduler - Implementation of the standard
+// MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
+
 /// ConvergingVLIWScheduler shrinks the unscheduled zone using heuristics
 /// to balance the schedule.
 class ConvergingVLIWScheduler : public MachineSchedStrategy {
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td
index 93ab2f73120..2519b7c4006 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -157,7 +157,7 @@ let Namespace = "Hexagon" in {
   // and isub_lo can be composed, which leads to all kinds of issues
   // with lane masks.
   def C8:         Rc<8,  "c8",         [], [USR]>, DwarfRegNum<[75]>;
-  def PC:         Rc<9,  "pc">,                    DwarfRegNum<[76]>;
+  def PC:         Rc<9,  "pc",         ["c9"]>,    DwarfRegNum<[76]>;
   def UGP:        Rc<10, "ugp",        ["c10"]>,   DwarfRegNum<[77]>;
   def GP:         Rc<11, "gp",         ["c11"]>,   DwarfRegNum<[78]>;
   def CS0:        Rc<12, "cs0",        ["c12"]>,   DwarfRegNum<[79]>;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 337af294eb8..c3b6eb19828 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -58,6 +58,7 @@ class HexagonAsmBackend : public MCAsmBackend {
     RF.getContents() = Code;
     RF.getFixups() = Fixups;
   }
+
 public:
   HexagonAsmBackend(const Target &T, const Triple &TT, uint8_t OSABI,
       StringRef CPU) :
@@ -711,22 +712,24 @@ public:
               break;
             }
             case MCFragment::FT_Relaxable: {
+              MCContext &Context = Asm.getContext();
               auto &RF = cast<MCRelaxableFragment>(*K);
               auto &Inst = const_cast<MCInst &>(RF.getInst());
               while (Size > 0 && HexagonMCInstrInfo::bundleSize(Inst) < 4) {
-                MCInst *Nop = new (Asm.getContext()) MCInst;
+                MCInst *Nop = new (Context) MCInst;
                 Nop->setOpcode(Hexagon::A2_nop);
                 Inst.addOperand(MCOperand::createInst(Nop));
                 Size -= 4;
                 if (!HexagonMCChecker(
-                           *MCII, RF.getSubtargetInfo(), Inst, Inst,
-                           *Asm.getContext().getRegisterInfo()).check()) {
+                         Context, *MCII, RF.getSubtargetInfo(), Inst,
+                         *Context.getRegisterInfo(), false)
+                         .check()) {
                   Inst.erase(Inst.end() - 1);
                   Size = 0;
                 }
               }
-              bool Error = HexagonMCShuffle(true, *MCII, RF.getSubtargetInfo(),
-                                            Inst);
+              bool Error = HexagonMCShuffle(Context, true, *MCII,
+                                            RF.getSubtargetInfo(), Inst);
               //assert(!Error);
               (void)Error;
               ReplaceInstruction(Asm.getEmitter(), RF, Inst);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index 9c80312b790..53d8b04c50a 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -163,7 +163,10 @@ namespace HexagonII {
     PrefersSlot3Mask = 0x1,
 
     CofMax1Pos = 60,
-    CofMax1Mask = 0x1
+    CofMax1Mask = 0x1,
+
+    CVINewPos = 61,
+    CVINewMask = 0x1
   };
 
   // *** The code above must match HexagonInstrFormat*.td *** //
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
index 62b21c419f3..33d73f1819c 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -16,23 +16,27 @@
 
 #include "HexagonBaseInfo.h"
 
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
-static cl::opt<bool> RelaxNVChecks("relax-nv-checks", cl::init(false),
-  cl::ZeroOrMore, cl::Hidden, cl::desc("Relax checks of new-value validity"));
+static cl::opt<bool>
+    RelaxNVChecks("relax-nv-checks", cl::init(false), cl::ZeroOrMore,
+                  cl::Hidden, cl::desc("Relax checks of new-value validity"));
 
 const HexagonMCChecker::PredSense
-  HexagonMCChecker::Unconditional(Hexagon::NoRegister, false);
+    HexagonMCChecker::Unconditional(Hexagon::NoRegister, false);
 
 void HexagonMCChecker::init() {
   // Initialize read-only registers set.
   ReadOnly.insert(Hexagon::PC);
+  ReadOnly.insert(Hexagon::C9_8);
 
   // Figure out the loop-registers definitions.
   if (HexagonMCInstrInfo::isInnerLoop(MCB)) {
@@ -46,13 +50,12 @@ void HexagonMCChecker::init() {
 
   if (HexagonMCInstrInfo::isBundle(MCB))
     // Unfurl a bundle.
-    for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+    for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
       MCInst const &Inst = *I.getInst();
       if (HexagonMCInstrInfo::isDuplex(MCII, Inst)) {
         init(*Inst.getOperand(0).getInst());
         init(*Inst.getOperand(1).getInst());
-      }
-      else
+      } else
         init(Inst);
     }
   else
@@ -69,20 +72,18 @@ void HexagonMCChecker::initReg(MCInst const &MCI, unsigned R, unsigned &PredReg,
     // Note use of new predicate register.
     if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
       NewPreds.insert(PredReg);
-  }
-  else
+  } else
     // Note register use.  Super-registers are not tracked directly,
     // but their components.
-    for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
-        SRI.isValid();
-        ++SRI)
+    for (MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+         SRI.isValid(); ++SRI)
       if (!MCSubRegIterator(*SRI, &RI).isValid())
         // Skip super-registers used indirectly.
         Uses.insert(*SRI);
 }
 
-void HexagonMCChecker::init(MCInst const& MCI) {
-  const MCInstrDesc& MCID = HexagonMCInstrInfo::getDesc(MCII, MCI);
+void HexagonMCChecker::init(MCInst const &MCI) {
+  const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MCI);
   unsigned PredReg = Hexagon::NoRegister;
   bool isTrue = false;
 
@@ -109,10 +110,10 @@ void HexagonMCChecker::init(MCInst const& MCI) {
 
       if (Hexagon::USR_OVF == R)
         // Many insns change the USR implicitly, but only one or another flag.
-        // The instruction table models the USR.OVF flag, which can be implicitly
-        // modified more than once, but cannot be modified in the same packet
-        // with an instruction that modifies is explicitly. Deal with such situ-
-        // ations individually.
+        // The instruction table models the USR.OVF flag, which can be
+        // implicitly modified more than once, but cannot be modified in the
+        // same packet with an instruction that modifies is explicitly. Deal
+        // with such situ- ations individually.
         SoftDefs.insert(R);
       else if (isPredicateRegister(R) &&
                HexagonMCInstrInfo::isPredicateLate(MCII, MCI))
@@ -124,8 +125,7 @@ void HexagonMCChecker::init(MCInst const& MCI) {
 
   // Figure out explicit register definitions.
   for (unsigned i = 0; i < MCID.getNumDefs(); ++i) {
-    unsigned R = MCI.getOperand(i).getReg(),
-             S = Hexagon::NoRegister;
+    unsigned R = MCI.getOperand(i).getReg(), S = Hexagon::NoRegister;
     // USR has subregisters (while C8 does not for technical reasons), so
     // reset R to USR, since we know how to handle multiple defs of USR,
     // taking into account its subregisters.
@@ -134,9 +134,8 @@ void HexagonMCChecker::init(MCInst const& MCI) {
 
     // Note register definitions, direct ones as well as indirect side-effects.
     // Super-registers are not tracked directly, but their components.
-    for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
-        SRI.isValid();
-        ++SRI) {
+    for (MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+         SRI.isValid(); ++SRI) {
       if (MCSubRegIterator(*SRI, &RI).isValid())
         // Skip super-registers defined indirectly.
         continue;
@@ -156,22 +155,25 @@ void HexagonMCChecker::init(MCInst const& MCI) {
         // Only an explicit definition of P3:0 is noted as such; if a
         // side-effect, then note as a soft definition.
         SoftDefs.insert(*SRI);
-      else if (HexagonMCInstrInfo::isPredicateLate(MCII, MCI) && isPredicateRegister(*SRI))
+      else if (HexagonMCInstrInfo::isPredicateLate(MCII, MCI) &&
+               isPredicateRegister(*SRI))
         // Some insns produce predicates too late to be used in the same packet.
         LatePreds.insert(*SRI);
-      else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_CUR_LD)
+      else if (i == 0 && HexagonMCInstrInfo::isCVINew(MCII, MCI) &&
+               MCID.mayLoad())
         // Current loads should be used in the same packet.
         // TODO: relies on the impossibility of a current and a temporary loads
         // in the same packet.
         CurDefs.insert(*SRI), Defs[*SRI].insert(PredSense(PredReg, isTrue));
-      else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_TMP_LD)
+      else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) ==
+                             HexagonII::TypeCVI_VM_TMP_LD)
         // Temporary loads should be used in the same packet, but don't commit
         // results, so it should be disregarded if another insn changes the same
         // register.
         // TODO: relies on the impossibility of a current and a temporary loads
         // in the same packet.
         TmpDefs.insert(*SRI);
-      else if (i <= 1 && llvm::HexagonMCInstrInfo::hasNewValue2(MCII, MCI) )
+      else if (i <= 1 && llvm::HexagonMCInstrInfo::hasNewValue2(MCII, MCI))
         // vshuff(Vx, Vy, Rx) <- Vx(0) and Vy(1) are both source and
         // destination registers with this instruction. same for vdeal(Vx,Vy,Rx)
         Uses.insert(*SRI);
@@ -187,25 +189,26 @@ void HexagonMCChecker::init(MCInst const& MCI) {
     if (HexagonMCInstrInfo::isCompound(MCII, MCI))
       compoundRegisterMap(R); // Compound insns have a limited register range.
 
-    for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
-        SRI.isValid();
-        ++SRI)
+    for (MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+         SRI.isValid(); ++SRI)
       if (!MCSubRegIterator(*SRI, &RI).isValid())
         // No super-registers defined indirectly.
-        NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI),
-                                              HexagonMCInstrInfo::isFloat(MCII, MCI)));
+        NewDefs[*SRI].push_back(NewSense::Def(
+            PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI),
+            HexagonMCInstrInfo::isFloat(MCII, MCI)));
 
     // For fairly unique 2-dot-new producers, example:
     // vdeal(V1, V9, R0) V1.new and V9.new can be used by consumers.
     if (HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) {
       unsigned R2 = HexagonMCInstrInfo::getNewValueOperand2(MCII, MCI).getReg();
 
-      for(MCRegAliasIterator SRI(R2, &RI, !MCSubRegIterator(R2, &RI).isValid());
-          SRI.isValid();
-          ++SRI)
+      for (MCRegAliasIterator SRI(R2, &RI,
+                                  !MCSubRegIterator(R2, &RI).isValid());
+           SRI.isValid(); ++SRI)
         if (!MCSubRegIterator(*SRI, &RI).isValid())
-          NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI),
-                                                HexagonMCInstrInfo::isFloat(MCII, MCI)));
+          NewDefs[*SRI].push_back(NewSense::Def(
+              PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI),
+              HexagonMCInstrInfo::isFloat(MCII, MCI)));
     }
   }
 
@@ -227,18 +230,19 @@ void HexagonMCChecker::init(MCInst const& MCI) {
       // Super-registers cannot use new values.
       if (MCID.isBranch())
         NewUses[N] = NewSense::Jmp(
-          llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNCJ);
+            llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNCJ);
       else
         NewUses[N] = NewSense::Use(
-          PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI));
+            PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI));
     }
   }
 }
 
-HexagonMCChecker::HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst &mcb, MCInst &mcbdx,
-                                   MCRegisterInfo const &ri)
-    : MCB(mcb), MCBDX(mcbdx), RI(ri), MCII(MCII), STI(STI),
-      bLoadErrInfo(false) {
+HexagonMCChecker::HexagonMCChecker(MCContext &Context, MCInstrInfo const &MCII,
+                                   MCSubtargetInfo const &STI, MCInst &mcb,
+                                   MCRegisterInfo const &ri, bool ReportErrors)
+    : Context(Context), MCB(mcb), RI(ri), MCII(MCII), STI(STI),
+      ReportErrors(ReportErrors) {
   init();
 }
 
@@ -247,24 +251,23 @@ bool HexagonMCChecker::check(bool FullCheck) {
   bool chkP = checkPredicates();
   bool chkNV = checkNewValues();
   bool chkR = checkRegisters();
+  bool chkRRO = checkRegistersReadOnly();
   bool chkS = checkSolo();
   bool chkSh = true;
   if (FullCheck)
-   chkSh = checkShuffle();
+    chkSh = checkShuffle();
   bool chkSl = true;
   if (FullCheck)
-   chkSl = checkSlots();
-  bool chk = chkB && chkP && chkNV && chkR && chkS && chkSh && chkSl;
+    chkSl = checkSlots();
+  bool chk = chkB && chkP && chkNV && chkR && chkRRO && chkS && chkSh && chkSl;
 
   return chk;
 }
 
-bool HexagonMCChecker::checkSlots()
-
-{
+bool HexagonMCChecker::checkSlots() {
   unsigned slotsUsed = 0;
-  for (auto HMI: HexagonMCInstrInfo::bundleInstructions(MCBDX)) {
-    MCInst const& MCI = *HMI.getInst();
+  for (auto HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+    MCInst const &MCI = *HMI.getInst();
     if (HexagonMCInstrInfo::isImmext(MCI))
       continue;
     if (HexagonMCInstrInfo::isDuplex(MCII, MCI))
@@ -274,9 +277,7 @@ bool HexagonMCChecker::checkSlots()
   }
 
   if (slotsUsed > HEXAGON_PACKET_SIZE) {
-    HexagonMCErrInfo errInfo;
-    errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NOSLOTS);
-    addErrInfo(errInfo);
+    reportError("invalid instruction packet: out of slots");
     return false;
   }
   return true;
@@ -284,11 +285,9 @@ bool HexagonMCChecker::checkSlots()
 
 // Check legal use of branches.
 bool HexagonMCChecker::checkBranches() {
-  HexagonMCErrInfo errInfo;
   if (HexagonMCInstrInfo::isBundle(MCB)) {
     bool hasConditional = false;
-    unsigned Branches = 0,
-             Conditional = HEXAGON_PRESHUFFLE_PACKET_SIZE,
+    unsigned Branches = 0, Conditional = HEXAGON_PRESHUFFLE_PACKET_SIZE,
              Unconditional = HEXAGON_PRESHUFFLE_PACKET_SIZE;
 
     for (unsigned i = HexagonMCInstrInfo::bundleInstructionsOffset;
@@ -314,16 +313,18 @@ bool HexagonMCChecker::checkBranches() {
       if (HexagonMCInstrInfo::isInnerLoop(MCB) ||
           HexagonMCInstrInfo::isOuterLoop(MCB)) {
         // Error out if there's any branch in a loop-end packet.
-        errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_ENDLOOP, Hexagon::PC);
-        addErrInfo(errInfo);
+        Twine N(HexagonMCInstrInfo::isInnerLoop(MCB) ? '0' : '1');
+        reportError("packet marked with `:endloop" + N + "' " +
+                    "cannot contain instructions that modify register " + "`" +
+                    llvm::Twine(RI.getName(Hexagon::PC)) + "'");
         return false;
       }
     if (Branches > 1)
       if (!hasConditional || Conditional > Unconditional) {
         // Error out if more than one unconditional branch or
         // the conditional branch appears after the unconditional one.
-        errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_BRANCHES);
-        addErrInfo(errInfo);
+        reportError(
+            "unconditional branch cannot precede another branch in packet");
         return false;
       }
   }
@@ -333,31 +334,28 @@ bool HexagonMCChecker::checkBranches() {
 
 // Check legal use of predicate registers.
 bool HexagonMCChecker::checkPredicates() {
-  HexagonMCErrInfo errInfo;
   // Check for proper use of new predicate registers.
-  for (const auto& I : NewPreds) {
+  for (const auto &I : NewPreds) {
     unsigned P = I;
 
     if (!Defs.count(P) || LatePreds.count(P)) {
       // Error out if the new predicate register is not defined,
       // or defined "late"
       // (e.g., "{ if (p3.new)... ; p3 = sp1loop0(#r7:2, Rs) }").
-      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWP, P);
-      addErrInfo(errInfo);
+      reportErrorNewValue(P);
       return false;
     }
   }
 
   // Check for proper use of auto-anded of predicate registers.
-  for (const auto& I : LatePreds) {
+  for (const auto &I : LatePreds) {
     unsigned P = I;
 
     if (LatePreds.count(P) > 1 || Defs.count(P)) {
       // Error out if predicate register defined "late" multiple times or
       // defined late and regularly defined
       // (e.g., "{ p3 = sp1loop0(...); p3 = cmp.eq(...) }".
-      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, P);
-      addErrInfo(errInfo);
+      reportErrorRegisters(P);
       return false;
     }
   }
@@ -367,15 +365,12 @@ bool HexagonMCChecker::checkPredicates() {
 
 // Check legal use of new values.
 bool HexagonMCChecker::checkNewValues() {
-  HexagonMCErrInfo errInfo;
-  memset(&errInfo, 0, sizeof(errInfo));
-  for (auto& I : NewUses) {
+  for (auto &I : NewUses) {
     unsigned R = I.first;
     NewSense &US = I.second;
 
     if (!hasValidNewValueDef(US, NewDefs[R])) {
-      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWV, R);
-      addErrInfo(errInfo);
+      reportErrorNewValue(R);
       return false;
     }
   }
@@ -383,25 +378,36 @@ bool HexagonMCChecker::checkNewValues() {
   return true;
 }
 
+bool HexagonMCChecker::checkRegistersReadOnly() {
+  for (auto I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+    MCInst const &Inst = *I.getInst();
+    unsigned Defs = HexagonMCInstrInfo::getDesc(MCII, Inst).getNumDefs();
+    for (unsigned j = 0; j < Defs; ++j) {
+      MCOperand const &Operand = Inst.getOperand(j);
+      assert(Operand.isReg() && "Def is not a register");
+      unsigned Register = Operand.getReg();
+      if (ReadOnly.find(Register) != ReadOnly.end()) {
+        reportError(Inst.getLoc(), "Cannot write to read-only register `" +
+                                       llvm::Twine(RI.getName(Register)) + "'");
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
 // Check for legal register uses and definitions.
 bool HexagonMCChecker::checkRegisters() {
-  HexagonMCErrInfo errInfo;
   // Check for proper register definitions.
-  for (const auto& I : Defs) {
+  for (const auto &I : Defs) {
     unsigned R = I.first;
 
-    if (ReadOnly.count(R)) {
-      // Error out for definitions of read-only registers.
-      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_READONLY, R);
-      addErrInfo(errInfo);
-      return false;
-    }
     if (isLoopRegister(R) && Defs.count(R) > 1 &&
         (HexagonMCInstrInfo::isInnerLoop(MCB) ||
          HexagonMCInstrInfo::isOuterLoop(MCB))) {
       // Error out for definitions of loop registers at the end of a loop.
-      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_LOOP, R);
-      addErrInfo(errInfo);
+      reportError("loop-setup and some branch instructions "
+                  "cannot be in the same packet");
       return false;
     }
     if (SoftDefs.count(R)) {
@@ -409,8 +415,7 @@ bool HexagonMCChecker::checkRegisters() {
       // (e.g., "{ usr = r0; r0 = sfadd(...) }").
       unsigned UsrR = Hexagon::USR; // Silence warning about mixed types in ?:.
       unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R;
-      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR);
-      addErrInfo(errInfo);
+      reportErrorRegisters(BadR);
       return false;
     }
     if (!isPredicateRegister(R) && Defs[R].size() > 1) {
@@ -423,20 +428,18 @@ bool HexagonMCChecker::checkRegisters() {
         // changes, conditional or not.
         unsigned UsrR = Hexagon::USR;
         unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R;
-        errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR);
-        addErrInfo(errInfo);
+        reportErrorRegisters(BadR);
         return false;
       }
       // Check for multiple conditional register definitions.
-      for (const auto& J : PM) {
+      for (const auto &J : PM) {
         PredSense P = J;
 
         // Check for multiple uses of the same condition.
         if (PM.count(P) > 1) {
           // Error out on conditional changes based on the same predicate
           // (e.g., "{ if (!p0) r0 =...; if (!p0) r0 =... }").
-          errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R);
-          addErrInfo(errInfo);
+          reportErrorRegisters(R);
           return false;
         }
         // Check for the use of the complementary condition.
@@ -444,9 +447,9 @@ bool HexagonMCChecker::checkRegisters() {
         if (PM.count(P) && PM.size() > 2) {
           // Error out on conditional changes based on the same predicate
           // multiple times
-          // (e.g., "{ if (p0) r0 =...; if (!p0) r0 =... }; if (!p0) r0 =... }").
-          errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R);
-          addErrInfo(errInfo);
+          // (e.g., "{ if (p0) r0 =...; if (!p0) r0 =... }; if (!p0) r0 =...
+          // }").
+          reportErrorRegisters(R);
           return false;
         }
       }
@@ -454,34 +457,37 @@ bool HexagonMCChecker::checkRegisters() {
   }
 
   // Check for use of current definitions.
-  for (const auto& I : CurDefs) {
+  for (const auto &I : CurDefs) {
     unsigned R = I;
 
     if (!Uses.count(R)) {
       // Warn on an unused current definition.
-      errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_CURRENT, R);
-      addErrInfo(errInfo);
+      reportWarning("register `" + llvm::Twine(RI.getName(R)) +
+                    "' used with `.cur' "
+                    "but not used in the same packet");
       return true;
     }
   }
 
   // Check for use of temporary definitions.
-  for (const auto& I : TmpDefs) {
+  for (const auto &I : TmpDefs) {
     unsigned R = I;
 
     if (!Uses.count(R)) {
       // special case for vhist
       bool vHistFound = false;
-      for (auto const&HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) {
-        if(llvm::HexagonMCInstrInfo::getType(MCII, *HMI.getInst()) == HexagonII::TypeCVI_HIST) {
-          vHistFound = true;  // vhist() implicitly uses ALL REGxx.tmp
+      for (auto const &HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+        if (llvm::HexagonMCInstrInfo::getType(MCII, *HMI.getInst()) ==
+            HexagonII::TypeCVI_HIST) {
+          vHistFound = true; // vhist() implicitly uses ALL REGxx.tmp
           break;
         }
       }
       // Warn on an unused temporary definition.
       if (vHistFound == false) {
-        errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_TEMPORARY, R);
-        addErrInfo(errInfo);
+        reportWarning("register `" + llvm::Twine(RI.getName(R)) +
+                      "' used with `.tmp' "
+                      "but not used in the same packet");
         return true;
       }
     }
@@ -492,45 +498,26 @@ bool HexagonMCChecker::checkRegisters() {
 
 // Check for legal use of solo insns.
 bool HexagonMCChecker::checkSolo() {
-  HexagonMCErrInfo errInfo;
-  if (HexagonMCInstrInfo::isBundle(MCB) &&
-      HexagonMCInstrInfo::bundleSize(MCB) > 1) {
-    for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+  if (HexagonMCInstrInfo::bundleSize(MCB) > 1)
+    for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
       if (llvm::HexagonMCInstrInfo::isSolo(MCII, *I.getInst())) {
-        errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SOLO);
-        addErrInfo(errInfo);
+        SMLoc Loc = I.getInst()->getLoc();
+        reportError(Loc, "Instruction is marked `isSolo' and "
+                         "cannot have other instructions in "
+                         "the same packet");
         return false;
       }
     }
-  }
 
   return true;
 }
 
 bool HexagonMCChecker::checkShuffle() {
-  HexagonMCErrInfo errInfo;
-  // Branch info is lost when duplexing. The unduplexed insns must be
-  // checked and only branch errors matter for this case.
-  HexagonMCShuffler MCS(true, MCII, STI, MCB);
-  if (!MCS.check()) {
-    if (MCS.getError() == HexagonShuffler::SHUFFLE_ERROR_BRANCHES) {
-      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE);
-      errInfo.setShuffleError(MCS.getError());
-      addErrInfo(errInfo);
-      return false;
-    }
-  }
-  HexagonMCShuffler MCSDX(true, MCII, STI, MCBDX);
-  if (!MCSDX.check()) {
-    errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE);
-    errInfo.setShuffleError(MCSDX.getError());
-    addErrInfo(errInfo);
-    return false;
-  }
-  return true;
+  HexagonMCShuffler MCSDX(Context, ReportErrors, MCII, STI, MCB);
+  return MCSDX.check();
 }
 
-void HexagonMCChecker::compoundRegisterMap(unsigned& Register) {
+void HexagonMCChecker::compoundRegisterMap(unsigned &Register) {
   switch (Register) {
   default:
     break;
@@ -562,7 +549,7 @@ void HexagonMCChecker::compoundRegisterMap(unsigned& Register) {
 }
 
 bool HexagonMCChecker::hasValidNewValueDef(const NewSense &Use,
-      const NewSenseList &Defs) const {
+                                           const NewSenseList &Defs) const {
   bool Strict = !RelaxNVChecks;
 
   for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
@@ -590,3 +577,30 @@ bool HexagonMCChecker::hasValidNewValueDef(const NewSense &Use,
   return false;
 }
 
+void HexagonMCChecker::reportErrorRegisters(unsigned Register) {
+  reportError("register `" + llvm::Twine(RI.getName(Register)) +
+              "' modified more than once");
+}
+
+void HexagonMCChecker::reportErrorNewValue(unsigned Register) {
+  reportError("register `" + llvm::Twine(RI.getName(Register)) +
+              "' used with `.new' "
+              "but not validly modified in the same packet");
+}
+
+void HexagonMCChecker::reportError(llvm::Twine const &Msg) {
+  reportError(MCB.getLoc(), Msg);
+}
+
+void HexagonMCChecker::reportError(SMLoc Loc, llvm::Twine const &Msg) {
+  if (ReportErrors)
+    Context.reportError(Loc, Msg);
+}
+
+void HexagonMCChecker::reportWarning(llvm::Twine const &Msg) {
+  if (ReportErrors) {
+    auto SM = Context.getSourceManager();
+    if (SM)
+      SM->PrintMessage(MCB.getLoc(), SourceMgr::DK_Warning, Msg);
+  }
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
index c3b3d4c14c8..d0238691cdc 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
@@ -24,59 +24,14 @@ using namespace llvm;
 namespace llvm {
 class MCOperandInfo;
 
-typedef struct {
-  unsigned Error, Warning, ShuffleError;
-  unsigned Register;
-} ErrInfo_T;
-
-class HexagonMCErrInfo {
-public:
-  enum {
-    CHECK_SUCCESS         = 0,
-    // Errors.
-    CHECK_ERROR_BRANCHES  = 0x00001,
-    CHECK_ERROR_NEWP      = 0x00002,
-    CHECK_ERROR_NEWV      = 0x00004,
-    CHECK_ERROR_REGISTERS = 0x00008,
-    CHECK_ERROR_READONLY  = 0x00010,
-    CHECK_ERROR_LOOP      = 0x00020,
-    CHECK_ERROR_ENDLOOP   = 0x00040,
-    CHECK_ERROR_SOLO      = 0x00080,
-    CHECK_ERROR_SHUFFLE   = 0x00100,
-    CHECK_ERROR_NOSLOTS   = 0x00200,
-    CHECK_ERROR_UNKNOWN   = 0x00400,
-    // Warnings.
-    CHECK_WARN_CURRENT    = 0x10000,
-    CHECK_WARN_TEMPORARY  = 0x20000
-  };
-  ErrInfo_T s;
-
-  void reset() {
-    s.Error = CHECK_SUCCESS;
-    s.Warning = CHECK_SUCCESS;
-    s.ShuffleError = HexagonShuffler::SHUFFLE_SUCCESS;
-    s.Register = Hexagon::NoRegister;
-  };
-  HexagonMCErrInfo() {
-    reset();
-  };
-
-  void setError(unsigned e, unsigned r = Hexagon::NoRegister)
-    { s.Error = e; s.Register = r; };
-  void setWarning(unsigned w, unsigned r = Hexagon::NoRegister)
-    { s.Warning = w; s.Register = r; };
-  void setShuffleError(unsigned e) { s.ShuffleError = e; };
-};
-
 /// Check for a valid bundle.
 class HexagonMCChecker {
-  /// Insn bundle.
-  MCInst& MCB;
-  MCInst& MCBDX;
-  const MCRegisterInfo& RI;
+  MCContext &Context;
+  MCInst &MCB;
+  const MCRegisterInfo &RI;
   MCInstrInfo const &MCII;
   MCSubtargetInfo const &STI;
-  bool bLoadErrInfo;
+  bool ReportErrors;
 
   /// Set of definitions: register #, if predicated, if predicated true.
   typedef std::pair<unsigned, bool> PredSense;
@@ -99,23 +54,23 @@ class HexagonMCChecker {
     bool IsFloat, IsNVJ, Cond;
     // The special-case "constructors":
     static NewSense Jmp(bool isNVJ) {
-      NewSense NS = { /*PredReg=*/ 0, /*IsFloat=*/ false, /*IsNVJ=*/ isNVJ,
-                      /*Cond=*/ false };
+      NewSense NS = {/*PredReg=*/0, /*IsFloat=*/false, /*IsNVJ=*/isNVJ,
+                     /*Cond=*/false};
       return NS;
     }
     static NewSense Use(unsigned PR, bool True) {
-      NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ false, /*IsNVJ=*/ false,
-                      /*Cond=*/ True };
+      NewSense NS = {/*PredReg=*/PR, /*IsFloat=*/false, /*IsNVJ=*/false,
+                     /*Cond=*/True};
       return NS;
     }
     static NewSense Def(unsigned PR, bool True, bool Float) {
-      NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ Float, /*IsNVJ=*/ false,
-                      /*Cond=*/ True };
+      NewSense NS = {/*PredReg=*/PR, /*IsFloat=*/Float, /*IsNVJ=*/false,
+                     /*Cond=*/True};
       return NS;
     }
   };
   /// Set of definitions that produce new register:
-  typedef llvm::SmallVector<NewSense,2> NewSenseList;
+  typedef llvm::SmallVector<NewSense, 2> NewSenseList;
   typedef llvm::DenseMap<unsigned, NewSenseList>::iterator NewDefsIterator;
   llvm::DenseMap<unsigned, NewSenseList> NewDefs;
 
@@ -151,23 +106,8 @@ class HexagonMCChecker {
   typedef std::set<unsigned>::iterator ReadOnlyIterator;
   std::set<unsigned> ReadOnly;
 
-  std::queue<ErrInfo_T> ErrInfoQ;
-  HexagonMCErrInfo CrntErrInfo;
-
-  void getErrInfo() {
-    if (bLoadErrInfo == true) {
-      if (ErrInfoQ.empty()) {
-        CrntErrInfo.reset();
-      } else {
-        CrntErrInfo.s = ErrInfoQ.front();
-        ErrInfoQ.pop();
-      }
-    }
-    bLoadErrInfo = false;
-  }
-
   void init();
-  void init(MCInst const&);
+  void init(MCInst const &);
   void initReg(MCInst const &, unsigned, unsigned &PredReg, bool &isTrue);
 
   // Checks performed.
@@ -175,45 +115,37 @@ class HexagonMCChecker {
   bool checkPredicates();
   bool checkNewValues();
   bool checkRegisters();
+  bool checkRegistersReadOnly();
   bool checkSolo();
   bool checkShuffle();
   bool checkSlots();
-  bool checkSize();
 
-  static void compoundRegisterMap(unsigned&);
+  static void compoundRegisterMap(unsigned &);
 
   bool isPredicateRegister(unsigned R) const {
-    return (Hexagon::P0 == R || Hexagon::P1 == R ||
-            Hexagon::P2 == R || Hexagon::P3 == R);
+    return (Hexagon::P0 == R || Hexagon::P1 == R || Hexagon::P2 == R ||
+            Hexagon::P3 == R);
   };
   bool isLoopRegister(unsigned R) const {
-    return (Hexagon::SA0 == R || Hexagon::LC0 == R ||
-            Hexagon::SA1 == R || Hexagon::LC1 == R);
+    return (Hexagon::SA0 == R || Hexagon::LC0 == R || Hexagon::SA1 == R ||
+            Hexagon::LC1 == R);
   };
 
-  bool hasValidNewValueDef(const NewSense &Use,
-                           const NewSenseList &Defs) const;
+  bool hasValidNewValueDef(const NewSense &Use, const NewSenseList &Defs) const;
 
-  public:
-  explicit HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst& mcb, MCInst &mcbdx,
-                            const MCRegisterInfo& ri);
+public:
+  explicit HexagonMCChecker(MCContext &Context, MCInstrInfo const &MCII,
+                            MCSubtargetInfo const &STI, MCInst &mcb,
+                            const MCRegisterInfo &ri, bool ReportErrors = true);
 
   bool check(bool FullCheck = true);
-
-  /// add a new error/warning
-  void addErrInfo(HexagonMCErrInfo &err) { ErrInfoQ.push(err.s); };
-
-  /// Return the error code for the last operation in the insn bundle.
-  unsigned getError() { getErrInfo(); return CrntErrInfo.s.Error; };
-  unsigned getWarning() { getErrInfo(); return CrntErrInfo.s.Warning; };
-  unsigned getShuffleError() { getErrInfo(); return CrntErrInfo.s.ShuffleError; };
-  unsigned getErrRegister() { getErrInfo(); return CrntErrInfo.s.Register; };
-  bool getNextErrInfo() {
-    bLoadErrInfo = true;
-    return (ErrInfoQ.empty()) ? false : (getErrInfo(), true);
-  }
+  void reportErrorRegisters(unsigned Register);
+  void reportErrorNewValue(unsigned Register);
+  void reportError(SMLoc Loc, llvm::Twine const &Msg);
+  void reportError(llvm::Twine const &Msg);
+  void reportWarning(llvm::Twine const &Msg);
 };
 
-}
+} // namespace llvm
 
 #endif // HEXAGONMCCHECKER_H
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index ffa980ca656..127c97e342d 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -406,7 +406,7 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo co
   if (MCI.size() < 2)
     return;
 
-  bool StartedValid = llvm::HexagonMCShuffle(false, MCII, STI, MCI);
+  bool StartedValid = llvm::HexagonMCShuffle(Context, false, MCII, STI, MCI);
 
   // Create a vector, needed to keep the order of jump instructions.
   MCInst CheckList(MCI);
@@ -420,8 +420,9 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo co
     // Need to update the bundle.
     MCI = CheckList;
 
-    if (StartedValid && !llvm::HexagonMCShuffle(false, MCII, STI, MCI)) {
-      DEBUG(dbgs() << "Found ERROR\n");
+    if (StartedValid &&
+        !llvm::HexagonMCShuffle(Context, false, MCII, STI, MCI)) {
+       DEBUG(dbgs() << "Found ERROR\n");
       MCI = OriginalBundle;
     }
   }
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index 553ffba508a..30a811a3640 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -66,7 +66,7 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII,
   // instructions when possible.
   if (!HexagonDisableCompound)
     HexagonMCInstrInfo::tryCompound(MCII, STI, Context, MCB);
-  HexagonMCShuffle(false, MCII, STI, MCB);
+  HexagonMCShuffle(Context, false, MCII, STI, MCB);
   // Examine the packet and convert pairs of instructions to duplex
   // instructions when possible.
   MCInst InstBundlePreDuplex = MCInst(MCB);
@@ -74,7 +74,7 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII,
     SmallVector<DuplexCandidate, 8> possibleDuplexes;
     possibleDuplexes =
         HexagonMCInstrInfo::getDuplexPossibilties(MCII, STI, MCB);
-    HexagonMCShuffle(MCII, STI, Context, MCB, possibleDuplexes);
+    HexagonMCShuffle(Context, MCII, STI, MCB, possibleDuplexes);
   }
   // Examines packet and pad the packet, if needed, when an
   // end-loop is in the bundle.
@@ -87,7 +87,7 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII,
   CheckOk = Check ? Check->check(true) : true;
   if (!CheckOk)
     return false;
-  HexagonMCShuffle(true, MCII, STI, MCB);
+  HexagonMCShuffle(Context, true, MCII, STI, MCB);
   return true;
 }
 
@@ -505,6 +505,11 @@ bool HexagonMCInstrInfo::isCompound(MCInstrInfo const &MCII,
   return (getType(MCII, MCI) == HexagonII::TypeCJ);
 }
 
+bool HexagonMCInstrInfo::isCVINew(MCInstrInfo const &MCII, MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::CVINewPos) & HexagonII::CVINewMask);
+}
+
 bool HexagonMCInstrInfo::isDblRegForSubInst(unsigned Reg) {
   return ((Reg >= Hexagon::D0 && Reg <= Hexagon::D3) ||
           (Reg >= Hexagon::D8 && Reg <= Hexagon::D11));
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
index 2e989adb5cc..4d2df4d0eb6 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -180,6 +180,7 @@ bool isCompound(MCInstrInfo const &MCII, MCInst const &MCI);
 
 // Return whether the instruction needs to be constant extended.
 bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isCVINew(MCInstrInfo const &MCII, MCInst const &MCI);
 
 // Is this double register suitable for use in a duplex subinst
 bool isDblRegForSubInst(unsigned Reg);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
index 529a5fd5ed8..aece3679048 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
@@ -45,6 +45,7 @@ void HexagonMCShuffler::init(MCInst &MCB) {
     }
   }
 
+  Loc = MCB.getLoc();
   BundleFlags = MCB.getOperand(0).getImm();
 }
 
@@ -68,12 +69,14 @@ void HexagonMCShuffler::init(MCInst &MCB, MCInst const &AddMI,
       append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, AddMI));
   }
 
+  Loc = MCB.getLoc();
   BundleFlags = MCB.getOperand(0).getImm();
 }
 
 void HexagonMCShuffler::copyTo(MCInst &MCB) {
   MCB.clear();
   MCB.addOperand(MCOperand::createImm(BundleFlags));
+  MCB.setLoc(Loc);
   // Copy the results into the bundle.
   for (HexagonShuffler::iterator I = begin(); I != end(); ++I) {
 
@@ -89,15 +92,16 @@ bool HexagonMCShuffler::reshuffleTo(MCInst &MCB) {
   if (shuffle()) {
     // Copy the results into the bundle.
     copyTo(MCB);
-  } else
-    DEBUG(MCB.dump());
-
-  return (!getError());
+    return true;
+  }
+  DEBUG(MCB.dump());
+  return false;
 }
 
-bool llvm::HexagonMCShuffle(bool Fatal, MCInstrInfo const &MCII,
-                            MCSubtargetInfo const &STI, MCInst &MCB) {
-  HexagonMCShuffler MCS(true, MCII, STI, MCB);
+bool llvm::HexagonMCShuffle(MCContext &Context, bool Fatal,
+                            MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+                            MCInst &MCB) {
+  HexagonMCShuffler MCS(Context, Fatal, MCII, STI, MCB);
 
   if (DisableShuffle)
     // Ignore if user chose so.
@@ -117,52 +121,16 @@ bool llvm::HexagonMCShuffle(bool Fatal, MCInstrInfo const &MCII,
     return false;
   }
 
-  // Reorder the bundle and copy the result.
-  if (!MCS.reshuffleTo(MCB)) {
-    // Unless there is any error, which should not happen at this point.
-    unsigned shuffleError = MCS.getError();
-
-    if (!Fatal && (shuffleError !=  HexagonShuffler::SHUFFLE_SUCCESS))
-      return false;
-    if (shuffleError !=  HexagonShuffler::SHUFFLE_SUCCESS) {
-      errs() << "\nFailing packet:\n";
-      for (const auto& I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
-        MCInst *MI = const_cast<MCInst *>(I.getInst());
-        errs() << HexagonMCInstrInfo::getName(MCII, *MI) << ' ' << HexagonMCInstrInfo::getDesc(MCII, *MI).getOpcode() << '\n';
-      }
-      errs() << '\n';
-    }
-
-    switch (shuffleError) {
-    default:
-      llvm_unreachable("unknown error");
-    case HexagonShuffler::SHUFFLE_ERROR_INVALID:
-      llvm_unreachable("invalid packet");
-    case HexagonShuffler::SHUFFLE_ERROR_STORES:
-      llvm_unreachable("too many stores");
-    case HexagonShuffler::SHUFFLE_ERROR_LOADS:
-      llvm_unreachable("too many loads");
-    case HexagonShuffler::SHUFFLE_ERROR_BRANCHES:
-      llvm_unreachable("too many branches");
-    case HexagonShuffler::SHUFFLE_ERROR_NOSLOTS:
-      llvm_unreachable("no suitable slot");
-    case HexagonShuffler::SHUFFLE_ERROR_SLOTS:
-      llvm_unreachable("over-subscribed slots");
-    case HexagonShuffler::SHUFFLE_SUCCESS: // Single instruction case.
-      return true;
-    }
-  }
-
-  return true;
+  return MCS.reshuffleTo(MCB);
 }
 
-unsigned
-llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
-                       MCContext &Context, MCInst &MCB,
+bool
+llvm::HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII,
+                       MCSubtargetInfo const &STI, MCInst &MCB,
                        SmallVector<DuplexCandidate, 8> possibleDuplexes) {
 
   if (DisableShuffle)
-    return HexagonShuffler::SHUFFLE_SUCCESS;
+    return false;
 
   if (!HexagonMCInstrInfo::bundleSize(MCB)) {
     // There once was a bundle:
@@ -172,46 +140,44 @@ llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
     // After the IMPLICIT_DEFs were removed by the asm printer, the bundle
     // became empty.
     DEBUG(dbgs() << "Skipping empty bundle");
-    return HexagonShuffler::SHUFFLE_SUCCESS;
+    return false;
   } else if (!HexagonMCInstrInfo::isBundle(MCB)) {
     DEBUG(dbgs() << "Skipping stand-alone insn");
-    return HexagonShuffler::SHUFFLE_SUCCESS;
+    return false;
   }
 
   bool doneShuffling = false;
-  unsigned shuffleError;
   while (possibleDuplexes.size() > 0 && (!doneShuffling)) {
     // case of Duplex Found
     DuplexCandidate duplexToTry = possibleDuplexes.pop_back_val();
     MCInst Attempt(MCB);
     HexagonMCInstrInfo::replaceDuplex(Context, Attempt, duplexToTry);
-    HexagonMCShuffler MCS(true, MCII, STI, Attempt); // copy packet to the shuffler
+    HexagonMCShuffler MCS(Context, false, MCII, STI, Attempt); // copy packet to the shuffler
     if (MCS.size() == 1) {                     // case of one duplex
       // copy the created duplex in the shuffler to the bundle
       MCS.copyTo(MCB);
-      return HexagonShuffler::SHUFFLE_SUCCESS;
+      return false;
     }
     // try shuffle with this duplex
     doneShuffling = MCS.reshuffleTo(MCB);
-    shuffleError = MCS.getError();
 
     if (doneShuffling)
       break;
   }
 
   if (doneShuffling == false) {
-    HexagonMCShuffler MCS(true, MCII, STI, MCB);
+    HexagonMCShuffler MCS(Context, false, MCII, STI, MCB);
     doneShuffling = MCS.reshuffleTo(MCB); // shuffle
-    shuffleError = MCS.getError();
   }
   if (!doneShuffling)
-    return shuffleError;
+    return true;
 
-  return HexagonShuffler::SHUFFLE_SUCCESS;
+  return false;
 }
 
-bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
-                            MCInst &MCB, MCInst const &AddMI, int fixupCount) {
+bool llvm::HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII,
+                            MCSubtargetInfo const &STI, MCInst &MCB,
+                            MCInst const &AddMI, int fixupCount) {
   if (!HexagonMCInstrInfo::isBundle(MCB))
     return false;
 
@@ -246,16 +212,6 @@ bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
   if (bhasDuplex && bundleSize >= maxBundleSize)
     return false;
 
-  HexagonMCShuffler MCS(MCII, STI, MCB, AddMI, false);
-  if (!MCS.reshuffleTo(MCB)) {
-    unsigned shuffleError = MCS.getError();
-    switch (shuffleError) {
-    default:
-      return false;
-    case HexagonShuffler::SHUFFLE_SUCCESS: // single instruction case
-      return true;
-    }
-  }
-
-  return true;
+  HexagonMCShuffler MCS(Context, false, MCII, STI, MCB, AddMI, false);
+  return MCS.reshuffleTo(MCB);
 }
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
index 14bbfda4c91..dbe85b434dc 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
@@ -18,24 +18,19 @@
 #include "MCTargetDesc/HexagonShuffler.h"
 
 namespace llvm {
-
 class MCInst;
-
 // Insn bundle shuffler.
 class HexagonMCShuffler : public HexagonShuffler {
-  bool immext_present;
-  bool duplex_present;
-
 public:
-  HexagonMCShuffler(bool Fatal, MCInstrInfo const &MCII,
+  HexagonMCShuffler(MCContext &Context, bool Fatal, MCInstrInfo const &MCII,
                     MCSubtargetInfo const &STI, MCInst &MCB)
-      : HexagonShuffler(MCII, STI) {
+      : HexagonShuffler(Context, Fatal, MCII, STI) {
     init(MCB);
   };
-  HexagonMCShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
-                    MCInst &MCB, MCInst const &AddMI,
-                    bool InsertAtFront)
-      : HexagonShuffler(MCII, STI) {
+  HexagonMCShuffler(MCContext &Context, bool Fatal, MCInstrInfo const &MCII,
+                    MCSubtargetInfo const &STI, MCInst &MCB,
+                    MCInst const &AddMI, bool InsertAtFront)
+      : HexagonShuffler(Context, Fatal, MCII, STI) {
     init(MCB, AddMI, InsertAtFront);
   };
 
@@ -44,22 +39,20 @@ public:
   // Reorder and copy result to another.
   bool reshuffleTo(MCInst &MCB);
 
-  bool immextPresent() const { return immext_present; };
-  bool duplexPresent() const { return duplex_present; };
-
 private:
   void init(MCInst &MCB);
   void init(MCInst &MCB, MCInst const &AddMI, bool InsertAtFront);
 };
 
 // Invocation of the shuffler.
-bool HexagonMCShuffle(bool Fatal, MCInstrInfo const &MCII,
+bool HexagonMCShuffle(MCContext &Context, bool Fatal, MCInstrInfo const &MCII,
                       MCSubtargetInfo const &STI, MCInst &);
-bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
-                      MCInst &, MCInst const &, int);
-unsigned HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
-                          MCContext &Context, MCInst &,
-                          SmallVector<DuplexCandidate, 8>);
-}
+bool HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII,
+                      MCSubtargetInfo const &STI, MCInst &, MCInst const &,
+                      int);
+bool HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII,
+                      MCSubtargetInfo const &STI, MCInst &,
+                      SmallVector<DuplexCandidate, 8>);
+} // namespace llvm
 
 #endif // HEXAGONMCSHUFFLER_H
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 853f76213d3..eb303464555 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -14,17 +14,18 @@
 
 #define DEBUG_TYPE "hexagon-shuffle"
 
-#include <algorithm>
-#include <utility>
+#include "HexagonShuffler.h"
 #include "Hexagon.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
-#include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
-#include "HexagonShuffler.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <utility>
 
 using namespace llvm;
 
@@ -38,7 +39,7 @@ class HexagonBid {
   unsigned Bid;
 
 public:
-  HexagonBid() : Bid(0){}
+  HexagonBid() : Bid(0) {}
   HexagonBid(unsigned B) { Bid = B ? MAX / countPopulation(B) : 0; }
 
   // Check if the insn priority is overflowed.
@@ -87,7 +88,7 @@ unsigned HexagonResource::setWeight(unsigned s) {
   // Calculate relative weight of the insn for the given slot, weighing it the
   // heavier the more restrictive the insn is and the lowest the slots that the
   // insn may be executed in.
-  if (Key == 0 || Units == 0 || (SlotWeight*s >= 32))
+  if (Key == 0 || Units == 0 || (SlotWeight * s >= 32))
     return Weight = 0;
 
   unsigned Ctpop = countPopulation(Units);
@@ -106,14 +107,12 @@ void HexagonCVIResource::SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU) {
   (*TUL)[HexagonII::TypeCVI_VP_VS] = UnitsAndLanes(CVI_XLANE, 2);
   (*TUL)[HexagonII::TypeCVI_VS] = UnitsAndLanes(CVI_SHIFT, 1);
   (*TUL)[HexagonII::TypeCVI_VINLANESAT] =
-      (CPU == "hexagonv60" || CPU == "hexagonv61" || CPU == "hexagonv61v1") ?
-      UnitsAndLanes(CVI_SHIFT, 1) :
-      UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+      (CPU == "hexagonv60" || CPU == "hexagonv61" || CPU == "hexagonv61v1")
+          ? UnitsAndLanes(CVI_SHIFT, 1)
+          : UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
   (*TUL)[HexagonII::TypeCVI_VM_LD] =
       UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
   (*TUL)[HexagonII::TypeCVI_VM_TMP_LD] = UnitsAndLanes(CVI_NONE, 0);
-  (*TUL)[HexagonII::TypeCVI_VM_CUR_LD] =
-      UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
   (*TUL)[HexagonII::TypeCVI_VM_VP_LDU] = UnitsAndLanes(CVI_XLANE, 1);
   (*TUL)[HexagonII::TypeCVI_VM_ST] =
       UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
@@ -154,18 +153,19 @@ typedef SmallVector<struct CVIUnits, 8> HVXInstsT;
 static unsigned makeAllBits(unsigned startBit, unsigned Lanes)
 
 {
-  for (unsigned i = 1 ; i < Lanes ; ++i)
+  for (unsigned i = 1; i < Lanes; ++i)
     startBit = (startBit << 1) | startBit;
   return startBit;
 }
 
-static bool checkHVXPipes(const HVXInstsT& hvxInsts, unsigned startIdx, unsigned usedUnits)
+static bool checkHVXPipes(const HVXInstsT &hvxInsts, unsigned startIdx,
+                          unsigned usedUnits)
 
 {
   if (startIdx < hvxInsts.size()) {
     if (!hvxInsts[startIdx].Units)
       return checkHVXPipes(hvxInsts, startIdx + 1, usedUnits);
-    for (unsigned b = 0x1 ; b <= 0x8 ; b <<= 1) {
+    for (unsigned b = 0x1; b <= 0x8; b <<= 1) {
       if ((hvxInsts[startIdx].Units & b) == 0)
         continue;
       unsigned allBits = makeAllBits(b, hvxInsts[startIdx].Lanes);
@@ -179,9 +179,10 @@ static bool checkHVXPipes(const HVXInstsT& hvxInsts, unsigned startIdx, unsigned
   return true;
 }
 
-HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII,
+HexagonShuffler::HexagonShuffler(MCContext &Context, bool ReportErrors,
+                                 MCInstrInfo const &MCII,
                                  MCSubtargetInfo const &STI)
-    : MCII(MCII), STI(STI) {
+    : Context(Context), MCII(MCII), STI(STI), ReportErrors(ReportErrors) {
   reset();
   HexagonCVIResource::SetupTUL(&TUL, STI.getCPU());
 }
@@ -189,7 +190,6 @@ HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII,
 void HexagonShuffler::reset() {
   Packet.clear();
   BundleFlags = 0;
-  Error = SHUFFLE_SUCCESS;
 }
 
 void HexagonShuffler::append(MCInst const &ID, MCInst const *Extender,
@@ -202,8 +202,8 @@ void HexagonShuffler::append(MCInst const &ID, MCInst const *Extender,
 static struct {
   unsigned first;
   unsigned second;
-} jumpSlots[] = { {8, 4}, {8, 2}, {8, 1}, {4, 2}, {4, 1}, {2, 1} };
-#define MAX_JUMP_SLOTS (sizeof(jumpSlots)/sizeof(jumpSlots[0]))
+} jumpSlots[] = {{8, 4}, {8, 2}, {8, 1}, {4, 2}, {4, 1}, {2, 1}};
+#define MAX_JUMP_SLOTS (sizeof(jumpSlots) / sizeof(jumpSlots[0]))
 
 namespace {
 bool isDuplexAGroup(unsigned Opcode) {
@@ -248,26 +248,23 @@ unsigned countNeitherAnorX(MCInstrInfo const &MCII, MCInst const &ID) {
     Result += !isDuplexAGroup(subInst0Opcode);
     Result += !isDuplexAGroup(subInst1Opcode);
   } else
-    Result += Type != HexagonII::TypeALU32_2op &&
-              Type != HexagonII::TypeALU32_3op &&
-              Type != HexagonII::TypeALU32_ADDI &&
-              Type != HexagonII::TypeS_2op &&
-              Type != HexagonII::TypeS_3op &&
-              Type != HexagonII::TypeALU64 &&
-              (Type != HexagonII::TypeM ||
-               HexagonMCInstrInfo::isFloat(MCII, ID));
+    Result +=
+        Type != HexagonII::TypeALU32_2op && Type != HexagonII::TypeALU32_3op &&
+        Type != HexagonII::TypeALU32_ADDI && Type != HexagonII::TypeS_2op &&
+        Type != HexagonII::TypeS_3op && Type != HexagonII::TypeALU64 &&
+        (Type != HexagonII::TypeM || HexagonMCInstrInfo::isFloat(MCII, ID));
   return Result;
 }
-}
+} // namespace
 
 /// Check that the packet is legal and enforce relative insn order.
 bool HexagonShuffler::check() {
   // Descriptive slot masks.
   const unsigned slotSingleLoad = 0x1, slotSingleStore = 0x1, slotOne = 0x2,
-                 slotThree = 0x8, //slotFirstJump = 0x8,
-                 slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1;
+                 slotThree = 0x8, // slotFirstJump = 0x8,
+      slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1;
   // Highest slots for branches and stores used to keep their original order.
-  //unsigned slotJump = slotFirstJump;
+  // unsigned slotJump = slotFirstJump;
   unsigned slotLoadStore = slotFirstLoadStore;
   // Number of branches, solo branches, indirect branches.
   unsigned jumps = 0, jump1 = 0;
@@ -287,6 +284,7 @@ bool HexagonShuffler::check() {
   unsigned onlyNo1 = 0;
   unsigned xtypeFloat = 0;
   unsigned pSlot3Cnt = 0;
+  unsigned nvstores = 0;
   unsigned memops = 0;
   unsigned deallocs = 0;
   iterator slot3ISJ = end();
@@ -327,14 +325,12 @@ bool HexagonShuffler::check() {
       ++onlyNo1;
     case HexagonII::TypeCVI_VM_LD:
     case HexagonII::TypeCVI_VM_TMP_LD:
-    case HexagonII::TypeCVI_VM_CUR_LD:
       ++CVIloads;
     case HexagonII::TypeLD:
       ++loads;
       ++memory;
       if (ISJ->Core.getUnits() == slotSingleLoad ||
-          HexagonMCInstrInfo::getType(MCII, ID) ==
-              HexagonII::TypeCVI_VM_VP_LDU)
+          HexagonMCInstrInfo::getType(MCII, ID) == HexagonII::TypeCVI_VM_VP_LDU)
         ++load0;
       if (HexagonMCInstrInfo::getDesc(MCII, ID).isReturn()) {
         ++deallocs, ++jumps, ++jump1; // DEALLOC_RETURN is of type LD.
@@ -368,18 +364,19 @@ bool HexagonShuffler::check() {
       }
       break;
     case HexagonII::TypeV2LDST:
-      if(HexagonMCInstrInfo::getDesc(MCII, ID).mayLoad()) {
+      if (HexagonMCInstrInfo::getDesc(MCII, ID).mayLoad()) {
         ++loads;
         ++memory;
         if (ISJ->Core.getUnits() == slotSingleLoad ||
-            HexagonMCInstrInfo::getType(MCII,ID) ==
+            HexagonMCInstrInfo::getType(MCII, ID) ==
                 HexagonII::TypeCVI_VM_VP_LDU)
           ++load0;
-      }
-      else {
+      } else {
         assert(HexagonMCInstrInfo::getDesc(MCII, ID).mayStore());
         ++memory;
         ++stores;
+        if (HexagonMCInstrInfo::isNewValue(MCII, ID))
+          ++nvstores;
       }
       break;
     case HexagonII::TypeCR:
@@ -415,21 +412,21 @@ bool HexagonShuffler::check() {
   if ((load0 > 1 || store0 > 1 || CVIloads > 1 || CVIstores > 1) ||
       (duplex > 1 || (duplex && memory)) || (solo && size() > 1) ||
       (onlyAX && neitherAnorX > 1) || (onlyAX && xtypeFloat)) {
-    Error = SHUFFLE_ERROR_INVALID;
+    reportError(llvm::Twine("invalid instruction packet"));
     return false;
   }
 
   if (jump1 && jumps > 1) {
     // Error if single branch with another branch.
-    Error = SHUFFLE_ERROR_BRANCHES;
+    reportError(llvm::Twine("too many branches in packet"));
     return false;
   }
-  if (memops && stores > 1) {
-    Error = SHUFFLE_ERROR_STORE_LOAD_CONFLICT;
+  if ((nvstores || memops) && stores > 1) {
+    reportError(llvm::Twine("slot 0 instruction does not allow slot 1 store"));
     return false;
   }
   if (deallocs && stores) {
-    Error = SHUFFLE_ERROR_STORE_LOAD_CONFLICT;
+    reportError(llvm::Twine("slot 0 instruction does not allow slot 1 store"));
     return false;
   }
 
@@ -441,7 +438,6 @@ bool HexagonShuffler::check() {
 
     if (!ISJ->Core.getUnits()) {
       // Error if insn may not be executed in any slot.
-      Error = SHUFFLE_ERROR_UNKNOWN;
       return false;
     }
 
@@ -472,7 +468,8 @@ bool HexagonShuffler::check() {
         else if (stores > 1) {
           if (slotLoadStore < slotLastLoadStore) {
             // Error if no more slots available for stores.
-            Error = SHUFFLE_ERROR_STORES;
+            reportError(
+                llvm::Twine("invalid instruction packet: too many stores"));
             return false;
           }
           // Pin the store to the highest slot available to it.
@@ -483,7 +480,7 @@ bool HexagonShuffler::check() {
       }
       if (store1 && stores > 1) {
         // Error if a single store with another store.
-        Error = SHUFFLE_ERROR_STORES;
+        reportError(llvm::Twine("invalid instruction packet: too many stores"));
         return false;
       }
     }
@@ -494,7 +491,7 @@ bool HexagonShuffler::check() {
 
     if (!ISJ->Core.getUnits()) {
       // Error if insn may not be executed in any slot.
-      Error = SHUFFLE_ERROR_NOSLOTS;
+      reportError(llvm::Twine("invalid instruction packet: out of slots"));
       return false;
     }
   }
@@ -503,12 +500,12 @@ bool HexagonShuffler::check() {
   bool validateSlots = true;
   if (jumps > 1) {
     if (foundBranches.size() > 2) {
-      Error = SHUFFLE_ERROR_BRANCHES;
+      reportError(llvm::Twine("too many branches in packet"));
       return false;
     }
 
     // try all possible choices
-    for (unsigned int i = 0 ; i < MAX_JUMP_SLOTS ; ++i) {
+    for (unsigned int i = 0; i < MAX_JUMP_SLOTS; ++i) {
       // validate first jump with this slot rule
       if (!(jumpSlots[i].first & foundBranches[0]->Core.getUnits()))
         continue;
@@ -535,18 +532,18 @@ bool HexagonShuffler::check() {
       if (!bFail) {
         validateSlots = false; // all good, no need to re-do auction
         break;
-      }
-      else
+      } else
         // restore original values
         Packet = PacketSave;
     }
     if (validateSlots == true) {
-      Error = SHUFFLE_ERROR_NOSLOTS;
+      reportError(llvm::Twine("invalid instruction packet: out of slots"));
       return false;
     }
   }
 
-  if (jumps <= 1 && bOnlySlot3 == false && pSlot3Cnt == 1 && slot3ISJ != end()) {
+  if (jumps <= 1 && bOnlySlot3 == false && pSlot3Cnt == 1 &&
+      slot3ISJ != end()) {
     validateSlots = true;
     // save off slot mask of instruction marked with A_PREFER_SLOT3
     // and then pin it to slot #3
@@ -582,7 +579,7 @@ bool HexagonShuffler::check() {
 
     for (iterator I = begin(); I != end(); ++I)
       if (!AuctionCore.bid(I->Core.getUnits())) {
-        Error = SHUFFLE_ERROR_SLOTS;
+        reportError(llvm::Twine("invalid instruction packet: slot error"));
         return false;
       }
   }
@@ -605,12 +602,11 @@ bool HexagonShuffler::check() {
     startIdx = usedUnits = 0x0;
     if (checkHVXPipes(hvxInsts, startIdx, usedUnits) == false) {
       // too many pipes used to be valid
-      Error = SHUFFLE_ERROR_SLOTS;
+      reportError(llvm::Twine("invalid instruction packet: slot error"));
       return false;
     }
   }
 
-  Error = SHUFFLE_SUCCESS;
   return true;
 }
 
@@ -618,12 +614,13 @@ bool HexagonShuffler::shuffle() {
   if (size() > HEXAGON_PACKET_SIZE) {
     // Ignore a packet with with more than what a packet can hold
     // or with compound or duplex insns for now.
-    Error = SHUFFLE_ERROR_INVALID;
+    reportError(llvm::Twine("invalid instruction packet"));
     return false;
   }
 
   // Check and prepare packet.
-  if (size() > 1 && check())
+  bool Ok = true;
+  if (size() > 1 && (Ok = check()))
     // Reorder the handles for each slot.
     for (unsigned nSlot = 0, emptySlots = 0; nSlot < HEXAGON_PACKET_SIZE;
          ++nSlot) {
@@ -659,5 +656,10 @@ bool HexagonShuffler::shuffle() {
           dbgs() << '\n');
   DEBUG(dbgs() << '\n');
 
-  return (!getError());
+  return Ok;
+}
+
+void HexagonShuffler::reportError(llvm::Twine const &Msg) {
+  if (ReportErrors)
+    Context.reportError(Loc, Msg);
 }
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index 36e8fa19d46..bd31c7be4c0 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -45,8 +45,7 @@ public:
 
   // Check if the resources are in ascending slot order.
   static bool lessUnits(const HexagonResource &A, const HexagonResource &B) {
-    return (countPopulation(A.getUnits()) <
-            countPopulation(B.getUnits()));
+    return (countPopulation(A.getUnits()) < countPopulation(B.getUnits()));
   };
   // Check if the resources are in ascending weight order.
   static bool lessWeight(const HexagonResource &A, const HexagonResource &B) {
@@ -106,7 +105,7 @@ class HexagonInstr {
 public:
   HexagonInstr(HexagonCVIResource::TypeUnitsAndLanes *T,
                MCInstrInfo const &MCII, MCInst const *id,
-               MCInst const *Extender, unsigned s)
+               MCInst const *Extender, unsigned s, bool x = false)
       : ID(id), Extender(Extender), Core(s), CVI(T, MCII, s, id) {}
 
   MCInst const &getDesc() const { return *ID; };
@@ -136,33 +135,21 @@ class HexagonShuffler {
   HexagonPacket Packet;
   HexagonPacket PacketSave;
 
-  // Shuffling error code.
-  unsigned Error;
-
   HexagonCVIResource::TypeUnitsAndLanes TUL;
 
 protected:
+  MCContext &Context;
   int64_t BundleFlags;
   MCInstrInfo const &MCII;
   MCSubtargetInfo const &STI;
+  SMLoc Loc;
+  bool ReportErrors;
 
 public:
   typedef HexagonPacket::iterator iterator;
 
-  enum {
-    SHUFFLE_SUCCESS = 0,    ///< Successful operation.
-    SHUFFLE_ERROR_INVALID,  ///< Invalid bundle.
-    SHUFFLE_ERROR_STORES,   ///< No free slots for store insns.
-    SHUFFLE_ERROR_LOADS,    ///< No free slots for load insns.
-    SHUFFLE_ERROR_BRANCHES, ///< No free slots for branch insns.
-    SHUFFLE_ERROR_NOSLOTS,  ///< No free slots for other insns.
-    SHUFFLE_ERROR_SLOTS,    ///< Over-subscribed slots.
-    SHUFFLE_ERROR_ERRATA2, ///< Errata violation (v60).
-    SHUFFLE_ERROR_STORE_LOAD_CONFLICT, ///< store/load conflict
-    SHUFFLE_ERROR_UNKNOWN   ///< Unknown error.
-  };
-
-  explicit HexagonShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI);
+  HexagonShuffler(MCContext &Context, bool ReportErrors,
+                  MCInstrInfo const &MCII, MCSubtargetInfo const &STI);
 
   // Reset to initial state.
   void reset();
@@ -180,9 +167,8 @@ public:
   void append(MCInst const &ID, MCInst const *Extender, unsigned S);
 
   // Return the error code for the last check or shuffling of the bundle.
-  void setError(unsigned Err) { Error = Err; };
-  unsigned getError() const { return (Error); };
+  void reportError(llvm::Twine const &Msg);
 };
-}
+} // namespace llvm
 
 #endif // HEXAGONSHUFFLER_H
diff --git a/lib/Target/Hexagon/RDFLiveness.cpp b/lib/Target/Hexagon/RDFLiveness.cpp
index b0532f933b1..726b7af73b0 100644
--- a/lib/Target/Hexagon/RDFLiveness.cpp
+++ b/lib/Target/Hexagon/RDFLiveness.cpp
@@ -759,8 +759,13 @@ void Liveness::computeLiveIns() {
             // all related shadows as a single use cluster.
             RegisterRef S(RS.first, P.second);
             NodeList Ds = getAllReachingDefs(S, PUA, true, false, NoRegs);
-            for (NodeAddr<DefNode*> D : Ds)
-              LOX[S.Reg].insert({D.Id, S.Mask});
+            for (NodeAddr<DefNode*> D : Ds) {
+              // Calculate the mask corresponding to the visited def.
+              RegisterAggr TA(PRI);
+              TA.insert(D.Addr->getRegRef(DFG)).intersect(S);
+              LaneBitmask TM = TA.makeRegRef().Mask;
+              LOX[S.Reg].insert({D.Id, TM});
+            }
           }
         }
 
diff --git a/lib/Target/Lanai/LanaiRegisterInfo.cpp b/lib/Target/Lanai/LanaiRegisterInfo.cpp
index 12a2571c28d..fe54589f8b0 100644
--- a/lib/Target/Lanai/LanaiRegisterInfo.cpp
+++ b/lib/Target/Lanai/LanaiRegisterInfo.cpp
@@ -264,12 +264,6 @@ LanaiRegisterInfo::getFrameRegister(const MachineFunction & /*MF*/) const {
 
 unsigned LanaiRegisterInfo::getBaseRegister() const { return Lanai::R14; }
 
-bool LanaiRegisterInfo::canRealignStack(const MachineFunction &MF) const {
-  if (!TargetRegisterInfo::canRealignStack(MF))
-    return false;
-  return true;
-}
-
 unsigned LanaiRegisterInfo::getEHExceptionRegister() const {
   llvm_unreachable("no exception support");
   return 0;
diff --git a/lib/Target/Lanai/LanaiRegisterInfo.h b/lib/Target/Lanai/LanaiRegisterInfo.h
index c6e459076eb..d88a1919385 100644
--- a/lib/Target/Lanai/LanaiRegisterInfo.h
+++ b/lib/Target/Lanai/LanaiRegisterInfo.h
@@ -41,8 +41,6 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 
-  bool canRealignStack(const MachineFunction &MF) const override;
-
   // Debug information queries.
   unsigned getRARegister() const;
   unsigned getFrameRegister(const MachineFunction &MF) const override;
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 3650cc9fe07..40e337eb97c 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -47,6 +47,7 @@ add_llvm_target(MipsCodeGen
   MipsSubtarget.cpp
   MipsTargetMachine.cpp
   MipsTargetObjectFile.cpp
+  MicroMipsSizeReduction.cpp
   )
 
 add_subdirectory(InstPrinter)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 3304449efb9..1e2eb7dbec3 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -366,6 +366,7 @@ getFixupKindInfo(MCFixupKind Kind) const {
     { "fixup_MICROMIPS_TLS_LDM",         0,     16,   0 },
     { "fixup_MICROMIPS_TLS_DTPREL_HI16", 0,     16,   0 },
     { "fixup_MICROMIPS_TLS_DTPREL_LO16", 0,     16,   0 },
+    { "fixup_MICROMIPS_GOTTPREL",        0,     16,   0 },
     { "fixup_MICROMIPS_TLS_TPREL_HI16",  0,     16,   0 },
     { "fixup_MICROMIPS_TLS_TPREL_LO16",  0,     16,   0 },
     { "fixup_Mips_SUB",                  0,     64,   0 },
@@ -437,6 +438,7 @@ getFixupKindInfo(MCFixupKind Kind) const {
     { "fixup_MICROMIPS_TLS_LDM",         16,     16,   0 },
     { "fixup_MICROMIPS_TLS_DTPREL_HI16", 16,     16,   0 },
     { "fixup_MICROMIPS_TLS_DTPREL_LO16", 16,     16,   0 },
+    { "fixup_MICROMIPS_GOTTPREL",        16,     16,   0 },
     { "fixup_MICROMIPS_TLS_TPREL_HI16",  16,     16,   0 },
     { "fixup_MICROMIPS_TLS_TPREL_LO16",  16,     16,   0 },
     { "fixup_Mips_SUB",                   0,     64,   0 },
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 324fd3c6fe1..1a1c613cfce 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -374,6 +374,8 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
     return ELF::R_MICROMIPS_TLS_DTPREL_HI16;
   case Mips::fixup_MICROMIPS_TLS_DTPREL_LO16:
     return ELF::R_MICROMIPS_TLS_DTPREL_LO16;
+  case Mips::fixup_MICROMIPS_GOTTPREL:
+    return ELF::R_MICROMIPS_TLS_GOTTPREL;
   case Mips::fixup_MICROMIPS_TLS_TPREL_HI16:
     return ELF::R_MICROMIPS_TLS_TPREL_HI16;
   case Mips::fixup_MICROMIPS_TLS_TPREL_LO16:
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index 149296212ec..6148a1b622c 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -203,6 +203,9 @@ namespace Mips {
     // resulting in - R_MICROMIPS_TLS_DTPREL_LO16
     fixup_MICROMIPS_TLS_DTPREL_LO16,
 
+    // resulting in - R_MICROMIPS_TLS_GOTTPREL.
+    fixup_MICROMIPS_GOTTPREL,
+
     // resulting in - R_MICROMIPS_TLS_TPREL_HI16
     fixup_MICROMIPS_TLS_TPREL_HI16,
 
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 5685f0426e9..a35eb2a8e03 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -669,7 +669,8 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
                                    : Mips::fixup_Mips_DTPREL_LO;
       break;
     case MipsMCExpr::MEK_GOTTPREL:
-      FixupKind = Mips::fixup_Mips_GOTTPREL;
+      FixupKind = isMicroMips(STI) ? Mips::fixup_MICROMIPS_GOTTPREL
+                                   : Mips::fixup_Mips_GOTTPREL;
       break;
     case MipsMCExpr::MEK_GOT:
       FixupKind = isMicroMips(STI) ? Mips::fixup_MICROMIPS_GOT16
diff --git a/lib/Target/Mips/MicroMipsSizeReduction.cpp b/lib/Target/Mips/MicroMipsSizeReduction.cpp
new file mode 100644
index 00000000000..4593fc92ca6
--- /dev/null
+++ b/lib/Target/Mips/MicroMipsSizeReduction.cpp
@@ -0,0 +1,335 @@
+//=== MicroMipsSizeReduction.cpp - MicroMips size reduction pass --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///\file
+/// This pass is used to reduce the size of instructions where applicable.
+///
+/// TODO: Implement microMIPS64 support.
+/// TODO: Implement support for reducing into lwp/swp instruction.
+//===----------------------------------------------------------------------===//
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "MipsSubtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "micromips-reduce-size"
+
+STATISTIC(NumReduced, "Number of 32-bit instructions reduced to 16-bit ones");
+
+namespace {
+
+/// Order of operands to transfer
+// TODO: Will be extended when additional optimizations are added
+enum OperandTransfer {
+  OT_NA,          ///< Not applicable
+  OT_OperandsAll, ///< Transfer all operands
+};
+
+/// Reduction type
+// TODO: Will be extended when additional optimizations are added
+enum ReduceType {
+  RT_OneInstr ///< Reduce one instruction into a smaller instruction
+};
+
+// Information about immediate field restrictions
+struct ImmField {
+  ImmField() : ImmFieldOperand(-1), Shift(0), LBound(0), HBound(0) {}
+  ImmField(uint8_t Shift, int16_t LBound, int16_t HBound,
+           int8_t ImmFieldOperand)
+      : ImmFieldOperand(ImmFieldOperand), Shift(Shift), LBound(LBound),
+        HBound(HBound) {}
+  int8_t ImmFieldOperand; // Immediate operand, -1 if it does not exist
+  uint8_t Shift;          // Shift value
+  int16_t LBound;         // Low bound of the immediate operand
+  int16_t HBound;         // High bound of the immediate operand
+};
+
+/// Information about operands
+// TODO: Will be extended when additional optimizations are added
+struct OpInfo {
+  OpInfo(enum OperandTransfer TransferOperands)
+      : TransferOperands(TransferOperands) {}
+  OpInfo() : TransferOperands(OT_NA) {}
+
+  enum OperandTransfer
+      TransferOperands; ///< Operands to transfer to the new instruction
+};
+
+// Information about opcodes
+struct OpCodes {
+  OpCodes(unsigned WideOpc, unsigned NarrowOpc)
+      : WideOpc(WideOpc), NarrowOpc(NarrowOpc) {}
+
+  unsigned WideOpc;   ///< Wide opcode
+  unsigned NarrowOpc; ///< Narrow opcode
+};
+
+/// ReduceTable - A static table with information on mapping from wide
+/// opcodes to narrow
+struct ReduceEntry {
+
+  enum ReduceType eRType; ///< Reduction type
+  bool (*ReduceFunction)(
+      MachineInstr *MI,
+      const ReduceEntry &Entry); ///< Pointer to reduce function
+  struct OpCodes Ops;            ///< All relevant OpCodes
+  struct OpInfo OpInf;           ///< Characteristics of operands
+  struct ImmField Imm;           ///< Characteristics of immediate field
+
+  ReduceEntry(enum ReduceType RType, struct OpCodes Op,
+              bool (*F)(MachineInstr *MI, const ReduceEntry &Entry),
+              struct OpInfo OpInf, struct ImmField Imm)
+      : eRType(RType), ReduceFunction(F), Ops(Op), OpInf(OpInf), Imm(Imm) {}
+
+  unsigned NarrowOpc() const { return Ops.NarrowOpc; }
+  unsigned WideOpc() const { return Ops.WideOpc; }
+  int16_t LBound() const { return Imm.LBound; }
+  int16_t HBound() const { return Imm.HBound; }
+  uint8_t Shift() const { return Imm.Shift; }
+  int8_t ImmField() const { return Imm.ImmFieldOperand; }
+  enum OperandTransfer TransferOperands() const {
+    return OpInf.TransferOperands;
+  }
+  enum ReduceType RType() const { return eRType; }
+
+  // operator used by std::equal_range
+  bool operator<(const unsigned int r) const { return (WideOpc() < r); }
+
+  // operator used by std::equal_range
+  friend bool operator<(const unsigned int r, const struct ReduceEntry &re) {
+    return (r < re.WideOpc());
+  }
+};
+
+class MicroMipsSizeReduce : public MachineFunctionPass {
+public:
+  static char ID;
+  MicroMipsSizeReduce();
+
+  static const MipsInstrInfo *MipsII;
+  const MipsSubtarget *Subtarget;
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  llvm::StringRef getPassName() const override {
+    return "microMIPS instruction size reduction pass";
+  }
+
+private:
+  /// Reduces width of instructions in the specified basic block.
+  bool ReduceMBB(MachineBasicBlock &MBB);
+
+  /// Attempts to reduce MI, returns true on success.
+  bool ReduceMI(const MachineBasicBlock::instr_iterator &MII);
+
+  // Attempts to reduce LW/SW instruction into LWSP/SWSP,
+  // returns true on success.
+  static bool ReduceXWtoXWSP(MachineInstr *MI, const ReduceEntry &Entry);
+
+  // Attempts to reduce arithmetic instructions, returns true on success
+  static bool ReduceArithmeticInstructions(MachineInstr *MI,
+                                           const ReduceEntry &Entry);
+
+  // Changes opcode of an instruction
+  static bool ReplaceInstruction(MachineInstr *MI, const ReduceEntry &Entry);
+
+  // Table with transformation rules for each instruction
+  static llvm::SmallVector<ReduceEntry, 16> ReduceTable;
+};
+
+char MicroMipsSizeReduce::ID = 0;
+const MipsInstrInfo *MicroMipsSizeReduce::MipsII;
+
+// This table must be sorted by WideOpc as a main criterion and
+// ReduceType as a sub-criterion (when wide opcodes are the same)
+llvm::SmallVector<ReduceEntry, 16> MicroMipsSizeReduce::ReduceTable = {
+
+    // ReduceType, OpCodes, ReduceFunction,
+    // OpInfo(TransferOperands),
+    // ImmField(Shift, LBound, HBound, ImmFieldPosition)
+    {RT_OneInstr, OpCodes(Mips::ADDu, Mips::ADDU16_MM),
+     ReduceArithmeticInstructions, OpInfo(OT_OperandsAll),
+     ImmField(0, 0, 0, -1)},
+    {RT_OneInstr, OpCodes(Mips::ADDu_MM, Mips::ADDU16_MM),
+     ReduceArithmeticInstructions, OpInfo(OT_OperandsAll),
+     ImmField(0, 0, 0, -1)},
+    {RT_OneInstr, OpCodes(Mips::LW, Mips::LWSP_MM), ReduceXWtoXWSP,
+     OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)},
+    {RT_OneInstr, OpCodes(Mips::LW_MM, Mips::LWSP_MM), ReduceXWtoXWSP,
+     OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)},
+    {RT_OneInstr, OpCodes(Mips::SUBu, Mips::SUBU16_MM),
+     ReduceArithmeticInstructions, OpInfo(OT_OperandsAll),
+     ImmField(0, 0, 0, -1)},
+    {RT_OneInstr, OpCodes(Mips::SUBu_MM, Mips::SUBU16_MM),
+     ReduceArithmeticInstructions, OpInfo(OT_OperandsAll),
+     ImmField(0, 0, 0, -1)},
+    {RT_OneInstr, OpCodes(Mips::SW, Mips::SWSP_MM), ReduceXWtoXWSP,
+     OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)},
+    {RT_OneInstr, OpCodes(Mips::SW_MM, Mips::SWSP_MM), ReduceXWtoXWSP,
+     OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)},
+};
+}
+
+// Returns true if the machine operand MO is register SP
+static bool IsSP(const MachineOperand &MO) {
+  if (MO.isReg() && ((MO.getReg() == Mips::SP)))
+    return true;
+  return false;
+}
+
+// Returns true if the machine operand MO is register $16, $17, or $2-$7.
+static bool isMMThreeBitGPRegister(const MachineOperand &MO) {
+  if (MO.isReg() && Mips::GPRMM16RegClass.contains(MO.getReg()))
+    return true;
+  return false;
+}
+
+// Returns true if the operand Op is an immediate value
+// and writes the immediate value into variable Imm
+static bool GetImm(MachineInstr *MI, unsigned Op, int64_t &Imm) {
+
+  if (!MI->getOperand(Op).isImm())
+    return false;
+  Imm = MI->getOperand(Op).getImm();
+  return true;
+}
+
+// Returns true if the variable Value has the number of least-significant zero
+// bits equal to Shift and if the shifted value is between the bounds
+static bool InRange(int64_t Value, unsigned short Shift, int LBound,
+                    int HBound) {
+  int64_t Value2 = Value >> Shift;
+  if ((Value2 << Shift) == Value && (Value2 >= LBound) && (Value2 < HBound))
+    return true;
+  return false;
+}
+
+// Returns true if immediate operand is in range
+static bool ImmInRange(MachineInstr *MI, const ReduceEntry &Entry) {
+
+  int64_t offset;
+
+  if (!GetImm(MI, Entry.ImmField(), offset))
+    return false;
+
+  if (!InRange(offset, Entry.Shift(), Entry.LBound(), Entry.HBound()))
+    return false;
+
+  return true;
+}
+
+MicroMipsSizeReduce::MicroMipsSizeReduce() : MachineFunctionPass(ID) {}
+
+bool MicroMipsSizeReduce::ReduceMI(
+    const MachineBasicBlock::instr_iterator &MII) {
+
+  MachineInstr *MI = &*MII;
+  unsigned Opcode = MI->getOpcode();
+
+  // Search the table.
+  llvm::SmallVector<ReduceEntry, 16>::const_iterator Start =
+      std::begin(ReduceTable);
+  llvm::SmallVector<ReduceEntry, 16>::const_iterator End =
+      std::end(ReduceTable);
+
+  std::pair<llvm::SmallVector<ReduceEntry, 16>::const_iterator,
+            llvm::SmallVector<ReduceEntry, 16>::const_iterator>
+      Range = std::equal_range(Start, End, Opcode);
+
+  if (Range.first == Range.second)
+    return false;
+
+  for (llvm::SmallVector<ReduceEntry, 16>::const_iterator Entry = Range.first;
+       Entry != Range.second; ++Entry)
+    if (((*Entry).ReduceFunction)(&(*MII), *Entry))
+      return true;
+
+  return false;
+}
+
+bool MicroMipsSizeReduce::ReduceXWtoXWSP(MachineInstr *MI,
+                                         const ReduceEntry &Entry) {
+
+  if (!ImmInRange(MI, Entry))
+    return false;
+
+  if (!IsSP(MI->getOperand(1)))
+    return false;
+
+  return ReplaceInstruction(MI, Entry);
+}
+
+bool MicroMipsSizeReduce::ReduceArithmeticInstructions(
+    MachineInstr *MI, const ReduceEntry &Entry) {
+
+  if (!isMMThreeBitGPRegister(MI->getOperand(0)) ||
+      !isMMThreeBitGPRegister(MI->getOperand(1)) ||
+      !isMMThreeBitGPRegister(MI->getOperand(2)))
+    return false;
+
+  return ReplaceInstruction(MI, Entry);
+}
+
+bool MicroMipsSizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+  MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),
+                                    E = MBB.instr_end();
+  MachineBasicBlock::instr_iterator NextMII;
+
+  // Iterate through the instructions in the basic block
+  for (; MII != E; MII = NextMII) {
+    NextMII = std::next(MII);
+    MachineInstr *MI = &*MII;
+
+    // Don't reduce bundled instructions or pseudo operations
+    if (MI->isBundle() || MI->isTransient())
+      continue;
+
+    // Try to reduce 32-bit instruction into 16-bit instruction
+    Modified |= ReduceMI(MII);
+  }
+
+  return Modified;
+}
+
+bool MicroMipsSizeReduce::ReplaceInstruction(MachineInstr *MI,
+                                             const ReduceEntry &Entry) {
+
+  MI->setDesc(MipsII->get(Entry.NarrowOpc()));
+  DEBUG(dbgs() << "Converted into 16-bit: " << *MI);
+  ++NumReduced;
+  return true;
+}
+
+bool MicroMipsSizeReduce::runOnMachineFunction(MachineFunction &MF) {
+
+  Subtarget = &static_cast<const MipsSubtarget &>(MF.getSubtarget());
+
+  // TODO: Add support for other subtargets:
+  // microMIPS32r6 and microMIPS64r6
+  if (!Subtarget->inMicroMipsMode() || !Subtarget->hasMips32r2())
+    return false;
+
+  MipsII = static_cast<const MipsInstrInfo *>(Subtarget->getInstrInfo());
+
+  bool Modified = false;
+  MachineFunction::iterator I = MF.begin(), E = MF.end();
+
+  for (; I != E; ++I)
+    Modified |= ReduceMBB(*I);
+  return Modified;
+}
+
+/// Returns an instance of the MicroMips size reduction pass.
+FunctionPass *llvm::createMicroMipsSizeReductionPass() {
+  return new MicroMipsSizeReduce();
+}
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index d9faf3325ca..7553f3972f5 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -32,6 +32,7 @@ namespace llvm {
   FunctionPass *createMipsHazardSchedule();
   FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
   FunctionPass *createMipsConstantIslandPass();
+  FunctionPass *createMicroMipsSizeReductionPass();
 } // end namespace llvm;
 
 #endif
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index a5c7bf7699e..21c99da0922 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -1263,7 +1263,8 @@ bool MipsFastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
     MipsCCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
 
     CCInfo.AnalyzeCallResult(CLI.Ins, RetCC_Mips, CLI.RetTy,
-                             CLI.Symbol->getName().data());
+                             CLI.Symbol ? CLI.Symbol->getName().data()
+                                        : nullptr);
 
     // Only handle a single return value.
     if (RVLocs.size() != 1)
@@ -1326,11 +1327,10 @@ bool MipsFastISel::fastLowerArguments() {
   // Only handle simple cases. i.e. All arguments are directly mapped to
   // registers of the appropriate type.
   SmallVector<AllocatedReg, 4> Allocation;
-  unsigned Idx = 1;
   for (const auto &FormalArg : F->args()) {
-    if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
-        F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
-        F->getAttributes().hasAttribute(Idx, Attribute::ByVal)) {
+    if (FormalArg.hasAttribute(Attribute::InReg) ||
+        FormalArg.hasAttribute(Attribute::StructRet) ||
+        FormalArg.hasAttribute(Attribute::ByVal)) {
       DEBUG(dbgs() << ".. gave up (inreg, structret, byval)\n");
       return false;
     }
@@ -1342,7 +1342,8 @@ bool MipsFastISel::fastLowerArguments() {
     }
 
     EVT ArgVT = TLI.getValueType(DL, ArgTy);
-    DEBUG(dbgs() << ".. " << (Idx - 1) << ": " << ArgVT.getEVTString() << "\n");
+    DEBUG(dbgs() << ".. " << FormalArg.getArgNo() << ": "
+                 << ArgVT.getEVTString() << "\n");
     if (!ArgVT.isSimple()) {
       DEBUG(dbgs() << ".. .. gave up (not a simple type)\n");
       return false;
@@ -1352,8 +1353,8 @@ bool MipsFastISel::fastLowerArguments() {
     case MVT::i1:
     case MVT::i8:
     case MVT::i16:
-      if (!F->getAttributes().hasAttribute(Idx, Attribute::SExt) &&
-          !F->getAttributes().hasAttribute(Idx, Attribute::ZExt)) {
+      if (!FormalArg.hasAttribute(Attribute::SExt) &&
+          !FormalArg.hasAttribute(Attribute::ZExt)) {
         // It must be any extend, this shouldn't happen for clang-generated IR
         // so just fall back on SelectionDAG.
         DEBUG(dbgs() << ".. .. gave up (i8/i16 arg is not extended)\n");
@@ -1374,7 +1375,7 @@ bool MipsFastISel::fastLowerArguments() {
       break;
 
     case MVT::i32:
-      if (F->getAttributes().hasAttribute(Idx, Attribute::ZExt)) {
+      if (FormalArg.hasAttribute(Attribute::ZExt)) {
         // The O32 ABI does not permit a zero-extended i32.
         DEBUG(dbgs() << ".. .. gave up (i32 arg is zero extended)\n");
         return false;
@@ -1437,23 +1438,20 @@ bool MipsFastISel::fastLowerArguments() {
       DEBUG(dbgs() << ".. .. gave up (unknown type)\n");
       return false;
     }
-
-    ++Idx;
   }
 
-  Idx = 0;
   for (const auto &FormalArg : F->args()) {
-    unsigned SrcReg = Allocation[Idx].Reg;
-    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, Allocation[Idx].RC);
+    unsigned ArgNo = FormalArg.getArgNo();
+    unsigned SrcReg = Allocation[ArgNo].Reg;
+    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, Allocation[ArgNo].RC);
     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
     // Without this, EmitLiveInCopies may eliminate the livein if its only
     // use is a bitcast (which isn't turned into an instruction).
-    unsigned ResultReg = createResultReg(Allocation[Idx].RC);
+    unsigned ResultReg = createResultReg(Allocation[ArgNo].RC);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
             TII.get(TargetOpcode::COPY), ResultReg)
         .addReg(DstReg, getKillRegState(true));
     updateValueMap(&FormalArg, ResultReg);
-    ++Idx;
   }
 
   // Calculate the size of the incoming arguments area.
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index a45a9c4b41c..29a38fd35c1 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -260,6 +260,7 @@ TargetIRAnalysis MipsTargetMachine::getTargetIRAnalysis() {
 // print out the code after the passes.
 void MipsPassConfig::addPreEmitPass() {
   MipsTargetMachine &TM = getMipsTargetMachine();
+  addPass(createMicroMipsSizeReductionPass());
 
   // The delay slot filler pass can potientially create forbidden slot (FS)
   // hazards for MIPSR6 which the hazard schedule pass (HSP) will fix. Any
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index ba28cd83278..58cb7793d04 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1555,7 +1555,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
         // Just print .param .align <a> .b8 .param[size];
         // <a> = PAL.getparamalignment
         // size = typeallocsize of element type
-        unsigned align = PAL.getParamAlignment(paramIndex + 1);
+        unsigned align = PAL.getParamAlignment(paramIndex);
         if (align == 0)
           align = DL.getABITypeAlignment(Ty);
 
@@ -1641,7 +1641,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
       // Just print .param .align <a> .b8 .param[size];
       // <a> = PAL.getparamalignment
       // size = typeallocsize of element type
-      unsigned align = PAL.getParamAlignment(paramIndex + 1);
+      unsigned align = PAL.getParamAlignment(paramIndex);
       if (align == 0)
         align = DL.getABITypeAlignment(ETy);
       // Work around a bug in ptxas. When PTX code takes address of
diff --git a/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 5b626cbcd5b..e858b37e184 100644
--- a/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -164,7 +164,7 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
   // Set the alignment to alignment of the byval parameter. This is because,
   // later load/stores assume that alignment, and we are going to replace
   // the use of the byval parameter with this alloca instruction.
-  AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1));
+  AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo()));
   Arg->replaceAllUsesWith(AllocA);
 
   Value *ArgInParam = new AddrSpaceCastInst(
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 125c00295f8..1b0402bf003 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -49,6 +49,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -542,12 +543,12 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
   SDValue Op1 = N->getOperand(1);
   SDLoc dl(N);
 
-  APInt LKZ, LKO, RKZ, RKO;
-  CurDAG->computeKnownBits(Op0, LKZ, LKO);
-  CurDAG->computeKnownBits(Op1, RKZ, RKO);
+  KnownBits LKnown, RKnown;
+  CurDAG->computeKnownBits(Op0, LKnown);
+  CurDAG->computeKnownBits(Op1, RKnown);
 
-  unsigned TargetMask = LKZ.getZExtValue();
-  unsigned InsertMask = RKZ.getZExtValue();
+  unsigned TargetMask = LKnown.Zero.getZExtValue();
+  unsigned InsertMask = RKnown.Zero.getZExtValue();
 
   if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
     unsigned Op0Opc = Op0.getOpcode();
@@ -590,9 +591,9 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
        // The AND mask might not be a constant, and we need to make sure that
        // if we're going to fold the masking with the insert, all bits not
        // know to be zero in the mask are known to be one.
-        APInt MKZ, MKO;
-        CurDAG->computeKnownBits(Op1.getOperand(1), MKZ, MKO);
-        bool CanFoldMask = InsertMask == MKO.getZExtValue();
+        KnownBits MKnown;
+        CurDAG->computeKnownBits(Op1.getOperand(1), MKnown);
+        bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
 
         unsigned SHOpc = Op1.getOperand(0).getOpcode();
         if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
@@ -2772,12 +2773,12 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
     short Imm;
     if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
         isIntS16Immediate(N->getOperand(1), Imm)) {
-      APInt LHSKnownZero, LHSKnownOne;
-      CurDAG->computeKnownBits(N->getOperand(0), LHSKnownZero, LHSKnownOne);
+      KnownBits LHSKnown;
+      CurDAG->computeKnownBits(N->getOperand(0), LHSKnown);
 
       // If this is equivalent to an add, then we can fold it with the
       // FrameIndex calculation.
-      if ((LHSKnownZero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
+      if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
         selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
         return;
       }
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 4659a2ea803..483e9b171d5 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -79,6 +79,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -1847,17 +1848,14 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
     // If this is an or of disjoint bitfields, we can codegen this as an add
     // (for better address arithmetic) if the LHS and RHS of the OR are provably
     // disjoint.
-    APInt LHSKnownZero, LHSKnownOne;
-    APInt RHSKnownZero, RHSKnownOne;
-    DAG.computeKnownBits(N.getOperand(0),
-                         LHSKnownZero, LHSKnownOne);
+    KnownBits LHSKnown, RHSKnown;
+    DAG.computeKnownBits(N.getOperand(0), LHSKnown);
 
-    if (LHSKnownZero.getBoolValue()) {
-      DAG.computeKnownBits(N.getOperand(1),
-                           RHSKnownZero, RHSKnownOne);
+    if (LHSKnown.Zero.getBoolValue()) {
+      DAG.computeKnownBits(N.getOperand(1), RHSKnown);
       // If all of the bits are known zero on the LHS or RHS, the add won't
       // carry.
-      if (~(LHSKnownZero | RHSKnownZero) == 0) {
+      if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
         Base = N.getOperand(0);
         Index = N.getOperand(1);
         return true;
@@ -1953,10 +1951,10 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
       // If this is an or of disjoint bitfields, we can codegen this as an add
       // (for better address arithmetic) if the LHS and RHS of the OR are
       // provably disjoint.
-      APInt LHSKnownZero, LHSKnownOne;
-      DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
+      KnownBits LHSKnown;
+      DAG.computeKnownBits(N.getOperand(0), LHSKnown);
 
-      if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
+      if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
         // If all of the bits are known zero on the LHS or RHS, the add won't
         // carry.
         if (FrameIndexSDNode *FI =
@@ -6466,7 +6464,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SETNE:
     std::swap(TV, FV);
   case ISD::SETEQ:
-    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
@@ -6476,25 +6474,25 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
   case ISD::SETULT:
   case ISD::SETLT:
-    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
   case ISD::SETOGE:
   case ISD::SETGE:
-    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
   case ISD::SETUGT:
   case ISD::SETGT:
-    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
   case ISD::SETOLE:
   case ISD::SETLE:
-    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
@@ -10318,17 +10316,16 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
     } else {
       // This is neither a signed nor an unsigned comparison, just make sure
       // that the high bits are equal.
-      APInt Op1Zero, Op1One;
-      APInt Op2Zero, Op2One;
-      DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One);
-      DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One);
+      KnownBits Op1Known, Op2Known;
+      DAG.computeKnownBits(N->getOperand(0), Op1Known);
+      DAG.computeKnownBits(N->getOperand(1), Op2Known);
 
       // We don't really care about what is known about the first bit (if
       // anything), so clear it in all masks prior to comparing them.
-      Op1Zero.clearBit(0); Op1One.clearBit(0);
-      Op2Zero.clearBit(0); Op2One.clearBit(0);
+      Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
+      Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
 
-      if (Op1Zero != Op2Zero || Op1One != Op2One)
+      if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
         return SDValue();
     }
   }
@@ -11216,6 +11213,14 @@ SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
   }
 
   MVT VecTy = N->getValueType(0).getSimpleVT();
+
+  // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
+  // aligned and the type is a vector with elements up to 4 bytes
+  if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
+      && VecTy.getScalarSizeInBits() <= 32 ) {
+    return SDValue();
+  }
+
   SDValue LoadOps[] = { Chain, Base };
   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
                                          DAG.getVTList(MVT::v2f64, MVT::Other),
@@ -11280,6 +11285,13 @@ SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
   SDValue Src = N->getOperand(SrcOpnd);
   MVT VecTy = Src.getValueType().getSimpleVT();
 
+  // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
+  // aligned and the type is a vector with elements up to 4 bytes
+  if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
+      && VecTy.getScalarSizeInBits() <= 32 ) {
+    return SDValue();
+  }
+
   // All stores are done as v2f64 and possible bit cast.
   if (VecTy != MVT::v2f64) {
     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
@@ -12015,18 +12027,17 @@ PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
 //===----------------------------------------------------------------------===//
 
 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
-                                                      APInt &KnownZero,
-                                                      APInt &KnownOne,
+                                                      KnownBits &Known,
                                                       const APInt &DemandedElts,
                                                       const SelectionDAG &DAG,
                                                       unsigned Depth) const {
-  KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
+  Known.Zero.clearAllBits(); Known.One.clearAllBits();
   switch (Op.getOpcode()) {
   default: break;
   case PPCISD::LBRX: {
     // lhbrx is known to have the top bits cleared out.
     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
-      KnownZero = 0xFFFF0000;
+      Known.Zero = 0xFFFF0000;
     break;
   }
   case ISD::INTRINSIC_WO_CHAIN: {
@@ -12048,7 +12059,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     case Intrinsic::ppc_altivec_vcmpgtuh_p:
     case Intrinsic::ppc_altivec_vcmpgtuw_p:
     case Intrinsic::ppc_altivec_vcmpgtud_p:
-      KnownZero = ~1U;  // All bits but the low one are known to be zero.
+      Known.Zero = ~1U;  // All bits but the low one are known to be zero.
       break;
     }
   }
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 6113eb58f42..5645fdc2485 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -606,8 +606,7 @@ namespace llvm {
                                SelectionDAG &DAG) const override;
 
     void computeKnownBitsForTargetNode(const SDValue Op,
-                                       APInt &KnownZero,
-                                       APInt &KnownOne,
+                                       KnownBits &Known,
                                        const APInt &DemandedElts,
                                        const SelectionDAG &DAG,
                                        unsigned Depth = 0) const override;
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 13603732397..967557452f2 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -138,7 +138,7 @@ let Uses = [RM] in {
     def LXVW4X : XX1Form<31, 780,
                          (outs vsrc:$XT), (ins memrr:$src),
                          "lxvw4x $XT, $src", IIC_LdStLFD,
-                         [(set v4i32:$XT, (int_ppc_vsx_lxvw4x xoaddr:$src))]>;
+                         []>;
   } // mayLoad
 
   // Store indexed instructions
@@ -160,7 +160,7 @@ let Uses = [RM] in {
     def STXVW4X : XX1Form<31, 908,
                          (outs), (ins vsrc:$XT, memrr:$dst),
                          "stxvw4x $XT, $dst", IIC_LdStSTFD,
-                         [(store v4i32:$XT, xoaddr:$dst)]>;
+                         []>;
     }
   } // mayStore
 
@@ -1041,8 +1041,6 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
   // Stores.
   def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
             (STXVD2X $rS, xoaddr:$dst)>;
-  def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
-            (STXVW4X $rS, xoaddr:$dst)>;
   def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
             (STXVD2X $rS, xoaddr:$dst)>;
   def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
@@ -1053,8 +1051,12 @@ let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in {
   def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
   def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
   def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+  def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVW4X xoaddr:$src)>;
   def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
   def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+  def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>;
+  def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
+            (STXVW4X $rS, xoaddr:$dst)>;
 }
 
 // Permutes.
@@ -1890,8 +1892,8 @@ let Predicates = [IsLittleEndian, HasVSX] in
   def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
             (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
 
-  def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
-  def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
 
 let Predicates = [IsLittleEndian, HasDirectMove] in {
   // v16i8 scalar <-> vector conversions (LE)
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index f120a98e945..c44e371856a 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -30,6 +30,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 using namespace llvm;
 
 
@@ -1875,25 +1876,24 @@ EVT SparcTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
 /// combiner.
 void SparcTargetLowering::computeKnownBitsForTargetNode
                                 (const SDValue Op,
-                                 APInt &KnownZero,
-                                 APInt &KnownOne,
+                                 KnownBits &Known,
                                  const APInt &DemandedElts,
                                  const SelectionDAG &DAG,
                                  unsigned Depth) const {
-  APInt KnownZero2, KnownOne2;
-  KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
+  KnownBits Known2;
+  Known.Zero.clearAllBits(); Known.One.clearAllBits();
 
   switch (Op.getOpcode()) {
   default: break;
   case SPISD::SELECT_ICC:
   case SPISD::SELECT_XCC:
   case SPISD::SELECT_FCC:
-    DAG.computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
-    DAG.computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+    DAG.computeKnownBits(Op.getOperand(1), Known, Depth+1);
+    DAG.computeKnownBits(Op.getOperand(0), Known2, Depth+1);
 
     // Only known if known in both the LHS and RHS.
-    KnownOne &= KnownOne2;
-    KnownZero &= KnownZero2;
+    Known.One &= Known2.One;
+    Known.Zero &= Known2.Zero;
     break;
   }
 }
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 90d03984060..cc6386bccbb 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -66,8 +66,7 @@ namespace llvm {
     /// in Mask are known to be either zero or one and return them in the
     /// KnownZero/KnownOne bitsets.
     void computeKnownBitsForTargetNode(const SDValue Op,
-                                       APInt &KnownZero,
-                                       APInt &KnownOne,
+                                       KnownBits &Known,
                                        const APInt &DemandedElts,
                                        const SelectionDAG &DAG,
                                        unsigned Depth = 0) const override;
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 920b6e430e8..cd2f708458b 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -711,9 +712,9 @@ bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op,
   // The inner check covers all cases but is more expensive.
   uint64_t Used = allOnes(Op.getValueSizeInBits());
   if (Used != (AndMask | InsertMask)) {
-    APInt KnownZero, KnownOne;
-    CurDAG->computeKnownBits(Op.getOperand(0), KnownZero, KnownOne);
-    if (Used != (AndMask | InsertMask | KnownZero.getZExtValue()))
+    KnownBits Known;
+    CurDAG->computeKnownBits(Op.getOperand(0), Known);
+    if (Used != (AndMask | InsertMask | Known.Zero.getZExtValue()))
       return false;
   }
 
@@ -770,9 +771,9 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
       // If some bits of Input are already known zeros, those bits will have
       // been removed from the mask.  See if adding them back in makes the
       // mask suitable.
-      APInt KnownZero, KnownOne;
-      CurDAG->computeKnownBits(Input, KnownZero, KnownOne);
-      Mask |= KnownZero.getZExtValue();
+      KnownBits Known;
+      CurDAG->computeKnownBits(Input, Known);
+      Mask |= Known.Zero.getZExtValue();
       if (!refineRxSBGMask(RxSBG, Mask))
         return false;
     }
@@ -794,9 +795,9 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
       // If some bits of Input are already known ones, those bits will have
       // been removed from the mask.  See if adding them back in makes the
       // mask suitable.
-      APInt KnownZero, KnownOne;
-      CurDAG->computeKnownBits(Input, KnownZero, KnownOne);
-      Mask &= ~KnownOne.getZExtValue();
+      KnownBits Known;
+      CurDAG->computeKnownBits(Input, Known);
+      Mask &= ~Known.One.getZExtValue();
       if (!refineRxSBGMask(RxSBG, Mask))
         return false;
     }
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index f2fd581f784..6989aabb8c6 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -20,8 +20,9 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/KnownBits.h"
 #include <cctype>
 
 using namespace llvm;
@@ -3066,14 +3067,14 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
 
   // Get the known-zero masks for each operand.
   SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
-  APInt KnownZero[2], KnownOne[2];
-  DAG.computeKnownBits(Ops[0], KnownZero[0], KnownOne[0]);
-  DAG.computeKnownBits(Ops[1], KnownZero[1], KnownOne[1]);
+  KnownBits Known[2];
+  DAG.computeKnownBits(Ops[0], Known[0]);
+  DAG.computeKnownBits(Ops[1], Known[1]);
 
   // See if the upper 32 bits of one operand and the lower 32 bits of the
   // other are known zero.  They are the low and high operands respectively.
-  uint64_t Masks[] = { KnownZero[0].getZExtValue(),
-                       KnownZero[1].getZExtValue() };
+  uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
+                       Known[1].Zero.getZExtValue() };
   unsigned High, Low;
   if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
     High = 1, Low = 0;
@@ -3158,9 +3159,9 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
   }
 
   // Get the known-zero mask for the operand.
-  APInt KnownZero, KnownOne;
-  DAG.computeKnownBits(Op, KnownZero, KnownOne);
-  unsigned NumSignificantBits = (~KnownZero).getActiveBits();
+  KnownBits Known;
+  DAG.computeKnownBits(Op, Known);
+  unsigned NumSignificantBits = (~Known.Zero).getActiveBits();
   if (NumSignificantBits == 0)
     return DAG.getConstant(0, DL, VT);
 
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index e74c9a80515..d286158f407 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -676,7 +676,6 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondT
                                        const Instruction *I) {
   if (ValTy->isVectorTy()) {
     assert (ST->hasVector() && "getCmpSelInstrCost() called with vector type.");
-    assert (CondTy == nullptr || CondTy->isVectorTy());
     unsigned VF = ValTy->getVectorNumElements();
 
     // Called with a compare instruction.
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 50272fda56d..91cc97e38b3 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -44,7 +44,7 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx,
                                           const TargetMachine &TM) {
   Ctx = &ctx;
   // `Initialize` can be called more than once.
-  if (Mang != nullptr) delete Mang;
+  delete Mang;
   Mang = new Mangler();
   InitMCObjectFileInfo(TM.getTargetTriple(), TM.isPositionIndependent(),
                        TM.getCodeModel(), *Ctx);
diff --git a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index a67137f867e..257f1d110aa 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/IR/Function.h" // To access function attributes.
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index 25d77bb1f23..365b327190e 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -26,18 +26,18 @@
 // offset for an add that needs wrapping.
 def regPlusImm : PatFrag<(ops node:$addr, node:$off),
                          (add node:$addr, node:$off),
-                         [{ return N->getFlags()->hasNoUnsignedWrap(); }]>;
+                         [{ return N->getFlags().hasNoUnsignedWrap(); }]>;
 
 // Treat an 'or' node as an 'add' if the or'ed bits are known to be zero.
 def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
     return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
 
-  APInt KnownZero0, KnownOne0;
-  CurDAG->computeKnownBits(N->getOperand(0), KnownZero0, KnownOne0, 0);
-  APInt KnownZero1, KnownOne1;
-  CurDAG->computeKnownBits(N->getOperand(1), KnownZero1, KnownOne1, 0);
-  return (~KnownZero0 & ~KnownZero1) == 0;
+  KnownBits Known0;
+  CurDAG->computeKnownBits(N->getOperand(0), Known0, 0);
+  KnownBits Known1;
+  CurDAG->computeKnownBits(N->getOperand(1), Known1, 0);
+  return (~Known0.Zero & ~Known1.Zero) == 0;
 }]>;
 
 // GlobalAddresses are conceptually unsigned values, so we can also fold them
@@ -47,7 +47,7 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
 def regPlusGA : PatFrag<(ops node:$addr, node:$off),
                         (add node:$addr, node:$off),
                         [{
-  return N->getFlags()->hasNoUnsignedWrap();
+  return N->getFlags().hasNoUnsignedWrap();
 }]>;
 
 // We don't need a regPlusES because external symbols never have constant
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index b8477810b4c..fd11b671dd9 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -3074,16 +3074,13 @@ bool X86FastISel::fastLowerArguments() {
   // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
   unsigned GPRCnt = 0;
   unsigned FPRCnt = 0;
-  unsigned Idx = 0;
   for (auto const &Arg : F->args()) {
-    // The first argument is at index 1.
-    ++Idx;
-    if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
-        F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
-        F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
-        F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) ||
-        F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) ||
-        F->getAttributes().hasAttribute(Idx, Attribute::Nest))
+    if (Arg.hasAttribute(Attribute::ByVal) ||
+        Arg.hasAttribute(Attribute::InReg) ||
+        Arg.hasAttribute(Attribute::StructRet) ||
+        Arg.hasAttribute(Attribute::SwiftSelf) ||
+        Arg.hasAttribute(Attribute::SwiftError) ||
+        Arg.hasAttribute(Attribute::Nest))
       return false;
 
     Type *ArgTy = Arg.getType();
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 2d788bf0cf9..12a10bf3072 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -31,6 +31,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
@@ -419,6 +420,7 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
     case ISD::ADD:
     case ISD::ADDC:
     case ISD::ADDE:
+    case ISD::ADDCARRY:
     case ISD::AND:
     case ISD::OR:
     case ISD::XOR: {
@@ -1070,9 +1072,9 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
   }
   APInt MaskedHighBits =
     APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ);
-  APInt KnownZero, KnownOne;
-  DAG.computeKnownBits(X, KnownZero, KnownOne);
-  if (MaskedHighBits != KnownZero) return true;
+  KnownBits Known;
+  DAG.computeKnownBits(X, Known);
+  if (MaskedHighBits != Known.Zero) return true;
 
   // We've identified a pattern that can be transformed into a single shift
   // and an addressing mode. Make it so.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index ada46643a5f..6092fd2bfd6 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -52,6 +52,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
@@ -784,30 +785,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::SMIN,               MVT::v8i16, Legal);
     setOperationAction(ISD::UMIN,               MVT::v16i8, Legal);
 
-    setOperationAction(ISD::SETCC,              MVT::v2i64, Custom);
-    setOperationAction(ISD::SETCC,              MVT::v16i8, Custom);
-    setOperationAction(ISD::SETCC,              MVT::v8i16, Custom);
-    setOperationAction(ISD::SETCC,              MVT::v4i32, Custom);
-
-    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
-    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
-    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v4i32, Custom);
     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
 
-    setOperationAction(ISD::CTPOP,              MVT::v16i8, Custom);
-    setOperationAction(ISD::CTPOP,              MVT::v8i16, Custom);
-    setOperationAction(ISD::CTPOP,              MVT::v4i32, Custom);
-    setOperationAction(ISD::CTPOP,              MVT::v2i64, Custom);
+    for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
+      setOperationAction(ISD::SETCC,              VT, Custom);
+      setOperationAction(ISD::CTPOP,              VT, Custom);
+      setOperationAction(ISD::CTTZ,               VT, Custom);
+    }
 
-    setOperationAction(ISD::CTTZ,               MVT::v16i8, Custom);
-    setOperationAction(ISD::CTTZ,               MVT::v8i16, Custom);
-    setOperationAction(ISD::CTTZ,               MVT::v4i32, Custom);
-    setOperationAction(ISD::CTTZ,               MVT::v2i64, Custom);
-
-    // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
     for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
+      setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
       setOperationAction(ISD::VSELECT,            VT, Custom);
@@ -882,18 +871,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
 
-    for (auto VT : { MVT::v8i16, MVT::v16i8 }) {
-      setOperationAction(ISD::SRL, VT, Custom);
-      setOperationAction(ISD::SHL, VT, Custom);
-      setOperationAction(ISD::SRA, VT, Custom);
-    }
-
-    // In the customized shift lowering, the legal cases in AVX2 will be
-    // recognized.
-    for (auto VT : { MVT::v4i32, MVT::v2i64 }) {
-      setOperationAction(ISD::SRL, VT, Custom);
-      setOperationAction(ISD::SHL, VT, Custom);
-      setOperationAction(ISD::SRA, VT, Custom);
+    // In the customized shift lowering, the legal v4i32/v2i64 cases
+    // in AVX2 will be recognized.
+    for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
+      setOperationAction(ISD::SRL,              VT, Custom);
+      setOperationAction(ISD::SHL,              VT, Custom);
+      setOperationAction(ISD::SRA,              VT, Custom);
     }
   }
 
@@ -935,13 +918,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
 
     // SSE41 brings specific instructions for doing vector sign extend even in
     // cases where we don't have SRA.
-    setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Legal);
-    setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Legal);
-    setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Legal);
-
-    setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v2i64, Legal);
-    setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v4i32, Legal);
-    setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v8i16, Legal);
+    for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
+      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
+      setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
+    }
 
     for (MVT VT : MVT::integer_vector_valuetypes()) {
       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
@@ -950,19 +930,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     }
 
     // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
-    setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8,  Legal);
-    setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8,  Legal);
-    setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8,  Legal);
-    setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
-    setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
-    setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
-
-    setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8,  Legal);
-    setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8,  Legal);
-    setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8,  Legal);
-    setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
-    setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
-    setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
+    for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
+      setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8,  Legal);
+      setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8,  Legal);
+      setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8,  Legal);
+      setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
+      setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
+      setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
+    }
 
     // i8 vectors are custom because the source register and source
     // source memory operand types are not the same width.
@@ -1026,36 +1001,31 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     for (MVT VT : MVT::fp_vector_valuetypes())
       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
 
-    for (auto VT : { MVT::v32i8, MVT::v16i16 }) {
+    // In the customized shift lowering, the legal v8i32/v4i64 cases
+    // in AVX2 will be recognized.
+    for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
       setOperationAction(ISD::SRL, VT, Custom);
       setOperationAction(ISD::SHL, VT, Custom);
       setOperationAction(ISD::SRA, VT, Custom);
     }
 
-    setOperationAction(ISD::SETCC,             MVT::v32i8, Custom);
-    setOperationAction(ISD::SETCC,             MVT::v16i16, Custom);
-    setOperationAction(ISD::SETCC,             MVT::v8i32, Custom);
-    setOperationAction(ISD::SETCC,             MVT::v4i64, Custom);
-
     setOperationAction(ISD::SELECT,            MVT::v4f64, Custom);
     setOperationAction(ISD::SELECT,            MVT::v4i64, Custom);
     setOperationAction(ISD::SELECT,            MVT::v8f32, Custom);
 
-    setOperationAction(ISD::SIGN_EXTEND,       MVT::v4i64, Custom);
-    setOperationAction(ISD::SIGN_EXTEND,       MVT::v8i32, Custom);
-    setOperationAction(ISD::SIGN_EXTEND,       MVT::v16i16, Custom);
-    setOperationAction(ISD::ZERO_EXTEND,       MVT::v4i64, Custom);
-    setOperationAction(ISD::ZERO_EXTEND,       MVT::v8i32, Custom);
-    setOperationAction(ISD::ZERO_EXTEND,       MVT::v16i16, Custom);
-    setOperationAction(ISD::ANY_EXTEND,        MVT::v4i64, Custom);
-    setOperationAction(ISD::ANY_EXTEND,        MVT::v8i32, Custom);
-    setOperationAction(ISD::ANY_EXTEND,        MVT::v16i16, Custom);
+    for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
+      setOperationAction(ISD::SIGN_EXTEND,     VT, Custom);
+      setOperationAction(ISD::ZERO_EXTEND,     VT, Custom);
+      setOperationAction(ISD::ANY_EXTEND,      VT, Custom);
+    }
+
     setOperationAction(ISD::TRUNCATE,          MVT::v16i8, Custom);
     setOperationAction(ISD::TRUNCATE,          MVT::v8i16, Custom);
     setOperationAction(ISD::TRUNCATE,          MVT::v4i32, Custom);
     setOperationAction(ISD::BITREVERSE,        MVT::v32i8, Custom);
 
     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
+      setOperationAction(ISD::SETCC,           VT, Custom);
       setOperationAction(ISD::CTPOP,           VT, Custom);
       setOperationAction(ISD::CTTZ,            VT, Custom);
       setOperationAction(ISD::CTLZ,            VT, Custom);
@@ -1103,27 +1073,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
 
       // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
-      setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
-      setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32,  MVT::v8i8,  Legal);
-      setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i8,  Legal);
-      setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32,  MVT::v8i16, Legal);
-      setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i16, Legal);
-      setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i32, Legal);
-
-      setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
-      setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32,  MVT::v8i8,  Legal);
-      setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i8,  Legal);
-      setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32,  MVT::v8i16, Legal);
-      setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i16, Legal);
-      setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i32, Legal);
-    }
-
-    // In the customized shift lowering, the legal cases in AVX2 will be
-    // recognized.
-    for (auto VT : { MVT::v8i32, MVT::v4i64 }) {
-      setOperationAction(ISD::SRL, VT, Custom);
-      setOperationAction(ISD::SHL, VT, Custom);
-      setOperationAction(ISD::SRA, VT, Custom);
+      for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
+        setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
+        setLoadExtAction(LoadExtOp, MVT::v8i32,  MVT::v8i8,  Legal);
+        setLoadExtAction(LoadExtOp, MVT::v4i64,  MVT::v4i8,  Legal);
+        setLoadExtAction(LoadExtOp, MVT::v8i32,  MVT::v8i16, Legal);
+        setLoadExtAction(LoadExtOp, MVT::v4i64,  MVT::v4i16, Legal);
+        setLoadExtAction(LoadExtOp, MVT::v4i64,  MVT::v4i32, Legal);
+      }
     }
 
     for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
@@ -1272,19 +1229,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::VSELECT,            MVT::v8i1,  Expand);
     setOperationAction(ISD::VSELECT,            MVT::v16i1, Expand);
     if (Subtarget.hasDQI()) {
-      setOperationAction(ISD::SINT_TO_FP,       MVT::v8i64, Legal);
-      setOperationAction(ISD::SINT_TO_FP,       MVT::v4i64, Legal);
-      setOperationAction(ISD::SINT_TO_FP,       MVT::v2i64, Legal);
-      setOperationAction(ISD::UINT_TO_FP,       MVT::v8i64, Legal);
-      setOperationAction(ISD::UINT_TO_FP,       MVT::v4i64, Legal);
-      setOperationAction(ISD::UINT_TO_FP,       MVT::v2i64, Legal);
-      setOperationAction(ISD::FP_TO_SINT,       MVT::v8i64, Legal);
-      setOperationAction(ISD::FP_TO_SINT,       MVT::v4i64, Legal);
-      setOperationAction(ISD::FP_TO_SINT,       MVT::v2i64, Legal);
-      setOperationAction(ISD::FP_TO_UINT,       MVT::v8i64, Legal);
-      setOperationAction(ISD::FP_TO_UINT,       MVT::v4i64, Legal);
-      setOperationAction(ISD::FP_TO_UINT,       MVT::v2i64, Legal);
-
+      for (auto VT : { MVT::v2i64, MVT::v4i64, MVT::v8i64 }) {
+        setOperationAction(ISD::SINT_TO_FP,     VT, Legal);
+        setOperationAction(ISD::UINT_TO_FP,     VT, Legal);
+        setOperationAction(ISD::FP_TO_SINT,     VT, Legal);
+        setOperationAction(ISD::FP_TO_UINT,     VT, Legal);
+      }
       if (Subtarget.hasVLX()) {
         // Fast v2f32 SINT_TO_FP( v2i32 ) custom conversion.
         setOperationAction(ISD::SINT_TO_FP,    MVT::v2f32, Custom);
@@ -1334,11 +1284,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i16, Custom);
 
     for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
-      setOperationAction(ISD::FFLOOR,     VT, Legal);
-      setOperationAction(ISD::FCEIL,      VT, Legal);
-      setOperationAction(ISD::FTRUNC,     VT, Legal);
-      setOperationAction(ISD::FRINT,      VT, Legal);
-      setOperationAction(ISD::FNEARBYINT, VT, Legal);
+      setOperationAction(ISD::FFLOOR,           VT, Legal);
+      setOperationAction(ISD::FCEIL,            VT, Legal);
+      setOperationAction(ISD::FTRUNC,           VT, Legal);
+      setOperationAction(ISD::FRINT,            VT, Legal);
+      setOperationAction(ISD::FNEARBYINT,       VT, Legal);
     }
 
     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i64,  Custom);
@@ -1357,7 +1307,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::SETCC,              MVT::v16i1, Custom);
     setOperationAction(ISD::SETCC,              MVT::v8i1, Custom);
 
-    setOperationAction(ISD::MUL,              MVT::v8i64, Custom);
+    setOperationAction(ISD::MUL,                MVT::v8i64, Custom);
 
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1,  Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
@@ -1372,15 +1322,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::SELECT,             MVT::v16i1, Custom);
     setOperationAction(ISD::SELECT,             MVT::v8i1,  Custom);
 
-    setOperationAction(ISD::SMAX,               MVT::v16i32, Legal);
-    setOperationAction(ISD::SMAX,               MVT::v8i64, Legal);
-    setOperationAction(ISD::UMAX,               MVT::v16i32, Legal);
-    setOperationAction(ISD::UMAX,               MVT::v8i64, Legal);
-    setOperationAction(ISD::SMIN,               MVT::v16i32, Legal);
-    setOperationAction(ISD::SMIN,               MVT::v8i64, Legal);
-    setOperationAction(ISD::UMIN,               MVT::v16i32, Legal);
-    setOperationAction(ISD::UMIN,               MVT::v8i64, Legal);
-
     setOperationAction(ISD::ADD,                MVT::v8i1,  Custom);
     setOperationAction(ISD::ADD,                MVT::v16i1, Custom);
     setOperationAction(ISD::SUB,                MVT::v8i1,  Custom);
@@ -1391,12 +1332,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::MUL,                MVT::v16i32, Legal);
 
     for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
-      setOperationAction(ISD::ABS, VT, Legal);
-      setOperationAction(ISD::SRL, VT, Custom);
-      setOperationAction(ISD::SHL, VT, Custom);
-      setOperationAction(ISD::SRA, VT, Custom);
-      setOperationAction(ISD::CTPOP, VT, Custom);
-      setOperationAction(ISD::CTTZ, VT, Custom);
+      setOperationAction(ISD::SMAX,             VT, Legal);
+      setOperationAction(ISD::UMAX,             VT, Legal);
+      setOperationAction(ISD::SMIN,             VT, Legal);
+      setOperationAction(ISD::UMIN,             VT, Legal);
+      setOperationAction(ISD::ABS,              VT, Legal);
+      setOperationAction(ISD::SRL,              VT, Custom);
+      setOperationAction(ISD::SHL,              VT, Custom);
+      setOperationAction(ISD::SRA,              VT, Custom);
+      setOperationAction(ISD::CTPOP,            VT, Custom);
+      setOperationAction(ISD::CTTZ,             VT, Custom);
     }
 
     // Need to promote to 64-bit even though we have 32-bit masked instructions
@@ -1540,15 +1485,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::VSELECT,            MVT::v64i1, Expand);
     setOperationAction(ISD::BITREVERSE,         MVT::v64i8, Custom);
 
-    setOperationAction(ISD::SMAX,               MVT::v64i8, Legal);
-    setOperationAction(ISD::SMAX,               MVT::v32i16, Legal);
-    setOperationAction(ISD::UMAX,               MVT::v64i8, Legal);
-    setOperationAction(ISD::UMAX,               MVT::v32i16, Legal);
-    setOperationAction(ISD::SMIN,               MVT::v64i8, Legal);
-    setOperationAction(ISD::SMIN,               MVT::v32i16, Legal);
-    setOperationAction(ISD::UMIN,               MVT::v64i8, Legal);
-    setOperationAction(ISD::UMIN,               MVT::v32i16, Legal);
-
     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
 
     setTruncStoreAction(MVT::v32i16,  MVT::v32i8, Legal);
@@ -1579,6 +1515,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::MSTORE,       VT, Legal);
       setOperationAction(ISD::CTPOP,        VT, Custom);
       setOperationAction(ISD::CTTZ,         VT, Custom);
+      setOperationAction(ISD::SMAX,         VT, Legal);
+      setOperationAction(ISD::UMAX,         VT, Legal);
+      setOperationAction(ISD::SMIN,         VT, Legal);
+      setOperationAction(ISD::UMIN,         VT, Legal);
 
       setOperationPromotedToType(ISD::AND,  VT, MVT::v8i64);
       setOperationPromotedToType(ISD::OR,   VT, MVT::v8i64);
@@ -1652,6 +1592,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::USUBO, VT, Custom);
     setOperationAction(ISD::SMULO, VT, Custom);
     setOperationAction(ISD::UMULO, VT, Custom);
+
+    // Support carry in as value rather than glue.
+    setOperationAction(ISD::ADDCARRY, VT, Custom);
+    setOperationAction(ISD::SUBCARRY, VT, Custom);
   }
 
   if (!Subtarget.is64Bit()) {
@@ -4839,14 +4783,10 @@ static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
     return false;
 
   // The index should be aligned on a vecWidth-bit boundary.
-  uint64_t Index =
-    cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
-
+  uint64_t Index = N->getConstantOperandVal(1);
   MVT VT = N->getSimpleValueType(0);
   unsigned ElSize = VT.getScalarSizeInBits();
-  bool Result = (Index * ElSize) % vecWidth == 0;
-
-  return Result;
+  return (Index * ElSize) % vecWidth == 0;
 }
 
 /// Return true if the specified INSERT_SUBVECTOR
@@ -4856,15 +4796,12 @@ static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
   assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
   if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
     return false;
-  // The index should be aligned on a vecWidth-bit boundary.
-  uint64_t Index =
-    cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
 
+  // The index should be aligned on a vecWidth-bit boundary.
+  uint64_t Index = N->getConstantOperandVal(2);
   MVT VT = N->getSimpleValueType(0);
   unsigned ElSize = VT.getScalarSizeInBits();
-  bool Result = (Index * ElSize) % vecWidth == 0;
-
-  return Result;
+  return (Index * ElSize) % vecWidth == 0;
 }
 
 bool X86::isVINSERT128Index(SDNode *N) {
@@ -4888,13 +4825,9 @@ static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
   assert(isa<ConstantSDNode>(N->getOperand(1).getNode()) &&
          "Illegal extract subvector for VEXTRACT");
 
-  uint64_t Index =
-    cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
-
+  uint64_t Index = N->getConstantOperandVal(1);
   MVT VecVT = N->getOperand(0).getSimpleValueType();
-  MVT ElVT = VecVT.getVectorElementType();
-
-  unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
+  unsigned NumElemsPerChunk = vecWidth / VecVT.getScalarSizeInBits();
   return Index / NumElemsPerChunk;
 }
 
@@ -4903,13 +4836,9 @@ static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
   assert(isa<ConstantSDNode>(N->getOperand(2).getNode()) &&
          "Illegal insert subvector for VINSERT");
 
-  uint64_t Index =
-    cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
-
+  uint64_t Index = N->getConstantOperandVal(2);
   MVT VecVT = N->getSimpleValueType(0);
-  MVT ElVT = VecVT.getVectorElementType();
-
-  unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
+  unsigned NumElemsPerChunk = vecWidth / VecVT.getScalarSizeInBits();
   return Index / NumElemsPerChunk;
 }
 
@@ -4942,9 +4871,9 @@ bool X86::isZeroNode(SDValue Elt) {
   return isNullConstant(Elt) || isNullFPConstant(Elt);
 }
 
-// Build a vector of constants
+// Build a vector of constants.
 // Use an UNDEF node if MaskElt == -1.
-// Spilt 64-bit constants in the 32-bit mode.
+// Split 64-bit constants in the 32-bit mode.
 static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
                               const SDLoc &dl, bool IsMask = false) {
 
@@ -5428,8 +5357,8 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
                                 unsigned BitOffset) {
     if (!Cst)
       return false;
-    unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits();
     if (isa<UndefValue>(Cst)) {
+      unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits();
       Undefs.setBits(BitOffset, BitOffset + CstSizeInBits);
       return true;
     }
@@ -6641,18 +6570,16 @@ static bool isUseOfShuffle(SDNode *N) {
   return false;
 }
 
-/// Attempt to use the vbroadcast instruction to generate a splat value for the
-/// following cases:
-/// 1. A splat BUILD_VECTOR which uses:
-///    a. A single scalar load, or a constant.
-///    b. Repeated pattern of constants (e.g. <0,1,0,1> or <0,1,2,3,0,1,2,3>).
-/// 2. A splat shuffle which uses a scalar_to_vector node which comes from
-/// a scalar load, or a constant.
+/// Attempt to use the vbroadcast instruction to generate a splat value
+/// from a splat BUILD_VECTOR which uses:
+///  a. A single scalar load, or a constant.
+///  b. Repeated pattern of constants (e.g. <0,1,0,1> or <0,1,2,3,0,1,2,3>).
 ///
 /// The VBROADCAST node is returned when a pattern is found,
 /// or SDValue() otherwise.
-static SDValue LowerVectorBroadcast(BuildVectorSDNode *BVOp, const X86Subtarget &Subtarget,
-                                    SelectionDAG &DAG) {
+static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
+                                           const X86Subtarget &Subtarget,
+                                           SelectionDAG &DAG) {
   // VBROADCAST requires AVX.
   // TODO: Splats could be generated for non-AVX CPUs using SSE
   // instructions, but there's less potential gain for only 128-bit vectors.
@@ -7605,7 +7532,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
     return AddSub;
   if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG))
     return HorizontalOp;
-  if (SDValue Broadcast = LowerVectorBroadcast(BV, Subtarget, DAG))
+  if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, Subtarget, DAG))
     return Broadcast;
   if (SDValue BitOp = lowerBuildVectorToBitOp(BV, DAG))
     return BitOp;
@@ -9843,7 +9770,6 @@ static SDValue lowerVectorShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT,
 /// For convenience, this code also bundles all of the subtarget feature set
 /// filtering. While a little annoying to re-dispatch on type here, there isn't
 /// a convenient way to factor it out.
-/// FIXME: This is very similar to LowerVectorBroadcast - can we merge them?
 static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
                                              SDValue V1, SDValue V2,
                                              ArrayRef<int> Mask,
@@ -16337,11 +16263,9 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
     case ISD::ADD:
     case ISD::SUB:
     case ISD::MUL:
-    case ISD::SHL: {
-      const auto *BinNode = cast<BinaryWithFlagsSDNode>(Op.getNode());
-      if (BinNode->Flags.hasNoSignedWrap())
+    case ISD::SHL:
+      if (Op.getNode()->getFlags().hasNoSignedWrap())
         break;
-    }
     default:
       NeedOF = true;
       break;
@@ -16799,9 +16723,9 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
       unsigned BitWidth = Op0.getValueSizeInBits();
       unsigned AndBitWidth = And.getValueSizeInBits();
       if (BitWidth > AndBitWidth) {
-        APInt Zeros, Ones;
-        DAG.computeKnownBits(Op0, Zeros, Ones);
-        if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth)
+        KnownBits Known;
+        DAG.computeKnownBits(Op0, Known);
+        if (Known.Zero.countLeadingOnes() < BitWidth - AndBitWidth)
           return SDValue();
       }
       LHS = Op1;
@@ -23351,6 +23275,35 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
                      Op.getOperand(1), Op.getOperand(2));
 }
 
+static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
+  SDNode *N = Op.getNode();
+  MVT VT = N->getSimpleValueType(0);
+
+  // Let legalize expand this if it isn't a legal type yet.
+  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+    return SDValue();
+
+  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+  SDLoc DL(N);
+
+  // Set the carry flag.
+  SDValue Carry = Op.getOperand(2);
+  EVT CarryVT = Carry.getValueType();
+  APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
+  Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
+                      Carry, DAG.getConstant(NegOne, DL, CarryVT));
+
+  unsigned Opc = Op.getOpcode() == ISD::ADDCARRY ? X86ISD::ADC : X86ISD::SBB;
+  SDValue Sum = DAG.getNode(Opc, DL, VTs, Op.getOperand(0),
+                            Op.getOperand(1), Carry.getValue(1));
+
+  SDValue SetCC = getSETCC(X86::COND_B, Sum.getValue(1), DL, DAG);
+  if (N->getValueType(1) == MVT::i1)
+    SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
+
+  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
+}
+
 static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
                             SelectionDAG &DAG) {
   assert(Subtarget.isTargetDarwin() && Subtarget.is64Bit());
@@ -23862,6 +23815,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::ADDE:
   case ISD::SUBC:
   case ISD::SUBE:               return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+  case ISD::ADDCARRY:
+  case ISD::SUBCARRY:           return LowerADDSUBCARRY(Op, DAG);
   case ISD::ADD:
   case ISD::SUB:                return LowerADD_SUB(Op, DAG);
   case ISD::SMAX:
@@ -26667,12 +26622,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
 //===----------------------------------------------------------------------===//
 
 void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
-                                                      APInt &KnownZero,
-                                                      APInt &KnownOne,
+                                                      KnownBits &Known,
                                                       const APInt &DemandedElts,
                                                       const SelectionDAG &DAG,
                                                       unsigned Depth) const {
-  unsigned BitWidth = KnownZero.getBitWidth();
+  unsigned BitWidth = Known.getBitWidth();
   unsigned Opc = Op.getOpcode();
   EVT VT = Op.getValueType();
   assert((Opc >= ISD::BUILTIN_OP_END ||
@@ -26682,7 +26636,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
          "Should use MaskedValueIsZero if you don't know whether Op"
          " is a target node!");
 
-  KnownZero = KnownOne = APInt(BitWidth, 0);   // Don't know anything.
+  Known.Zero.clearAllBits(); Known.One.clearAllBits();
   switch (Opc) {
   default: break;
   case X86ISD::ADD:
@@ -26701,33 +26655,33 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
       break;
     LLVM_FALLTHROUGH;
   case X86ISD::SETCC:
-    KnownZero.setBits(1, BitWidth);
+    Known.Zero.setBitsFrom(1);
     break;
   case X86ISD::MOVMSK: {
     unsigned NumLoBits = Op.getOperand(0).getValueType().getVectorNumElements();
-    KnownZero.setBits(NumLoBits, BitWidth);
+    Known.Zero.setBitsFrom(NumLoBits);
     break;
   }
   case X86ISD::VSHLI:
   case X86ISD::VSRLI: {
     if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
       if (ShiftImm->getAPIntValue().uge(VT.getScalarSizeInBits())) {
-        KnownZero = APInt::getAllOnesValue(BitWidth);
+        Known.Zero.setAllBits();
         break;
       }
 
-      DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth + 1);
+      DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1);
       unsigned ShAmt = ShiftImm->getZExtValue();
       if (Opc == X86ISD::VSHLI) {
-        KnownZero = KnownZero << ShAmt;
-        KnownOne = KnownOne << ShAmt;
+        Known.Zero <<= ShAmt;
+        Known.One <<= ShAmt;
         // Low bits are known zero.
-        KnownZero.setLowBits(ShAmt);
+        Known.Zero.setLowBits(ShAmt);
       } else {
-        KnownZero.lshrInPlace(ShAmt);
-        KnownOne.lshrInPlace(ShAmt);
+        Known.Zero.lshrInPlace(ShAmt);
+        Known.One.lshrInPlace(ShAmt);
         // High bits are known zero.
-        KnownZero.setHighBits(ShAmt);
+        Known.Zero.setHighBits(ShAmt);
       }
     }
     break;
@@ -26741,12 +26695,12 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     unsigned InBitWidth = SrcVT.getScalarSizeInBits();
     assert(InNumElts >= NumElts && "Illegal VZEXT input");
 
-    KnownZero = KnownOne = APInt(InBitWidth, 0);
+    Known = KnownBits(InBitWidth);
     APInt DemandedSrcElts = APInt::getLowBitsSet(InNumElts, NumElts);
-    DAG.computeKnownBits(N0, KnownZero, KnownOne, DemandedSrcElts, Depth + 1);
-    KnownOne = KnownOne.zext(BitWidth);
-    KnownZero = KnownZero.zext(BitWidth);
-    KnownZero.setBits(InBitWidth, BitWidth);
+    DAG.computeKnownBits(N0, Known, DemandedSrcElts, Depth + 1);
+    Known.One = Known.One.zext(BitWidth);
+    Known.Zero = Known.Zero.zext(BitWidth);
+    Known.Zero.setBitsFrom(InBitWidth);
     break;
   }
   }
@@ -30206,12 +30160,11 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
 
     assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
     APInt DemandedMask(APInt::getSignMask(BitWidth));
-    APInt KnownZero, KnownOne;
+    KnownBits Known;
     TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
                                           DCI.isBeforeLegalizeOps());
     if (TLI.ShrinkDemandedConstant(Cond, DemandedMask, TLO) ||
-        TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne,
-                                 TLO)) {
+        TLI.SimplifyDemandedBits(Cond, DemandedMask, Known, TLO)) {
       // If we changed the computation somewhere in the DAG, this change will
       // affect all users of Cond. Make sure it is fine and update all the nodes
       // so that we do not use the generic VSELECT anymore. Otherwise, we may
@@ -31056,8 +31009,7 @@ static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG) {
       N0.getOperand(1).getOpcode() == ISD::Constant) {
     SDValue N00 = N0.getOperand(0);
     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
-    const APInt &ShAmt = N1C->getAPIntValue();
-    Mask = Mask.shl(ShAmt);
+    Mask <<= N1C->getAPIntValue();
     bool MaskOK = false;
     // We can handle cases concerning bit-widening nodes containing setcc_c if
     // we carefully interrogate the mask to make sure we are semantics
@@ -31267,9 +31219,9 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
     unsigned ShiftImm = ShiftVal.getZExtValue();
     for (APInt &Elt : EltBits) {
       if (X86ISD::VSHLI == Opcode)
-        Elt = Elt.shl(ShiftImm);
+        Elt <<= ShiftImm;
       else if (X86ISD::VSRAI == Opcode)
-        Elt = Elt.ashr(ShiftImm);
+        Elt.ashrInPlace(ShiftImm);
       else
         Elt.lshrInPlace(ShiftImm);
     }
@@ -33481,7 +33433,7 @@ static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
   // use of a constant by performing (-0 - A*B) instead.
   // FIXME: Check rounding control flags as well once it becomes available.
   if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 || SVT == MVT::f64) &&
-      Arg->getFlags()->hasNoSignedZeros() && Subtarget.hasAnyFMA()) {
+      Arg->getFlags().hasNoSignedZeros() && Subtarget.hasAnyFMA()) {
     SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
     SDValue NewNode = DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
                                   Arg.getOperand(1), Zero);
@@ -33775,12 +33727,12 @@ static SDValue combineBT(SDNode *N, SelectionDAG &DAG,
   if (Op1.hasOneUse()) {
     unsigned BitWidth = Op1.getValueSizeInBits();
     APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
-    APInt KnownZero, KnownOne;
+    KnownBits Known;
     TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
                                           !DCI.isBeforeLegalizeOps());
     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
     if (TLI.ShrinkDemandedConstant(Op1, DemandedMask, TLO) ||
-        TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO))
+        TLI.SimplifyDemandedBits(Op1, DemandedMask, Known, TLO))
       DCI.CommitTargetLoweringOpt(TLO);
   }
   return SDValue();
@@ -33842,8 +33794,8 @@ static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG,
     return SDValue();
 
   bool Sext = Ext->getOpcode() == ISD::SIGN_EXTEND;
-  bool NSW = Add->getFlags()->hasNoSignedWrap();
-  bool NUW = Add->getFlags()->hasNoUnsignedWrap();
+  bool NSW = Add->getFlags().hasNoSignedWrap();
+  bool NUW = Add->getFlags().hasNoUnsignedWrap();
 
   // We need an 'add nsw' feeding into the 'sext' or 'add nuw' feeding
   // into the 'zext'
@@ -33883,7 +33835,7 @@ static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG,
   SDNodeFlags Flags;
   Flags.setNoSignedWrap(NSW);
   Flags.setNoUnsignedWrap(NUW);
-  return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, &Flags);
+  return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, Flags);
 }
 
 /// (i8,i32 {s/z}ext ({s/u}divrem (i8 x, i8 y)) ->
@@ -34486,6 +34438,34 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+// Optimize RES, EFLAGS = X86ISD::ADD LHS, RHS
+static SDValue combineX86ADD(SDNode *N, SelectionDAG &DAG,
+                             X86TargetLowering::DAGCombinerInfo &DCI) {
+  // When legalizing carry, we create carries via add X, -1
+  // If that comes from an actual carry, via setcc, we use the
+  // carry directly.
+  if (isAllOnesConstant(N->getOperand(1)) && N->hasAnyUseOfValue(1)) {
+    SDValue Carry = N->getOperand(0);
+    while (Carry.getOpcode() == ISD::TRUNCATE ||
+           Carry.getOpcode() == ISD::ZERO_EXTEND ||
+           Carry.getOpcode() == ISD::SIGN_EXTEND ||
+           Carry.getOpcode() == ISD::ANY_EXTEND ||
+           (Carry.getOpcode() == ISD::AND &&
+            isOneConstant(Carry.getOperand(1))))
+      Carry = Carry.getOperand(0);
+
+    if (Carry.getOpcode() == ISD::SETCC ||
+        Carry.getOpcode() == X86ISD::SETCC ||
+        Carry.getOpcode() == X86ISD::SETCC_CARRY) {
+      auto *Cond = cast<ConstantSDNode>(Carry.getOperand(0));
+      if (Cond->getZExtValue() == X86::COND_B)
+        return DCI.CombineTo(N, SDValue(N, 0), Carry.getOperand(1));
+    }
+  }
+
+  return SDValue();
+}
+
 // Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
 static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
                           X86TargetLowering::DAGCombinerInfo &DCI) {
@@ -34740,8 +34720,8 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
 
 static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
                           const X86Subtarget &Subtarget) {
-  const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
-  if (Flags->hasVectorReduction()) {
+  const SDNodeFlags Flags = N->getFlags();
+  if (Flags.hasVectorReduction()) {
     if (SDValue Sad = combineLoopSADPattern(N, DAG, Subtarget))
       return Sad;
     if (SDValue MAdd = combineLoopMAddPattern(N, DAG, Subtarget))
@@ -35047,6 +35027,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::CMOV:        return combineCMov(N, DAG, DCI, Subtarget);
   case ISD::ADD:            return combineAdd(N, DAG, Subtarget);
   case ISD::SUB:            return combineSub(N, DAG, Subtarget);
+  case X86ISD::ADD:         return combineX86ADD(N, DAG, DCI);
   case X86ISD::ADC:         return combineADC(N, DAG, DCI);
   case ISD::MUL:            return combineMul(N, DAG, DCI, Subtarget);
   case ISD::SHL:
@@ -35171,14 +35152,21 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
 /// know that the code that lowers COPY of EFLAGS has to use the stack, and if
 /// we don't adjust the stack we clobber the first frame index.
 /// See X86InstrInfo::copyPhysReg.
-bool X86TargetLowering::hasCopyImplyingStackAdjustment(
-    MachineFunction *MF) const {
-  const MachineRegisterInfo &MRI = MF->getRegInfo();
-
+static bool hasCopyImplyingStackAdjustment(const MachineFunction &MF) {
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
   return any_of(MRI.reg_instructions(X86::EFLAGS),
                 [](const MachineInstr &RI) { return RI.isCopy(); });
 }
 
+void X86TargetLowering::finalizeLowering(MachineFunction &MF) const {
+  if (hasCopyImplyingStackAdjustment(MF)) {
+    MachineFrameInfo &MFI = MF.getFrameInfo();
+    MFI.setHasCopyImplyingStackAdjustment(true);
+  }
+
+  TargetLoweringBase::finalizeLowering(MF);
+}
+
 /// This method query the target whether it is beneficial for dag combiner to
 /// promote the specified node. If true, it should return the desired promotion
 /// type by reference.
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 190a8833500..46dc587c637 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -773,10 +773,6 @@ namespace llvm {
     /// and some i16 instructions are slow.
     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
 
-    /// Return true if the MachineFunction contains a COPY which would imply
-    /// HasOpaqueSPAdjustment.
-    bool hasCopyImplyingStackAdjustment(MachineFunction *MF) const override;
-
     MachineBasicBlock *
     EmitInstrWithCustomInserter(MachineInstr &MI,
                                 MachineBasicBlock *MBB) const override;
@@ -828,8 +824,7 @@ namespace llvm {
     /// Determine which of the bits specified in Mask are known to be either
     /// zero or one and return them in the KnownZero/KnownOne bitsets.
     void computeKnownBitsForTargetNode(const SDValue Op,
-                                       APInt &KnownZero,
-                                       APInt &KnownOne,
+                                       KnownBits &Known,
                                        const APInt &DemandedElts,
                                        const SelectionDAG &DAG,
                                        unsigned Depth = 0) const override;
@@ -1066,6 +1061,9 @@ namespace llvm {
                               ArrayRef<ShuffleVectorInst *> Shuffles,
                               ArrayRef<unsigned> Indices,
                               unsigned Factor) const override;
+
+    void finalizeLowering(MachineFunction &MF) const override;
+
   protected:
     std::pair<const TargetRegisterClass *, uint8_t>
     findRepresentativeClass(const TargetRegisterInfo *TRI,
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index e592c2b3c0a..3dc673e3c35 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -1271,11 +1271,11 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
     return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
 
-  APInt KnownZero0, KnownOne0;
-  CurDAG->computeKnownBits(N->getOperand(0), KnownZero0, KnownOne0, 0);
-  APInt KnownZero1, KnownOne1;
-  CurDAG->computeKnownBits(N->getOperand(1), KnownZero1, KnownOne1, 0);
-  return (~KnownZero0 & ~KnownZero1) == 0;
+  KnownBits Known0;
+  CurDAG->computeKnownBits(N->getOperand(0), Known0, 0);
+  KnownBits Known1;
+  CurDAG->computeKnownBits(N->getOperand(1), Known1, 0);
+  return (~Known0.Zero & ~Known1.Zero) == 0;
 }]>;
 
 
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index c3def461afd..ce087649867 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -877,7 +877,9 @@ def In32BitMode  : Predicate<"Subtarget->is32Bit()">,
 def IsWin64      : Predicate<"Subtarget->isTargetWin64()">;
 def NotWin64     : Predicate<"!Subtarget->isTargetWin64()">;
 def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() ||"
-                                  "Subtarget->getFrameLowering()->hasFP(*MF)">;
+                                  "Subtarget->getFrameLowering()->hasFP(*MF)"> {
+  let RecomputePerFunction = 1;
+}
 def IsPS4        : Predicate<"Subtarget->isTargetPS4()">;
 def NotPS4       : Predicate<"!Subtarget->isTargetPS4()">;
 def IsNaCl       : Predicate<"Subtarget->isTargetNaCl()">;
@@ -887,9 +889,9 @@ def KernelCode   : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;
 def NearData     : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
                              "TM.getCodeModel() == CodeModel::Kernel">;
 def IsNotPIC     : Predicate<"!TM.isPositionIndependent()">;
-def OptForSize   : Predicate<"OptForSize">;
-def OptForMinSize : Predicate<"OptForMinSize">;
-def OptForSpeed  : Predicate<"!OptForSize">;
+def OptForSize   : Predicate<"Subtarget->getOptForSize()">;
+def OptForMinSize : Predicate<"Subtarget->getOptForMinSize()">;
+def OptForSpeed  : Predicate<"!Subtarget->getOptForSize()">;
 def FastBTMem    : Predicate<"!Subtarget->isBTMemSlow()">;
 def CallImmAddr  : Predicate<"Subtarget->isLegalToCallImmediateAddr()">;
 def FavorMemIndirectCall  : Predicate<"!Subtarget->callRegIndirect()">;
diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp
index d0f1b7091da..38f7bc0af5c 100644
--- a/lib/Target/X86/X86InstructionSelector.cpp
+++ b/lib/Target/X86/X86InstructionSelector.cpp
@@ -48,7 +48,6 @@ public:
   X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI,
                          const X86RegisterBankInfo &RBI);
 
-  void beginFunction(const MachineFunction &MF) override;
   bool select(MachineInstr &I) const override;
 
 private:
@@ -56,11 +55,9 @@ private:
   /// the patterns that don't require complex C++.
   bool selectImpl(MachineInstr &I) const;
 
-  // TODO: remove after selectImpl support pattern with a predicate.
+  // TODO: remove after suported by Tablegen-erated instruction selection.
   unsigned getFAddOp(LLT &Ty, const RegisterBank &RB) const;
   unsigned getFSubOp(LLT &Ty, const RegisterBank &RB) const;
-  unsigned getAddOp(LLT &Ty, const RegisterBank &RB) const;
-  unsigned getSubOp(LLT &Ty, const RegisterBank &RB) const;
   unsigned getLoadStoreOp(LLT &Ty, const RegisterBank &RB, unsigned Opc,
                           uint64_t Alignment) const;
 
@@ -80,12 +77,10 @@ private:
   const X86InstrInfo &TII;
   const X86RegisterInfo &TRI;
   const X86RegisterBankInfo &RBI;
-  bool OptForSize;
-  bool OptForMinSize;
 
-  PredicateBitset AvailableFeatures;
-  PredicateBitset computeAvailableFeatures(const MachineFunction *MF,
-                                           const X86Subtarget *Subtarget) const;
+#define GET_GLOBALISEL_PREDICATES_DECL
+#include "X86GenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_DECL
 
 #define GET_GLOBALISEL_TEMPORARIES_DECL
 #include "X86GenGlobalISel.inc"
@@ -102,8 +97,10 @@ X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM,
                                                const X86Subtarget &STI,
                                                const X86RegisterBankInfo &RBI)
     : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
-      TRI(*STI.getRegisterInfo()), RBI(RBI), OptForSize(false),
-      OptForMinSize(false), AvailableFeatures()
+      TRI(*STI.getRegisterInfo()), RBI(RBI),
+#define GET_GLOBALISEL_PREDICATES_INIT
+#include "X86GenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_INIT
 #define GET_GLOBALISEL_TEMPORARIES_INIT
 #include "X86GenGlobalISel.inc"
 #undef GET_GLOBALISEL_TEMPORARIES_INIT
@@ -153,10 +150,9 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
 
   const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
   const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
-  (void)DstSize;
   unsigned SrcReg = I.getOperand(1).getReg();
   const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
-  (void)SrcSize;
+
   assert((!TargetRegisterInfo::isPhysicalRegister(SrcReg) || I.isCopy()) &&
          "No phys reg on generic operators");
   assert((DstSize == SrcSize ||
@@ -172,6 +168,18 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
   case X86::GPRRegBankID:
     assert((DstSize <= 64) && "GPRs cannot get more than 64-bit width values.");
     RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank);
+
+    // Change the physical register
+    if (SrcSize > DstSize && TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+      if (RC == &X86::GR32RegClass)
+        I.getOperand(1).setSubReg(X86::sub_32bit);
+      else if (RC == &X86::GR16RegClass)
+        I.getOperand(1).setSubReg(X86::sub_16bit);
+      else if (RC == &X86::GR8RegClass)
+        I.getOperand(1).setSubReg(X86::sub_8bit);
+
+      I.getOperand(1).substPhysReg(SrcReg, TRI);
+    }
     break;
   case X86::VECRRegBankID:
     RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank);
@@ -195,12 +203,6 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
   return true;
 }
 
-void X86InstructionSelector::beginFunction(const MachineFunction &MF) {
-  OptForSize = MF.getFunction()->optForSize();
-  OptForMinSize = MF.getFunction()->optForMinSize();
-  AvailableFeatures = computeAvailableFeatures(&MF, &STI);
-}
-
 bool X86InstructionSelector::select(MachineInstr &I) const {
   assert(I.getParent() && "Instruction should be in a basic block!");
   assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -223,8 +225,12 @@ bool X86InstructionSelector::select(MachineInstr &I) const {
   assert(I.getNumOperands() == I.getNumExplicitOperands() &&
          "Generic instruction has unexpected implicit operands\n");
 
-  // TODO: This should be implemented by tblgen, pattern with predicate not
-  // supported yet.
+  if (selectImpl(I))
+     return true;
+
+  DEBUG(dbgs() << " C++ instruction selection: "; I.print(dbgs()));
+
+  // TODO: This should be implemented by tblgen.
   if (selectBinaryOp(I, MRI, MF))
     return true;
   if (selectLoadStoreOp(I, MRI, MF))
@@ -236,7 +242,7 @@ bool X86InstructionSelector::select(MachineInstr &I) const {
   if (selectTrunc(I, MRI, MF))
     return true;
 
-  return selectImpl(I);
+  return false;
 }
 
 unsigned X86InstructionSelector::getFAddOp(LLT &Ty,
@@ -309,44 +315,6 @@ unsigned X86InstructionSelector::getFSubOp(LLT &Ty,
   return TargetOpcode::G_FSUB;
 }
 
-unsigned X86InstructionSelector::getAddOp(LLT &Ty,
-                                          const RegisterBank &RB) const {
-
-  if (X86::VECRRegBankID != RB.getID())
-    return TargetOpcode::G_ADD;
-
-  if (Ty == LLT::vector(4, 32)) {
-    if (STI.hasAVX512() && STI.hasVLX()) {
-      return X86::VPADDDZ128rr;
-    } else if (STI.hasAVX()) {
-      return X86::VPADDDrr;
-    } else if (STI.hasSSE2()) {
-      return X86::PADDDrr;
-    }
-  }
-
-  return TargetOpcode::G_ADD;
-}
-
-unsigned X86InstructionSelector::getSubOp(LLT &Ty,
-                                          const RegisterBank &RB) const {
-
-  if (X86::VECRRegBankID != RB.getID())
-    return TargetOpcode::G_SUB;
-
-  if (Ty == LLT::vector(4, 32)) {
-    if (STI.hasAVX512() && STI.hasVLX()) {
-      return X86::VPSUBDZ128rr;
-    } else if (STI.hasAVX()) {
-      return X86::VPSUBDrr;
-    } else if (STI.hasSSE2()) {
-      return X86::PSUBDrr;
-    }
-  }
-
-  return TargetOpcode::G_SUB;
-}
-
 bool X86InstructionSelector::selectBinaryOp(MachineInstr &I,
                                             MachineRegisterInfo &MRI,
                                             MachineFunction &MF) const {
@@ -364,12 +332,6 @@ bool X86InstructionSelector::selectBinaryOp(MachineInstr &I,
   case TargetOpcode::G_FSUB:
     NewOpc = getFSubOp(Ty, RB);
     break;
-  case TargetOpcode::G_ADD:
-    NewOpc = getAddOp(Ty, RB);
-    break;
-  case TargetOpcode::G_SUB:
-    NewOpc = getSubOp(Ty, RB);
-    break;
   default:
     break;
   }
@@ -396,7 +358,7 @@ unsigned X86InstructionSelector::getLoadStoreOp(LLT &Ty, const RegisterBank &RB,
   } else if (Ty == LLT::scalar(16)) {
     if (X86::GPRRegBankID == RB.getID())
       return Isload ? X86::MOV16rm : X86::MOV16mr;
-  } else if (Ty == LLT::scalar(32)) {
+  } else if (Ty == LLT::scalar(32) || Ty == LLT::pointer(0, 32)) {
     if (X86::GPRRegBankID == RB.getID())
       return Isload ? X86::MOV32rm : X86::MOV32mr;
     if (X86::VECRRegBankID == RB.getID())
@@ -404,7 +366,7 @@ unsigned X86InstructionSelector::getLoadStoreOp(LLT &Ty, const RegisterBank &RB,
                                  : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm)
                     : (HasAVX512 ? X86::VMOVSSZmr
                                  : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);
-  } else if (Ty == LLT::scalar(64)) {
+  } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
     if (X86::GPRRegBankID == RB.getID())
       return Isload ? X86::MOV64rm : X86::MOV64mr;
     if (X86::VECRRegBankID == RB.getID())
diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp
index c2dc762fec5..a437f6bf471 100644
--- a/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/lib/Target/X86/X86LegalizerInfo.cpp
@@ -71,6 +71,15 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
 
   setAction({TargetOpcode::G_CONSTANT, s1}, WidenScalar);
   setAction({TargetOpcode::G_CONSTANT, s64}, NarrowScalar);
+
+  // Extensions
+  setAction({G_ZEXT, s32}, Legal);
+  setAction({G_SEXT, s32}, Legal);
+
+  for (auto Ty : {s8, s16}) {
+    setAction({G_ZEXT, 1, Ty}, Legal);
+    setAction({G_SEXT, 1, Ty}, Legal);
+  }
 }
 
 void X86LegalizerInfo::setLegalizerInfo64bit() {
@@ -105,6 +114,17 @@ void X86LegalizerInfo::setLegalizerInfo64bit() {
     setAction({TargetOpcode::G_CONSTANT, Ty}, Legal);
 
   setAction({TargetOpcode::G_CONSTANT, s1}, WidenScalar);
+
+  // Extensions
+  for (auto Ty : {s32, s64}) {
+    setAction({G_ZEXT, Ty}, Legal);
+    setAction({G_SEXT, Ty}, Legal);
+  }
+
+  for (auto Ty : {s8, s16, s32}) {
+    setAction({G_ZEXT, 1, Ty}, Legal);
+    setAction({G_SEXT, 1, Ty}, Legal);
+  }
 }
 
 void X86LegalizerInfo::setLegalizerInfoSSE1() {
diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp
index debb192732e..7be0a7fd406 100644
--- a/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -27,6 +27,8 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -221,6 +223,8 @@ public:
 
   StringRef getPassName() const override { return "X86 LEA Optimize"; }
 
+  bool doInitialization(Module &M) override;
+
   /// \brief Loop over all of the basic blocks, replacing address
   /// calculations in load and store instructions, if it's already
   /// been calculated by LEA. Also, remove redundant LEAs.
@@ -262,6 +266,12 @@ private:
   /// \brief Removes redundant address calculations.
   bool removeRedundantAddrCalc(MemOpMap &LEAs);
 
+  /// Replace debug value MI with a new debug value instruction using register
+  /// VReg with an appropriate offset and DIExpression to incorporate the
+  /// address displacement AddrDispShift. Return new debug value instruction.
+  MachineInstr *replaceDebugValue(MachineInstr &MI, unsigned VReg,
+                                  int64_t AddrDispShift);
+
   /// \brief Removes LEAs which calculate similar addresses.
   bool removeRedundantLEAs(MemOpMap &LEAs);
 
@@ -270,6 +280,7 @@ private:
   MachineRegisterInfo *MRI;
   const X86InstrInfo *TII;
   const X86RegisterInfo *TRI;
+  Module *TheModule;
 
   static char ID;
 };
@@ -532,6 +543,25 @@ bool OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) {
   return Changed;
 }
 
+MachineInstr *OptimizeLEAPass::replaceDebugValue(MachineInstr &MI,
+                                                 unsigned VReg,
+                                                 int64_t AddrDispShift) {
+  DIExpression *Expr = const_cast<DIExpression *>(MI.getDebugExpression());
+
+  if (AddrDispShift != 0)
+    Expr = DIExpression::prepend(Expr, DIExpression::NoDeref, AddrDispShift,
+                                 DIExpression::WithStackValue);
+
+  // Replace DBG_VALUE instruction with modified version.
+  MachineBasicBlock *MBB = MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+  bool IsIndirect = MI.isIndirectDebugValue();
+  int64_t Offset = IsIndirect ? MI.getOperand(1).getImm() : 0;
+  const MDNode *Var = MI.getDebugVariable();
+  return BuildMI(*MBB, MBB->erase(&MI), DL, TII->get(TargetOpcode::DBG_VALUE),
+                 IsIndirect, VReg, Offset, Var, Expr);
+}
+
 // Try to find similar LEAs in the list and replace one with another.
 bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
   bool Changed = false;
@@ -563,13 +593,21 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
         // Loop over all uses of the Last LEA and update their operands. Note
         // that the correctness of this has already been checked in the
         // isReplaceable function.
+        unsigned FirstVReg = First.getOperand(0).getReg();
         unsigned LastVReg = Last.getOperand(0).getReg();
-        for (auto UI = MRI->use_nodbg_begin(LastVReg),
-                  UE = MRI->use_nodbg_end();
+        for (auto UI = MRI->use_begin(LastVReg), UE = MRI->use_end();
              UI != UE;) {
           MachineOperand &MO = *UI++;
           MachineInstr &MI = *MO.getParent();
 
+          if (MI.isDebugValue()) {
+            // Replace DBG_VALUE instruction with modified version using the
+            // register from the replacing LEA and the address displacement
+            // between the LEA instructions.
+            replaceDebugValue(MI, FirstVReg, AddrDispShift);
+            continue;
+          }
+
           // Get the number of the first memory operand.
           const MCInstrDesc &Desc = MI.getDesc();
           int MemOpNo =
@@ -577,7 +615,7 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
               X86II::getOperandBias(Desc);
 
           // Update address base.
-          MO.setReg(First.getOperand(0).getReg());
+          MO.setReg(FirstVReg);
 
           // Update address disp.
           MachineOperand &Op = MI.getOperand(MemOpNo + X86::AddrDisp);
@@ -587,11 +625,8 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
             Op.setOffset(Op.getOffset() + AddrDispShift);
         }
 
-        // Mark debug values referring to Last LEA as undefined.
-        MRI->markUsesInDebugValueAsUndef(LastVReg);
-
         // Since we can possibly extend register lifetime, clear kill flags.
-        MRI->clearKillFlags(First.getOperand(0).getReg());
+        MRI->clearKillFlags(FirstVReg);
 
         ++NumRedundantLEAs;
         DEBUG(dbgs() << "OptimizeLEAs: Remove redundant LEA: "; Last.dump(););
@@ -614,6 +649,11 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
   return Changed;
 }
 
+bool OptimizeLEAPass::doInitialization(Module &M) {
+  TheModule = &M;
+  return false;
+}
+
 bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
   bool Changed = false;
 
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 1a72a0ba3a6..d4b2392eb1f 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -44,8 +44,26 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible(
   return false;
 }
 
+namespace {
+
+// Represents a cover of a buffer of Size bytes with Count() blocks of type AVT
+// (of size UBytes() bytes), as well as how many bytes remain (BytesLeft() is
+// always smaller than the block size).
+struct RepMovsRepeats {
+  RepMovsRepeats(uint64_t Size) : Size(Size) {}
+
+  uint64_t Count() const { return Size / UBytes(); }
+  uint64_t BytesLeft() const { return Size % UBytes(); }
+  uint64_t UBytes() const { return AVT.getSizeInBits() / 8; }
+
+  const uint64_t Size;
+  MVT AVT = MVT::i8;
+};
+
+}  // namespace
+
 SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
-    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
+    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
     SDValue Size, unsigned Align, bool isVolatile,
     MachinePointerInfo DstPtrInfo) const {
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -69,10 +87,10 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
   if ((Align & 3) != 0 || !ConstantSize ||
       ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) {
     // Check to see if there is a specialized entry-point for memory zeroing.
-    ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
+    ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val);
 
-    if (const char *bzeroEntry = V &&
-        V->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) {
+    if (const char *bzeroEntry = ValC &&
+        ValC->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) {
       const TargetLowering &TLI = DAG.getTargetLoweringInfo();
       EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
       Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
@@ -104,7 +122,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
   SDValue InFlag;
   EVT AVT;
   SDValue Count;
-  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
+  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val);
   unsigned BytesLeft = 0;
   if (ValC) {
     unsigned ValReg;
@@ -147,7 +165,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
   } else {
     AVT = MVT::i8;
     Count  = DAG.getIntPtrConstant(SizeVal, dl);
-    Chain  = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag);
+    Chain  = DAG.getCopyToReg(Chain, dl, X86::AL, Val, InFlag);
     InFlag = Chain.getValue(1);
   }
 
@@ -171,7 +189,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
     Chain = DAG.getMemset(Chain, dl,
                           DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
                                       DAG.getConstant(Offset, dl, AddrVT)),
-                          Src,
+                          Val,
                           DAG.getConstant(BytesLeft, dl, SizeVT),
                           Align, isVolatile, false,
                           DstPtrInfo.getWithOffset(Offset));
@@ -181,24 +199,6 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
   return Chain;
 }
 
-namespace {
-
-// Represents a cover of a buffer of SizeVal bytes with blocks of size
-// AVT, as well as how many bytes remain (BytesLeft is always smaller than
-// the block size).
-struct RepMovsRepeats {
-  RepMovsRepeats(const uint64_t SizeVal, const MVT& AVT) {
-    const unsigned UBytes = AVT.getSizeInBits() / 8;
-    Count = SizeVal / UBytes;
-    BytesLeft = SizeVal % UBytes;
-  }
-
-  unsigned Count;
-  unsigned BytesLeft;
-};
-
-}  // namespace
-
 SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
     SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
@@ -210,8 +210,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
       DAG.getMachineFunction().getSubtarget<X86Subtarget>();
   if (!ConstantSize)
     return SDValue();
-  uint64_t SizeVal = ConstantSize->getZExtValue();
-  if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
+  RepMovsRepeats Repeats(ConstantSize->getZExtValue());
+  if (!AlwaysInline && Repeats.Size > Subtarget.getMaxInlineSizeThreshold())
     return SDValue();
 
   /// If not DWORD aligned, it is more efficient to call the library.  However
@@ -232,35 +232,31 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
   if (isBaseRegConflictPossible(DAG, ClobberSet))
     return SDValue();
 
-  MVT AVT;
-  if (Subtarget.hasERMSB())
-    // If the target has enhanced REPMOVSB, then it's at least as fast to use
-    // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle
-    // BytesLeft.
-    AVT = MVT::i8;
-  else if (Align & 1)
-    AVT = MVT::i8;
-  else if (Align & 2)
-    AVT = MVT::i16;
-  else if (Align & 4)
-    // DWORD aligned
-    AVT = MVT::i32;
-  else
-    // QWORD aligned
-    AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
+  // If the target has enhanced REPMOVSB, then it's at least as fast to use
+  // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle
+  // BytesLeft.
+  if (!Subtarget.hasERMSB() && !(Align & 1)) {
+    if (Align & 2)
+      // WORD aligned
+      Repeats.AVT = MVT::i16;
+    else if (Align & 4)
+      // DWORD aligned
+      Repeats.AVT = MVT::i32;
+    else
+      // QWORD aligned
+      Repeats.AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
 
-  RepMovsRepeats Repeats(SizeVal, AVT);
-  if (Repeats.BytesLeft > 0 &&
-      DAG.getMachineFunction().getFunction()->optForMinSize()) {
-    // When agressively optimizing for size, avoid generating the code to handle
-    // BytesLeft.
-    AVT = MVT::i8;
-    Repeats = RepMovsRepeats(SizeVal, AVT);
+    if (Repeats.BytesLeft() > 0 &&
+        DAG.getMachineFunction().getFunction()->optForMinSize()) {
+      // When agressively optimizing for size, avoid generating the code to
+      // handle BytesLeft.
+      Repeats.AVT = MVT::i8;
+    }
   }
 
   SDValue InFlag;
   Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
-                           DAG.getIntPtrConstant(Repeats.Count, dl), InFlag);
+                           DAG.getIntPtrConstant(Repeats.Count(), dl), InFlag);
   InFlag = Chain.getValue(1);
   Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
                            Dst, InFlag);
@@ -270,14 +266,14 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
   InFlag = Chain.getValue(1);
 
   SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
-  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+  SDValue Ops[] = { Chain, DAG.getValueType(Repeats.AVT), InFlag };
   SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);
 
   SmallVector<SDValue, 4> Results;
   Results.push_back(RepMovs);
-  if (Repeats.BytesLeft) {
+  if (Repeats.BytesLeft()) {
     // Handle the last 1 - 7 bytes.
-    unsigned Offset = SizeVal - Repeats.BytesLeft;
+    unsigned Offset = Repeats.Size - Repeats.BytesLeft();
     EVT DstVT = Dst.getValueType();
     EVT SrcVT = Src.getValueType();
     EVT SizeVT = Size.getValueType();
@@ -288,7 +284,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
                                     DAG.getNode(ISD::ADD, dl, SrcVT, Src,
                                                 DAG.getConstant(Offset, dl,
                                                                 SrcVT)),
-                                    DAG.getConstant(Repeats.BytesLeft, dl,
+                                    DAG.getConstant(Repeats.BytesLeft(), dl,
                                                     SizeVT),
                                     Align, isVolatile, AlwaysInline, false,
                                     DstPtrInfo.getWithOffset(Offset),
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 4154530d04e..82ff436f7eb 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -290,6 +290,9 @@ void X86Subtarget::initializeEnvironment() {
   HasMWAITX = false;
   HasCLZERO = false;
   HasMPX = false;
+  HasSGX = false;
+  HasCLFLUSHOPT = false;
+  HasCLWB = false;
   IsBTMemSlow = false;
   IsPMULLDSlow = false;
   IsSHLDSlow = false;
@@ -326,7 +329,8 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
 
 X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
                            const X86TargetMachine &TM,
-                           unsigned StackAlignOverride)
+                           unsigned StackAlignOverride, bool OptForSize,
+                           bool OptForMinSize)
     : X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others),
       PICStyle(PICStyles::None), TM(TM), TargetTriple(TT),
       StackAlignOverride(StackAlignOverride),
@@ -335,8 +339,9 @@ X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
                   TargetTriple.getEnvironment() != Triple::CODE16),
       In16BitMode(TargetTriple.getArch() == Triple::x86 &&
                   TargetTriple.getEnvironment() == Triple::CODE16),
-      InstrInfo(initializeSubtargetDependencies(CPU, FS)),
-      TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) {
+      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
+      FrameLowering(*this, getStackAlignment()), OptForSize(OptForSize),
+      OptForMinSize(OptForMinSize) {
   // Determine the PICStyle based on the target selected.
   if (!isPositionIndependent())
     setPICStyle(PICStyles::None);
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index fd057f36c89..8568cf04e7d 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -328,12 +328,16 @@ private:
   X86TargetLowering TLInfo;
   X86FrameLowering FrameLowering;
 
+  bool OptForSize;
+  bool OptForMinSize;
+
 public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
   X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
-               const X86TargetMachine &TM, unsigned StackAlignOverride);
+               const X86TargetMachine &TM, unsigned StackAlignOverride,
+               bool OptForSize, bool OptForMinSize);
 
   /// This object will take onwership of \p GISelAccessor.
   void setGISelAccessor(GISelAccessor &GISel) { this->GISel.reset(&GISel); }
@@ -499,6 +503,9 @@ public:
   bool isSLM() const { return X86ProcFamily == IntelSLM; }
   bool useSoftFloat() const { return UseSoftFloat; }
 
+  bool getOptForSize() const { return OptForSize; }
+  bool getOptForMinSize() const { return OptForMinSize; }
+
   /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
   /// no-sse2). There isn't any reason to disable it if the target processor
   /// supports it.
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 623cf38aa95..086f55dd60b 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -268,6 +268,12 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
 
   FS = Key.substr(CPU.size());
 
+  bool OptForSize = F.optForSize();
+  bool OptForMinSize = F.optForMinSize();
+
+  Key += std::string(OptForSize ? "+" : "-") + "optforsize";
+  Key += std::string(OptForMinSize ? "+" : "-") + "optforminsize";
+
   auto &I = SubtargetMap[Key];
   if (!I) {
     // This needs to be done before we create a new subtarget since any
@@ -275,7 +281,8 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
     // function that reside in TargetOptions.
     resetTargetOptions(F);
     I = llvm::make_unique<X86Subtarget>(TargetTriple, CPU, FS, *this,
-                                        Options.StackAlignmentOverride);
+                                        Options.StackAlignmentOverride,
+                                        OptForSize, OptForMinSize);
 #ifndef LLVM_BUILD_GLOBAL_ISEL
     GISelAccessor *GISel = new GISelAccessor();
 #else
@@ -286,7 +293,8 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
 
     auto *RBI = new X86RegisterBankInfo(*I->getRegisterInfo());
     GISel->RegBankInfo.reset(RBI);
-    GISel->InstSelector.reset(createX86InstructionSelector(*this, *I, *RBI));
+    GISel->InstSelector.reset(createX86InstructionSelector(
+        *this, *I, *RBI));
 #endif
     I->setGISelAccessor(*GISel);
   }
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 2efcd46cd8d..4d3ecf25dc3 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -34,6 +34,7 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 
@@ -406,9 +407,9 @@ SDValue XCoreTargetLowering::lowerLoadWordFromAlignedBasePlusOffset(
 
 static bool isWordAligned(SDValue Value, SelectionDAG &DAG)
 {
-  APInt KnownZero, KnownOne;
-  DAG.computeKnownBits(Value, KnownZero, KnownOne);
-  return KnownZero.countTrailingOnes() >= 2;
+  KnownBits Known;
+  DAG.computeKnownBits(Value, Known);
+  return Known.Zero.countTrailingOnes() >= 2;
 }
 
 SDValue XCoreTargetLowering::
@@ -1601,13 +1602,12 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
       if (OutVal.hasOneUse()) {
         unsigned BitWidth = OutVal.getValueSizeInBits();
         APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
-        APInt KnownZero, KnownOne;
+        KnownBits Known;
         TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
                                               !DCI.isBeforeLegalizeOps());
         const TargetLowering &TLI = DAG.getTargetLoweringInfo();
         if (TLI.ShrinkDemandedConstant(OutVal, DemandedMask, TLO) ||
-            TLI.SimplifyDemandedBits(OutVal, DemandedMask, KnownZero, KnownOne,
-                                     TLO))
+            TLI.SimplifyDemandedBits(OutVal, DemandedMask, Known, TLO))
           DCI.CommitTargetLoweringOpt(TLO);
       }
       break;
@@ -1618,13 +1618,12 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
       if (Time.hasOneUse()) {
         unsigned BitWidth = Time.getValueSizeInBits();
         APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
-        APInt KnownZero, KnownOne;
+        KnownBits Known;
         TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
                                               !DCI.isBeforeLegalizeOps());
         const TargetLowering &TLI = DAG.getTargetLoweringInfo();
         if (TLI.ShrinkDemandedConstant(Time, DemandedMask, TLO) ||
-            TLI.SimplifyDemandedBits(Time, DemandedMask, KnownZero, KnownOne,
-                                     TLO))
+            TLI.SimplifyDemandedBits(Time, DemandedMask, Known, TLO))
           DCI.CommitTargetLoweringOpt(TLO);
       }
       break;
@@ -1655,11 +1654,11 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
     // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the
     // low bit set
     if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
-      APInt KnownZero, KnownOne;
+      KnownBits Known;
       APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
                                          VT.getSizeInBits() - 1);
-      DAG.computeKnownBits(N2, KnownZero, KnownOne);
-      if ((KnownZero & Mask) == Mask) {
+      DAG.computeKnownBits(N2, Known);
+      if ((Known.Zero & Mask) == Mask) {
         SDValue Carry = DAG.getConstant(0, dl, VT);
         SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2);
         SDValue Ops[] = { Result, Carry };
@@ -1678,11 +1677,11 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
 
     // fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set
     if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
-      APInt KnownZero, KnownOne;
+      KnownBits Known;
       APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
                                          VT.getSizeInBits() - 1);
-      DAG.computeKnownBits(N2, KnownZero, KnownOne);
-      if ((KnownZero & Mask) == Mask) {
+      DAG.computeKnownBits(N2, Known);
+      if ((Known.Zero & Mask) == Mask) {
         SDValue Borrow = N2;
         SDValue Result = DAG.getNode(ISD::SUB, dl, VT,
                                      DAG.getConstant(0, dl, VT), N2);
@@ -1694,11 +1693,11 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
     // fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the
     // low bit set
     if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
-      APInt KnownZero, KnownOne;
+      KnownBits Known;
       APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
                                          VT.getSizeInBits() - 1);
-      DAG.computeKnownBits(N2, KnownZero, KnownOne);
-      if ((KnownZero & Mask) == Mask) {
+      DAG.computeKnownBits(N2, Known);
+      if ((Known.Zero & Mask) == Mask) {
         SDValue Borrow = DAG.getConstant(0, dl, VT);
         SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2);
         SDValue Ops[] = { Result, Borrow };
@@ -1822,20 +1821,19 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
 }
 
 void XCoreTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
-                                                        APInt &KnownZero,
-                                                        APInt &KnownOne,
+                                                        KnownBits &Known,
                                                         const APInt &DemandedElts,
                                                         const SelectionDAG &DAG,
                                                         unsigned Depth) const {
-  KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
+  Known.Zero.clearAllBits(); Known.One.clearAllBits();
   switch (Op.getOpcode()) {
   default: break;
   case XCoreISD::LADD:
   case XCoreISD::LSUB:
     if (Op.getResNo() == 1) {
       // Top bits of carry / borrow are clear.
-      KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(),
-                                        KnownZero.getBitWidth() - 1);
+      Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(),
+                                         Known.getBitWidth() - 1);
     }
     break;
   case ISD::INTRINSIC_W_CHAIN:
@@ -1844,24 +1842,24 @@ void XCoreTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
       switch (IntNo) {
       case Intrinsic::xcore_getts:
         // High bits are known to be zero.
-        KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(),
-                                          KnownZero.getBitWidth() - 16);
+        Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(),
+                                           Known.getBitWidth() - 16);
         break;
       case Intrinsic::xcore_int:
       case Intrinsic::xcore_inct:
         // High bits are known to be zero.
-        KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(),
-                                          KnownZero.getBitWidth() - 8);
+        Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(),
+                                           Known.getBitWidth() - 8);
         break;
       case Intrinsic::xcore_testct:
         // Result is either 0 or 1.
-        KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(),
-                                          KnownZero.getBitWidth() - 1);
+        Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(),
+                                           Known.getBitWidth() - 1);
         break;
       case Intrinsic::xcore_testwct:
         // Result is in the range 0 - 4.
-        KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(),
-                                          KnownZero.getBitWidth() - 3);
+        Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(),
+                                           Known.getBitWidth() - 3);
         break;
       }
     }
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 188f4f1fa06..452d5b04605 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -200,8 +200,7 @@ namespace llvm {
     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 
     void computeKnownBitsForTargetNode(const SDValue Op,
-                                       APInt &KnownZero,
-                                       APInt &KnownOne,
+                                       KnownBits &Known,
                                        const APInt &DemandedElts,
                                        const SelectionDAG &DAG,
                                        unsigned Depth = 0) const override;
diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index 5cc51cd7a99..87532d11ede 100644
--- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -128,11 +128,11 @@ createReplacementInstr(ConstantExpr *CE, Instruction *Instr) {
 
 static bool replaceConstantExprOp(ConstantExpr *CE, Pass *P) {
   do {
-    SmallVector<WeakVH,8> WUsers(CE->user_begin(), CE->user_end());
+    SmallVector<WeakTrackingVH, 8> WUsers(CE->user_begin(), CE->user_end());
     std::sort(WUsers.begin(), WUsers.end());
     WUsers.erase(std::unique(WUsers.begin(), WUsers.end()), WUsers.end());
     while (!WUsers.empty())
-      if (WeakVH WU = WUsers.pop_back_val()) {
+      if (WeakTrackingVH WU = WUsers.pop_back_val()) {
         if (PHINode *PN = dyn_cast<PHINode>(WU)) {
           for (int I = 0, E = PN->getNumIncomingValues(); I < E; ++I)
             if (PN->getIncomingValue(I) == CE) {
@@ -159,12 +159,12 @@ static bool replaceConstantExprOp(ConstantExpr *CE, Pass *P) {
 }
 
 static bool rewriteNonInstructionUses(GlobalVariable *GV, Pass *P) {
-  SmallVector<WeakVH,8> WUsers;
+  SmallVector<WeakTrackingVH, 8> WUsers;
   for (User *U : GV->users())
     if (!isa<Instruction>(U))
-      WUsers.push_back(WeakVH(U));
+      WUsers.push_back(WeakTrackingVH(U));
   while (!WUsers.empty())
-    if (WeakVH WU = WUsers.pop_back_val()) {
+    if (WeakTrackingVH WU = WUsers.pop_back_val()) {
       ConstantExpr *CE = dyn_cast<ConstantExpr>(WU);
       if (!CE || !replaceConstantExprOp(CE, P))
         return false;
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index a2c8a32dfe8..25db0eff884 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -106,9 +106,9 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
   AttributeList PAL = F->getAttributes();
 
   // First, determine the new argument list
-  unsigned ArgIndex = 0;
+  unsigned ArgNo = 0;
   for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
-       ++I, ++ArgIndex) {
+       ++I, ++ArgNo) {
     if (ByValArgsToTransform.count(&*I)) {
       // Simple byval argument? Just add all the struct element types.
       Type *AgTy = cast<PointerType>(I->getType())->getElementType();
@@ -120,7 +120,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
     } else if (!ArgsToPromote.count(&*I)) {
       // Unchanged argument
       Params.push_back(I->getType());
-      ArgAttrVec.push_back(PAL.getParamAttributes(ArgIndex));
+      ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo));
     } else if (I->use_empty()) {
       // Dead argument (which are always marked as promotable)
       ++NumArgumentsDead;
@@ -214,12 +214,12 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
     // Loop over the operands, inserting GEP and loads in the caller as
     // appropriate.
     CallSite::arg_iterator AI = CS.arg_begin();
-    ArgIndex = 1;
+    ArgNo = 0;
     for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
-         ++I, ++AI, ++ArgIndex)
+         ++I, ++AI, ++ArgNo)
       if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
         Args.push_back(*AI); // Unmodified argument
-        ArgAttrVec.push_back(CallPAL.getAttributes(ArgIndex));
+        ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
       } else if (ByValArgsToTransform.count(&*I)) {
         // Emit a GEP and load for each element of the struct.
         Type *AgTy = cast<PointerType>(I->getType())->getElementType();
@@ -280,9 +280,9 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
       }
 
     // Push any varargs arguments on the list.
-    for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
+    for (; AI != CS.arg_end(); ++AI, ++ArgNo) {
       Args.push_back(*AI);
-      ArgAttrVec.push_back(CallPAL.getAttributes(ArgIndex));
+      ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
     }
 
     SmallVector<OperandBundleDef, 1> OpBundles;
@@ -839,17 +839,12 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
     // avoiding a register copy.
     if (PtrArg->hasStructRetAttr()) {
       unsigned ArgNo = PtrArg->getArgNo();
-      F->setAttributes(
-          F->getAttributes()
-              .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet)
-              .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+      F->removeAttribute(ArgNo + 1, Attribute::StructRet);
+      F->addAttribute(ArgNo + 1, Attribute::NoAlias);
       for (Use &U : F->uses()) {
         CallSite CS(U.getUser());
-        CS.setAttributes(
-            CS.getAttributes()
-                .removeAttribute(F->getContext(), ArgNo + 1,
-                                 Attribute::StructRet)
-                .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+        CS.removeAttribute(ArgNo + 1, Attribute::StructRet);
+        CS.addAttribute(ArgNo + 1, Attribute::NoAlias);
       }
     }
 
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 9648883b7f2..031c3d8a9eb 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -855,10 +855,11 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
 
   bool MadeChange = false;
   for (Function *F : SCCNodes) {
-    if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
+    if (F->doesNotAlias(AttributeList::ReturnIndex) ||
+        !F->getReturnType()->isPointerTy())
       continue;
 
-    F->setDoesNotAlias(0);
+    F->setDoesNotAlias(AttributeList::ReturnIndex);
     ++NumNoAlias;
     MadeChange = true;
   }
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index d66411f04cc..c7ef2494e3b 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -25,7 +26,6 @@
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/Linker/Linker.h"
 #include "llvm/Object/IRObjectFile.h"
-#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/SourceMgr.h"
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index ae9d4ce11e0..f277a51ae65 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -239,7 +239,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
   // we delete a constant array, we may also be holding pointer to one of its
   // elements (or an element of one of its elements if we're dealing with an
   // array of arrays) in the worklist.
-  SmallVector<WeakVH, 8> WorkList(V->user_begin(), V->user_end());
+  SmallVector<WeakTrackingVH, 8> WorkList(V->user_begin(), V->user_end());
   while (!WorkList.empty()) {
     Value *UV = WorkList.pop_back_val();
     if (!UV)
@@ -1792,7 +1792,9 @@ static void makeAllConstantUsesInstructions(Constant *C) {
       NewU->insertBefore(UI);
       UI->replaceUsesOfWith(U, NewU);
     }
-    U->dropAllReferences();
+    // We've replaced all the uses, so destroy the constant. (destroyConstant
+    // will update value handles and metadata.)
+    U->destroyConstant();
   }
 }
 
diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt
index 9c83f88b221..a8b0f32fd78 100644
--- a/lib/Transforms/IPO/LLVMBuild.txt
+++ b/lib/Transforms/IPO/LLVMBuild.txt
@@ -20,4 +20,4 @@ type = Library
 name = IPO
 parent = Transforms
 library_name = ipo
-required_libraries = Analysis BitWriter Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation
+required_libraries = Analysis BitReader BitWriter Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 771770ddc06..0e478ba607b 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -207,11 +207,13 @@ private:
 
   /// A work queue of functions that may have been modified and should be
   /// analyzed again.
-  std::vector<WeakVH> Deferred;
+  std::vector<WeakTrackingVH> Deferred;
 
   /// Checks the rules of order relation introduced among functions set.
   /// Returns true, if sanity check has been passed, and false if failed.
-  bool doSanityCheck(std::vector<WeakVH> &Worklist);
+#ifndef NDEBUG
+  bool doSanityCheck(std::vector<WeakTrackingVH> &Worklist);
+#endif
 
   /// Insert a ComparableFunction into the FnTree, or merge it away if it's
   /// equal to one that's already present.
@@ -283,7 +285,8 @@ ModulePass *llvm::createMergeFunctionsPass() {
   return new MergeFunctions();
 }
 
-bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
+#ifndef NDEBUG
+bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) {
   if (const unsigned Max = NumFunctionsForSanityCheck) {
     unsigned TripleNumber = 0;
     bool Valid = true;
@@ -291,10 +294,12 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
     dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n";
 
     unsigned i = 0;
-    for (std::vector<WeakVH>::iterator I = Worklist.begin(), E = Worklist.end();
+    for (std::vector<WeakTrackingVH>::iterator I = Worklist.begin(),
+                                               E = Worklist.end();
          I != E && i < Max; ++I, ++i) {
       unsigned j = i;
-      for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) {
+      for (std::vector<WeakTrackingVH>::iterator J = I; J != E && j < Max;
+           ++J, ++j) {
         Function *F1 = cast<Function>(*I);
         Function *F2 = cast<Function>(*J);
         int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare();
@@ -312,7 +317,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
           continue;
 
         unsigned k = j;
-        for (std::vector<WeakVH>::iterator K = J; K != E && k < Max;
+        for (std::vector<WeakTrackingVH>::iterator K = J; K != E && k < Max;
              ++k, ++K, ++TripleNumber) {
           if (K == J)
             continue;
@@ -351,6 +356,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
   }
   return true;
 }
+#endif
 
 bool MergeFunctions::runOnModule(Module &M) {
   if (skipModule(M))
@@ -381,12 +387,12 @@ bool MergeFunctions::runOnModule(Module &M) {
     // consider merging it. Otherwise it is dropped and never considered again.
     if ((I != S && std::prev(I)->first == I->first) ||
         (std::next(I) != IE && std::next(I)->first == I->first) ) {
-      Deferred.push_back(WeakVH(I->second));
+      Deferred.push_back(WeakTrackingVH(I->second));
     }
   }
   
   do {
-    std::vector<WeakVH> Worklist;
+    std::vector<WeakTrackingVH> Worklist;
     Deferred.swap(Worklist);
 
     DEBUG(doSanityCheck(Worklist));
@@ -395,7 +401,7 @@ bool MergeFunctions::runOnModule(Module &M) {
     DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
 
     // Insert functions and merge them.
-    for (WeakVH &I : Worklist) {
+    for (WeakTrackingVH &I : Worklist) {
       if (!I)
         continue;
       Function *F = cast<Function>(I);
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 78e71c18fe2..1bb9d654ec1 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -16,8 +16,12 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Dominators.h"
@@ -31,13 +35,18 @@ using namespace llvm;
 
 #define DEBUG_TYPE "partial-inlining"
 
-STATISTIC(NumPartialInlined, "Number of functions partially inlined");
+STATISTIC(NumPartialInlined,
+          "Number of callsites functions partially inlined into.");
 
 // Command line option to disable partial-inlining. The default is false:
 static cl::opt<bool>
     DisablePartialInlining("disable-partial-inlining", cl::init(false),
                            cl::Hidden, cl::desc("Disable partial ininling"));
 
+static cl::opt<unsigned> MaxNumInlineBlocks(
+    "max-num-inline-blocks", cl::init(5), cl::Hidden,
+    cl::desc("Max Number of Blocks  To be Partially Inlined"));
+
 // Command line option to set the maximum number of partial inlining allowed
 // for the module. The default value of -1 means no limit.
 static cl::opt<int> MaxNumPartialInlining(
@@ -45,20 +54,52 @@ static cl::opt<int> MaxNumPartialInlining(
     cl::desc("Max number of partial inlining. The default is unlimited"));
 
 namespace {
+
+struct FunctionOutliningInfo {
+  FunctionOutliningInfo()
+      : Entries(), ReturnBlock(nullptr), NonReturnBlock(nullptr),
+        ReturnBlockPreds() {}
+  // Returns the number of blocks to be inlined including all blocks
+  // in Entries and one return block.
+  unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
+
+  // A set of blocks including the function entry that guard
+  // the region to be outlined.
+  SmallVector<BasicBlock *, 4> Entries;
+  // The return block that is not included in the outlined region.
+  BasicBlock *ReturnBlock;
+  // The dominating block of the region ot be outlined.
+  BasicBlock *NonReturnBlock;
+  // The set of blocks in Entries that that are predecessors to ReturnBlock
+  SmallVector<BasicBlock *, 4> ReturnBlockPreds;
+};
+
 struct PartialInlinerImpl {
-  PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(std::move(IFI)) {}
+  PartialInlinerImpl(
+      std::function<AssumptionCache &(Function &)> *GetAC,
+      std::function<TargetTransformInfo &(Function &)> *GTTI,
+      Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
+      ProfileSummaryInfo *ProfSI)
+      : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
   bool run(Module &M);
   Function *unswitchFunction(Function *F);
 
-private:
-  InlineFunctionInfo IFI;
-  int NumPartialInlining = 0;
+  std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
 
+private:
+  int NumPartialInlining = 0;
+  std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
+  std::function<TargetTransformInfo &(Function &)> *GetTTI;
+  Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
+  ProfileSummaryInfo *PSI;
+
+  bool shouldPartialInline(CallSite CS, OptimizationRemarkEmitter &ORE);
   bool IsLimitReached() {
     return (MaxNumPartialInlining != -1 &&
             NumPartialInlining >= MaxNumPartialInlining);
   }
 };
+
 struct PartialInlinerLegacyPass : public ModulePass {
   static char ID; // Pass identification, replacement for typeid
   PartialInlinerLegacyPass() : ModulePass(ID) {
@@ -67,91 +108,319 @@ struct PartialInlinerLegacyPass : public ModulePass {
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
   }
   bool runOnModule(Module &M) override {
     if (skipModule(M))
       return false;
 
     AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
+    TargetTransformInfoWrapperPass *TTIWP =
+        &getAnalysis<TargetTransformInfoWrapperPass>();
+    ProfileSummaryInfo *PSI =
+        getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+
     std::function<AssumptionCache &(Function &)> GetAssumptionCache =
         [&ACT](Function &F) -> AssumptionCache & {
       return ACT->getAssumptionCache(F);
     };
-    InlineFunctionInfo IFI(nullptr, &GetAssumptionCache);
-    return PartialInlinerImpl(IFI).run(M);
+
+    std::function<TargetTransformInfo &(Function &)> GetTTI =
+        [&TTIWP](Function &F) -> TargetTransformInfo & {
+      return TTIWP->getTTI(F);
+    };
+
+    return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M);
   }
 };
 }
 
-Function *PartialInlinerImpl::unswitchFunction(Function *F) {
-  // First, verify that this function is an unswitching candidate...
-  if (F->hasAddressTaken())
-    return nullptr;
-
+std::unique_ptr<FunctionOutliningInfo>
+PartialInlinerImpl::computeOutliningInfo(Function *F) {
   BasicBlock *EntryBlock = &F->front();
   BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
   if (!BR || BR->isUnconditional())
-    return nullptr;
+    return std::unique_ptr<FunctionOutliningInfo>();
 
-  BasicBlock *ReturnBlock = nullptr;
-  BasicBlock *NonReturnBlock = nullptr;
-  unsigned ReturnCount = 0;
-  for (BasicBlock *BB : successors(EntryBlock)) {
-    if (isa<ReturnInst>(BB->getTerminator())) {
-      ReturnBlock = BB;
-      ReturnCount++;
-    } else
-      NonReturnBlock = BB;
+  // Returns true if Succ is BB's successor
+  auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
+    return is_contained(successors(BB), Succ);
+  };
+
+  auto SuccSize = [](BasicBlock *BB) {
+    return std::distance(succ_begin(BB), succ_end(BB));
+  };
+
+  auto IsReturnBlock = [](BasicBlock *BB) {
+    TerminatorInst *TI = BB->getTerminator();
+    return isa<ReturnInst>(TI);
+  };
+
+  auto GetReturnBlock = [=](BasicBlock *Succ1, BasicBlock *Succ2) {
+    if (IsReturnBlock(Succ1))
+      return std::make_tuple(Succ1, Succ2);
+    if (IsReturnBlock(Succ2))
+      return std::make_tuple(Succ2, Succ1);
+
+    return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
+  };
+
+  // Detect a triangular shape:
+  auto GetCommonSucc = [=](BasicBlock *Succ1, BasicBlock *Succ2) {
+    if (IsSuccessor(Succ1, Succ2))
+      return std::make_tuple(Succ1, Succ2);
+    if (IsSuccessor(Succ2, Succ1))
+      return std::make_tuple(Succ2, Succ1);
+
+    return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
+  };
+
+  std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
+      llvm::make_unique<FunctionOutliningInfo>();
+
+  BasicBlock *CurrEntry = EntryBlock;
+  bool CandidateFound = false;
+  do {
+    // The number of blocks to be inlined has already reached
+    // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
+    // disables partial inlining for the function.
+    if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
+      break;
+
+    if (SuccSize(CurrEntry) != 2)
+      break;
+
+    BasicBlock *Succ1 = *succ_begin(CurrEntry);
+    BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
+
+    BasicBlock *ReturnBlock, *NonReturnBlock;
+    std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
+
+    if (ReturnBlock) {
+      OutliningInfo->Entries.push_back(CurrEntry);
+      OutliningInfo->ReturnBlock = ReturnBlock;
+      OutliningInfo->NonReturnBlock = NonReturnBlock;
+      CandidateFound = true;
+      break;
+    }
+
+    BasicBlock *CommSucc;
+    BasicBlock *OtherSucc;
+    std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
+
+    if (!CommSucc)
+      break;
+
+    OutliningInfo->Entries.push_back(CurrEntry);
+    CurrEntry = OtherSucc;
+
+  } while (true);
+
+  if (!CandidateFound)
+    return std::unique_ptr<FunctionOutliningInfo>();
+
+  // Do sanity check of the entries: threre should not
+  // be any successors (not in the entry set) other than
+  // {ReturnBlock, NonReturnBlock}
+  assert(OutliningInfo->Entries[0] == &F->front());
+  DenseSet<BasicBlock *> Entries;
+  for (BasicBlock *E : OutliningInfo->Entries)
+    Entries.insert(E);
+
+  // Returns true of BB has Predecessor which is not
+  // in Entries set.
+  auto HasNonEntryPred = [Entries](BasicBlock *BB) {
+    for (auto Pred : predecessors(BB)) {
+      if (!Entries.count(Pred))
+        return true;
+    }
+    return false;
+  };
+  auto CheckAndNormalizeCandidate =
+      [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
+        for (BasicBlock *E : OutliningInfo->Entries) {
+          for (auto Succ : successors(E)) {
+            if (Entries.count(Succ))
+              continue;
+            if (Succ == OutliningInfo->ReturnBlock)
+              OutliningInfo->ReturnBlockPreds.push_back(E);
+            else if (Succ != OutliningInfo->NonReturnBlock)
+              return false;
+          }
+          // There should not be any outside incoming edges either:
+          if (HasNonEntryPred(E))
+            return false;
+        }
+        return true;
+      };
+
+  if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
+    return std::unique_ptr<FunctionOutliningInfo>();
+
+  // Now further growing the candidate's inlining region by
+  // peeling off dominating blocks from the outlining region:
+  while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
+    BasicBlock *Cand = OutliningInfo->NonReturnBlock;
+    if (SuccSize(Cand) != 2)
+      break;
+
+    if (HasNonEntryPred(Cand))
+      break;
+
+    BasicBlock *Succ1 = *succ_begin(Cand);
+    BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
+
+    BasicBlock *ReturnBlock, *NonReturnBlock;
+    std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
+    if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
+      break;
+
+    if (NonReturnBlock->getSinglePredecessor() != Cand)
+      break;
+
+    // Now grow and update OutlininigInfo:
+    OutliningInfo->Entries.push_back(Cand);
+    OutliningInfo->NonReturnBlock = NonReturnBlock;
+    OutliningInfo->ReturnBlockPreds.push_back(Cand);
+    Entries.insert(Cand);
   }
 
-  if (ReturnCount != 1)
+  return OutliningInfo;
+}
+
+bool PartialInlinerImpl::shouldPartialInline(CallSite CS,
+                                             OptimizationRemarkEmitter &ORE) {
+  // TODO : more sharing with shouldInline in Inliner.cpp
+  using namespace ore;
+  Instruction *Call = CS.getInstruction();
+  Function *Callee = CS.getCalledFunction();
+  Function *Caller = CS.getCaller();
+  auto &CalleeTTI = (*GetTTI)(*Callee);
+  InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
+                                *GetAssumptionCache, GetBFI, PSI);
+
+  if (IC.isAlways()) {
+    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
+             << NV("Callee", Callee)
+             << " should always be fully inlined, not partially");
+    return false;
+  }
+
+  if (IC.isNever()) {
+    ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
+             << NV("Callee", Callee) << " not partially inlined into "
+             << NV("Caller", Caller)
+             << " because it should never be inlined (cost=never)");
+    return false;
+  }
+
+  if (!IC) {
+    ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
+             << NV("Callee", Callee) << " not partially inlined into "
+             << NV("Caller", Caller) << " because too costly to inline (cost="
+             << NV("Cost", IC.getCost()) << ", threshold="
+             << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
+    return false;
+  }
+
+  ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
+           << NV("Callee", Callee) << " can be partially inlined into "
+           << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
+           << " (threshold="
+           << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
+  return true;
+}
+
+Function *PartialInlinerImpl::unswitchFunction(Function *F) {
+
+  if (F->hasAddressTaken())
+    return nullptr;
+
+  std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
+      computeOutliningInfo(F);
+
+  if (!OutliningInfo)
     return nullptr;
 
   // Clone the function, so that we can hack away on it.
   ValueToValueMapTy VMap;
   Function *DuplicateFunction = CloneFunction(F, VMap);
-  DuplicateFunction->setLinkage(GlobalValue::InternalLinkage);
-  BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[EntryBlock]);
-  BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[ReturnBlock]);
-  BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[NonReturnBlock]);
+  BasicBlock *NewReturnBlock =
+      cast<BasicBlock>(VMap[OutliningInfo->ReturnBlock]);
+  BasicBlock *NewNonReturnBlock =
+      cast<BasicBlock>(VMap[OutliningInfo->NonReturnBlock]);
+  DenseSet<BasicBlock *> NewEntries;
+  for (BasicBlock *BB : OutliningInfo->Entries) {
+    NewEntries.insert(cast<BasicBlock>(VMap[BB]));
+  }
 
   // Go ahead and update all uses to the duplicate, so that we can just
   // use the inliner functionality when we're done hacking.
   F->replaceAllUsesWith(DuplicateFunction);
 
+  auto getFirstPHI = [](BasicBlock *BB) {
+    BasicBlock::iterator I = BB->begin();
+    PHINode *FirstPhi = nullptr;
+    while (I != BB->end()) {
+      PHINode *Phi = dyn_cast<PHINode>(I);
+      if (!Phi)
+        break;
+      if (!FirstPhi) {
+        FirstPhi = Phi;
+        break;
+      }
+    }
+    return FirstPhi;
+  };
   // Special hackery is needed with PHI nodes that have inputs from more than
   // one extracted block.  For simplicity, just split the PHIs into a two-level
   // sequence of PHIs, some of which will go in the extracted region, and some
   // of which will go outside.
   BasicBlock *PreReturn = NewReturnBlock;
-  NewReturnBlock = NewReturnBlock->splitBasicBlock(
-      NewReturnBlock->getFirstNonPHI()->getIterator());
-  BasicBlock::iterator I = PreReturn->begin();
-  Instruction *Ins = &NewReturnBlock->front();
-  while (I != PreReturn->end()) {
-    PHINode *OldPhi = dyn_cast<PHINode>(I);
-    if (!OldPhi)
-      break;
+  // only split block when necessary:
+  PHINode *FirstPhi = getFirstPHI(PreReturn);
+  unsigned NumPredsFromEntries = OutliningInfo->ReturnBlockPreds.size();
+  if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) {
 
-    PHINode *RetPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
-    OldPhi->replaceAllUsesWith(RetPhi);
-    Ins = NewReturnBlock->getFirstNonPHI();
+    NewReturnBlock = NewReturnBlock->splitBasicBlock(
+        NewReturnBlock->getFirstNonPHI()->getIterator());
+    BasicBlock::iterator I = PreReturn->begin();
+    Instruction *Ins = &NewReturnBlock->front();
+    while (I != PreReturn->end()) {
+      PHINode *OldPhi = dyn_cast<PHINode>(I);
+      if (!OldPhi)
+        break;
 
-    RetPhi->addIncoming(&*I, PreReturn);
-    RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewEntryBlock),
-                        NewEntryBlock);
-    OldPhi->removeIncomingValue(NewEntryBlock);
+      PHINode *RetPhi =
+          PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
+      OldPhi->replaceAllUsesWith(RetPhi);
+      Ins = NewReturnBlock->getFirstNonPHI();
 
-    ++I;
+      RetPhi->addIncoming(&*I, PreReturn);
+      for (BasicBlock *E : OutliningInfo->ReturnBlockPreds) {
+        BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
+        RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE);
+        OldPhi->removeIncomingValue(NewE);
+      }
+      ++I;
+    }
+    for (auto E : OutliningInfo->ReturnBlockPreds) {
+      BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
+      NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
+    }
   }
-  NewEntryBlock->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
 
+  // Returns true if the block is to be partial inlined into the caller
+  // (i.e. not to be extracted to the out of line function)
+  auto ToBeInlined = [=](BasicBlock *BB) {
+    return BB == NewReturnBlock || NewEntries.count(BB);
+  };
   // Gather up the blocks that we're going to extract.
   std::vector<BasicBlock *> ToExtract;
   ToExtract.push_back(NewNonReturnBlock);
   for (BasicBlock &BB : *DuplicateFunction)
-    if (&BB != NewEntryBlock && &BB != NewReturnBlock &&
-        &BB != NewNonReturnBlock)
+    if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock)
       ToExtract.push_back(&BB);
 
   // The CodeExtractor needs a dominator tree.
@@ -183,16 +452,22 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
 
     if (IsLimitReached())
       continue;
-    NumPartialInlining++;
 
     OptimizationRemarkEmitter ORE(CS.getCaller());
+    if (!shouldPartialInline(CS, ORE))
+      continue;
+
     DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
     BasicBlock *Block = CS.getParent();
     ORE.emit(OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", DLoc, Block)
              << ore::NV("Callee", F) << " partially inlined into "
              << ore::NV("Caller", CS.getCaller()));
 
+    InlineFunctionInfo IFI(nullptr, GetAssumptionCache);
     InlineFunction(CS, IFI);
+    NumPartialInlining++;
+    // update stats
+    NumPartialInlined++;
   }
 
   // Ditch the duplicate, since we're done with it, and rewrite all remaining
@@ -200,7 +475,6 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
   DuplicateFunction->replaceAllUsesWith(F);
   DuplicateFunction->eraseFromParent();
 
-  ++NumPartialInlined;
 
   return ExtractedFunction;
 }
@@ -246,6 +520,8 @@ char PartialInlinerLegacyPass::ID = 0;
 INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
                       "Partial Inliner", false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
                     "Partial Inliner", false, false)
 
@@ -256,12 +532,25 @@ ModulePass *llvm::createPartialInliningPass() {
 PreservedAnalyses PartialInlinerPass::run(Module &M,
                                           ModuleAnalysisManager &AM) {
   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
   std::function<AssumptionCache &(Function &)> GetAssumptionCache =
       [&FAM](Function &F) -> AssumptionCache & {
     return FAM.getResult<AssumptionAnalysis>(F);
   };
-  InlineFunctionInfo IFI(nullptr, &GetAssumptionCache);
-  if (PartialInlinerImpl(IFI).run(M))
+
+  std::function<BlockFrequencyInfo &(Function &)> GetBFI =
+      [&FAM](Function &F) -> BlockFrequencyInfo & {
+    return FAM.getResult<BlockFrequencyAnalysis>(F);
+  };
+
+  std::function<TargetTransformInfo &(Function &)> GetTTI =
+      [&FAM](Function &F) -> TargetTransformInfo & {
+    return FAM.getResult<TargetIRAnalysis>(F);
+  };
+
+  ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
+
+  if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M))
     return PreservedAnalyses::none();
   return PreservedAnalyses::all();
 }
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 0d5910ebbfc..20359457261 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -38,6 +38,7 @@
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
 #include "llvm/Transforms/Vectorize.h"
 
 using namespace llvm;
@@ -137,14 +138,19 @@ static cl::opt<int> PreInlineThreshold(
              "(default = 75)"));
 
 static cl::opt<bool> EnableGVNHoist(
-    "enable-gvn-hoist", cl::init(true), cl::Hidden,
-    cl::desc("Enable the GVN hoisting pass (default = on)"));
+    "enable-gvn-hoist", cl::init(false), cl::Hidden,
+    cl::desc("Enable the GVN hoisting pass (default = off)"));
 
 static cl::opt<bool>
     DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false),
                               cl::Hidden,
                               cl::desc("Disable shrink-wrap library calls"));
 
+static cl::opt<bool>
+    EnableSimpleLoopUnswitch("enable-simple-loop-unswitch", cl::init(false),
+                             cl::Hidden,
+                             cl::desc("Enable the simple loop unswitch pass."));
+
 PassManagerBuilder::PassManagerBuilder() {
     OptLevel = 2;
     SizeLevel = 0;
@@ -318,7 +324,10 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
   // Rotate Loop - disable header duplication at -Oz
   MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
   MPM.add(createLICMPass());                  // Hoist loop invariants
-  MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
+  if (EnableSimpleLoopUnswitch)
+    MPM.add(createSimpleLoopUnswitchLegacyPass());
+  else
+    MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
   MPM.add(createCFGSimplificationPass());
   addInstructionCombiningPass(MPM);
   MPM.add(createIndVarSimplifyPass());        // Canonicalize indvars
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 9801a0a6141..d3a3c24ce7b 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -30,42 +30,11 @@
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/FunctionAttrs.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
 using namespace llvm;
 
 namespace {
 
-// Produce a unique identifier for this module by taking the MD5 sum of the
-// names of the module's strong external symbols. This identifier is
-// normally guaranteed to be unique, or the program would fail to link due to
-// multiply defined symbols.
-//
-// If the module has no strong external symbols (such a module may still have a
-// semantic effect if it performs global initialization), we cannot produce a
-// unique identifier for this module, so we return the empty string, which
-// causes the entire module to be written as a regular LTO module.
-std::string getModuleId(Module *M) {
-  MD5 Md5;
-  bool ExportsSymbols = false;
-  for (auto &GV : M->global_values()) {
-    if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
-        !GV.hasExternalLinkage())
-      continue;
-    ExportsSymbols = true;
-    Md5.update(GV.getName());
-    Md5.update(ArrayRef<uint8_t>{0});
-  }
-
-  if (!ExportsSymbols)
-    return "";
-
-  MD5::MD5Result R;
-  Md5.final(R);
-
-  SmallString<32> Str;
-  MD5::stringifyResult(R, Str);
-  return ("$" + Str).str();
-}
-
 // Promote each local-linkage entity defined by ExportM and used by ImportM by
 // changing visibility and appending the given ModuleId.
 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
@@ -251,7 +220,7 @@ void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {
 void splitAndWriteThinLTOBitcode(
     raw_ostream &OS, raw_ostream *ThinLinkOS,
     function_ref<AAResults &(Function &)> AARGetter, Module &M) {
-  std::string ModuleId = getModuleId(&M);
+  std::string ModuleId = getUniqueModuleId(&M);
   if (ModuleId.empty()) {
     // We couldn't generate a module ID for this module, just write it out as a
     // regular LTO module.
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 030461004f5..4f1f1949976 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -861,7 +861,7 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero,
   // Find the most significant known 0 other than the sign bit.
   int BitWidth = Op0KnownZero.getBitWidth();
   APInt Op0KnownZeroTemp(Op0KnownZero);
-  Op0KnownZeroTemp.clearBit(BitWidth - 1);
+  Op0KnownZeroTemp.clearSignBit();
   int Op0ZeroPosition = BitWidth - Op0KnownZeroTemp.countLeadingZeros() - 1;
 
   int Op1OnePosition = BitWidth - Op1MaybeOne.countLeadingZeros() - 1;
@@ -1037,7 +1037,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
-                                 I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC))
+                                 I.hasNoUnsignedWrap(), SQ))
     return replaceInstUsesWith(I, V);
 
    // (A*B)+(A*C) -> A*(B+C) etc
@@ -1358,8 +1358,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V =
-          SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), SQ))
     return replaceInstUsesWith(I, V);
 
   if (isa<Constant>(RHS))
@@ -1550,7 +1549,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(),
-                                 I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC))
+                                 I.hasNoUnsignedWrap(), SQ))
     return replaceInstUsesWith(I, V);
 
   // (A*B)-(A*C) -> A*(B-C) etc
@@ -1756,8 +1755,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V =
-          SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), SQ))
     return replaceInstUsesWith(I, V);
 
   // fsub nsz 0, X ==> fsub nsz -0.0, X
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index a97b5a9ec0b..c7092bf3a39 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1213,7 +1213,7 @@ static Instruction *foldAndToXor(BinaryOperator &I,
   // (~B | A) & (~A | B) --> ~(A ^ B)
   // (~B | A) & (B | ~A) --> ~(A ^ B)
   if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
-      match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Value(B))))
+      match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B))))
     return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
 
   return nullptr;
@@ -1254,7 +1254,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyAndInst(Op0, Op1, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyAndInst(Op0, Op1, SQ))
     return replaceInstUsesWith(I, V);
 
   // See if we can simplify any instructions used by the instruction whose sole
@@ -2039,7 +2039,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyOrInst(Op0, Op1, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyOrInst(Op0, Op1, SQ))
     return replaceInstUsesWith(I, V);
 
   // See if we can simplify any instructions used by the instruction whose sole
@@ -2415,7 +2415,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyXorInst(Op0, Op1, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyXorInst(Op0, Op1, SQ))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *NewXor = foldXorToXor(I))
@@ -2433,25 +2433,32 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   if (Value *V = SimplifyBSwap(I))
     return replaceInstUsesWith(I, V);
 
+  // Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand.
+  Value *X, *Y;
+
+  // We must eliminate the and/or (one-use) for these transforms to not increase
+  // the instruction count.
+  // ~(~X & Y) --> (X | ~Y)
+  // ~(Y & ~X) --> (X | ~Y)
+  if (match(&I, m_Not(m_OneUse(m_c_And(m_Not(m_Value(X)), m_Value(Y)))))) {
+    Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not");
+    return BinaryOperator::CreateOr(X, NotY);
+  }
+  // ~(~X | Y) --> (X & ~Y)
+  // ~(Y | ~X) --> (X & ~Y)
+  if (match(&I, m_Not(m_OneUse(m_c_Or(m_Not(m_Value(X)), m_Value(Y)))))) {
+    Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not");
+    return BinaryOperator::CreateAnd(X, NotY);
+  }
+
   // Is this a 'not' (~) fed by a binary operator?
   BinaryOperator *NotOp;
   if (match(&I, m_Not(m_BinOp(NotOp)))) {
     if (NotOp->getOpcode() == Instruction::And ||
         NotOp->getOpcode() == Instruction::Or) {
-      // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
-      // ~(~X | Y) === (X & ~Y) - De Morgan's Law
-      if (dyn_castNotVal(NotOp->getOperand(1)))
-        NotOp->swapOperands();
-      if (Value *Op0NotVal = dyn_castNotVal(NotOp->getOperand(0))) {
-        Value *NotY = Builder->CreateNot(
-            NotOp->getOperand(1), NotOp->getOperand(1)->getName() + ".not");
-        if (NotOp->getOpcode() == Instruction::And)
-          return BinaryOperator::CreateOr(Op0NotVal, NotY);
-        return BinaryOperator::CreateAnd(Op0NotVal, NotY);
-      }
-
-      // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
-      // ~(X | Y) === (~X & ~Y) - De Morgan's Law
+      // Apply DeMorgan's Law when inverts are free:
+      // ~(X & Y) --> (~X | ~Y)
+      // ~(X | Y) --> (~X & ~Y)
       if (IsFreeToInvert(NotOp->getOperand(0),
                          NotOp->getOperand(0)->hasOneUse()) &&
           IsFreeToInvert(NotOp->getOperand(1),
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 313ab13b9e2..e9286b1bf17 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -379,7 +379,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II,
     for (unsigned i = 0; i != NumSubElts; ++i) {
       unsigned SubEltIdx = (NumSubElts - 1) - i;
       auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
-      Count = Count.shl(BitWidth);
+      Count <<= BitWidth;
       Count |= SubElt->getValue().zextOrTrunc(64);
     }
   }
@@ -1384,17 +1384,17 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
 
   // Create a mask for bits above (ctlz) or below (cttz) the first known one.
   bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
-  unsigned NumMaskBits = IsTZ ? Known.One.countTrailingZeros()
-                              : Known.One.countLeadingZeros();
-  APInt Mask = IsTZ ? APInt::getLowBitsSet(BitWidth, NumMaskBits)
-                    : APInt::getHighBitsSet(BitWidth, NumMaskBits);
+  unsigned PossibleZeros = IsTZ ? Known.One.countTrailingZeros()
+                                : Known.One.countLeadingZeros();
+  unsigned DefiniteZeros = IsTZ ? Known.Zero.countTrailingOnes()
+                                : Known.Zero.countLeadingOnes();
 
   // If all bits above (ctlz) or below (cttz) the first known one are known
   // zero, this value is constant.
   // FIXME: This should be in InstSimplify because we're replacing an
   // instruction with a constant.
-  if (Mask.isSubsetOf(Known.Zero)) {
-    auto *C = ConstantInt::get(IT, APInt(BitWidth, NumMaskBits));
+  if (PossibleZeros == DefiniteZeros) {
+    auto *C = ConstantInt::get(IT, DefiniteZeros);
     return IC.replaceInstUsesWith(II, C);
   }
 
@@ -1818,8 +1818,8 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) {
 /// lifting.
 Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   auto Args = CI.arg_operands();
-  if (Value *V = SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), DL,
-                              &TLI, &DT, &AC))
+  if (Value *V =
+          SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), SQ))
     return replaceInstUsesWith(CI, V);
 
   if (isFreeCall(&CI, &TLI))
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index d846a631b96..60970775de6 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -190,8 +190,8 @@ static void computeSignedMinMaxValuesFromKnownBits(const KnownBits &Known,
   Max = Known.One|UnknownBits;
 
   if (UnknownBits.isNegative()) { // Sign bit is unknown
-    Min.setBit(Min.getBitWidth()-1);
-    Max.clearBit(Max.getBitWidth()-1);
+    Min.setSignBit();
+    Max.clearSignBit();
   }
 }
 
@@ -4269,8 +4269,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     Changed = true;
   }
 
-  if (Value *V =
-          SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL, &TLI, &DT, &AC, &I))
+  if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1,
+                                  SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   // comparing -val or val with non-zero is the same as just comparing val
@@ -4778,8 +4778,9 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1,
-                                  I.getFastMathFlags(), DL, &TLI, &DT, &AC, &I))
+  if (Value *V =
+          SimplifyFCmpInst(I.getPredicate(), Op0, Op1, I.getFastMathFlags(),
+                           SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   // Simplify 'fcmp pred X, X'
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 776686d3d11..3be6419a129 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -17,9 +17,11 @@
 
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetFolder.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstVisitor.h"
@@ -27,10 +29,9 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/Dwarf.h"
 #include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/IR/DIBuilder.h"
 
 #define DEBUG_TYPE "instcombine"
 
@@ -193,7 +194,7 @@ private:
   TargetLibraryInfo &TLI;
   DominatorTree &DT;
   const DataLayout &DL;
-
+  const SimplifyQuery SQ;
   // Optional analyses. When non-null, these can both be used to do better
   // combining and will be updated to reflect any changes.
   LoopInfo *LI;
@@ -203,11 +204,11 @@ private:
 public:
   InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder,
                bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA,
-               AssumptionCache &AC, TargetLibraryInfo &TLI,
-               DominatorTree &DT, const DataLayout &DL, LoopInfo *LI)
+               AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
+               const DataLayout &DL, LoopInfo *LI)
       : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
         ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT),
-        DL(DL), LI(LI), MadeIRChange(false) {}
+        DL(DL), SQ(DL, &TLI, &DT, &AC), LI(LI), MadeIRChange(false) {}
 
   /// \brief Run the combiner over the entire worklist until it is empty.
   ///
@@ -533,6 +534,12 @@ private:
   /// value, or null if it didn't simplify.
   Value *SimplifyUsingDistributiveLaws(BinaryOperator &I);
 
+  /// This tries to simplify binary operations by factorizing out common terms
+  /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
+  Value *tryFactorization(InstCombiner::BuilderTy *, BinaryOperator &,
+                          Instruction::BinaryOps, Value *, Value *, Value *,
+                          Value *);
+
   /// \brief Attempts to replace V with a simpler value based on the demanded
   /// bits.
   Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownBits &Known,
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index ce66581a491..face9d9237a 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -179,7 +179,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyMulInst(Op0, Op1, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyMulInst(Op0, Op1, SQ))
     return replaceInstUsesWith(I, V);
 
   if (Value *V = SimplifyUsingDistributiveLaws(I))
@@ -606,8 +606,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
   if (isa<Constant>(Op0))
     std::swap(Op0, Op1);
 
-  if (Value *V =
-          SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), SQ))
     return replaceInstUsesWith(I, V);
 
   bool AllowReassociate = I.hasUnsafeAlgebra();
@@ -1111,7 +1110,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyUDivInst(Op0, Op1, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyUDivInst(Op0, Op1, SQ))
     return replaceInstUsesWith(I, V);
 
   // Handle the integer div common cases
@@ -1184,7 +1183,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifySDivInst(Op0, Op1, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifySDivInst(Op0, Op1, SQ))
     return replaceInstUsesWith(I, V);
 
   // Handle the integer div common cases
@@ -1296,8 +1295,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(),
-                                  DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(), SQ))
     return replaceInstUsesWith(I, V);
 
   if (isa<Constant>(Op0))
@@ -1481,7 +1479,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyURemInst(Op0, Op1, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyURemInst(Op0, Op1, SQ))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *common = commonIRemTransforms(I))
@@ -1524,7 +1522,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifySRemInst(Op0, Op1, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifySRemInst(Op0, Op1, SQ))
     return replaceInstUsesWith(I, V);
 
   // Handle the integer rem common cases
@@ -1597,8 +1595,7 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(),
-                                  DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(), SQ))
     return replaceInstUsesWith(I, V);
 
   // Handle cases involving: rem X, (select Cond, Y, Z)
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 85e5b6ba2dc..1117c11f4f5 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -880,7 +880,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
 // PHINode simplification
 //
 Instruction *InstCombiner::visitPHINode(PHINode &PN) {
-  if (Value *V = SimplifyInstruction(&PN, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyInstruction(&PN, SQ))
     return replaceInstUsesWith(PN, V);
 
   if (Instruction *Result = FoldPHIArgZextsIntoPHI(PN))
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 76829c5e457..7afb8814fe5 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1121,8 +1121,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   Value *FalseVal = SI.getFalseValue();
   Type *SelType = SI.getType();
 
-  if (Value *V =
-          SimplifySelectInst(CondVal, TrueVal, FalseVal, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, SQ))
     return replaceInstUsesWith(SI, V);
 
   if (Instruction *I = canonicalizeSelectToShuffle(SI))
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index f77d713b9b0..219effce7ba 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -520,7 +520,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   if (Value *V = SimplifyShlInst(Op0, Op1, I.hasNoSignedWrap(),
-                                 I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC))
+                                 I.hasNoUnsignedWrap(), SQ))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *V = commonShiftTransforms(I))
@@ -618,7 +618,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-  if (Value *V = SimplifyLShrInst(Op0, Op1, I.isExact(), DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyLShrInst(Op0, Op1, I.isExact(), SQ))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *R = commonShiftTransforms(I))
@@ -702,7 +702,7 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-  if (Value *V = SimplifyAShrInst(Op0, Op1, I.isExact(), DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyAShrInst(Op0, Op1, I.isExact(), SQ))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *R = commonShiftTransforms(I))
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 8d0ed853277..0195c5e727c 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -589,12 +589,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
 
         // If LHS is non-negative or has all low bits zero, then the upper bits
         // are all zero.
-        if (LHSKnown.Zero.isSignBitSet() || LowBits.isSubsetOf(LHSKnown.Zero))
+        if (LHSKnown.isNonNegative() || LowBits.isSubsetOf(LHSKnown.Zero))
           Known.Zero |= ~LowBits;
 
         // If LHS is negative and not all low bits are zero, then the upper bits
         // are all one.
-        if (LHSKnown.One.isSignBitSet() && LowBits.intersects(LHSKnown.One))
+        if (LHSKnown.isNegative() && LowBits.intersects(LHSKnown.One))
           Known.One |= ~LowBits;
 
         assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
@@ -607,8 +607,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     if (DemandedMask.isSignBitSet()) {
       computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
       // If it's known zero, our sign bit is also zero.
-      if (LHSKnown.Zero.isSignBitSet())
-        Known.Zero.setSignBit();
+      if (LHSKnown.isNonNegative())
+        Known.makeNonNegative();
     }
     break;
   case Instruction::URem: {
@@ -1537,7 +1537,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
         for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
           APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane);
           LaneElts = LaneElts.getLoBits(InnerVWidthPerLane);
-          LaneElts = LaneElts.shl(InnerVWidthPerLane * (2 * Lane + OpNum));
+          LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
           UndefElts |= LaneElts;
         }
       }
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index e89b400a4af..7fc6774f184 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -144,8 +144,8 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
 }
 
 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
-  if (Value *V = SimplifyExtractElementInst(
-          EI.getVectorOperand(), EI.getIndexOperand(), DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyExtractElementInst(EI.getVectorOperand(),
+                                            EI.getIndexOperand(), SQ))
     return replaceInstUsesWith(EI, V);
 
   // If vector val is constant with all elements the same, replace EI with
@@ -1140,8 +1140,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   SmallVector<int, 16> Mask = SVI.getShuffleMask();
   Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
 
-  if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getMask(),
-                                          SVI.getType(), DL, &TLI, &DT, &AC))
+  if (auto *V =
+          SimplifyShuffleVectorInst(LHS, RHS, SVI.getMask(), SVI.getType(), SQ))
     return replaceInstUsesWith(SVI, V);
 
   bool MadeChange = false;
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 4729c79ca4c..1eb98b18bfb 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -256,7 +256,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
         Value *C = I.getOperand(1);
 
         // Does "B op C" simplify?
-        if (Value *V = SimplifyBinOp(Opcode, B, C, DL)) {
+        if (Value *V = SimplifyBinOp(Opcode, B, C, SQ)) {
           // It simplifies to V.  Form "A op V".
           I.setOperand(0, A);
           I.setOperand(1, V);
@@ -285,7 +285,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
         Value *C = Op1->getOperand(1);
 
         // Does "A op B" simplify?
-        if (Value *V = SimplifyBinOp(Opcode, A, B, DL)) {
+        if (Value *V = SimplifyBinOp(Opcode, A, B, SQ)) {
           // It simplifies to V.  Form "V op C".
           I.setOperand(0, V);
           I.setOperand(1, C);
@@ -313,7 +313,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
         Value *C = I.getOperand(1);
 
         // Does "C op A" simplify?
-        if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) {
+        if (Value *V = SimplifyBinOp(Opcode, C, A, SQ)) {
           // It simplifies to V.  Form "V op B".
           I.setOperand(0, V);
           I.setOperand(1, B);
@@ -333,7 +333,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
         Value *C = Op1->getOperand(1);
 
         // Does "C op A" simplify?
-        if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) {
+        if (Value *V = SimplifyBinOp(Opcode, C, A, SQ)) {
           // It simplifies to V.  Form "B op V".
           I.setOperand(0, B);
           I.setOperand(1, V);
@@ -498,10 +498,10 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode,
 
 /// This tries to simplify binary operations by factorizing out common terms
 /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
-static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
-                               const DataLayout &DL, BinaryOperator &I,
-                               Instruction::BinaryOps InnerOpcode, Value *A,
-                               Value *B, Value *C, Value *D) {
+Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder,
+                                      BinaryOperator &I,
+                                      Instruction::BinaryOps InnerOpcode,
+                                      Value *A, Value *B, Value *C, Value *D) {
   assert(A && B && C && D && "All values must be provided");
 
   Value *V = nullptr;
@@ -521,7 +521,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
         std::swap(C, D);
       // Consider forming "A op' (B op D)".
       // If "B op D" simplifies then it can be formed with no cost.
-      V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
+      V = SimplifyBinOp(TopLevelOpcode, B, D, SQ);
       // If "B op D" doesn't simplify then only go on if both of the existing
       // operations "A op' B" and "C op' D" will be zapped as no longer used.
       if (!V && LHS->hasOneUse() && RHS->hasOneUse())
@@ -540,7 +540,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
         std::swap(C, D);
       // Consider forming "(A op C) op' B".
       // If "A op C" simplifies then it can be formed with no cost.
-      V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
+      V = SimplifyBinOp(TopLevelOpcode, A, C, SQ);
 
       // If "A op C" doesn't simplify then only go on if both of the existing
       // operations "A op' B" and "C op' D" will be zapped as no longer used.
@@ -610,23 +610,23 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
     // The instruction has the form "(A op' B) op (C op' D)".  Try to factorize
     // a common term.
     if (Op0 && Op1 && LHSOpcode == RHSOpcode)
-      if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, C, D))
+      if (Value *V = tryFactorization(Builder, I, LHSOpcode, A, B, C, D))
         return V;
 
     // The instruction has the form "(A op' B) op (C)".  Try to factorize common
     // term.
     if (Op0)
       if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
-        if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, RHS,
-                                        Ident))
+        if (Value *V =
+                tryFactorization(Builder, I, LHSOpcode, A, B, RHS, Ident))
           return V;
 
     // The instruction has the form "(B) op (C op' D)".  Try to factorize common
     // term.
     if (Op1)
       if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
-        if (Value *V = tryFactorization(Builder, DL, I, RHSOpcode, LHS, Ident,
-                                        C, D))
+        if (Value *V =
+                tryFactorization(Builder, I, RHSOpcode, LHS, Ident, C, D))
           return V;
   }
 
@@ -638,8 +638,8 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
     Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
 
     // Do "A op C" and "B op C" both simplify?
-    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, DL))
-      if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, DL)) {
+    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQ))
+      if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQ)) {
         // They do! Return "L op' R".
         ++NumExpand;
         C = Builder->CreateBinOp(InnerOpcode, L, R);
@@ -655,8 +655,8 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
     Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
 
     // Do "A op B" and "A op C" both simplify?
-    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, DL))
-      if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, DL)) {
+    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQ))
+      if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQ)) {
         // They do! Return "L op' R".
         ++NumExpand;
         A = Builder->CreateBinOp(InnerOpcode, L, R);
@@ -672,14 +672,14 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
       if (SI0->getCondition() == SI1->getCondition()) {
         Value *SI = nullptr;
         if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(),
-                                     SI1->getFalseValue(), DL, &TLI, &DT, &AC))
+                                     SI1->getFalseValue(), SQ))
           SI = Builder->CreateSelect(SI0->getCondition(),
                                      Builder->CreateBinOp(TopLevelOpcode,
                                                           SI0->getTrueValue(),
                                                           SI1->getTrueValue()),
                                      V);
         if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(),
-                                     SI1->getTrueValue(), DL, &TLI, &DT, &AC))
+                                     SI1->getTrueValue(), SQ))
           SI = Builder->CreateSelect(
               SI0->getCondition(), V,
               Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(),
@@ -1399,8 +1399,7 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
 Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
 
-  if (Value *V =
-          SimplifyGEPInst(GEP.getSourceElementType(), Ops, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyGEPInst(GEP.getSourceElementType(), Ops, SQ))
     return replaceInstUsesWith(GEP, V);
 
   Value *PtrOp = GEP.getOperand(0);
@@ -1589,7 +1588,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       if (SO1->getType() != GO1->getType())
         return nullptr;
 
-      Value* Sum = SimplifyAddInst(GO1, SO1, false, false, DL, &TLI, &DT, &AC);
+      Value *Sum = SimplifyAddInst(GO1, SO1, false, false, SQ);
       // Only do the combine when we are sure the cost after the
       // merge is never more than that before the merge.
       if (Sum == nullptr)
@@ -1949,9 +1948,9 @@ static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo *TLI,
   return isAllocLikeFn(V, TLI) && V != AI;
 }
 
-static bool
-isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
-                     const TargetLibraryInfo *TLI) {
+static bool isAllocSiteRemovable(Instruction *AI,
+                                 SmallVectorImpl<WeakTrackingVH> &Users,
+                                 const TargetLibraryInfo *TLI) {
   SmallVector<Instruction*, 4> Worklist;
   Worklist.push_back(AI);
 
@@ -2035,7 +2034,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
   // If we have a malloc call which is only used in any amount of comparisons
   // to null and free calls, delete the calls and replace the comparisons with
   // true or false as appropriate.
-  SmallVector<WeakVH, 64> Users;
+  SmallVector<WeakTrackingVH, 64> Users;
   if (isAllocSiteRemovable(&MI, Users, &TLI)) {
     for (unsigned i = 0, e = Users.size(); i != e; ++i) {
       // Lowering all @llvm.objectsize calls first because they may
@@ -2304,8 +2303,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
   if (!EV.hasIndices())
     return replaceInstUsesWith(EV, Agg);
 
-  if (Value *V =
-          SimplifyExtractValueInst(Agg, EV.getIndices(), DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(), SQ))
     return replaceInstUsesWith(EV, V);
 
   if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index b866958e3c4..b034ccc4693 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -101,6 +101,10 @@ static const char *const kAsanRegisterImageGlobalsName =
   "__asan_register_image_globals";
 static const char *const kAsanUnregisterImageGlobalsName =
   "__asan_unregister_image_globals";
+static const char *const kAsanRegisterElfGlobalsName =
+  "__asan_register_elf_globals";
+static const char *const kAsanUnregisterElfGlobalsName =
+  "__asan_unregister_elf_globals";
 static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
 static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
 static const char *const kAsanInitName = "__asan_init";
@@ -120,8 +124,11 @@ static const char *const kAsanPoisonStackMemoryName =
     "__asan_poison_stack_memory";
 static const char *const kAsanUnpoisonStackMemoryName =
     "__asan_unpoison_stack_memory";
+
+// ASan version script has __asan_* wildcard. Triple underscore prevents a
+// linker (gold) warning about attempting to export a local symbol.
 static const char *const kAsanGlobalsRegisteredFlagName =
-    "__asan_globals_registered";
+    "___asan_globals_registered";
 
 static const char *const kAsanOptionDetectUseAfterReturn =
     "__asan_option_detect_stack_use_after_return";
@@ -270,6 +277,13 @@ static cl::opt<bool>
                             "code stripping of globals"),
                    cl::Hidden, cl::init(true));
 
+// This is on by default even though there is a bug in gold:
+// https://sourceware.org/bugzilla/show_bug.cgi?id=19002
+static cl::opt<bool>
+    ClWithComdat("asan-with-comdat",
+                 cl::desc("Place ASan constructors in comdat sections"),
+                 cl::Hidden, cl::init(true));
+
 // Debug flags.
 static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,
                             cl::init(0));
@@ -607,10 +621,14 @@ public:
 private:
   void initializeCallbacks(Module &M);
 
-  bool InstrumentGlobals(IRBuilder<> &IRB, Module &M);
+  bool InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat);
   void InstrumentGlobalsCOFF(IRBuilder<> &IRB, Module &M,
                              ArrayRef<GlobalVariable *> ExtendedGlobals,
                              ArrayRef<Constant *> MetadataInitializers);
+  void InstrumentGlobalsELF(IRBuilder<> &IRB, Module &M,
+                            ArrayRef<GlobalVariable *> ExtendedGlobals,
+                            ArrayRef<Constant *> MetadataInitializers,
+                            const std::string &UniqueModuleId);
   void InstrumentGlobalsMachO(IRBuilder<> &IRB, Module &M,
                               ArrayRef<GlobalVariable *> ExtendedGlobals,
                               ArrayRef<Constant *> MetadataInitializers);
@@ -621,7 +639,8 @@ private:
 
   GlobalVariable *CreateMetadataGlobal(Module &M, Constant *Initializer,
                                        StringRef OriginalName);
-  void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata);
+  void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata,
+                                  StringRef InternalSuffix);
   IRBuilder<> CreateAsanModuleDtor(Module &M);
 
   bool ShouldInstrumentGlobal(GlobalVariable *G);
@@ -647,6 +666,11 @@ private:
   Function *AsanUnregisterGlobals;
   Function *AsanRegisterImageGlobals;
   Function *AsanUnregisterImageGlobals;
+  Function *AsanRegisterElfGlobals;
+  Function *AsanUnregisterElfGlobals;
+
+  Function *AsanCtorFunction = nullptr;
+  Function *AsanDtorFunction = nullptr;
 };
 
 // Stack poisoning does not play well with exception handling.
@@ -1431,8 +1455,13 @@ void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit,
 void AddressSanitizerModule::createInitializerPoisonCalls(
     Module &M, GlobalValue *ModuleName) {
   GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+  if (!GV)
+    return;
+
+  ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (!CA)
+    return;
 
-  ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
   for (Use &OP : CA->operands()) {
     if (isa<ConstantAggregateZero>(OP)) continue;
     ConstantStruct *CS = cast<ConstantStruct>(OP);
@@ -1594,12 +1623,22 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) {
       checkSanitizerInterfaceFunction(M.getOrInsertFunction(
           kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy));
   AsanUnregisterImageGlobals->setLinkage(Function::ExternalLinkage);
+
+  AsanRegisterElfGlobals = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction(kAsanRegisterElfGlobalsName, IRB.getVoidTy(),
+                            IntptrTy, IntptrTy, IntptrTy));
+  AsanRegisterElfGlobals->setLinkage(Function::ExternalLinkage);
+
+  AsanUnregisterElfGlobals = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction(kAsanUnregisterElfGlobalsName, IRB.getVoidTy(),
+                            IntptrTy, IntptrTy, IntptrTy));
+  AsanUnregisterElfGlobals->setLinkage(Function::ExternalLinkage);
 }
 
 // Put the metadata and the instrumented global in the same group. This ensures
 // that the metadata is discarded if the instrumented global is discarded.
 void AddressSanitizerModule::SetComdatForGlobalMetadata(
-    GlobalVariable *G, GlobalVariable *Metadata) {
+    GlobalVariable *G, GlobalVariable *Metadata, StringRef InternalSuffix) {
   Module &M = *G->getParent();
   Comdat *C = G->getComdat();
   if (!C) {
@@ -1609,7 +1648,15 @@ void AddressSanitizerModule::SetComdatForGlobalMetadata(
       assert(G->hasLocalLinkage());
       G->setName(Twine(kAsanGenPrefix) + "_anon_global");
     }
-    C = M.getOrInsertComdat(G->getName());
+
+    if (!InternalSuffix.empty() && G->hasLocalLinkage()) {
+      std::string Name = G->getName();
+      Name += InternalSuffix;
+      C = M.getOrInsertComdat(Name);
+    } else {
+      C = M.getOrInsertComdat(G->getName());
+    }
+
     // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF.
     if (TargetTriple.isOSBinFormatCOFF())
       C->setSelectionKind(Comdat::NoDuplicates);
@@ -1636,11 +1683,10 @@ AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer,
 }
 
 IRBuilder<> AddressSanitizerModule::CreateAsanModuleDtor(Module &M) {
-  Function *AsanDtorFunction =
+  AsanDtorFunction =
       Function::Create(FunctionType::get(Type::getVoidTy(*C), false),
                        GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
   BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
-  appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority);
 
   return IRBuilder<>(ReturnInst::Create(*C, AsanDtorBB));
 }
@@ -1665,10 +1711,69 @@ void AddressSanitizerModule::InstrumentGlobalsCOFF(
            "global metadata will not be padded appropriately");
     Metadata->setAlignment(SizeOfGlobalStruct);
 
-    SetComdatForGlobalMetadata(G, Metadata);
+    SetComdatForGlobalMetadata(G, Metadata, "");
   }
 }
 
+void AddressSanitizerModule::InstrumentGlobalsELF(
+    IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
+    ArrayRef<Constant *> MetadataInitializers,
+    const std::string &UniqueModuleId) {
+  assert(ExtendedGlobals.size() == MetadataInitializers.size());
+
+  SmallVector<GlobalValue *, 16> MetadataGlobals(ExtendedGlobals.size());
+  for (size_t i = 0; i < ExtendedGlobals.size(); i++) {
+    GlobalVariable *G = ExtendedGlobals[i];
+    GlobalVariable *Metadata =
+        CreateMetadataGlobal(M, MetadataInitializers[i], G->getName());
+    MDNode *MD = MDNode::get(M.getContext(), ValueAsMetadata::get(G));
+    Metadata->setMetadata(LLVMContext::MD_associated, MD);
+    MetadataGlobals[i] = Metadata;
+
+    SetComdatForGlobalMetadata(G, Metadata, UniqueModuleId);
+  }
+
+  // Update llvm.compiler.used, adding the new metadata globals. This is
+  // needed so that during LTO these variables stay alive.
+  if (!MetadataGlobals.empty())
+    appendToCompilerUsed(M, MetadataGlobals);
+
+  // RegisteredFlag serves two purposes. First, we can pass it to dladdr()
+  // to look up the loaded image that contains it. Second, we can store in it
+  // whether registration has already occurred, to prevent duplicate
+  // registration.
+  //
+  // Common linkage ensures that there is only one global per shared library.
+  GlobalVariable *RegisteredFlag = new GlobalVariable(
+      M, IntptrTy, false, GlobalVariable::CommonLinkage,
+      ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName);
+  RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility);
+
+  // Create start and stop symbols.
+  GlobalVariable *StartELFMetadata = new GlobalVariable(
+      M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr,
+      "__start_" + getGlobalMetadataSection());
+  StartELFMetadata->setVisibility(GlobalVariable::HiddenVisibility);
+  GlobalVariable *StopELFMetadata = new GlobalVariable(
+      M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr,
+      "__stop_" + getGlobalMetadataSection());
+  StopELFMetadata->setVisibility(GlobalVariable::HiddenVisibility);
+
+  // Create a call to register the globals with the runtime.
+  IRB.CreateCall(AsanRegisterElfGlobals,
+                 {IRB.CreatePointerCast(RegisteredFlag, IntptrTy),
+                  IRB.CreatePointerCast(StartELFMetadata, IntptrTy),
+                  IRB.CreatePointerCast(StopELFMetadata, IntptrTy)});
+
+  // We also need to unregister globals at the end, e.g., when a shared library
+  // gets closed.
+  IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M);
+  IRB_Dtor.CreateCall(AsanUnregisterElfGlobals,
+                      {IRB.CreatePointerCast(RegisteredFlag, IntptrTy),
+                       IRB.CreatePointerCast(StartELFMetadata, IntptrTy),
+                       IRB.CreatePointerCast(StopELFMetadata, IntptrTy)});
+}
+
 void AddressSanitizerModule::InstrumentGlobalsMachO(
     IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
     ArrayRef<Constant *> MetadataInitializers) {
@@ -1756,7 +1861,10 @@ void AddressSanitizerModule::InstrumentGlobalsWithMetadataArray(
 // This function replaces all global variables with new variables that have
 // trailing redzones. It also creates a function that poisons
 // redzones and inserts this function into llvm.global_ctors.
-bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
+// Sets *CtorComdat to true if the global registration code emitted into the
+// asan constructor is comdat-compatible.
+bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat) {
+  *CtorComdat = false;
   GlobalsMD.init(M);
 
   SmallVector<GlobalVariable *, 16> GlobalsToChange;
@@ -1766,7 +1874,10 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
   }
 
   size_t n = GlobalsToChange.size();
-  if (n == 0) return false;
+  if (n == 0) {
+    *CtorComdat = true;
+    return false;
+  }
 
   auto &DL = M.getDataLayout();
 
@@ -1911,7 +2022,14 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
     Initializers[i] = Initializer;
   }
 
-  if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) {
+  std::string ELFUniqueModuleId =
+      (UseGlobalsGC && TargetTriple.isOSBinFormatELF()) ? getUniqueModuleId(&M)
+                                                        : "";
+
+  if (!ELFUniqueModuleId.empty()) {
+    InstrumentGlobalsELF(IRB, M, NewGlobals, Initializers, ELFUniqueModuleId);
+    *CtorComdat = true;
+  } else if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) {
     InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers);
   } else if (UseGlobalsGC && ShouldUseMachOGlobalsSection()) {
     InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers);
@@ -1938,17 +2056,36 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
   if (CompileKernel)
     return false;
 
-  Function *AsanCtorFunction;
+  // Create a module constructor. A destructor is created lazily because not all
+  // platforms, and not all modules need it.
   std::tie(AsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions(
       M, kAsanModuleCtorName, kAsanInitName, /*InitArgTypes=*/{},
       /*InitArgs=*/{}, kAsanVersionCheckName);
-  appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority);
 
+  bool CtorComdat = true;
   bool Changed = false;
   // TODO(glider): temporarily disabled globals instrumentation for KASan.
   if (ClGlobals) {
     IRBuilder<> IRB(AsanCtorFunction->getEntryBlock().getTerminator());
-    Changed |= InstrumentGlobals(IRB, M);
+    Changed |= InstrumentGlobals(IRB, M, &CtorComdat);
+  }
+
+  // Put the constructor and destructor in comdat if both
+  // (1) global instrumentation is not TU-specific
+  // (2) target is ELF.
+  if (ClWithComdat && TargetTriple.isOSBinFormatELF() && CtorComdat) {
+    AsanCtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleCtorName));
+    appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority,
+                        AsanCtorFunction);
+    if (AsanDtorFunction) {
+      AsanDtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleDtorName));
+      appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority,
+                          AsanDtorFunction);
+    }
+  } else {
+    appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority);
+    if (AsanDtorFunction)
+      appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority);
   }
 
   return Changed;
@@ -2586,7 +2723,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
     Value *NewAllocaPtr = IRB.CreateIntToPtr(
         IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)),
         AI->getType());
-    replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, /*Deref=*/false);
+    replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, DIExpression::NoDeref);
     AI->replaceAllUsesWith(NewAllocaPtr);
   }
 
diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index d7eb857cff7..493d014586c 100644
--- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -771,7 +771,7 @@ public:
       if (perform(MI)) {
         Changed = true;
         ++NumOfPGOMemOPOpt;
-        DEBUG(dbgs() << "MemOP calls: " << MI->getCalledFunction()->getName()
+        DEBUG(dbgs() << "MemOP call: " << MI->getCalledFunction()->getName()
                      << "is Transformed.\n");
       }
     }
@@ -863,13 +863,23 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
     ActualCount = *BBEdgeCount;
   }
 
+  ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
+  DEBUG(dbgs() << "Read one memory intrinsic profile with count " << ActualCount
+               << "\n");
+  DEBUG(
+      for (auto &VD
+           : VDs) { dbgs() << "  (" << VD.Value << "," << VD.Count << ")\n"; });
+
   if (ActualCount < MemOPCountThreshold)
     return false;
+  // Skip if the total value profiled count is 0, in which case we can't
+  // scale up the counts properly (and there is no profitable transformation).
+  if (TotalCount == 0)
+    return false;
 
-  ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
   TotalCount = ActualCount;
   if (MemOPScaleCount)
-    DEBUG(dbgs() << "Scale counts: numberator = " << ActualCount
+    DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount
                  << " denominator = " << SavedTotalCount << "\n");
 
   // Keeping track of the count of the default case:
@@ -915,14 +925,10 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
     MaxCount = RemainCount;
 
   uint64_t SumForOpt = TotalCount - RemainCount;
-  DEBUG(dbgs() << "Read one memory intrinsic profile: " << SumForOpt << " vs "
-               << TotalCount << "\n");
-  DEBUG(
-      for (auto &VD
-           : VDs) { dbgs() << "  (" << VD.Value << "," << VD.Count << ")\n"; });
 
   DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version
-               << " Versions\n");
+               << " Versions (covering " << SumForOpt << " out of "
+               << TotalCount << ")\n");
 
   // mem_op(..., size)
   // ==>
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 190f05db4b0..3e480a6df44 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2643,7 +2643,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                "ByVal argument is not a pointer!");
         Size = DL.getTypeAllocSize(A->getType()->getPointerElementType());
         if (ArgOffset + Size > kParamTLSSize) break;
-        unsigned ParamAlignment = CS.getParamAlignment(i + 1);
+        unsigned ParamAlignment = CS.getParamAlignment(i);
         unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment);
         Store = IRB.CreateMemCpy(ArgShadowBase,
                                  getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB),
@@ -3502,7 +3502,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
         assert(A->getType()->isPointerTy());
         Type *RealTy = A->getType()->getPointerElementType();
         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
-        uint64_t ArgAlign = CS.getParamAlignment(ArgNo + 1);
+        uint64_t ArgAlign = CS.getParamAlignment(ArgNo);
         if (ArgAlign < 8)
           ArgAlign = 8;
         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h
index f02b75f0b45..cd9b3d96a14 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/lib/Transforms/ObjCARC/ObjCARC.h
@@ -69,6 +69,19 @@ static inline void EraseInstruction(Instruction *CI) {
     RecursivelyDeleteTriviallyDeadInstructions(OldArg);
 }
 
+/// If Inst is a ReturnRV and its operand is a call or invoke, return the
+/// operand. Otherwise return null.
+static inline const Instruction *getreturnRVOperand(const Instruction &Inst,
+                                                    ARCInstKind Class) {
+  if (Class != ARCInstKind::RetainRV)
+    return nullptr;
+
+  const auto *Opnd = Inst.getOperand(0)->stripPointerCasts();
+  if (const auto *C = dyn_cast<CallInst>(Opnd))
+    return C;
+  return dyn_cast<InvokeInst>(Opnd);
+}
+
 } // end namespace objcarc
 } // end namespace llvm
 
diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp
index c1bbc4e96b1..d13e941044f 100644
--- a/lib/Transforms/ObjCARC/PtrState.cpp
+++ b/lib/Transforms/ObjCARC/PtrState.cpp
@@ -244,6 +244,18 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst,
                                           const Value *Ptr,
                                           ProvenanceAnalysis &PA,
                                           ARCInstKind Class) {
+  auto SetSeqAndInsertReverseInsertPt = [&](Sequence NewSeq){
+    assert(!HasReverseInsertPts());
+    SetSeq(NewSeq);
+    // If this is an invoke instruction, we're scanning it as part of
+    // one of its successor blocks, since we can't insert code after it
+    // in its own block, and we don't want to split critical edges.
+    if (isa<InvokeInst>(Inst))
+      InsertReverseInsertPt(&*BB->getFirstInsertionPt());
+    else
+      InsertReverseInsertPt(&*++Inst->getIterator());
+  };
+
   // Check for possible direct uses.
   switch (GetSeq()) {
   case S_Release:
@@ -251,26 +263,18 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst,
     if (CanUse(Inst, Ptr, PA, Class)) {
       DEBUG(dbgs() << "            CanUse: Seq: " << GetSeq() << "; " << *Ptr
                    << "\n");
-      assert(!HasReverseInsertPts());
-      // If this is an invoke instruction, we're scanning it as part of
-      // one of its successor blocks, since we can't insert code after it
-      // in its own block, and we don't want to split critical edges.
-      if (isa<InvokeInst>(Inst))
-        InsertReverseInsertPt(&*BB->getFirstInsertionPt());
-      else
-        InsertReverseInsertPt(&*++Inst->getIterator());
-      SetSeq(S_Use);
+      SetSeqAndInsertReverseInsertPt(S_Use);
     } else if (Seq == S_Release && IsUser(Class)) {
       DEBUG(dbgs() << "            PreciseReleaseUse: Seq: " << GetSeq() << "; "
                    << *Ptr << "\n");
       // Non-movable releases depend on any possible objc pointer use.
-      SetSeq(S_Stop);
-      assert(!HasReverseInsertPts());
-      // As above; handle invoke specially.
-      if (isa<InvokeInst>(Inst))
-        InsertReverseInsertPt(&*BB->getFirstInsertionPt());
-      else
-        InsertReverseInsertPt(&*++Inst->getIterator());
+      SetSeqAndInsertReverseInsertPt(S_Stop);
+    } else if (const auto *Call = getreturnRVOperand(*Inst, Class)) {
+      if (CanUse(Call, Ptr, PA, GetBasicARCInstKind(Call))) {
+        DEBUG(dbgs() << "            ReleaseUse: Seq: " << GetSeq() << "; "
+                     << *Ptr << "\n");
+        SetSeqAndInsertReverseInsertPt(S_Stop);
+      }
     }
     break;
   case S_Stop:
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index b323ab3bd44..52339075876 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -55,6 +55,7 @@ add_llvm_library(LLVMScalarOpts
   Scalar.cpp
   Scalarizer.cpp
   SeparateConstOffsetFromGEP.cpp
+  SimpleLoopUnswitch.cpp
   SimplifyCFGPass.cpp
   Sink.cpp
   SpeculativeExecution.cpp
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index b5a4cc2f395..dc864f48bf1 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -151,7 +151,7 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI,
     Changed = true;
   }
 
-  if (Value *V = SimplifyInstruction(P, SQ.getWithInstruction(P))) {
+  if (Value *V = SimplifyInstruction(P, SQ)) {
     P->replaceAllUsesWith(V);
     P->eraseFromParent();
     Changed = true;
@@ -565,25 +565,14 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
     return false;
 
   LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
-  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
-  auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr;
-  auto *ACWP = getAnalysisIfAvailable<AssumptionCacheTracker>();
-  auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr;
-  const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC);
-  return runImpl(F, LVI, SQ);
+  return runImpl(F, LVI, getBestSimplifyQuery(*this, F));
 }
 
 PreservedAnalyses
 CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) {
 
   LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F);
-  auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
-  auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F);
-  auto *AC = AM.getCachedResult<AssumptionAnalysis>(F);
-  const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC);
-  bool Changed = runImpl(F, LVI, SQ);
+  bool Changed = runImpl(F, LVI, getBestSimplifyQuery(AM, F));
 
   if (!Changed)
     return PreservedAnalyses::all();
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 04479b6e49a..d8f8a58a5fd 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -253,6 +253,7 @@ public:
   const TargetTransformInfo &TTI;
   DominatorTree &DT;
   AssumptionCache &AC;
+  const SimplifyQuery SQ;
   MemorySSA *MSSA;
   std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
   typedef RecyclingAllocator<
@@ -315,9 +316,10 @@ public:
   unsigned CurrentGeneration;
 
   /// \brief Set up the EarlyCSE runner for a particular function.
-  EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI,
-           DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA)
-      : TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA),
+  EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
+           const TargetTransformInfo &TTI, DominatorTree &DT,
+           AssumptionCache &AC, MemorySSA *MSSA)
+      : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
         MSSAUpdater(make_unique<MemorySSAUpdater>(MSSA)), CurrentGeneration(0) {
   }
 
@@ -616,8 +618,6 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
   /// stores which can occur in bitfield code among other things.
   Instruction *LastStore = nullptr;
 
-  const DataLayout &DL = BB->getModule()->getDataLayout();
-
   // See if any instructions in the block can be eliminated.  If so, do it.  If
   // not, add them to AvailableValues.
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
@@ -635,10 +635,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
 
     // Skip assume intrinsics, they don't really have side effects (although
     // they're marked as such to ensure preservation of control dependencies),
-    // and this pass will not disturb any of the assumption's control
-    // dependencies.
+    // and this pass will not bother with its removal. However, we should mark
+    // its condition as true for all dominated blocks.
     if (match(Inst, m_Intrinsic<Intrinsic::assume>())) {
-      DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
+      auto *CondI =
+          dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0));
+      if (CondI && SimpleValue::canHandle(CondI)) {
+        DEBUG(dbgs() << "EarlyCSE considering assumption: " << *Inst << '\n');
+        AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
+      } else
+        DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
       continue;
     }
 
@@ -658,10 +664,25 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
     if (match(Inst, m_Intrinsic<Intrinsic::experimental_guard>())) {
       if (auto *CondI =
               dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) {
-        // The condition we're on guarding here is true for all dominated
-        // locations.
-        if (SimpleValue::canHandle(CondI))
+        if (SimpleValue::canHandle(CondI)) {
+          // Do we already know the actual value of this condition?
+          if (auto *KnownCond = AvailableValues.lookup(CondI)) {
+            // Is the condition known to be true?
+            if (isa<ConstantInt>(KnownCond) &&
+                cast<ConstantInt>(KnownCond)->isOneValue()) {
+              DEBUG(dbgs() << "EarlyCSE removing guard: " << *Inst << '\n');
+              removeMSSA(Inst);
+              Inst->eraseFromParent();
+              Changed = true;
+              continue;
+            } else
+              // Use the known value if it wasn't true.
+              cast<CallInst>(Inst)->setArgOperand(0, KnownCond);
+          }
+          // The condition we're on guarding here is true for all dominated
+          // locations.
           AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
+        }
       }
 
       // Guard intrinsics read all memory, but don't write any memory.
@@ -673,7 +694,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
 
     // If the instruction can be simplified (e.g. X+0 = X) then replace it with
     // its simpler value.
-    if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) {
+    if (Value *V = SimplifyInstruction(Inst, SQ)) {
       DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V << '\n');
       bool Killed = false;
       if (!Inst->use_empty()) {
@@ -964,7 +985,7 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
   auto *MSSA =
       UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
 
-  EarlyCSE CSE(TLI, TTI, DT, AC, MSSA);
+  EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
 
   if (!CSE.run())
     return PreservedAnalyses::all();
@@ -1008,7 +1029,7 @@ public:
     auto *MSSA =
         UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
 
-    EarlyCSE CSE(TLI, TTI, DT, AC, MSSA);
+    EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
 
     return CSE.run();
   }
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index be696df548d..c04646eed49 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1687,7 +1687,7 @@ bool GVN::processInstruction(Instruction *I) {
   // example if it determines that %y is equal to %x then the instruction
   // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
   const DataLayout &DL = I->getModule()->getDataLayout();
-  if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) {
+  if (Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC})) {
     bool Changed = false;
     if (!I->use_empty()) {
       I->replaceAllUsesWith(V);
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index dcb2a4a0c6e..3953198fe60 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -97,7 +97,7 @@ class IndVarSimplify {
   TargetLibraryInfo *TLI;
   const TargetTransformInfo *TTI;
 
-  SmallVector<WeakVH, 16> DeadInsts;
+  SmallVector<WeakTrackingVH, 16> DeadInsts;
   bool Changed = false;
 
   bool isValidRewrite(Value *FromVal, Value *ToVal);
@@ -415,8 +415,8 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
                                       Compare->getName());
 
   // In the following deletions, PN may become dead and may be deleted.
-  // Use a WeakVH to observe whether this happens.
-  WeakVH WeakPH = PN;
+  // Use a WeakTrackingVH to observe whether this happens.
+  WeakTrackingVH WeakPH = PN;
 
   // Delete the old floating point exit comparison.  The branch starts using the
   // new comparison.
@@ -451,7 +451,7 @@ void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
   //
   BasicBlock *Header = L->getHeader();
 
-  SmallVector<WeakVH, 8> PHIs;
+  SmallVector<WeakTrackingVH, 8> PHIs;
   for (BasicBlock::iterator I = Header->begin();
        PHINode *PN = dyn_cast<PHINode>(I); ++I)
     PHIs.push_back(PN);
@@ -901,7 +901,7 @@ class WidenIV {
   PHINode *WidePhi;
   Instruction *WideInc;
   const SCEV *WideIncExpr;
-  SmallVectorImpl<WeakVH> &DeadInsts;
+  SmallVectorImpl<WeakTrackingVH> &DeadInsts;
 
   SmallPtrSet<Instruction *,16> Widened;
   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
@@ -941,20 +941,13 @@ class WidenIV {
   }
 
 public:
-  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
-          ScalarEvolution *SEv, DominatorTree *DTree,
-          SmallVectorImpl<WeakVH> &DI, bool HasGuards) :
-    OrigPhi(WI.NarrowIV),
-    WideType(WI.WidestNativeType),
-    LI(LInfo),
-    L(LI->getLoopFor(OrigPhi->getParent())),
-    SE(SEv),
-    DT(DTree),
-    HasGuards(HasGuards),
-    WidePhi(nullptr),
-    WideInc(nullptr),
-    WideIncExpr(nullptr),
-    DeadInsts(DI) {
+  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
+          DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
+          bool HasGuards)
+      : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
+        L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
+        HasGuards(HasGuards), WidePhi(nullptr), WideInc(nullptr),
+        WideIncExpr(nullptr), DeadInsts(DI) {
     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
   }
diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 9e2563879da..5e116ef2fe7 100644
--- a/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -138,7 +138,7 @@ private:
 
   // Tries to infer the specific address space of each address expression in
   // Postorder.
-  void inferAddressSpaces(const std::vector<Value *> &Postorder,
+  void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
                           ValueToAddrSpaceMapTy *InferredAddrSpace) const;
 
   bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
@@ -147,7 +147,7 @@ private:
   // address spaces if InferredAddrSpace says so. Postorder is the postorder of
   // all flat expressions in the use-def graph of function F.
   bool
-  rewriteWithNewAddressSpaces(const std::vector<Value *> &Postorder,
+  rewriteWithNewAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
                               const ValueToAddrSpaceMapTy &InferredAddrSpace,
                               Function *F) const;
 
@@ -162,7 +162,7 @@ private:
     std::vector<std::pair<Value *, bool>> &PostorderStack,
     DenseSet<Value *> &Visited) const;
 
-  std::vector<Value *> collectFlatAddressExpressions(Function &F) const;
+  std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const;
 
   Value *cloneValueWithNewAddressSpace(
     Value *V, unsigned NewAddrSpace,
@@ -274,16 +274,36 @@ void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
     Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack,
     DenseSet<Value *> &Visited) const {
   assert(V->getType()->isPointerTy());
+
+  // Generic addressing expressions may be hidden in nested constant
+  // expressions.
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    // TODO: Look in non-address parts, like icmp operands.
+    if (isAddressExpression(*CE) && Visited.insert(CE).second)
+      PostorderStack.push_back(std::make_pair(CE, false));
+
+    return;
+  }
+
   if (isAddressExpression(*V) &&
       V->getType()->getPointerAddressSpace() == FlatAddrSpace) {
-    if (Visited.insert(V).second)
+    if (Visited.insert(V).second) {
       PostorderStack.push_back(std::make_pair(V, false));
+
+      Operator *Op = cast<Operator>(V);
+      for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) {
+        if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) {
+          if (isAddressExpression(*CE) && Visited.insert(CE).second)
+            PostorderStack.emplace_back(CE, false);
+        }
+      }
+    }
   }
 }
 
 // Returns all flat address expressions in function F. The elements are ordered
 // ordered in postorder.
-std::vector<Value *>
+std::vector<WeakTrackingVH>
 InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
   // This function implements a non-recursive postorder traversal of a partial
   // use-def graph of function F.
@@ -326,21 +346,25 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
         PushPtrOperand(Cmp->getOperand(0));
         PushPtrOperand(Cmp->getOperand(1));
       }
+    } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
+      if (!ASC->getType()->isVectorTy())
+        PushPtrOperand(ASC->getPointerOperand());
     }
   }
 
-  std::vector<Value *> Postorder; // The resultant postorder.
+  std::vector<WeakTrackingVH> Postorder; // The resultant postorder.
   while (!PostorderStack.empty()) {
+    Value *TopVal = PostorderStack.back().first;
     // If the operands of the expression on the top are already explored,
     // adds that expression to the resultant postorder.
     if (PostorderStack.back().second) {
-      Postorder.push_back(PostorderStack.back().first);
+      Postorder.push_back(TopVal);
       PostorderStack.pop_back();
       continue;
     }
     // Otherwise, adds its operands to the stack and explores them.
     PostorderStack.back().second = true;
-    for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) {
+    for (Value *PtrOperand : getPointerOperands(*TopVal)) {
       appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
                                                    Visited);
     }
@@ -559,7 +583,7 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
     return false;
 
   // Collects all flat address expressions in postorder.
-  std::vector<Value *> Postorder = collectFlatAddressExpressions(F);
+  std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F);
 
   // Runs a data-flow analysis to refine the address spaces of every expression
   // in Postorder.
@@ -571,8 +595,10 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
   return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F);
 }
 
+// Constants need to be tracked through RAUW to handle cases with nested
+// constant expressions, so wrap values in WeakTrackingVH.
 void InferAddressSpaces::inferAddressSpaces(
-    const std::vector<Value *> &Postorder,
+    ArrayRef<WeakTrackingVH> Postorder,
     ValueToAddrSpaceMapTy *InferredAddrSpace) const {
   SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());
   // Initially, all expressions are in the uninitialized address space.
@@ -784,8 +810,8 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I,
 }
 
 bool InferAddressSpaces::rewriteWithNewAddressSpaces(
-  const std::vector<Value *> &Postorder,
-  const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
+    ArrayRef<WeakTrackingVH> Postorder,
+    const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
   // For each address expression to be modified, creates a clone of it with its
   // pointer operands converted to the new address space. Since the pointer
   // operands are converted, the clone is naturally in the new address space by
@@ -812,8 +838,12 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
     NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get()));
   }
 
+  SmallVector<Instruction *, 16> DeadInstructions;
+
   // Replaces the uses of the old address expressions with the new ones.
-  for (Value *V : Postorder) {
+  for (const WeakTrackingVH &WVH : Postorder) {
+    assert(WVH && "value was unexpectedly deleted");
+    Value *V = WVH;
     Value *NewV = ValueWithNewAddrSpace.lookup(V);
     if (NewV == nullptr)
       continue;
@@ -821,6 +851,17 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
     DEBUG(dbgs() << "Replacing the uses of " << *V
                  << "\n  with\n  " << *NewV << '\n');
 
+    if (Constant *C = dyn_cast<Constant>(V)) {
+      Constant *Replace = ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
+                                                         C->getType());
+      if (C != Replace) {
+        DEBUG(dbgs() << "Inserting replacement const cast: "
+              << Replace << ": " << *Replace << '\n');
+        C->replaceAllUsesWith(Replace);
+        V = Replace;
+      }
+    }
+
     Value::use_iterator I, E, Next;
     for (I = V->use_begin(), E = V->use_end(); I != E; ) {
       Use &U = *I;
@@ -881,6 +922,15 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
           }
         }
 
+        if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(CurUser)) {
+          unsigned NewAS = NewV->getType()->getPointerAddressSpace();
+          if (ASC->getDestAddressSpace() == NewAS) {
+            ASC->replaceAllUsesWith(NewV);
+            DeadInstructions.push_back(ASC);
+            continue;
+          }
+        }
+
         // Otherwise, replaces the use with flat(NewV).
         if (Instruction *I = dyn_cast<Instruction>(V)) {
           BasicBlock::iterator InsertPos = std::next(I->getIterator());
@@ -894,10 +944,15 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
       }
     }
 
-    if (V->use_empty())
-      RecursivelyDeleteTriviallyDeadInstructions(V);
+    if (V->use_empty()) {
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        DeadInstructions.push_back(I);
+    }
   }
 
+  for (Instruction *I : DeadInstructions)
+    RecursivelyDeleteTriviallyDeadInstructions(I);
+
   return true;
 }
 
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index a0da81605a8..7dacaba1193 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -557,7 +557,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
         Value *LHS = PN->getIncomingValue(i);
         Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
 
-        Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, DL);
+        Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, {DL});
         if (!Res) {
           if (!isa<Constant>(RHS))
             continue;
@@ -1250,37 +1250,53 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
 
   BasicBlock *OnlyDest = nullptr;
   BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
+  Constant *OnlyVal = nullptr;
+  Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
 
+  unsigned PredWithKnownDest = 0;
   for (const auto &PredValue : PredValues) {
     BasicBlock *Pred = PredValue.second;
     if (!SeenPreds.insert(Pred).second)
       continue;  // Duplicate predecessor entry.
 
-    // If the predecessor ends with an indirect goto, we can't change its
-    // destination.
-    if (isa<IndirectBrInst>(Pred->getTerminator()))
-      continue;
-
     Constant *Val = PredValue.first;
 
     BasicBlock *DestBB;
     if (isa<UndefValue>(Val))
       DestBB = nullptr;
-    else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
+    else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+      assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
       DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
-    else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+      assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
       DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
     } else {
       assert(isa<IndirectBrInst>(BB->getTerminator())
               && "Unexpected terminator");
+      assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
       DestBB = cast<BlockAddress>(Val)->getBasicBlock();
     }
 
     // If we have exactly one destination, remember it for efficiency below.
-    if (PredToDestList.empty())
+    if (PredToDestList.empty()) {
       OnlyDest = DestBB;
-    else if (OnlyDest != DestBB)
-      OnlyDest = MultipleDestSentinel;
+      OnlyVal = Val;
+    } else {
+      if (OnlyDest != DestBB)
+        OnlyDest = MultipleDestSentinel;
+      // It possible we have same destination, but different value, e.g. default
+      // case in switchinst.
+      if (Val != OnlyVal)
+        OnlyVal = MultipleVal;
+    }
+
+    // We know where this predecessor is going.
+    ++PredWithKnownDest;
+
+    // If the predecessor ends with an indirect goto, we can't change its
+    // destination.
+    if (isa<IndirectBrInst>(Pred->getTerminator()))
+      continue;
 
     PredToDestList.push_back(std::make_pair(Pred, DestBB));
   }
@@ -1293,7 +1309,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
   // not thread. By doing so, we do not need to duplicate the current block and
   // also miss potential opportunities in case we dont/cant duplicate.
   if (OnlyDest && OnlyDest != MultipleDestSentinel) {
-    if (PredToDestList.size() ==
+    if (PredWithKnownDest ==
         (size_t)std::distance(pred_begin(BB), pred_end(BB))) {
       bool SeenFirstBranchToOnlyDest = false;
       for (BasicBlock *SuccBB : successors(BB)) {
@@ -1310,11 +1326,18 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
 
       // If the condition is now dead due to the removal of the old terminator,
       // erase it.
-      auto *CondInst = dyn_cast<Instruction>(Cond);
-      if (CondInst && CondInst->use_empty())
-        CondInst->eraseFromParent();
-      // FIXME: in case this instruction is defined in the current BB and it
-      // resolves to a single value from all predecessors, we can do RAUW.
+      if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
+        if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
+          CondInst->eraseFromParent();
+        else if (OnlyVal && OnlyVal != MultipleVal &&
+                 CondInst->getParent() == BB) {
+          // If we just learned Cond is the same value for all uses of the
+          // condition, replace it with a constant value
+          CondInst->replaceAllUsesWith(OnlyVal);
+          if (!CondInst->mayHaveSideEffects())
+            CondInst->eraseFromParent();
+        }
+      }
       return true;
     }
   }
@@ -1883,8 +1906,9 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
     // If this instruction can be simplified after the operands are updated,
     // just use the simplified value instead.  This frequently happens due to
     // phi translation.
-    if (Value *IV =
-            SimplifyInstruction(New, BB->getModule()->getDataLayout())) {
+    if (Value *IV = SimplifyInstruction(
+            New,
+            {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
       ValueMapping[&*BI] = IV;
       if (!New->mayHaveSideEffects()) {
         delete New;
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 5042fc18d7c..410fbb03068 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -499,7 +499,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(
     Instruction *Inst = &*I++;
     // Look for memset instructions, which may be optimized to a larger memset.
     if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
-      WeakVH InstPtr(&*I);
+      WeakTrackingVH InstPtr(&*I);
       if (!processLoopMemSet(MSI, BECount))
         continue;
       MadeChange = true;
@@ -856,7 +856,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
 
 /// If the stored value is a strided load in the same loop with the same stride
 /// this may be transformable into a memcpy.  This kicks in for stuff like
-///   for (i) A[i] = B[i];
+/// for (i) A[i] = B[i];
 bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
                                                     const SCEV *BECount) {
   assert(SI->isSimple() && "Expected only non-volatile stores.");
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 28e71ca0543..af095560cc0 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -77,7 +77,7 @@ static bool SimplifyLoopInst(Loop *L, DominatorTree *DT, LoopInfo *LI,
 
         // Don't bother simplifying unused instructions.
         if (!I->use_empty()) {
-          Value *V = SimplifyInstruction(I, DL, TLI, DT, AC);
+          Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC});
           if (V && LI->replacementPreservesLCSSAForm(I, V)) {
             // Mark all uses for resimplification next time round the loop.
             for (User *U : I->users())
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 8ce96cf1b7a..2ba9265566a 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -341,7 +341,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
     // With the operands remapped, see if the instruction constant folds or is
     // otherwise simplifyable.  This commonly occurs because the entry from PHI
     // nodes allows icmps and other instructions to fold.
-    Value *V = SimplifyInstruction(C, SQ.getWithInstruction(C));
+    Value *V = SimplifyInstruction(C, SQ);
     if (V && LI->replacementPreservesLCSSAForm(C, V)) {
       // If so, then delete the temporary instruction and stick the folded value
       // in the map.
@@ -670,8 +670,9 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
                                       LPMUpdater &) {
   int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0;
   const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
-  const SimplifyQuery SQ(DL, &AR.TLI, &AR.DT, &AR.AC);
-  LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, SQ);
+  const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL);
+  LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE,
+                SQ);
 
   bool Changed = LR.processLoop(&L);
   if (!Changed)
@@ -714,10 +715,7 @@ public:
     auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
     auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
     auto *SE = SEWP ? &SEWP->getSE() : nullptr;
-    auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
-    auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr;
-    const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
-    const SimplifyQuery SQ(DL, TLI, DT, AC);
+    const SimplifyQuery SQ = getBestSimplifyQuery(*this, F);
     LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE, SQ);
     return LR.processLoop(L);
   }
diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index a5a81c33a8e..35c05e84fd6 100644
--- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -40,7 +40,7 @@ static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI) {
   bool Changed = false;
   // Copy blocks into a temporary array to avoid iterator invalidation issues
   // as we remove them.
-  SmallVector<WeakVH, 16> Blocks(L.blocks());
+  SmallVector<WeakTrackingVH, 16> Blocks(L.blocks());
 
   for (auto &Block : Blocks) {
     // Attempt to merge blocks in the trivial case. Don't modify blocks which
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index af137f6faa6..ccedb98d7fa 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -900,7 +900,7 @@ static bool isHighCostExpansion(const SCEV *S,
 /// If any of the instructions is the specified set are trivially dead, delete
 /// them and see if this makes any of their operands subsequently dead.
 static bool
-DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
+DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
   bool Changed = false;
 
   while (!DeadInsts.empty()) {
@@ -1845,7 +1845,7 @@ class LSRInstance {
   void FinalizeChain(IVChain &Chain);
   void CollectChains();
   void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
-                       SmallVectorImpl<WeakVH> &DeadInsts);
+                       SmallVectorImpl<WeakTrackingVH> &DeadInsts);
 
   void CollectInterestingTypesAndFactors();
   void CollectFixupsAndInitialFormulae();
@@ -1920,19 +1920,15 @@ class LSRInstance {
                                   const LSRUse &LU,
                                   SCEVExpander &Rewriter) const;
 
-  Value *Expand(const LSRUse &LU, const LSRFixup &LF,
-                const Formula &F,
-                BasicBlock::iterator IP,
-                SCEVExpander &Rewriter,
-                SmallVectorImpl<WeakVH> &DeadInsts) const;
+  Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
+                BasicBlock::iterator IP, SCEVExpander &Rewriter,
+                SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
   void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
-                     const Formula &F,
-                     SCEVExpander &Rewriter,
-                     SmallVectorImpl<WeakVH> &DeadInsts) const;
-  void Rewrite(const LSRUse &LU, const LSRFixup &LF,
-               const Formula &F,
+                     const Formula &F, SCEVExpander &Rewriter,
+                     SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
+  void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
                SCEVExpander &Rewriter,
-               SmallVectorImpl<WeakVH> &DeadInsts) const;
+               SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
   void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
 
 public:
@@ -3014,7 +3010,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
 /// Generate an add or subtract for each IVInc in a chain to materialize the IV
 /// user's operand from the previous IV user's operand.
 void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
-                                  SmallVectorImpl<WeakVH> &DeadInsts) {
+                                  SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
   // Find the new IVOperand for the head of the chain. It may have been replaced
   // by LSR.
   const IVInc &Head = Chain.Incs[0];
@@ -4759,12 +4755,10 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
 
 /// Emit instructions for the leading candidate expression for this LSRUse (this
 /// is called "expanding").
-Value *LSRInstance::Expand(const LSRUse &LU,
-                           const LSRFixup &LF,
-                           const Formula &F,
-                           BasicBlock::iterator IP,
+Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
+                           const Formula &F, BasicBlock::iterator IP,
                            SCEVExpander &Rewriter,
-                           SmallVectorImpl<WeakVH> &DeadInsts) const {
+                           SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
   if (LU.RigidFormula)
     return LF.OperandValToReplace;
 
@@ -4939,12 +4933,9 @@ Value *LSRInstance::Expand(const LSRUse &LU,
 /// Helper for Rewrite. PHI nodes are special because the use of their operands
 /// effectively happens in their predecessor blocks, so the expression may need
 /// to be expanded in multiple places.
-void LSRInstance::RewriteForPHI(PHINode *PN,
-                                const LSRUse &LU,
-                                const LSRFixup &LF,
-                                const Formula &F,
-                                SCEVExpander &Rewriter,
-                                SmallVectorImpl<WeakVH> &DeadInsts) const {
+void LSRInstance::RewriteForPHI(
+    PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
+    SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
   DenseMap<BasicBlock *, Value *> Inserted;
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
     if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
@@ -5016,11 +5007,9 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
 /// Emit instructions for the leading candidate expression for this LSRUse (this
 /// is called "expanding"), and update the UserInst to reference the newly
 /// expanded value.
-void LSRInstance::Rewrite(const LSRUse &LU,
-                          const LSRFixup &LF,
-                          const Formula &F,
-                          SCEVExpander &Rewriter,
-                          SmallVectorImpl<WeakVH> &DeadInsts) const {
+void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
+                          const Formula &F, SCEVExpander &Rewriter,
+                          SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
   // First, find an insertion point that dominates UserInst. For PHI nodes,
   // find the nearest block which dominates all the relevant uses.
   if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
@@ -5058,7 +5047,7 @@ void LSRInstance::ImplementSolution(
     const SmallVectorImpl<const Formula *> &Solution) {
   // Keep track of instructions we may have made dead, so that
   // we can remove them after we are done working.
-  SmallVector<WeakVH, 16> DeadInsts;
+  SmallVector<WeakTrackingVH, 16> DeadInsts;
 
   SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(),
                         "lsr");
@@ -5308,7 +5297,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
   // Remove any extra phis created by processing inner loops.
   Changed |= DeleteDeadPHIs(L->getHeader());
   if (EnablePhiElim && L->isLoopSimplifyForm()) {
-    SmallVector<WeakVH, 16> DeadInsts;
+    SmallVector<WeakTrackingVH, 16> DeadInsts;
     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
     SCEVExpander Rewriter(SE, DL, "lsr");
 #ifndef NDEBUG
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 8fa806a7e8b..6ef1464e933 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -1231,11 +1231,12 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
   LoopProcessWorklist.push_back(NewLoop);
   redoLoop = true;
 
-  // Keep a WeakVH holding onto LIC.  If the first call to RewriteLoopBody
+  // Keep a WeakTrackingVH holding onto LIC.  If the first call to
+  // RewriteLoopBody
   // deletes the instruction (for example by simplifying a PHI that feeds into
   // the condition that we're unswitching on), we don't rewrite the second
   // iteration.
-  WeakVH LICHandle(LIC);
+  WeakTrackingVH LICHandle(LIC);
 
   // Now we rewrite the original code to know that the condition is true and the
   // new code to know that the condition is false.
@@ -1262,7 +1263,7 @@ static void RemoveFromWorklist(Instruction *I,
 static void ReplaceUsesOfWith(Instruction *I, Value *V,
                               std::vector<Instruction*> &Worklist,
                               Loop *L, LPPassManager *LPM) {
-  DEBUG(dbgs() << "Replace with '" << *V << "': " << *I);
+  DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n");
 
   // Add uses to the worklist, which may be dead now.
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
@@ -1275,7 +1276,8 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
   LPM->deleteSimpleAnalysisValue(I, L);
   RemoveFromWorklist(I, Worklist);
   I->replaceAllUsesWith(V);
-  I->eraseFromParent();
+  if (!I->mayHaveSideEffects())
+    I->eraseFromParent();
   ++NumSimplify;
 }
 
@@ -1431,7 +1433,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
 
     // Simple DCE.
     if (isInstructionTriviallyDead(I)) {
-      DEBUG(dbgs() << "Remove dead instruction '" << *I);
+      DEBUG(dbgs() << "Remove dead instruction '" << *I << "\n");
 
       // Add uses to the worklist, which may be dead now.
       for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index a3f3f25c1e0..21a632073da 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1323,7 +1323,7 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
 
   // Get the alignment of the byval.  If the call doesn't specify the alignment,
   // then it is some target specific value that we can't know.
-  unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
+  unsigned ByValAlign = CS.getParamAlignment(ArgNo);
   if (ByValAlign == 0) return false;
 
   // If it is greater than the memcpy, then we check to see if we can force the
diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp
index c5bf2f28d18..d0bfe360389 100644
--- a/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -211,7 +211,8 @@ bool NaryReassociatePass::doOneIteration(Function &F) {
           Changed = true;
           SE->forgetValue(&*I);
           I->replaceAllUsesWith(NewI);
-          // If SeenExprs constains I's WeakVH, that entry will be replaced with
+          // If SeenExprs constains I's WeakTrackingVH, that entry will be
+          // replaced with
           // nullptr.
           RecursivelyDeleteTriviallyDeadInstructions(&*I, TLI);
           I = NewI->getIterator();
@@ -219,7 +220,7 @@ bool NaryReassociatePass::doOneIteration(Function &F) {
         // Add the rewritten instruction to SeenExprs; the original instruction
         // is deleted.
         const SCEV *NewSCEV = SE->getSCEV(&*I);
-        SeenExprs[NewSCEV].push_back(WeakVH(&*I));
+        SeenExprs[NewSCEV].push_back(WeakTrackingVH(&*I));
         // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I)
         // is equivalent to I. However, ScalarEvolution::getSCEV may
         // weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose
@@ -239,7 +240,7 @@ bool NaryReassociatePass::doOneIteration(Function &F) {
         //
         // This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll.
         if (NewSCEV != OldSCEV)
-          SeenExprs[OldSCEV].push_back(WeakVH(&*I));
+          SeenExprs[OldSCEV].push_back(WeakTrackingVH(&*I));
       }
     }
   }
@@ -494,7 +495,8 @@ NaryReassociatePass::findClosestMatchingDominator(const SCEV *CandidateExpr,
   // future instruction either. Therefore, we pop it out of the stack. This
   // optimization makes the algorithm O(n).
   while (!Candidates.empty()) {
-    // Candidates stores WeakVHs, so a candidate can be nullptr if it's removed
+    // Candidates stores WeakTrackingVHs, so a candidate can be nullptr if it's
+    // removed
     // during rewriting.
     if (Value *Candidate = Candidates.back()) {
       Instruction *CandidateInstruction = cast<Instruction>(Candidate);
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index a014ddd9ba0..162d91beae7 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -395,7 +395,6 @@ namespace {
 class NewGVN {
   Function &F;
   DominatorTree *DT;
-  AssumptionCache *AC;
   const TargetLibraryInfo *TLI;
   AliasAnalysis *AA;
   MemorySSA *MSSA;
@@ -405,6 +404,7 @@ class NewGVN {
   BumpPtrAllocator ExpressionAllocator;
   ArrayRecycler<Value *> ArgRecycler;
   TarjanSCC SCCFinder;
+  const SimplifyQuery SQ;
 
   // Number of function arguments, used by ranking
   unsigned int NumFuncArgs;
@@ -504,8 +504,9 @@ public:
   NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
          TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA,
          const DataLayout &DL)
-      : F(F), DT(DT), AC(AC), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL),
-        PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)) {}
+      : F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL),
+        PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)), SQ(DL, TLI, DT, AC) {
+  }
   bool runGVN();
 
 private:
@@ -782,8 +783,7 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,
   E->op_push_back(lookupOperandLeader(Arg1));
   E->op_push_back(lookupOperandLeader(Arg2));
 
-  Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), DL, TLI,
-                           DT, AC);
+  Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), SQ);
   if (const Expression *SimplifiedE = checkSimplificationResults(E, nullptr, V))
     return SimplifiedE;
   return E;
@@ -864,8 +864,8 @@ const Expression *NewGVN::createExpression(Instruction *I) {
            "Wrong types on cmp instruction");
     assert((E->getOperand(0)->getType() == I->getOperand(0)->getType() &&
             E->getOperand(1)->getType() == I->getOperand(1)->getType()));
-    Value *V = SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1),
-                               DL, TLI, DT, AC);
+    Value *V =
+        SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), SQ);
     if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
       return SimplifiedE;
   } else if (isa<SelectInst>(I)) {
@@ -874,23 +874,23 @@ const Expression *NewGVN::createExpression(Instruction *I) {
       assert(E->getOperand(1)->getType() == I->getOperand(1)->getType() &&
              E->getOperand(2)->getType() == I->getOperand(2)->getType());
       Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1),
-                                    E->getOperand(2), DL, TLI, DT, AC);
+                                    E->getOperand(2), SQ);
       if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
         return SimplifiedE;
     }
   } else if (I->isBinaryOp()) {
-    Value *V = SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1),
-                             DL, TLI, DT, AC);
+    Value *V =
+        SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), SQ);
     if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
       return SimplifiedE;
   } else if (auto *BI = dyn_cast<BitCastInst>(I)) {
-    Value *V = SimplifyInstruction(BI, DL, TLI, DT, AC);
+    Value *V =
+        SimplifyCastInst(BI->getOpcode(), BI->getOperand(0), BI->getType(), SQ);
     if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
       return SimplifiedE;
   } else if (isa<GetElementPtrInst>(I)) {
-    Value *V = SimplifyGEPInst(E->getType(),
-                               ArrayRef<Value *>(E->op_begin(), E->op_end()),
-                               DL, TLI, DT, AC);
+    Value *V = SimplifyGEPInst(
+        E->getType(), ArrayRef<Value *>(E->op_begin(), E->op_end()), SQ);
     if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
       return SimplifiedE;
   } else if (AllConstant) {
@@ -1628,15 +1628,15 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) {
         if (PBranch->TrueEdge) {
           // If we know the previous predicate is true and we are in the true
           // edge then we may be implied true or false.
-          if (CmpInst::isImpliedTrueByMatchingCmp(OurPredicate,
-                                                  BranchPredicate)) {
+          if (CmpInst::isImpliedTrueByMatchingCmp(BranchPredicate,
+                                                  OurPredicate)) {
             addPredicateUsers(PI, I);
             return createConstantExpression(
                 ConstantInt::getTrue(CI->getType()));
           }
 
-          if (CmpInst::isImpliedFalseByMatchingCmp(OurPredicate,
-                                                   BranchPredicate)) {
+          if (CmpInst::isImpliedFalseByMatchingCmp(BranchPredicate,
+                                                   OurPredicate)) {
             addPredicateUsers(PI, I);
             return createConstantExpression(
                 ConstantInt::getFalse(CI->getType()));
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 3dcab609078..ef29d414160 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -982,7 +982,7 @@ static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
 /// Emit a tree of add instructions, summing Ops together
 /// and returning the result.  Insert the tree before I.
 static Value *EmitAddTreeOfValues(Instruction *I,
-                                  SmallVectorImpl<WeakVH> &Ops){
+                                  SmallVectorImpl<WeakTrackingVH> &Ops) {
   if (Ops.size() == 1) return Ops.back();
 
   Value *V1 = Ops.back();
@@ -1559,7 +1559,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
             ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
             : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
 
-    SmallVector<WeakVH, 4> NewMulOps;
+    SmallVector<WeakTrackingVH, 4> NewMulOps;
     for (unsigned i = 0; i != Ops.size(); ++i) {
       // Only try to remove factors from expressions we're allowed to.
       BinaryOperator *BOp =
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index f344eb15146..c11247c06b8 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1128,39 +1128,23 @@ normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent,
 
 // Create new attribute set containing only attributes which can be transferred
 // from original call to the safepoint.
-static AttributeList legalizeCallAttributes(AttributeList AS) {
-  AttributeList Ret;
+static AttributeList legalizeCallAttributes(AttributeList AL) {
+  if (AL.isEmpty())
+    return AL;
 
-  for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) {
-    unsigned Index = AS.getSlotIndex(Slot);
-
-    if (Index == AttributeList::ReturnIndex ||
-        Index == AttributeList::FunctionIndex) {
-
-      for (Attribute Attr : make_range(AS.begin(Slot), AS.end(Slot))) {
-
-        // Do not allow certain attributes - just skip them
-        // Safepoint can not be read only or read none.
-        if (Attr.hasAttribute(Attribute::ReadNone) ||
-            Attr.hasAttribute(Attribute::ReadOnly))
-          continue;
-
-        // These attributes control the generation of the gc.statepoint call /
-        // invoke itself; and once the gc.statepoint is in place, they're of no
-        // use.
-        if (isStatepointDirectiveAttr(Attr))
-          continue;
-
-        Ret = Ret.addAttributes(
-            AS.getContext(), Index,
-            AttributeList::get(AS.getContext(), Index, AttrBuilder(Attr)));
-      }
-    }
-
-    // Just skip parameter attributes for now
+  // Remove the readonly, readnone, and statepoint function attributes.
+  AttrBuilder FnAttrs = AL.getFnAttributes();
+  FnAttrs.removeAttribute(Attribute::ReadNone);
+  FnAttrs.removeAttribute(Attribute::ReadOnly);
+  for (Attribute A : AL.getFnAttributes()) {
+    if (isStatepointDirectiveAttr(A))
+      FnAttrs.remove(A);
   }
 
-  return Ret;
+  // Just skip parameter and return attributes for now
+  LLVMContext &Ctx = AL.getContext();
+  return AttributeList::get(Ctx, AttributeList::FunctionIndex,
+                            AttributeSet::get(Ctx, FnAttrs));
 }
 
 /// Helper function to place all gc relocates necessary for the given
@@ -1402,13 +1386,10 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
     Call->setCallingConv(ToReplace->getCallingConv());
 
     // Currently we will fail on parameter attributes and on certain
-    // function attributes.
-    AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
-    // In case if we can handle this set of attributes - set up function attrs
-    // directly on statepoint and return attrs later for gc_result intrinsic.
-    Call->setAttributes(AttributeList::get(Call->getContext(),
-                                           AttributeList::FunctionIndex,
-                                           NewAttrs.getFnAttributes()));
+    // function attributes.  In case if we can handle this set of attributes -
+    // set up function attrs directly on statepoint and return attrs later for
+    // gc_result intrinsic.
+    Call->setAttributes(legalizeCallAttributes(ToReplace->getAttributes()));
 
     Token = Call;
 
@@ -1431,13 +1412,10 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
     Invoke->setCallingConv(ToReplace->getCallingConv());
 
     // Currently we will fail on parameter attributes and on certain
-    // function attributes.
-    AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
-    // In case if we can handle this set of attributes - set up function attrs
-    // directly on statepoint and return attrs later for gc_result intrinsic.
-    Invoke->setAttributes(AttributeList::get(Invoke->getContext(),
-                                             AttributeList::FunctionIndex,
-                                             NewAttrs.getFnAttributes()));
+    // function attributes.  In case if we can handle this set of attributes -
+    // set up function attrs directly on statepoint and return attrs later for
+    // gc_result intrinsic.
+    Invoke->setAttributes(legalizeCallAttributes(ToReplace->getAttributes()));
 
     Token = Invoke;
 
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index d01e91a7f23..1d9beffaf06 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -25,6 +25,7 @@
 
 #include "llvm/Transforms/Scalar/SROA.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
@@ -2186,8 +2187,8 @@ class llvm::sroa::AllocaSliceRewriter
   Instruction *OldPtr;
 
   // Track post-rewrite users which are PHI nodes and Selects.
-  SmallPtrSetImpl<PHINode *> &PHIUsers;
-  SmallPtrSetImpl<SelectInst *> &SelectUsers;
+  SmallSetVector<PHINode *, 8> &PHIUsers;
+  SmallSetVector<SelectInst *, 8> &SelectUsers;
 
   // Utility IR builder, whose name prefix is setup for each visited use, and
   // the insertion point is set to point to the user.
@@ -2199,8 +2200,8 @@ public:
                       uint64_t NewAllocaBeginOffset,
                       uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
                       VectorType *PromotableVecTy,
-                      SmallPtrSetImpl<PHINode *> &PHIUsers,
-                      SmallPtrSetImpl<SelectInst *> &SelectUsers)
+                      SmallSetVector<PHINode *, 8> &PHIUsers,
+                      SmallSetVector<SelectInst *, 8> &SelectUsers)
       : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
         NewAllocaBeginOffset(NewAllocaBeginOffset),
         NewAllocaEndOffset(NewAllocaEndOffset),
@@ -3880,8 +3881,8 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
   // fact scheduled for promotion.
   unsigned PPWOldSize = PostPromotionWorklist.size();
   unsigned NumUses = 0;
-  SmallPtrSet<PHINode *, 8> PHIUsers;
-  SmallPtrSet<SelectInst *, 8> SelectUsers;
+  SmallSetVector<PHINode *, 8> PHIUsers;
+  SmallSetVector<SelectInst *, 8> SelectUsers;
 
   AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
                                P.endOffset(), IsIntegerPromotable, VecTy,
@@ -3902,19 +3903,16 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
 
   // Now that we've processed all the slices in the new partition, check if any
   // PHIs or Selects would block promotion.
-  for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
-                                            E = PHIUsers.end();
-       I != E; ++I)
-    if (!isSafePHIToSpeculate(**I)) {
+  for (PHINode *PHI : PHIUsers)
+    if (!isSafePHIToSpeculate(*PHI)) {
       Promotable = false;
       PHIUsers.clear();
       SelectUsers.clear();
       break;
     }
-  for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
-                                               E = SelectUsers.end();
-       I != E; ++I)
-    if (!isSafeSelectToSpeculate(**I)) {
+
+  for (SelectInst *Sel : SelectUsers)
+    if (!isSafeSelectToSpeculate(*Sel)) {
       Promotable = false;
       PHIUsers.clear();
       SelectUsers.clear();
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 00e3c95f6f0..52201d8f3e5 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/ScopedNoAliasAA.h"
 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
 #include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
@@ -83,6 +84,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeCFGSimplifyPassPass(Registry);
   initializeLateCFGSimplifyPassPass(Registry);
   initializeStructurizeCFGPass(Registry);
+  initializeSimpleLoopUnswitchLegacyPassPass(Registry);
   initializeSinkingLegacyPassPass(Registry);
   initializeTailCallElimPass(Registry);
   initializeSeparateConstOffsetFromGEPPass(Registry);
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 4d594532c36..cde659b9d18 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1138,7 +1138,7 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
   // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
   if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) ||
       match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
-    if (isKnownNotFullPoison(I)) {
+    if (programUndefinedIfFullPoison(I)) {
       const SCEV *Key =
           SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
       DominatingExprs[Key].push_back(I);
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
new file mode 100644
index 00000000000..fb1b47c4827
--- /dev/null
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -0,0 +1,626 @@
+//===-- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+#define DEBUG_TYPE "simple-loop-unswitch"
+
+using namespace llvm;
+
+STATISTIC(NumBranches, "Number of branches unswitched");
+STATISTIC(NumSwitches, "Number of switches unswitched");
+STATISTIC(NumTrivial, "Number of unswitches that are trivial");
+
+static void replaceLoopUsesWithConstant(Loop &L, Value &LIC,
+                                        Constant &Replacement) {
+  assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
+
+  // Replace uses of LIC in the loop with the given constant.
+  for (auto UI = LIC.use_begin(), UE = LIC.use_end(); UI != UE;) {
+    // Grab the use and walk past it so we can clobber it in the use list.
+    Use *U = &*UI++;
+    Instruction *UserI = dyn_cast<Instruction>(U->getUser());
+    if (!UserI || !L.contains(UserI))
+      continue;
+
+    // Replace this use within the loop body.
+    *U = &Replacement;
+  }
+}
+
+/// Update the dominator tree after removing one exiting predecessor of a loop
+/// exit block.
+static void updateLoopExitIDom(BasicBlock *LoopExitBB, Loop &L,
+                                            DominatorTree &DT) {
+  assert(pred_begin(LoopExitBB) != pred_end(LoopExitBB) &&
+         "Cannot have empty predecessors of the loop exit block if we split "
+         "off a block to unswitch!");
+
+  BasicBlock *IDom = *pred_begin(LoopExitBB);
+  // Walk all of the other predecessors finding the nearest common dominator
+  // until all predecessors are covered or we reach the loop header. The loop
+  // header necessarily dominates all loop exit blocks in loop simplified form
+  // so we can early-exit the moment we hit that block.
+  for (auto PI = std::next(pred_begin(LoopExitBB)), PE = pred_end(LoopExitBB);
+       PI != PE && IDom != L.getHeader(); ++PI)
+    IDom = DT.findNearestCommonDominator(IDom, *PI);
+
+  DT.changeImmediateDominator(LoopExitBB, IDom);
+}
+
+/// Update the dominator tree after unswitching a particular former exit block.
+///
+/// This handles the full update of the dominator tree after hoisting a block
+/// that previously was an exit block (or split off of an exit block) up to be
+/// reached from the new immediate dominator of the preheader.
+///
+/// The common case is simple -- we just move the unswitched block to have an
+/// immediate dominator of the old preheader. But in complex cases, there may
+/// be other blocks reachable from the unswitched block that are immediately
+/// dominated by some node between the unswitched one and the old preheader.
+/// All of these also need to be hoisted in the dominator tree. We also want to
+/// minimize queries to the dominator tree because each step of this
+/// invalidates any DFS numbers that would make queries fast.
+static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH,
+                                  DominatorTree &DT) {
+  DomTreeNode *OldPHNode = DT[OldPH];
+  DomTreeNode *UnswitchedNode = DT[UnswitchedBB];
+  // If the dominator tree has already been updated for this unswitched node,
+  // we're done. This makes it easier to use this routine if there are multiple
+  // paths to the same unswitched destination.
+  if (UnswitchedNode->getIDom() == OldPHNode)
+    return;
+
+  // First collect the domtree nodes that we are hoisting over. These are the
+  // set of nodes which may have children that need to be hoisted as well.
+  SmallPtrSet<DomTreeNode *, 4> DomChain;
+  for (auto *IDom = UnswitchedNode->getIDom(); IDom != OldPHNode;
+       IDom = IDom->getIDom())
+    DomChain.insert(IDom);
+
+  // The unswitched block ends up immediately dominated by the old preheader --
+  // regardless of whether it is the loop exit block or split off of the loop
+  // exit block.
+  DT.changeImmediateDominator(UnswitchedNode, OldPHNode);
+
+  // Blocks reachable from the unswitched block may need to change their IDom
+  // as well.
+  SmallSetVector<BasicBlock *, 4> Worklist;
+  for (auto *SuccBB : successors(UnswitchedBB))
+    Worklist.insert(SuccBB);
+
+  // Walk the worklist. We grow the list in the loop and so must recompute size.
+  for (int i = 0; i < (int)Worklist.size(); ++i) {
+    auto *BB = Worklist[i];
+
+    DomTreeNode *Node = DT[BB];
+    assert(!DomChain.count(Node) &&
+           "Cannot be dominated by a block you can reach!");
+    // If this block doesn't have an immediate dominator somewhere in the chain
+    // we hoisted over, then its position in the domtree hasn't changed. Either
+    // it is above the region hoisted and still valid, or it is below the
+    // hoisted block and so was trivially updated. This also applies to
+    // everything reachable from this block so we're completely done with the
+    // it.
+    if (!DomChain.count(Node->getIDom()))
+      continue;
+
+    // We need to change the IDom for this node but also walk its successors
+    // which could have similar dominance position.
+    DT.changeImmediateDominator(Node, OldPHNode);
+    for (auto *SuccBB : successors(BB))
+      Worklist.insert(SuccBB);
+  }
+}
+
+/// Unswitch a trivial branch if the condition is loop invariant.
+///
+/// This routine should only be called when loop code leading to the branch has
+/// been validated as trivial (no side effects). This routine checks if the
+/// condition is invariant and one of the successors is a loop exit. This
+/// allows us to unswitch without duplicating the loop, making it trivial.
+///
+/// If this routine fails to unswitch the branch it returns false.
+///
+/// If the branch can be unswitched, this routine splits the preheader and
+/// hoists the branch above that split. Preserves loop simplified form
+/// (splitting the exit block as necessary). It simplifies the branch within
+/// the loop to an unconditional branch but doesn't remove it entirely. Further
+/// cleanup can be done with some simplify-cfg like pass.
+static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
+                                  LoopInfo &LI) {
+  assert(BI.isConditional() && "Can only unswitch a conditional branch!");
+  DEBUG(dbgs() << "  Trying to unswitch branch: " << BI << "\n");
+
+  Value *LoopCond = BI.getCondition();
+
+  // Need a trivial loop condition to unswitch.
+  if (!L.isLoopInvariant(LoopCond))
+    return false;
+
+  // FIXME: We should compute this once at the start and update it!
+  SmallVector<BasicBlock *, 16> ExitBlocks;
+  L.getExitBlocks(ExitBlocks);
+  SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(),
+                                             ExitBlocks.end());
+
+  // Check to see if a successor of the branch is guaranteed to
+  // exit through a unique exit block without having any
+  // side-effects.  If so, determine the value of Cond that causes
+  // it to do this.
+  ConstantInt *CondVal = ConstantInt::getTrue(BI.getContext());
+  ConstantInt *Replacement = ConstantInt::getFalse(BI.getContext());
+  int LoopExitSuccIdx = 0;
+  auto *LoopExitBB = BI.getSuccessor(0);
+  if (!ExitBlockSet.count(LoopExitBB)) {
+    std::swap(CondVal, Replacement);
+    LoopExitSuccIdx = 1;
+    LoopExitBB = BI.getSuccessor(1);
+    if (!ExitBlockSet.count(LoopExitBB))
+      return false;
+  }
+  auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
+  assert(L.contains(ContinueBB) &&
+         "Cannot have both successors exit and still be in the loop!");
+
+  // If the loop exit block contains phi nodes, this isn't trivial.
+  // FIXME: We should examine the PHI to determine whether or not we can handle
+  // it trivially.
+  if (isa<PHINode>(LoopExitBB->begin()))
+    return false;
+
+  DEBUG(dbgs() << "    unswitching trivial branch when: " << CondVal
+               << " == " << LoopCond << "\n");
+
+  // Split the preheader, so that we know that there is a safe place to insert
+  // the conditional branch. We will change the preheader to have a conditional
+  // branch on LoopCond.
+  BasicBlock *OldPH = L.getLoopPreheader();
+  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
+
+  // Now that we have a place to insert the conditional branch, create a place
+  // to branch to: this is the exit block out of the loop that we are
+  // unswitching. We need to split this if there are other loop predecessors.
+  // Because the loop is in simplified form, *any* other predecessor is enough.
+  BasicBlock *UnswitchedBB;
+  if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) {
+    (void)PredBB;
+    assert(PredBB == BI.getParent() && "A branch's parent is't a predecessor!");
+    UnswitchedBB = LoopExitBB;
+  } else {
+    UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI);
+  }
+
+  BasicBlock *ParentBB = BI.getParent();
+
+  // Now splice the branch to gate reaching the new preheader and re-point its
+  // successors.
+  OldPH->getInstList().splice(std::prev(OldPH->end()),
+                              BI.getParent()->getInstList(), BI);
+  OldPH->getTerminator()->eraseFromParent();
+  BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
+  BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
+
+  // Create a new unconditional branch that will continue the loop as a new
+  // terminator.
+  BranchInst::Create(ContinueBB, ParentBB);
+
+  // Now we need to update the dominator tree.
+  updateDTAfterUnswitch(UnswitchedBB, OldPH, DT);
+  // But if we split something off of the loop exit block then we also removed
+  // one of the predecessors for the loop exit block and may need to update its
+  // idom.
+  if (UnswitchedBB != LoopExitBB)
+    updateLoopExitIDom(LoopExitBB, L, DT);
+
+  // Since this is an i1 condition we can also trivially replace uses of it
+  // within the loop with a constant.
+  replaceLoopUsesWithConstant(L, *LoopCond, *Replacement);
+
+  ++NumTrivial;
+  ++NumBranches;
+  return true;
+}
+
+/// Unswitch a trivial switch if the condition is loop invariant.
+///
+/// This routine should only be called when loop code leading to the switch has
+/// been validated as trivial (no side effects). This routine checks if the
+/// condition is invariant and that at least one of the successors is a loop
+/// exit. This allows us to unswitch without duplicating the loop, making it
+/// trivial.
+///
+/// If this routine fails to unswitch the switch it returns false.
+///
+/// If the switch can be unswitched, this routine splits the preheader and
+/// copies the switch above that split. If the default case is one of the
+/// exiting cases, it copies the non-exiting cases and points them at the new
+/// preheader. If the default case is not exiting, it copies the exiting cases
+/// and points the default at the preheader. It preserves loop simplified form
+/// (splitting the exit blocks as necessary). It simplifies the switch within
+/// the loop by removing now-dead cases. If the default case is one of those
+/// unswitched, it replaces its destination with a new basic block containing
+/// only unreachable. Such basic blocks, while technically loop exits, are not
+/// considered for unswitching so this is a stable transform and the same
+/// switch will not be revisited. If after unswitching there is only a single
+/// in-loop successor, the switch is further simplified to an unconditional
+/// branch. Still more cleanup can be done with some simplify-cfg like pass.
+static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
+                                  LoopInfo &LI) {
+  DEBUG(dbgs() << "  Trying to unswitch switch: " << SI << "\n");
+  Value *LoopCond = SI.getCondition();
+
+  // If this isn't switching on an invariant condition, we can't unswitch it.
+  if (!L.isLoopInvariant(LoopCond))
+    return false;
+
+  // FIXME: We should compute this once at the start and update it!
+  SmallVector<BasicBlock *, 16> ExitBlocks;
+  L.getExitBlocks(ExitBlocks);
+  SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(),
+                                             ExitBlocks.end());
+
+  SmallVector<int, 4> ExitCaseIndices;
+  for (auto Case : SI.cases()) {
+    auto *SuccBB = Case.getCaseSuccessor();
+    if (ExitBlockSet.count(SuccBB) && !isa<PHINode>(SuccBB->begin()))
+      ExitCaseIndices.push_back(Case.getCaseIndex());
+  }
+  BasicBlock *DefaultExitBB = nullptr;
+  if (ExitBlockSet.count(SI.getDefaultDest()) &&
+      !isa<PHINode>(SI.getDefaultDest()->begin()) &&
+      !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator()))
+    DefaultExitBB = SI.getDefaultDest();
+  else if (ExitCaseIndices.empty())
+    return false;
+
+  DEBUG(dbgs() << "    unswitching trivial cases...\n");
+
+  SmallVector<std::pair<ConstantInt *, BasicBlock *>, 4> ExitCases;
+  ExitCases.reserve(ExitCaseIndices.size());
+  // We walk the case indices backwards so that we remove the last case first
+  // and don't disrupt the earlier indices.
+  for (unsigned Index : reverse(ExitCaseIndices)) {
+    auto CaseI = SI.case_begin() + Index;
+    // Save the value of this case.
+    ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()});
+    // Delete the unswitched cases.
+    SI.removeCase(CaseI);
+  }
+
+  // Check if after this all of the remaining cases point at the same
+  // successor.
+  BasicBlock *CommonSuccBB = nullptr;
+  if (SI.getNumCases() > 0 &&
+      std::all_of(std::next(SI.case_begin()), SI.case_end(),
+                  [&SI](const SwitchInst::CaseHandle &Case) {
+                    return Case.getCaseSuccessor() ==
+                           SI.case_begin()->getCaseSuccessor();
+                  }))
+    CommonSuccBB = SI.case_begin()->getCaseSuccessor();
+
+  if (DefaultExitBB) {
+    // We can't remove the default edge so replace it with an edge to either
+    // the single common remaining successor (if we have one) or an unreachable
+    // block.
+    if (CommonSuccBB) {
+      SI.setDefaultDest(CommonSuccBB);
+    } else {
+      BasicBlock *ParentBB = SI.getParent();
+      BasicBlock *UnreachableBB = BasicBlock::Create(
+          ParentBB->getContext(),
+          Twine(ParentBB->getName()) + ".unreachable_default",
+          ParentBB->getParent());
+      new UnreachableInst(ParentBB->getContext(), UnreachableBB);
+      SI.setDefaultDest(UnreachableBB);
+      DT.addNewBlock(UnreachableBB, ParentBB);
+    }
+  } else {
+    // If we're not unswitching the default, we need it to match any cases to
+    // have a common successor or if we have no cases it is the common
+    // successor.
+    if (SI.getNumCases() == 0)
+      CommonSuccBB = SI.getDefaultDest();
+    else if (SI.getDefaultDest() != CommonSuccBB)
+      CommonSuccBB = nullptr;
+  }
+
+  // Split the preheader, so that we know that there is a safe place to insert
+  // the switch.
+  BasicBlock *OldPH = L.getLoopPreheader();
+  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
+  OldPH->getTerminator()->eraseFromParent();
+
+  // Now add the unswitched switch.
+  auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
+
+  // Split any exit blocks with remaining in-loop predecessors. We walk in
+  // reverse so that we split in the same order as the cases appeared. This is
+  // purely for convenience of reading the resulting IR, but it doesn't cost
+  // anything really.
+  SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;
+  // Handle the default exit if necessary.
+  // FIXME: It'd be great if we could merge this with the loop below but LLVM's
+  // ranges aren't quite powerful enough yet.
+  if (DefaultExitBB && !pred_empty(DefaultExitBB)) {
+    auto *SplitBB =
+        SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI);
+    updateLoopExitIDom(DefaultExitBB, L, DT);
+    DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
+  }
+  // Note that we must use a reference in the for loop so that we update the
+  // container.
+  for (auto &CasePair : reverse(ExitCases)) {
+    // Grab a reference to the exit block in the pair so that we can update it.
+    BasicBlock *&ExitBB = CasePair.second;
+
+    // If this case is the last edge into the exit block, we can simply reuse it
+    // as it will no longer be a loop exit. No mapping necessary.
+    if (pred_empty(ExitBB))
+      continue;
+
+    // Otherwise we need to split the exit block so that we retain an exit
+    // block from the loop and a target for the unswitched condition.
+    BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
+    if (!SplitExitBB) {
+      // If this is the first time we see this, do the split and remember it.
+      SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
+      updateLoopExitIDom(ExitBB, L, DT);
+    }
+    ExitBB = SplitExitBB;
+  }
+
+  // Now add the unswitched cases. We do this in reverse order as we built them
+  // in reverse order.
+  for (auto CasePair : reverse(ExitCases)) {
+    ConstantInt *CaseVal = CasePair.first;
+    BasicBlock *UnswitchedBB = CasePair.second;
+
+    NewSI->addCase(CaseVal, UnswitchedBB);
+    updateDTAfterUnswitch(UnswitchedBB, OldPH, DT);
+  }
+
+  // If the default was unswitched, re-point it and add explicit cases for
+  // entering the loop.
+  if (DefaultExitBB) {
+    NewSI->setDefaultDest(DefaultExitBB);
+    updateDTAfterUnswitch(DefaultExitBB, OldPH, DT);
+
+    // We removed all the exit cases, so we just copy the cases to the
+    // unswitched switch.
+    for (auto Case : SI.cases())
+      NewSI->addCase(Case.getCaseValue(), NewPH);
+  }
+
+  // If we ended up with a common successor for every path through the switch
+  // after unswitching, rewrite it to an unconditional branch to make it easy
+  // to recognize. Otherwise we potentially have to recognize the default case
+  // pointing at unreachable and other complexity.
+  if (CommonSuccBB) {
+    BasicBlock *BB = SI.getParent();
+    SI.eraseFromParent();
+    BranchInst::Create(CommonSuccBB, BB);
+  }
+
+  DT.verifyDomTree();
+  ++NumTrivial;
+  ++NumSwitches;
+  return true;
+}
+
+/// This routine scans the loop to find a branch or switch which occurs before
+/// any side effects occur. These can potentially be unswitched without
+/// duplicating the loop. If a branch or switch is successfully unswitched the
+/// scanning continues to see if subsequent branches or switches have become
+/// trivial. Once all trivial candidates have been unswitched, this routine
+/// returns.
+///
+/// The return value indicates whether anything was unswitched (and therefore
+/// changed).
+static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
+                                         LoopInfo &LI) {
+  bool Changed = false;
+
+  // If loop header has only one reachable successor we should keep looking for
+  // trivial condition candidates in the successor as well. An alternative is
+  // to constant fold conditions and merge successors into loop header (then we
+  // only need to check header's terminator). The reason for not doing this in
+  // LoopUnswitch pass is that it could potentially break LoopPassManager's
+  // invariants. Folding dead branches could either eliminate the current loop
+  // or make other loops unreachable. LCSSA form might also not be preserved
+  // after deleting branches. The following code keeps traversing loop header's
+  // successors until it finds the trivial condition candidate (condition that
+  // is not a constant). Since unswitching generates branches with constant
+  // conditions, this scenario could be very common in practice.
+  BasicBlock *CurrentBB = L.getHeader();
+  SmallPtrSet<BasicBlock *, 8> Visited;
+  Visited.insert(CurrentBB);
+  do {
+    // Check if there are any side-effecting instructions (e.g. stores, calls,
+    // volatile loads) in the part of the loop that the code *would* execute
+    // without unswitching.
+    if (llvm::any_of(*CurrentBB,
+                     [](Instruction &I) { return I.mayHaveSideEffects(); }))
+      return Changed;
+
+    TerminatorInst *CurrentTerm = CurrentBB->getTerminator();
+
+    if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
+      // Don't bother trying to unswitch past a switch with a constant
+      // condition. This should be removed prior to running this pass by
+      // simplify-cfg.
+      if (isa<Constant>(SI->getCondition()))
+        return Changed;
+
+      if (!unswitchTrivialSwitch(L, *SI, DT, LI))
+        // Coludn't unswitch this one so we're done.
+        return Changed;
+
+      // Mark that we managed to unswitch something.
+      Changed = true;
+
+      // If unswitching turned the terminator into an unconditional branch then
+      // we can continue. The unswitching logic specifically works to fold any
+      // cases it can into an unconditional branch to make it easier to
+      // recognize here.
+      auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
+      if (!BI || BI->isConditional())
+        return Changed;
+
+      CurrentBB = BI->getSuccessor(0);
+      continue;
+    }
+
+    auto *BI = dyn_cast<BranchInst>(CurrentTerm);
+    if (!BI)
+      // We do not understand other terminator instructions.
+      return Changed;
+
+    // Don't bother trying to unswitch past an unconditional branch or a branch
+    // with a constant value. These should be removed by simplify-cfg prior to
+    // running this pass.
+    if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
+      return Changed;
+
+    // Found a trivial condition candidate: non-foldable conditional branch. If
+    // we fail to unswitch this, we can't do anything else that is trivial.
+    if (!unswitchTrivialBranch(L, *BI, DT, LI))
+      return Changed;
+
+    // Mark that we managed to unswitch something.
+    Changed = true;
+
+    // We unswitched the branch. This should always leave us with an
+    // unconditional branch that we can follow now.
+    BI = cast<BranchInst>(CurrentBB->getTerminator());
+    assert(!BI->isConditional() &&
+           "Cannot form a conditional branch by unswitching1");
+    CurrentBB = BI->getSuccessor(0);
+
+    // When continuing, if we exit the loop or reach a previous visited block,
+    // then we can not reach any trivial condition candidates (unfoldable
+    // branch instructions or switch instructions) and no unswitch can happen.
+  } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
+
+  return Changed;
+}
+
+/// Unswitch control flow predicated on loop invariant conditions.
+///
+/// This first hoists all branches or switches which are trivial (IE, do not
+/// require duplicating any part of the loop) out of the loop body. It then
+/// looks at other loop invariant control flows and tries to unswitch those as
+/// well by cloning the loop if the result is small enough.
+static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
+                         AssumptionCache &AC) {
+  assert(L.isLCSSAForm(DT) &&
+         "Loops must be in LCSSA form before unswitching.");
+  bool Changed = false;
+
+  // Must be in loop simplified form: we need a preheader and dedicated exits.
+  if (!L.isLoopSimplifyForm())
+    return false;
+
+  // Try trivial unswitch first before loop over other basic blocks in the loop.
+  Changed |= unswitchAllTrivialConditions(L, DT, LI);
+
+  // FIXME: Add support for non-trivial unswitching by cloning the loop.
+
+  return Changed;
+}
+
+PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
+                                              LoopStandardAnalysisResults &AR,
+                                              LPMUpdater &U) {
+  Function &F = *L.getHeader()->getParent();
+  (void)F;
+
+  DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n");
+
+  if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC))
+    return PreservedAnalyses::all();
+
+#ifndef NDEBUG
+  // Historically this pass has had issues with the dominator tree so verify it
+  // in asserts builds.
+  AR.DT.verifyDomTree();
+#endif
+  return getLoopPassPreservedAnalyses();
+}
+
+namespace {
+class SimpleLoopUnswitchLegacyPass : public LoopPass {
+public:
+  static char ID; // Pass ID, replacement for typeid
+  explicit SimpleLoopUnswitchLegacyPass() : LoopPass(ID) {
+    initializeSimpleLoopUnswitchLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AssumptionCacheTracker>();
+    getLoopAnalysisUsage(AU);
+  }
+};
+} // namespace
+
+bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
+  if (skipLoop(L))
+    return false;
+
+  Function &F = *L->getHeader()->getParent();
+
+  DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L << "\n");
+
+  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+
+  bool Changed = unswitchLoop(*L, DT, LI, AC);
+
+#ifndef NDEBUG
+  // Historically this pass has had issues with the dominator tree so verify it
+  // in asserts builds.
+  DT.verifyDomTree();
+#endif
+  return Changed;
+}
+
+char SimpleLoopUnswitchLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
+                      "Simple unswitch loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
+                    "Simple unswitch loops", false, false)
+
+Pass *llvm::createSimpleLoopUnswitchLegacyPass() {
+  return new SimpleLoopUnswitchLegacyPass();
+}
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 22af21d55c0..3d5cbfc93f2 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -78,8 +78,8 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
 
 bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
   // Recursively deleting a PHI may cause multiple PHIs to be deleted
-  // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete.
-  SmallVector<WeakVH, 8> PHIs;
+  // or RAUW'd undef, so use an array of WeakTrackingVH for the PHIs to delete.
+  SmallVector<WeakTrackingVH, 8> PHIs;
   for (BasicBlock::iterator I = BB->begin();
        PHINode *PN = dyn_cast<PHINode>(I); ++I)
     PHIs.push_back(PN);
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 385c12302e0..d5124ac8901 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -245,7 +245,7 @@ namespace {
 void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
                                        BasicBlock::const_iterator StartingInst,
                                        std::vector<const BasicBlock*> &ToClone){
-  WeakVH &BBEntry = VMap[BB];
+  WeakTrackingVH &BBEntry = VMap[BB];
 
   // Have we already cloned this block?
   if (BBEntry) return;
@@ -547,7 +547,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
   // Make a second pass over the PHINodes now that all of them have been
   // remapped into the new function, simplifying the PHINode and performing any
   // recursive simplifications exposed. This will transparently update the
-  // WeakVH in the VMap. Notably, we rely on that so that if we coalesce
+  // WeakTrackingVH in the VMap. Notably, we rely on that so that if we coalesce
   // two PHINodes, the iteration over the old PHIs remains valid, and the
   // mapping will just map us to the new node (which may not even be a PHI
   // node).
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 5d6fbc3325f..6d56e08af99 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -1640,7 +1640,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
       // modify the struct.
       if (CS.isByValArgument(ArgNo)) {
         ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
-                                        CalledFunc->getParamAlignment(ArgNo+1));
+                                        CalledFunc->getParamAlignment(ArgNo));
         if (ActualArg != *AI)
           ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
       }
@@ -2302,7 +2302,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     AssumptionCache *AC =
         IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr;
     auto &DL = Caller->getParent()->getDataLayout();
-    if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr, AC)) {
+    if (Value *V = SimplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) {
       PHI->replaceAllUsesWith(V);
       PHI->eraseFromParent();
     }
diff --git a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index fe93d6927c6..42aca757c2a 100644
--- a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstVisitor.h"
@@ -48,16 +49,6 @@ using namespace llvm;
 STATISTIC(NumWrappedOneCond, "Number of One-Condition Wrappers Inserted");
 STATISTIC(NumWrappedTwoCond, "Number of Two-Condition Wrappers Inserted");
 
-static cl::opt<bool> LibCallsShrinkWrapDoDomainError(
-    "libcalls-shrinkwrap-domain-error", cl::init(true), cl::Hidden,
-    cl::desc("Perform shrink-wrap on lib calls with domain errors"));
-static cl::opt<bool> LibCallsShrinkWrapDoRangeError(
-    "libcalls-shrinkwrap-range-error", cl::init(true), cl::Hidden,
-    cl::desc("Perform shrink-wrap on lib calls with range errors"));
-static cl::opt<bool> LibCallsShrinkWrapDoPoleError(
-    "libcalls-shrinkwrap-pole-error", cl::init(true), cl::Hidden,
-    cl::desc("Perform shrink-wrap on lib calls with pole errors"));
-
 namespace {
 class LibCallsShrinkWrapLegacyPass : public FunctionPass {
 public:
@@ -82,10 +73,11 @@ INITIALIZE_PASS_END(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap",
 namespace {
 class LibCallsShrinkWrap : public InstVisitor<LibCallsShrinkWrap> {
 public:
-  LibCallsShrinkWrap(const TargetLibraryInfo &TLI) : TLI(TLI), Changed(false){};
-  bool isChanged() const { return Changed; }
+  LibCallsShrinkWrap(const TargetLibraryInfo &TLI, DominatorTree *DT)
+      : TLI(TLI), DT(DT){};
   void visitCallInst(CallInst &CI) { checkCandidate(CI); }
-  void perform() {
+  bool perform() {
+    bool Changed = false;
     for (auto &CI : WorkList) {
       DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName()
                    << "\n");
@@ -94,6 +86,7 @@ public:
         DEBUG(dbgs() << "Transformed\n");
       }
     }
+    return Changed;
   }
 
 private:
@@ -134,8 +127,8 @@ private:
   }
 
   const TargetLibraryInfo &TLI;
+  DominatorTree *DT;
   SmallVector<CallInst *, 16> WorkList;
-  bool Changed;
 };
 } // end anonymous namespace
 
@@ -241,8 +234,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
   case LibFunc_atanhf: // Same as atanh
   case LibFunc_atanhl: // Same as atanh
   {
-    if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError)
-      return false;
     ++NumWrappedTwoCond;
     Cond = createOrCond(CI, CmpInst::FCMP_OLE, -1.0f, CmpInst::FCMP_OGE, 1.0f);
     break;
@@ -262,8 +253,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
   case LibFunc_logbf:  // Same as log
   case LibFunc_logbl:  // Same as log
   {
-    if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError)
-      return false;
     ++NumWrappedOneCond;
     Cond = createCond(CI, CmpInst::FCMP_OLE, 0.0f);
     break;
@@ -274,8 +263,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
   case LibFunc_log1pf: // Same as log1p
   case LibFunc_log1pl: // Same as log1p
   {
-    if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError)
-      return false;
     ++NumWrappedOneCond;
     Cond = createCond(CI, CmpInst::FCMP_OLE, -1.0f);
     break;
@@ -285,9 +272,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
                      // RangeError:  overflow or underflow
   case LibFunc_powf:
   case LibFunc_powl: {
-    if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError ||
-        !LibCallsShrinkWrapDoRangeError)
-      return false;
     Cond = generateCondForPow(CI, Func);
     if (Cond == nullptr)
       return false;
@@ -346,7 +330,7 @@ Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI,
     UpperBound = 11356.0f;
     break;
   default:
-    llvm_unreachable("Should be reach here");
+    llvm_unreachable("Unhandled library call!");
   }
 
   ++NumWrappedOneCond;
@@ -410,7 +394,7 @@ Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI,
     UpperBound = 11383.0f;
     break;
   default:
-    llvm_unreachable("Should be reach here");
+    llvm_unreachable("Unhandled library call!");
   }
 
   ++NumWrappedTwoCond;
@@ -499,14 +483,17 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,
 
 // Wrap conditions that can potentially generate errno to the library call.
 void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) {
-  assert(Cond != nullptr && "hrinkWrapCI is not expecting an empty call inst");
+  assert(Cond != nullptr && "ShrinkWrapCI is not expecting an empty call inst");
   MDNode *BranchWeights =
       MDBuilder(CI->getContext()).createBranchWeights(1, 2000);
+
   TerminatorInst *NewInst =
-      SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights);
+      SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights, DT);
   BasicBlock *CallBB = NewInst->getParent();
   CallBB->setName("cdce.call");
-  CallBB->getSingleSuccessor()->setName("cdce.end");
+  BasicBlock *SuccBB = CallBB->getSingleSuccessor();
+  assert(SuccBB && "The split block should have a single successor");
+  SuccBB->setName("cdce.end");
   CI->removeFromParent();
   CallBB->getInstList().insert(CallBB->getFirstInsertionPt(), CI);
   DEBUG(dbgs() << "== Basic Block After ==");
@@ -522,32 +509,38 @@ bool LibCallsShrinkWrap::perform(CallInst *CI) {
   TLI.getLibFunc(*Callee, Func);
   assert(Func && "perform() is not expecting an empty function");
 
-  if (LibCallsShrinkWrapDoDomainError && performCallDomainErrorOnly(CI, Func))
+  if (performCallDomainErrorOnly(CI, Func) || performCallRangeErrorOnly(CI, Func))
     return true;
-
-  if (LibCallsShrinkWrapDoRangeError && performCallRangeErrorOnly(CI, Func))
-    return true;
-
   return performCallErrors(CI, Func);
 }
 
 void LibCallsShrinkWrapLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addPreserved<DominatorTreeWrapperPass>();
   AU.addPreserved<GlobalsAAWrapperPass>();
   AU.addRequired<TargetLibraryInfoWrapperPass>();
 }
 
-static bool runImpl(Function &F, const TargetLibraryInfo &TLI) {
+static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
+                    DominatorTree *DT) {
   if (F.hasFnAttribute(Attribute::OptimizeForSize))
     return false;
-  LibCallsShrinkWrap CCDCE(TLI);
+  LibCallsShrinkWrap CCDCE(TLI, DT);
   CCDCE.visit(F);
-  CCDCE.perform();
-  return CCDCE.isChanged();
+  bool Changed = CCDCE.perform();
+
+// Verify the dominator after we've updated it locally.
+#ifndef NDEBUG
+  if (DT)
+    DT->verifyDomTree();
+#endif
+  return Changed;
 }
 
 bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) {
   auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-  return runImpl(F, TLI);
+  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+  return runImpl(F, TLI, DT);
 }
 
 namespace llvm {
@@ -561,11 +554,12 @@ FunctionPass *createLibCallsShrinkWrapPass() {
 PreservedAnalyses LibCallsShrinkWrapPass::run(Function &F,
                                               FunctionAnalysisManager &FAM) {
   auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
-  bool Changed = runImpl(F, TLI);
-  if (!Changed)
+  auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
+  if (!runImpl(F, TLI, DT))
     return PreservedAnalyses::all();
   auto PA = PreservedAnalyses();
   PA.preserve<GlobalsAA>();
+  PA.preserve<DominatorTreeAnalysis>();
   return PA;
 }
 }
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index d3002c5fb75..ce6b703f352 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -562,7 +562,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) {
   // that can be removed.
   BB->removePredecessor(Pred, true);
 
-  WeakVH PhiIt = &BB->front();
+  WeakTrackingVH PhiIt = &BB->front();
   while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
     PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
     Value *OldPhiIt = PhiIt;
@@ -1259,49 +1259,6 @@ void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
           DbgValues.push_back(DVI);
 }
 
-static void appendOffset(SmallVectorImpl<uint64_t> &Ops, int64_t Offset) {
-  if (Offset > 0) {
-    Ops.push_back(dwarf::DW_OP_plus);
-    Ops.push_back(Offset);
-  } else if (Offset < 0) {
-    Ops.push_back(dwarf::DW_OP_minus);
-    Ops.push_back(-Offset);
-  }
-}
-
-enum { WithStackValue = true };
-
-/// Prepend \p DIExpr with a deref and offset operation and optionally turn it
-/// into a stack value.
-static DIExpression *prependDIExpr(DIBuilder &Builder, DIExpression *DIExpr,
-                                   bool Deref, int64_t Offset = 0,
-                                   bool StackValue = false) {
-  if (!Deref && !Offset && !StackValue)
-    return DIExpr;
-
-  SmallVector<uint64_t, 8> Ops;
-  appendOffset(Ops, Offset);
-  if (Deref)
-    Ops.push_back(dwarf::DW_OP_deref);
-  if (DIExpr)
-    for (auto Op : DIExpr->expr_ops()) {
-      // A DW_OP_stack_value comes at the end, but before a DW_OP_LLVM_fragment.
-      if (StackValue) {
-        if (Op.getOp() == dwarf::DW_OP_stack_value)
-          StackValue = false;
-        else if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) {
-          Ops.push_back(dwarf::DW_OP_stack_value);
-          StackValue = false;
-        }
-      }
-      Ops.push_back(Op.getOp());
-      for (unsigned I = 0; I < Op.getNumArgs(); ++I)
-        Ops.push_back(Op.getArg(I));
-    }
-  if (StackValue)
-    Ops.push_back(dwarf::DW_OP_stack_value);
-  return Builder.createExpression(Ops);
-}
 
 bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
                              Instruction *InsertBefore, DIBuilder &Builder,
@@ -1313,9 +1270,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
   auto *DIVar = DDI->getVariable();
   auto *DIExpr = DDI->getExpression();
   assert(DIVar && "Missing variable");
-
-  DIExpr = prependDIExpr(Builder, DIExpr, Deref, Offset);
-
+  DIExpr = DIExpression::prepend(DIExpr, Deref, Offset);
   // Insert llvm.dbg.declare immediately after the original alloca, and remove
   // old llvm.dbg.declare.
   Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore);
@@ -1348,7 +1303,7 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
   if (Offset) {
     SmallVector<uint64_t, 4> Ops;
     Ops.push_back(dwarf::DW_OP_deref);
-    appendOffset(Ops, Offset);
+    DIExpression::appendOffset(Ops, Offset);
     Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end());
     DIExpr = Builder.createExpression(Ops);
   }
@@ -1398,8 +1353,9 @@ void llvm::salvageDebugInfo(Instruction &I) {
         auto *DIExpr = DVI->getExpression();
         DIBuilder DIB(M, /*AllowUnresolved*/ false);
         // GEP offsets are i32 and thus always fit into an int64_t.
-        DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue(),
-                               WithStackValue);
+        DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref,
+                                       Offset.getSExtValue(),
+                                       DIExpression::WithStackValue);
         DVI->setOperand(0, MDWrap(I.getOperand(0)));
         DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
         DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
@@ -1411,7 +1367,7 @@ void llvm::salvageDebugInfo(Instruction &I) {
       // Rewrite the load into DW_OP_deref.
       auto *DIExpr = DVI->getExpression();
       DIBuilder DIB(M, /*AllowUnresolved*/ false);
-      DIExpr = prependDIExpr(DIB, DIExpr, WithDeref);
+      DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref);
       DVI->setOperand(0, MDWrap(I.getOperand(0)));
       DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
       DEBUG(dbgs() << "SALVAGE:  " << *DVI << '\n');
@@ -1520,7 +1476,7 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
   II->setAttributes(CI->getAttributes());
 
   // Make sure that anything using the call now uses the invoke!  This also
-  // updates the CallGraph if present, because it uses a WeakVH.
+  // updates the CallGraph if present, because it uses a WeakTrackingVH.
   CI->replaceAllUsesWith(II);
 
   // Delete the original call
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index e7ba19665d5..72c06aef803 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -210,7 +210,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
     PHINode *PN = cast<PHINode>(I);
     ++I;
-    if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
+    if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {
       // This is a degenerate PHI already, don't modify it!
       PN->replaceAllUsesWith(V);
       PN->eraseFromParent();
@@ -628,7 +628,7 @@ ReprocessLoop:
   PHINode *PN;
   for (BasicBlock::iterator I = L->getHeader()->begin();
        (PN = dyn_cast<PHINode>(I++)); )
-    if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
+    if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {
       if (SE) SE->forgetValue(PN);
       if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) {
         PN->replaceAllUsesWith(V);
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 43ab725b076..4ab4d7949d2 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -757,7 +757,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
 
   // Simplify any new induction variables in the partially unrolled loop.
   if (SE && !CompletelyUnroll && Count > 1) {
-    SmallVector<WeakVH, 16> DeadInsts;
+    SmallVector<WeakTrackingVH, 16> DeadInsts;
     simplifyLoopIVs(L, SE, DT, LI, DeadInsts);
 
     // Aggressively clean up dead instructions that simplifyLoopIVs already
@@ -777,7 +777,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
       Instruction *Inst = &*I++;
 
-      if (Value *V = SimplifyInstruction(Inst, DL))
+      if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC}))
         if (LI->replacementPreservesLCSSAForm(Inst, V))
           Inst->replaceAllUsesWith(V);
       if (isInstructionTriviallyDead(Inst))
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index dbe42c201dd..29d334f2968 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -237,3 +237,35 @@ void llvm::filterDeadComdatFunctions(
            ComdatEntriesCovered.end();
   });
 }
+
+std::string llvm::getUniqueModuleId(Module *M) {
+  MD5 Md5;
+  bool ExportsSymbols = false;
+  auto AddGlobal = [&](GlobalValue &GV) {
+    if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
+        !GV.hasExternalLinkage())
+      return;
+    ExportsSymbols = true;
+    Md5.update(GV.getName());
+    Md5.update(ArrayRef<uint8_t>{0});
+  };
+
+  for (auto &F : *M)
+    AddGlobal(F);
+  for (auto &GV : M->globals())
+    AddGlobal(GV);
+  for (auto &GA : M->aliases())
+    AddGlobal(GA);
+  for (auto &IF : M->ifuncs())
+    AddGlobal(IF);
+
+  if (!ExportsSymbols)
+    return "";
+
+  MD5::MD5Result R;
+  Md5.final(R);
+
+  SmallString<32> Str;
+  MD5::stringifyResult(R, Str);
+  return ("$" + Str).str();
+}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index a33b85c4ee6..cdba982e664 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -225,10 +225,10 @@ struct PromoteMem2Reg {
   std::vector<AllocaInst *> Allocas;
   DominatorTree &DT;
   DIBuilder DIB;
-
   /// A cache of @llvm.assume intrinsics used by SimplifyInstruction.
   AssumptionCache *AC;
 
+  const SimplifyQuery SQ;
   /// Reverse mapping of Allocas.
   DenseMap<AllocaInst *, unsigned> AllocaLookup;
 
@@ -270,7 +270,8 @@ public:
                  AssumptionCache *AC)
       : Allocas(Allocas.begin(), Allocas.end()), DT(DT),
         DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false),
-        AC(AC) {}
+        AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(),
+                   nullptr, &DT, AC) {}
 
   void run();
 
@@ -673,8 +674,6 @@ void PromoteMem2Reg::run() {
     A->eraseFromParent();
   }
 
-  const DataLayout &DL = F.getParent()->getDataLayout();
-
   // Remove alloca's dbg.declare instrinsics from the function.
   for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
     if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
@@ -699,7 +698,7 @@ void PromoteMem2Reg::run() {
       PHINode *PN = I->second;
 
       // If this PHI node merges one value and/or undefs, get the value.
-      if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) {
+      if (Value *V = SimplifyInstruction(PN, SQ)) {
         PN->replaceAllUsesWith(V);
         PN->eraseFromParent();
         NewPhiNodes.erase(I++);
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index f86e97b6cc7..7a3e8b9ae91 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2231,7 +2231,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
       }
 
       // Check for trivial simplification.
-      if (Value *V = SimplifyInstruction(N, DL)) {
+      if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
         if (!BBI->use_empty())
           TranslateMap[&*BBI] = V;
         if (!N->mayHaveSideEffects()) {
@@ -2307,7 +2307,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
 
   for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
     PHINode *PN = cast<PHINode>(II++);
-    if (Value *V = SimplifyInstruction(PN, DL)) {
+    if (Value *V = SimplifyInstruction(PN, {DL, PN})) {
       PN->replaceAllUsesWith(V);
       PN->eraseFromParent();
       continue;
@@ -3545,7 +3545,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
     assert(VVal && "Should have a unique destination value");
     ICI->setOperand(0, VVal);
 
-    if (Value *V = SimplifyInstruction(ICI, DL)) {
+    if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) {
       ICI->replaceAllUsesWith(V);
       ICI->eraseFromParent();
     }
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index a4cc6a031ad..02a5d3dbead 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -51,13 +51,13 @@ namespace {
     ScalarEvolution  *SE;
     DominatorTree    *DT;
 
-    SmallVectorImpl<WeakVH> &DeadInsts;
+    SmallVectorImpl<WeakTrackingVH> &DeadInsts;
 
     bool Changed;
 
   public:
     SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
-                   LoopInfo *LI,SmallVectorImpl<WeakVH> &Dead)
+                   LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead)
         : L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) {
       assert(LI && "IV simplification requires LoopInfo");
     }
@@ -701,7 +701,7 @@ void IVVisitor::anchor() { }
 /// Simplify instructions that use this induction variable
 /// by using ScalarEvolution to analyze the IV's recurrence.
 bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
-                       LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead,
+                       LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead,
                        IVVisitor *V) {
   SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead);
   SIV.simplifyUsers(CurrIV, V);
@@ -711,7 +711,7 @@ bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
 /// Simplify users of induction variables within this
 /// loop. This does not actually change or add IVs.
 bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
-                     LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead) {
+                     LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead) {
   bool Changed = false;
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
     Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead);
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index 27373427d4f..2509b5f2204 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -54,8 +54,7 @@ static bool runImpl(Function &F, const SimplifyQuery &SQ,
 
         // Don't waste time simplifying unused instructions.
         if (!I->use_empty()) {
-          if (Value *V =
-                  SimplifyInstruction(I, SQ.getWithInstruction(I), ORE)) {
+          if (Value *V = SimplifyInstruction(I, SQ, ORE)) {
             // Mark all uses for resimplification next time round the loop.
             for (User *U : I->users())
               Next->insert(cast<Instruction>(U));
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 55494440470..f112c555205 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3899,11 +3899,13 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
 }
 
 /// \brief Check that the Values in the slice in VL array are still existent in
-/// the WeakVH array.
+/// the WeakTrackingVH array.
 /// Vectorization of part of the VL array may cause later values in the VL array
-/// to become invalid. We track when this has happened in the WeakVH array.
-static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH,
-                               unsigned SliceBegin, unsigned SliceSize) {
+/// to become invalid. We track when this has happened in the WeakTrackingVH
+/// array.
+static bool hasValueBeenRAUWed(ArrayRef<Value *> VL,
+                               ArrayRef<WeakTrackingVH> VH, unsigned SliceBegin,
+                               unsigned SliceSize) {
   VL = VL.slice(SliceBegin, SliceSize);
   VH = VH.slice(SliceBegin, SliceSize);
   return !std::equal(VL.begin(), VL.end(), VH.begin());
@@ -3921,7 +3923,7 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
     return false;
 
   // Keep track of values that were deleted by vectorizing in the loop below.
-  SmallVector<WeakVH, 8> TrackValues(Chain.begin(), Chain.end());
+  SmallVector<WeakTrackingVH, 8> TrackValues(Chain.begin(), Chain.end());
 
   bool Changed = false;
   // Look for profitable vectorizable trees at all offsets, starting at zero.
@@ -4107,7 +4109,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
   bool Changed = false;
 
   // Keep track of values that were deleted by vectorizing in the loop below.
-  SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());
+  SmallVector<WeakTrackingVH, 8> TrackValues(VL.begin(), VL.end());
 
   unsigned NextInst = 0, MaxInst = VL.size();
   for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF;
@@ -4734,7 +4736,7 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
 
 namespace {
 /// Tracks instructons and its children.
-class WeakVHWithLevel final : public CallbackVH {
+class WeakTrackingVHWithLevel final : public CallbackVH {
   /// Operand index of the instruction currently beeing analized.
   unsigned Level = 0;
   /// Is this the instruction that should be vectorized, or are we now
@@ -4743,8 +4745,8 @@ class WeakVHWithLevel final : public CallbackVH {
   bool IsInitial = true;
 
 public:
-  explicit WeakVHWithLevel() = default;
-  WeakVHWithLevel(Value *V) : CallbackVH(V){};
+  explicit WeakTrackingVHWithLevel() = default;
+  WeakTrackingVHWithLevel(Value *V) : CallbackVH(V){};
   /// Restart children analysis each time it is repaced by the new instruction.
   void allUsesReplacedWith(Value *New) override {
     setValPtr(New);
@@ -4771,7 +4773,7 @@ public:
            cast<Instruction>(getValPtr())->getNumOperands() > Level);
     return cast<Instruction>(getValPtr())->getOperand(Level++);
   }
-  virtual ~WeakVHWithLevel() = default;
+  virtual ~WeakTrackingVHWithLevel() = default;
 };
 } // namespace
 
@@ -4793,7 +4795,7 @@ static bool canBeVectorized(
 
   if (Root->getParent() != BB)
     return false;
-  SmallVector<WeakVHWithLevel, 8> Stack(1, Root);
+  SmallVector<WeakTrackingVHWithLevel, 8> Stack(1, Root);
   SmallSet<Value *, 8> VisitedInstrs;
   bool Res = false;
   while (!Stack.empty()) {
@@ -5069,7 +5071,8 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
       SetVector<Value *> Candidates(GEPList.begin(), GEPList.end());
 
       // Some of the candidates may have already been vectorized after we
-      // initially collected them. If so, the WeakVHs will have nullified the
+      // initially collected them. If so, the WeakTrackingVHs will have
+      // nullified the
       // values, so remove them from the set of candidates.
       Candidates.remove(nullptr);
 
diff --git a/test/Analysis/AliasSet/unknown-inst-tracking.ll b/test/Analysis/AliasSet/unknown-inst-tracking.ll
new file mode 100644
index 00000000000..da528fbae07
--- /dev/null
+++ b/test/Analysis/AliasSet/unknown-inst-tracking.ll
@@ -0,0 +1,25 @@
+; RUN: opt -S -licm -loop-unswitch < %s | FileCheck %s
+
+; This test checks for a crash.  See PR32587.
+
+@global = external global i32
+
+declare i32 @f_1(i8, i32 returned)
+
+define i32 @f_0() {
+; CHECK-LABEL: @f_0(
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb3, %bb
+  %tmp = load i32, i32* @global
+  %tmp2 = select i1 false, i16 1, i16 0
+  br label %bb3
+
+bb3:                                              ; preds = %bb3, %bb1
+  %tmp4 = phi i8 [ 0, %bb1 ], [ %tmp6, %bb3 ]
+  %tmp5 = icmp eq i16 %tmp2, 0
+  %tmp6 = select i1 %tmp5, i8 %tmp4, i8 1
+  %tmp7 = tail call i32 @f_1(i8 %tmp6, i32 1)
+  br i1 false, label %bb1, label %bb3
+}
diff --git a/test/Analysis/ScalarEvolution/flags-from-poison.ll b/test/Analysis/ScalarEvolution/flags-from-poison.ll
index 44ee830d9c6..15c679a5f10 100644
--- a/test/Analysis/ScalarEvolution/flags-from-poison.ll
+++ b/test/Analysis/ScalarEvolution/flags-from-poison.ll
@@ -205,7 +205,7 @@ exit:
   ret void
 }
 
-; Demonstrate why we need a Visited set in llvm::isKnownNotFullPoison.
+; Demonstrate why we need a Visited set in llvm::programUndefinedIfFullPoison.
 define void @test-add-not-header5(float* %input, i32 %offset) {
 ; CHECK-LABEL: @test-add-not-header5
 entry:
diff --git a/test/Assembler/dinamespace.ll b/test/Assembler/dinamespace.ll
index 346fcfb1111..af20c19f131 100644
--- a/test/Assembler/dinamespace.ll
+++ b/test/Assembler/dinamespace.ll
@@ -8,11 +8,11 @@
 !1 = distinct !{}
 !2 = !DIFile(filename: "path/to/file", directory: "/path/to/dir")
 
-; CHECK: !3 = !DINamespace(name: "Namespace", scope: !0, file: !2, line: 7)
-!3 = !DINamespace(name: "Namespace", scope: !0, file: !2, line: 7)
+; CHECK: !3 = !DINamespace(name: "Namespace", scope: !0)
+!3 = !DINamespace(name: "Namespace", scope: !0)
 
 ; CHECK: !4 = !DINamespace(scope: !0)
-!4 = !DINamespace(name: "", scope: !0, file: null, line: 0)
+!4 = !DINamespace(name: "", scope: !0)
 !5 = !DINamespace(scope: !0)
 !6 = !DINamespace(scope: !0, exportSymbols: false)
 ; CHECK: !5 = !DINamespace(scope: !0, exportSymbols: true)
diff --git a/test/Assembler/disubprogram.ll b/test/Assembler/disubprogram.ll
index f6352a5e82c..8a3a60aa079 100644
--- a/test/Assembler/disubprogram.ll
+++ b/test/Assembler/disubprogram.ll
@@ -6,8 +6,8 @@ define void @_Z3foov() !dbg !9 {
   ret void
 }
 
-; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12}
-!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12}
+; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14}
+!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14}
 
 !0 = !{null}
 !1 = distinct !DICompositeType(tag: DW_TAG_structure_type)
@@ -61,6 +61,14 @@ define void @_Z3foov() !dbg !9 {
                             unit: !8,
                             templateParams: !5, declaration: !9, variables: !6)
 
-!13 = !{i32 1, !"Debug Info Version", i32 3}
-!llvm.module.flags = !{!13}
+!13 = !{!4}
+; CHECK: !13 = !{!4}
+; CHECK: !14 = distinct !DISubprogram(name: "foo", scope: !1, file: !2, line: 1, type: !3, isLocal: true, isDefinition: true, scopeLine: 2, isOptimized: false, unit: !8, thrownTypes: !13)
+!14 = distinct !DISubprogram(name: "foo", scope: !1,
+                            file: !2, line: 1, type: !3, isLocal: true,
+                            isDefinition: true, scopeLine: 2, isOptimized: false,
+                            unit: !8, thrownTypes: !13)
+
+!15 = !{i32 1, !"Debug Info Version", i32 3}
+!llvm.module.flags = !{!15}
 !llvm.dbg.cu = !{!8}
diff --git a/test/Bitcode/DINamespace.ll b/test/Bitcode/DINamespace.ll
index 2807cb02d3d..e3a04fbc872 100644
--- a/test/Bitcode/DINamespace.ll
+++ b/test/Bitcode/DINamespace.ll
@@ -10,8 +10,8 @@ target triple = "x86_64-apple-macosx10.12.0"
 
 !0 = distinct !DIGlobalVariable(name: "i", linkageName: "_ZN1N1iE", scope: !1, file: !2, line: 2, type: !3, isLocal: false, isDefinition: true)
 ; Test bitcode upgrade for DINamespace without an exportSymbols field.
-; CHECK: !DINamespace(name: "N", scope: null, file: !{{[0-9]+}}, line: 1)
-!1 = !DINamespace(name: "N", scope: null, file: !2, line: 1)
+; CHECK: !DINamespace(name: "N", scope: null)
+!1 = !DINamespace(name: "N", scope: null)
 !2 = !DIFile(filename: "dinamespace.cpp", directory: "/")
 !3 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !4 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !2, producer: "clang version 4.0.0 (trunk 283228) (llvm/trunk 283225)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !5, globals: !6)
diff --git a/test/Bitcode/attributes.ll b/test/Bitcode/attributes.ll
index 9fdf54b7b30..18aa12c7af9 100644
--- a/test/Bitcode/attributes.ll
+++ b/test/Bitcode/attributes.ll
@@ -204,7 +204,7 @@ define void @f34()
 ; CHECK: define void @f34()
 {
         call void @nobuiltin() nobuiltin
-; CHECK: call void @nobuiltin() #33
+; CHECK: call void @nobuiltin() #34
         ret void;
 }
 
@@ -334,6 +334,11 @@ define void @f56() writeonly
   ret void
 }
 
+; CHECK: define void @f57() #33
+define void @f57() speculatable {
+  ret void
+}
+
 ; CHECK: attributes #0 = { noreturn }
 ; CHECK: attributes #1 = { nounwind }
 ; CHECK: attributes #2 = { readnone }
@@ -367,4 +372,5 @@ define void @f56() writeonly
 ; CHECK: attributes #30 = { allocsize(0) }
 ; CHECK: attributes #31 = { allocsize(0,1) }
 ; CHECK: attributes #32 = { writeonly }
-; CHECK: attributes #33 = { nobuiltin }
+; CHECK: attributes #33 = { speculatable }
+; CHECK: attributes #34 = { nobuiltin }
diff --git a/test/Bitcode/compatibility.ll b/test/Bitcode/compatibility.ll
index b1f52bbe059..ec69344947c 100644
--- a/test/Bitcode/compatibility.ll
+++ b/test/Bitcode/compatibility.ll
@@ -3,7 +3,7 @@
 ; Please update this file when making any IR changes. Information on the
 ; release process for this file is available here:
 ;
-;     http://llvm.org/docs/DeveloperPolicy.html#ir-backwards-compatibility 
+;     http://llvm.org/docs/DeveloperPolicy.html#ir-backwards-compatibility
 
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
 ; RUN-PR24755: verify-uselistorder < %s
@@ -472,6 +472,10 @@ declare cc91 void @f.cc91()
 ; CHECK: declare amdgpu_kernel void @f.cc91()
 declare amdgpu_kernel void @f.amdgpu_kernel()
 ; CHECK: declare amdgpu_kernel void @f.amdgpu_kernel()
+declare cc93 void @f.cc93()
+; CHECK: declare amdgpu_hs void @f.cc93()
+declare amdgpu_hs void @f.amdgpu_hs()
+; CHECK: declare amdgpu_hs void @f.amdgpu_hs()
 declare cc1023 void @f.cc1023()
 ; CHECK: declare cc1023 void @f.cc1023()
 
@@ -1246,7 +1250,7 @@ exit:
   ; CHECK: select <2 x i1> <i1 true, i1 false>, <2 x i8> <i8 2, i8 3>, <2 x i8> <i8 3, i8 2>
 
   call void @f.nobuiltin() builtin
-  ; CHECK: call void @f.nobuiltin() #41
+  ; CHECK: call void @f.nobuiltin() #42
 
   call fastcc noalias i32* @f.noalias() noinline
   ; CHECK: call fastcc noalias i32* @f.noalias() #12
@@ -1613,6 +1617,9 @@ normal:
 declare void @f.writeonly() writeonly
 ; CHECK: declare void @f.writeonly() #40
 
+declare void @f.speculatable() speculatable
+; CHECK: declare void @f.speculatable() #41
+
 ;; Constant Expressions
 
 define i8** @constexpr() {
@@ -1661,7 +1668,8 @@ define i8** @constexpr() {
 ; CHECK: attributes #38 = { nounwind readonly }
 ; CHECK: attributes #39 = { inaccessiblemem_or_argmemonly nounwind }
 ; CHECK: attributes #40 = { writeonly }
-; CHECK: attributes #41 = { builtin }
+; CHECK: attributes #41 = { speculatable }
+; CHECK: attributes #42 = { builtin }
 
 ;; Metadata
 
diff --git a/test/CodeGen/AArch64/arm64-anyregcc.ll b/test/CodeGen/AArch64/arm64-anyregcc.ll
index 1af31038324..10989a07990 100644
--- a/test/CodeGen/AArch64/arm64-anyregcc.ll
+++ b/test/CodeGen/AArch64/arm64-anyregcc.ll
@@ -4,7 +4,7 @@
 ; CHECK-LABEL: .section	__LLVM_STACKMAPS,__llvm_stackmaps
 ; CHECK-NEXT:  __LLVM_StackMaps:
 ; Header
-; CHECK-NEXT:   .byte 2
+; CHECK-NEXT:   .byte 3
 ; CHECK-NEXT:   .byte 0
 ; CHECK-NEXT:   .short 0
 ; Num Functions
@@ -48,18 +48,24 @@
 ; CHECK-NEXT:   .short  3
 ; Loc 0: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 4
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 4
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 1: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 4
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 4
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 2: Constant 3
 ; CHECK-NEXT:   .byte 4
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
+; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 3
 define i64 @test() nounwind ssp uwtable {
 entry:
@@ -69,18 +75,22 @@ entry:
 
 ; property access 1 - %obj is an anyreg call argument and should therefore be in a register
 ; CHECK-LABEL:  .long   L{{.*}}-_property_access1
-; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short 0
 ; 2 locations
 ; CHECK-NEXT:   .short  2
 ; Loc 0: Register <-- this is the return register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 1: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 define i64 @property_access1(i8* %obj) nounwind ssp uwtable {
 entry:
@@ -91,18 +101,22 @@ entry:
 
 ; property access 2 - %obj is an anyreg call argument and should therefore be in a register
 ; CHECK-LABEL:  .long   L{{.*}}-_property_access2
-; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short 0
 ; 2 locations
 ; CHECK-NEXT:   .short  2
 ; Loc 0: Register <-- this is the return register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 1: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 define i64 @property_access2() nounwind ssp uwtable {
 entry:
@@ -114,18 +128,22 @@ entry:
 
 ; property access 3 - %obj is a frame index
 ; CHECK-LABEL:  .long   L{{.*}}-_property_access3
-; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short 0
 ; 2 locations
 ; CHECK-NEXT:   .short  2
 ; Loc 0: Register <-- this is the return register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 1: Direct FP - 8
 ; CHECK-NEXT:   .byte 2
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short 29
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long -8
 define i64 @property_access3() nounwind ssp uwtable {
 entry:
@@ -137,78 +155,106 @@ entry:
 
 ; anyreg_test1
 ; CHECK-LABEL:  .long   L{{.*}}-_anyreg_test1
-; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short 0
 ; 14 locations
 ; CHECK-NEXT:   .short  14
 ; Loc 0: Register <-- this is the return register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 1: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 2: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 3: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 4: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 5: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 6: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 7: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 8: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 9: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 10: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 11: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 12: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 13: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
 entry:
@@ -219,78 +265,106 @@ entry:
 
 ; anyreg_test2
 ; CHECK-LABEL:  .long   L{{.*}}-_anyreg_test2
-; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short 0
 ; 14 locations
 ; CHECK-NEXT:   .short  14
 ; Loc 0: Register <-- this is the return register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 1: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 2: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 3: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 4: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 5: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 6: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 7: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 8: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 9: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 10: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 11: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 12: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 13: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
 entry:
@@ -308,18 +382,24 @@ entry:
 ; CHECK-NEXT: .short 3
 ; Loc 0: Register (some register that will be spilled to the stack)
 ; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .short 8
 ; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long  0
 ; Loc 1: Register
 ; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .short 8
 ; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long  0
 ; Loc 1: Register
 ; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .short 8
 ; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long  0
 define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
 entry:
@@ -337,28 +417,38 @@ entry:
 ; CHECK-NEXT: .short 5
 ; Loc 0: Return a register
 ; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .short 8
 ; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long  0
 ; Loc 1: Arg0 in a Register
 ; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .short 8
 ; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long  0
 ; Loc 2: Arg1 in a Register
 ; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .short 8
 ; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long  0
 ; Loc 3: Arg2 spilled to FP -96
 ; CHECK-NEXT: .byte  3
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .short 8
 ; CHECK-NEXT: .short 29
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long -96
 ; Loc 4: Arg3 spilled to FP - 88
 ; CHECK-NEXT: .byte  3
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .short 8
 ; CHECK-NEXT: .short 29
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long -88
 define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
 entry:
diff --git a/test/CodeGen/AArch64/arm64-stackmap.ll b/test/CodeGen/AArch64/arm64-stackmap.ll
index 0b2e9776263..e12a35a93e2 100644
--- a/test/CodeGen/AArch64/arm64-stackmap.ll
+++ b/test/CodeGen/AArch64/arm64-stackmap.ll
@@ -10,7 +10,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; CHECK-LABEL:  .section  __LLVM_STACKMAPS,__llvm_stackmaps
 ; CHECK-NEXT:  __LLVM_StackMaps:
 ; Header
-; CHECK-NEXT:   .byte 2
+; CHECK-NEXT:   .byte 3
 ; CHECK-NEXT:   .byte 0
 ; CHECK-NEXT:   .short 0
 ; Num Functions
@@ -67,22 +67,30 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; CHECK-NEXT:   .short  4
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   65535
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   65536
 ; SmallConstant
 ; CHECK-NEXT:   .byte   5
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; LargeConstant at index 0
 ; CHECK-NEXT:   .byte   5
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   1
 
@@ -99,12 +107,16 @@ entry:
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  2
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long  0
 define void @osrinline(i64 %a, i64 %b) {
 entry:
@@ -123,12 +135,16 @@ entry:
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  2
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long  0
 define void @osrcold(i64 %a, i64 %b) {
 entry:
@@ -163,12 +179,16 @@ entry:
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  2
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) {
 entry:
@@ -185,12 +205,16 @@ entry:
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  2
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
 entry:
@@ -207,12 +231,16 @@ entry:
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  2
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
 entry:
@@ -233,8 +261,11 @@ entry:
 ; Check that at least one is a spilled entry from RBP.
 ; Location: Indirect FP + ...
 ; CHECK:        .byte 3
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 
 ; CHECK-NEXT:   .short 29
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long
 define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27) {
 entry:
   call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 20, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27)
@@ -252,8 +283,11 @@ entry:
 ; Check that at least one is a spilled entry from RBP.
 ; Location: Indirect FP + ...
 ; CHECK:        .byte 3
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 
 ; CHECK-NEXT:   .short 29
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long
 define webkit_jscc void @spilledStackMapValue(i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29) {
 entry:
   call void (i64, i32, ...) @llvm.experimental.stackmap(i64 12, i32 16, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29)
@@ -269,7 +303,9 @@ entry:
 ; CHECK-NEXT:   .short 1
 ; Loc 0: SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   33
 
@@ -286,8 +322,10 @@ define void @liveConstant() {
 ; CHECK-NEXT:   .short 1
 ; Loc 0: Indirect FP (r29) - offset
 ; CHECK-NEXT:   .byte   3
-; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  4
 ; CHECK-NEXT:   .short  29
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   -{{[0-9]+}}
 define void @clobberLR(i32 %a) {
   tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x31}"() nounwind
diff --git a/test/CodeGen/AArch64/arm64-tls-dynamics.ll b/test/CodeGen/AArch64/arm64-tls-dynamics.ll
index 88700a15343..17979f4036c 100644
--- a/test/CodeGen/AArch64/arm64-tls-dynamics.ll
+++ b/test/CodeGen/AArch64/arm64-tls-dynamics.ll
@@ -30,13 +30,13 @@ define i32 @test_generaldynamic() {
 ; CHECK-NOLD: ldr w0, [x[[TP]], x0]
 
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
 
 ; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
-; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
-; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12
 ; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
 
 }
@@ -56,13 +56,13 @@ define i32* @test_generaldynamic_addr() {
 ; CHECK: add x0, [[TP]], x0
 
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
 
 ; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
-; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
-; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12
 ; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
 
 }
@@ -95,15 +95,15 @@ define i32 @test_localdynamic() {
 
 
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
 ; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_HI12
 ; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
 
 ; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
-; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
-; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12
 ; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
 
 }
@@ -131,15 +131,15 @@ define i32* @test_localdynamic_addr() {
   ret i32* @local_dynamic_var
 
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
 ; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_HI12
 ; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
 
 ; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
-; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
-; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12
 ; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
 }
 
diff --git a/test/CodeGen/AArch64/stackmap-liveness.ll b/test/CodeGen/AArch64/stackmap-liveness.ll
index 4b04276ac22..b66dbfae6c8 100644
--- a/test/CodeGen/AArch64/stackmap-liveness.ll
+++ b/test/CodeGen/AArch64/stackmap-liveness.ll
@@ -5,7 +5,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 ; CHECK-LABEL:  .section  __LLVM_STACKMAPS,__llvm_stackmaps
 ; CHECK-NEXT:   __LLVM_StackMaps:
 ; Header
-; CHECK-NEXT:   .byte 2
+; CHECK-NEXT:   .byte 3
 ; CHECK-NEXT:   .byte 0
 ; CHECK-NEXT:   .short 0
 ; Num Functions
@@ -25,6 +25,7 @@ define i64 @stackmap_liveness(i1 %c) {
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; Padding
+; CHECK-NEXT:   .p2align  3
 ; CHECK-NEXT:   .short  0
 ; Num LiveOut Entries: 1
 ; CHECK-NEXT:   .short  2
diff --git a/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
index e2620ce353c..f7461b925ca 100644
--- a/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
+++ b/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
@@ -221,10 +221,10 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)*
   ret void
 }
 
-attributes #0 = { nounwind readnone }
+attributes #0 = { nounwind readnone speculatable }
 attributes #1 = { nounwind }
 
-; HSA: attributes #0 = { nounwind readnone }
+; HSA: attributes #0 = { nounwind readnone speculatable }
 ; HSA: attributes #1 = { nounwind }
 ; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" }
 ; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" }
diff --git a/test/CodeGen/AMDGPU/inline-asm.ll b/test/CodeGen/AMDGPU/inline-asm.ll
index 0d7e07b9a62..636b45db698 100644
--- a/test/CodeGen/AMDGPU/inline-asm.ll
+++ b/test/CodeGen/AMDGPU/inline-asm.ll
@@ -232,3 +232,17 @@ entry:
   call void asm sideeffect "; use $0 $1 ", "{VGPR0}, {VGPR1}"(i1 %val0, i1 %val1)
   ret void
 }
+
+; CHECK-LABEL: {{^}}muliple_def_phys_vgpr:
+; CHECK: ; def v0
+; CHECK: v_mov_b32_e32 v1, v0
+; CHECK: ; def v0
+; CHECK: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1
+define amdgpu_kernel void @muliple_def_phys_vgpr() {
+entry:
+  %def0 = call i32 asm sideeffect "; def $0 ", "={VGPR0}"()
+  %def1 = call i32 asm sideeffect "; def $0 ", "={VGPR0}"()
+  %add = shl i32 %def0, %def1
+  store i32 %add, i32 addrspace(1)* undef
+  ret void
+}
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll
new file mode 100644
index 00000000000..617f1f19e36
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll
@@ -0,0 +1,80 @@
+;RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s --check-prefix=GCN
+
+; GCN-LABEL: {{^}}full_mask:
+; GCN: s_mov_b64 exec, -1
+; GCN: v_add_f32_e32 v0,
+define amdgpu_ps float @full_mask(float %a, float %b) {
+main_body:
+  %s = fadd float %a, %b
+  call void @llvm.amdgcn.init.exec(i64 -1)
+  ret float %s
+}
+
+; GCN-LABEL: {{^}}partial_mask:
+; GCN: s_mov_b64 exec, 0x1e240
+; GCN: v_add_f32_e32 v0,
+define amdgpu_ps float @partial_mask(float %a, float %b) {
+main_body:
+  %s = fadd float %a, %b
+  call void @llvm.amdgcn.init.exec(i64 123456)
+  ret float %s
+}
+
+; GCN-LABEL: {{^}}input_s3off8:
+; GCN: s_bfe_u32 s0, s3, 0x70008
+; GCN: s_bfm_b64 exec, s0, 0
+; GCN: s_cmp_eq_u32 s0, 64
+; GCN: s_cmov_b64 exec, -1
+; GCN: v_add_f32_e32 v0,
+define amdgpu_ps float @input_s3off8(i32 inreg, i32 inreg, i32 inreg, i32 inreg %count, float %a, float %b) {
+main_body:
+  %s = fadd float %a, %b
+  call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 8)
+  ret float %s
+}
+
+; GCN-LABEL: {{^}}input_s0off19:
+; GCN: s_bfe_u32 s0, s0, 0x70013
+; GCN: s_bfm_b64 exec, s0, 0
+; GCN: s_cmp_eq_u32 s0, 64
+; GCN: s_cmov_b64 exec, -1
+; GCN: v_add_f32_e32 v0,
+define amdgpu_ps float @input_s0off19(i32 inreg %count, float %a, float %b) {
+main_body:
+  %s = fadd float %a, %b
+  call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19)
+  ret float %s
+}
+
+; GCN-LABEL: {{^}}reuse_input:
+; GCN: s_bfe_u32 s1, s0, 0x70013
+; GCN: s_bfm_b64 exec, s1, 0
+; GCN: s_cmp_eq_u32 s1, 64
+; GCN: s_cmov_b64 exec, -1
+; GCN: v_add_i32_e32 v0, vcc, s0, v0
+define amdgpu_ps float @reuse_input(i32 inreg %count, i32 %a) {
+main_body:
+  call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19)
+  %s = add i32 %a, %count
+  %f = sitofp i32 %s to float
+  ret float %f
+}
+
+; GCN-LABEL: {{^}}reuse_input2:
+; GCN: s_bfe_u32 s1, s0, 0x70013
+; GCN: s_bfm_b64 exec, s1, 0
+; GCN: s_cmp_eq_u32 s1, 64
+; GCN: s_cmov_b64 exec, -1
+; GCN: v_add_i32_e32 v0, vcc, s0, v0
+define amdgpu_ps float @reuse_input2(i32 inreg %count, i32 %a) {
+main_body:
+  %s = add i32 %a, %count
+  %f = sitofp i32 %s to float
+  call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19)
+  ret float %f
+}
+
+declare void @llvm.amdgcn.init.exec(i64) #1
+declare void @llvm.amdgcn.init.exec.from.input(i32, i32) #1
+
+attributes #1 = { convergent }
diff --git a/test/CodeGen/AMDGPU/zext-lid.ll b/test/CodeGen/AMDGPU/zext-lid.ll
index 8eeff53ff99..066f2927727 100644
--- a/test/CodeGen/AMDGPU/zext-lid.ll
+++ b/test/CodeGen/AMDGPU/zext-lid.ll
@@ -4,19 +4,19 @@
 ; CHECK-NOT: and_b32
 
 ; OPT-LABEL: @zext_grp_size_128
-; OPT: tail call i32 @llvm.amdgcn.workitem.id.x() #2, !range !0
-; OPT: tail call i32 @llvm.amdgcn.workitem.id.y() #2, !range !0
-; OPT: tail call i32 @llvm.amdgcn.workitem.id.z() #2, !range !0
+; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
+; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !0
+; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !0
 define amdgpu_kernel void @zext_grp_size_128(i32 addrspace(1)* nocapture %arg) #0 {
 bb:
-  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #2
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = and i32 %tmp, 127
   store i32 %tmp1, i32 addrspace(1)* %arg, align 4
-  %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() #2
+  %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y()
   %tmp3 = and i32 %tmp2, 127
   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
   store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4
-  %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() #2
+  %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z()
   %tmp6 = and i32 %tmp5, 127
   %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
   store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4
@@ -24,19 +24,19 @@ bb:
 }
 
 ; OPT-LABEL: @zext_grp_size_32x4x1
-; OPT: tail call i32 @llvm.amdgcn.workitem.id.x() #2, !range !2
-; OPT: tail call i32 @llvm.amdgcn.workitem.id.y() #2, !range !3
-; OPT: tail call i32 @llvm.amdgcn.workitem.id.z() #2, !range !4
+; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !2
+; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !3
+; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !4
 define amdgpu_kernel void @zext_grp_size_32x4x1(i32 addrspace(1)* nocapture %arg) #0 !reqd_work_group_size !0 {
 bb:
-  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #2
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = and i32 %tmp, 31
   store i32 %tmp1, i32 addrspace(1)* %arg, align 4
-  %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() #2
+  %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y()
   %tmp3 = and i32 %tmp2, 3
   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
   store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4
-  %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() #2
+  %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z()
   %tmp6 = and i32 %tmp5, 1
   %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
   store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4
@@ -44,19 +44,19 @@ bb:
 }
 
 ; OPT-LABEL: @zext_grp_size_512
-; OPT: tail call i32 @llvm.amdgcn.workitem.id.x() #2, !range !5
-; OPT: tail call i32 @llvm.amdgcn.workitem.id.y() #2, !range !5
-; OPT: tail call i32 @llvm.amdgcn.workitem.id.z() #2, !range !5
+; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !5
+; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !5
+; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !5
 define amdgpu_kernel void @zext_grp_size_512(i32 addrspace(1)* nocapture %arg) #1 {
 bb:
-  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #2
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = and i32 %tmp, 65535
   store i32 %tmp1, i32 addrspace(1)* %arg, align 4
-  %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() #2
+  %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y()
   %tmp3 = and i32 %tmp2, 65535
   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
   store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4
-  %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() #2
+  %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z()
   %tmp6 = and i32 %tmp5, 65535
   %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
   store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4
@@ -71,7 +71,8 @@ declare i32 @llvm.amdgcn.workitem.id.z() #2
 
 attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,128" }
 attributes #1 = { nounwind "amdgpu-flat-work-group-size"="512,512" }
-attributes #2 = { nounwind readnone }
+attributes #2 = { nounwind readnone speculatable }
+attributes #3 = { nounwind readnone }
 
 !0 = !{i32 32, i32 4, i32 1}
 
diff --git a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
index cf77ce35207..44fe7410b42 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
+++ b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
@@ -128,10 +128,10 @@ define i32 @test_stack_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5
 ; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 4
 ; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 4
 ; CHECK: liveins: %r0, %r1, %r2, %r3
-; CHECK: [[VREGP2:%[0-9]+]]{{.*}} = COPY %r2
-; CHECK: [[FIP5:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P5]]
-; CHECK: [[VREGP5:%[0-9]+]]{{.*}} = G_LOAD [[FIP5]]
-; CHECK: [[SUM:%[0-9]+]]{{.*}} = G_ADD [[VREGP2]], [[VREGP5]]
+; CHECK: [[VREGP2:%[0-9]+]](s32) = COPY %r2
+; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]]
+; CHECK: [[VREGP5:%[0-9]+]](s32) = G_LOAD [[FIP5]]{{.*}}load 4
+; CHECK: [[SUM:%[0-9]+]](s32) = G_ADD [[VREGP2]], [[VREGP5]]
 ; CHECK: %r0 = COPY [[SUM]]
 ; CHECK: BX_RET 14, _, implicit %r0
 entry:
@@ -146,10 +146,11 @@ define i16 @test_stack_args_signext(i32 %p0, i16 %p1, i8 %p2, i1 %p3,
 ; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1
 ; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2
 ; CHECK: liveins: %r0, %r1, %r2, %r3
-; CHECK: [[VREGP1:%[0-9]+]]{{.*}} = COPY %r1
-; CHECK: [[FIP5:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P5]]
-; CHECK: [[VREGP5:%[0-9]+]]{{.*}} = G_LOAD [[FIP5]](p0)
-; CHECK: [[SUM:%[0-9]+]]{{.*}} = G_ADD [[VREGP1]], [[VREGP5]]
+; CHECK: [[VREGP1:%[0-9]+]](s16) = COPY %r1
+; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]]
+; CHECK: [[VREGP5EXT:%[0-9]+]](s32) = G_LOAD [[FIP5]](p0){{.*}}load 4
+; CHECK: [[VREGP5:%[0-9]+]](s16) = G_TRUNC [[VREGP5EXT]]
+; CHECK: [[SUM:%[0-9]+]](s16) = G_ADD [[VREGP1]], [[VREGP5]]
 ; CHECK: %r0 = COPY [[SUM]]
 ; CHECK: BX_RET 14, _, implicit %r0
 entry:
@@ -164,10 +165,11 @@ define i8 @test_stack_args_zeroext(i32 %p0, i16 %p1, i8 %p2, i1 %p3,
 ; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1
 ; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2
 ; CHECK: liveins: %r0, %r1, %r2, %r3
-; CHECK: [[VREGP2:%[0-9]+]]{{.*}} = COPY %r2
-; CHECK: [[FIP4:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P4]]
-; CHECK: [[VREGP4:%[0-9]+]]{{.*}} = G_LOAD [[FIP4]](p0)
-; CHECK: [[SUM:%[0-9]+]]{{.*}} = G_ADD [[VREGP2]], [[VREGP4]]
+; CHECK: [[VREGP2:%[0-9]+]](s8) = COPY %r2
+; CHECK: [[FIP4:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P4]]
+; CHECK: [[VREGP4EXT:%[0-9]+]](s32) = G_LOAD [[FIP4]](p0){{.*}}load 4
+; CHECK: [[VREGP4:%[0-9]+]](s8) = G_TRUNC [[VREGP4EXT]]
+; CHECK: [[SUM:%[0-9]+]](s8) = G_ADD [[VREGP2]], [[VREGP4]]
 ; CHECK: %r0 = COPY [[SUM]]
 ; CHECK: BX_RET 14, _, implicit %r0
 entry:
@@ -175,11 +177,46 @@ entry:
   ret i8 %sum
 }
 
+define i8 @test_stack_args_noext(i32 %p0, i16 %p1, i8 %p2, i1 %p3,
+                                 i8 %p4, i16 %p5) {
+; CHECK-LABEL: name: test_stack_args_noext
+; CHECK: fixedStack:
+; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1
+; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2
+; CHECK: liveins: %r0, %r1, %r2, %r3
+; CHECK: [[VREGP2:%[0-9]+]](s8) = COPY %r2
+; CHECK: [[FIP4:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P4]]
+; CHECK: [[VREGP4:%[0-9]+]](s8) = G_LOAD [[FIP4]](p0){{.*}}load 1
+; CHECK: [[SUM:%[0-9]+]](s8) = G_ADD [[VREGP2]], [[VREGP4]]
+; CHECK: %r0 = COPY [[SUM]]
+; CHECK: BX_RET 14, _, implicit %r0
+entry:
+  %sum = add i8 %p2, %p4
+  ret i8 %sum
+}
+
+define zeroext i16 @test_stack_args_extend_the_extended(i32 %p0, i16 %p1, i8 %p2, i1 %p3,
+                                                        i8 signext %p4, i16 signext %p5) {
+; CHECK-LABEL: name: test_stack_args_extend_the_extended
+; CHECK: fixedStack:
+; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1
+; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2
+; CHECK: liveins: %r0, %r1, %r2, %r3
+; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]]
+; CHECK: [[VREGP5SEXT:%[0-9]+]](s32) = G_LOAD [[FIP5]](p0){{.*}}load 4
+; CHECK: [[VREGP5:%[0-9]+]](s16) = G_TRUNC [[VREGP5SEXT]]
+; CHECK: [[VREGP5ZEXT:%[0-9]+]](s32) = G_ZEXT [[VREGP5]]
+; CHECK: %r0 = COPY [[VREGP5ZEXT]]
+; CHECK: BX_RET 14, _, implicit %r0
+entry:
+  ret i16 %p5
+}
+
 define i16 @test_ptr_arg(i16* %p) {
 ; CHECK-LABEL: name: test_ptr_arg
 ; CHECK: liveins: %r0
 ; CHECK: [[VREGP:%[0-9]+]](p0) = COPY %r0
-; CHECK: [[VREGV:%[0-9]+]](s16) = G_LOAD [[VREGP]](p0)
+; CHECK: [[VREGV:%[0-9]+]](s16) = G_LOAD [[VREGP]](p0){{.*}}load 2
 entry:
   %v = load i16, i16* %p
   ret i16 %v
@@ -190,7 +227,7 @@ define i32* @test_ptr_ret(i32** %p) {
 ; CHECK-LABEL: name: test_ptr_ret
 ; CHECK: liveins: %r0
 ; CHECK: [[VREGP:%[0-9]+]](p0) = COPY %r0
-; CHECK: [[VREGV:%[0-9]+]](p0) = G_LOAD [[VREGP]](p0)
+; CHECK: [[VREGV:%[0-9]+]](p0) = G_LOAD [[VREGP]](p0){{.*}}load 4
 ; CHECK: %r0 = COPY [[VREGV]]
 ; CHECK: BX_RET 14, _, implicit %r0
 entry:
@@ -203,9 +240,9 @@ define i32 @test_ptr_arg_on_stack(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32* %p) {
 ; CHECK: fixedStack:
 ; CHECK: id: [[P:[0-9]+]]{{.*}}offset: 0{{.*}}size: 4
 ; CHECK: liveins: %r0, %r1, %r2, %r3
-; CHECK: [[FIP:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P]]
-; CHECK: [[VREGP:%[0-9]+]](p0) = G_LOAD [[FIP]](p0)
-; CHECK: [[VREGV:%[0-9]+]](s32) = G_LOAD [[VREGP]](p0)
+; CHECK: [[FIP:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P]]
+; CHECK: [[VREGP:%[0-9]+]](p0) = G_LOAD [[FIP]](p0){{.*}}load 4
+; CHECK: [[VREGV:%[0-9]+]](s32) = G_LOAD [[VREGP]](p0){{.*}}load 4
 ; CHECK: %r0 = COPY [[VREGV]]
 ; CHECK: BX_RET 14, _, implicit %r0
 entry:
@@ -222,7 +259,7 @@ define arm_aapcscc float @test_float_aapcscc(float %p0, float %p1, float %p2,
 ; CHECK: liveins: %r0, %r1, %r2, %r3
 ; CHECK: [[VREGP1:%[0-9]+]](s32) = COPY %r1
 ; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]]
-; CHECK: [[VREGP5:%[0-9]+]](s32) = G_LOAD [[FIP5]](p0)
+; CHECK: [[VREGP5:%[0-9]+]](s32) = G_LOAD [[FIP5]](p0){{.*}}load 4
 ; CHECK: [[VREGV:%[0-9]+]](s32) = G_FADD [[VREGP1]], [[VREGP5]]
 ; CHECK: %r0 = COPY [[VREGV]]
 ; CHECK: BX_RET 14, _, implicit %r0
@@ -251,7 +288,7 @@ define arm_aapcs_vfpcc float @test_float_vfpcc(float %p0, float %p1, float %p2,
 ; CHECK: liveins: %s0, %s1, %s2, %s3, %s4, %s5, %s6, %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15
 ; CHECK: [[VREGP1:%[0-9]+]](s32) = COPY %s1
 ; CHECK: [[FIQ1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Q1]]
-; CHECK: [[VREGQ1:%[0-9]+]](s32) = G_LOAD [[FIQ1]](p0)
+; CHECK: [[VREGQ1:%[0-9]+]](s32) = G_LOAD [[FIQ1]](p0){{.*}}load 4
 ; CHECK: [[VREGV:%[0-9]+]](s32) = G_FADD [[VREGP1]], [[VREGQ1]]
 ; CHECK: %s0 = COPY [[VREGV]]
 ; CHECK: BX_RET 14, _, implicit %s0
@@ -272,7 +309,7 @@ define arm_aapcs_vfpcc double @test_double_vfpcc(double %p0, double %p1, double
 ; CHECK: liveins: %d0, %d1, %d2, %d3, %d4, %d5, %d6, %d7
 ; CHECK: [[VREGP1:%[0-9]+]](s64) = COPY %d1
 ; CHECK: [[FIQ1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Q1]]
-; CHECK: [[VREGQ1:%[0-9]+]](s64) = G_LOAD [[FIQ1]](p0)
+; CHECK: [[VREGQ1:%[0-9]+]](s64) = G_LOAD [[FIQ1]](p0){{.*}}load 8
 ; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP1]], [[VREGQ1]]
 ; CHECK: %d0 = COPY [[VREGV]]
 ; CHECK: BX_RET 14, _, implicit %d0
@@ -295,7 +332,7 @@ define arm_aapcscc double @test_double_aapcscc(double %p0, double %p1, double %p
 ; LITTLE: [[VREGP1:%[0-9]+]](s64) = G_SEQUENCE [[VREGP1LO]](s32), 0, [[VREGP1HI]](s32), 32
 ; BIG: [[VREGP1:%[0-9]+]](s64) = G_SEQUENCE [[VREGP1HI]](s32), 0, [[VREGP1LO]](s32), 32
 ; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]]
-; CHECK: [[VREGP5:%[0-9]+]](s64) = G_LOAD [[FIP5]](p0)
+; CHECK: [[VREGP5:%[0-9]+]](s64) = G_LOAD [[FIP5]](p0){{.*}}load 8
 ; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP1]], [[VREGP5]]
 ; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
 ; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
@@ -322,7 +359,7 @@ define arm_aapcs_vfpcc double @test_double_gap_vfpcc(double %p0, float %filler,
 ; CHECK: liveins: %d0, %d2, %d3, %d4, %d5, %d6, %d7, %s2
 ; CHECK: [[VREGP1:%[0-9]+]](s64) = COPY %d2
 ; CHECK: [[FIQ1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Q1]]
-; CHECK: [[VREGQ1:%[0-9]+]](s64) = G_LOAD [[FIQ1]](p0)
+; CHECK: [[VREGQ1:%[0-9]+]](s64) = G_LOAD [[FIQ1]](p0){{.*}}load 8
 ; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP1]], [[VREGQ1]]
 ; CHECK: %d0 = COPY [[VREGV]]
 ; CHECK: BX_RET 14, _, implicit %d0
@@ -342,7 +379,7 @@ define arm_aapcscc double @test_double_gap_aapcscc(float %filler, double %p0,
 ; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0LO]](s32), 0, [[VREGP0HI]](s32), 32
 ; BIG: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0HI]](s32), 0, [[VREGP0LO]](s32), 32
 ; CHECK: [[FIP1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P1]]
-; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0)
+; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0){{.*}}load 8
 ; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP0]], [[VREGP1]]
 ; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
 ; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
@@ -367,7 +404,7 @@ define arm_aapcscc double @test_double_gap2_aapcscc(double %p0, float %filler,
 ; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0LO]](s32), 0, [[VREGP0HI]](s32), 32
 ; BIG: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0HI]](s32), 0, [[VREGP0LO]](s32), 32
 ; CHECK: [[FIP1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P1]]
-; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0)
+; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0){{.*}}load 8
 ; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP0]], [[VREGP1]]
 ; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
 ; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel.ll b/test/CodeGen/ARM/GlobalISel/arm-isel.ll
index da02bfe6851..57ccff90c0b 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-isel.ll
+++ b/test/CodeGen/ARM/GlobalISel/arm-isel.ll
@@ -197,6 +197,17 @@ entry:
   ret i8 %sum
 }
 
+define i8 @test_stack_args_noext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, i8 %p4) {
+; CHECK-LABEL: test_stack_args_noext:
+; CHECK: mov [[P4ADDR:r[0-9]+]], sp
+; CHECK: ldrb [[P4:r[0-9]+]], {{.*}}[[P4ADDR]]
+; CHECK: add r0, r2, [[P4]]
+; CHECK: bx lr
+entry:
+  %sum = add i8 %p2, %p4
+  ret i8 %sum
+}
+
 define i32 @test_ptr_arg_in_reg(i32* %p) {
 ; CHECK-LABEL: test_ptr_arg_in_reg:
 ; CHECK: ldr r0, [r0]
diff --git a/test/CodeGen/ARM/bool-ext-inc.ll b/test/CodeGen/ARM/bool-ext-inc.ll
index b91b9b25899..5f2ba8b109a 100644
--- a/test/CodeGen/ARM/bool-ext-inc.ll
+++ b/test/CodeGen/ARM/bool-ext-inc.ll
@@ -4,7 +4,7 @@
 define i32 @sext_inc(i1 zeroext %x) {
 ; CHECK-LABEL: sext_inc:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    rsb r0, r0, #1
+; CHECK-NEXT:    eor r0, r0, #1
 ; CHECK-NEXT:    mov pc, lr
   %ext = sext i1 %x to i32
   %add = add i32 %ext, 1
@@ -14,14 +14,12 @@ define i32 @sext_inc(i1 zeroext %x) {
 define <4 x i32> @sext_inc_vec(<4 x i1> %x) {
 ; CHECK-LABEL: sext_inc_vec:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vmov d16, r0, r1
-; CHECK-NEXT:    vmov.i32 q9, #0x1f
-; CHECK-NEXT:    vmov.i32 q10, #0x1
+; CHECK-NEXT:    vmov.i16 d16, #0x1
+; CHECK-NEXT:    vmov d17, r0, r1
+; CHECK-NEXT:    vmov.i32 q9, #0x1
+; CHECK-NEXT:    veor d16, d17, d16
 ; CHECK-NEXT:    vmovl.u16 q8, d16
-; CHECK-NEXT:    vneg.s32 q9, q9
-; CHECK-NEXT:    vshl.i32 q8, q8, #31
-; CHECK-NEXT:    vshl.s32 q8, q8, q9
-; CHECK-NEXT:    vadd.i32 q8, q8, q10
+; CHECK-NEXT:    vand q8, q8, q9
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
@@ -38,8 +36,8 @@ define <4 x i32> @cmpgt_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-NEXT:    vmov.i32 q10, #0x1
 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
 ; CHECK-NEXT:    vmov d18, r0, r1
-; CHECK-NEXT:    vcgt.s32 q8, q9, q8
-; CHECK-NEXT:    vadd.i32 q8, q8, q10
+; CHECK-NEXT:    vcge.s32 q8, q8, q9
+; CHECK-NEXT:    vand q8, q8, q10
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
@@ -54,12 +52,11 @@ define <4 x i32> @cmpne_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK:       @ BB#0:
 ; CHECK-NEXT:    mov r12, sp
 ; CHECK-NEXT:    vmov d19, r2, r3
+; CHECK-NEXT:    vmov.i32 q10, #0x1
 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
 ; CHECK-NEXT:    vmov d18, r0, r1
 ; CHECK-NEXT:    vceq.i32 q8, q9, q8
-; CHECK-NEXT:    vmov.i32 q9, #0x1
-; CHECK-NEXT:    vmvn q8, q8
-; CHECK-NEXT:    vadd.i32 q8, q8, q9
+; CHECK-NEXT:    vand q8, q8, q10
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
diff --git a/test/CodeGen/AVR/calling-conv/c/stack.ll b/test/CodeGen/AVR/calling-conv/c/stack.ll
index 00ff7d1acd8..52b6427476a 100644
--- a/test/CodeGen/AVR/calling-conv/c/stack.ll
+++ b/test/CodeGen/AVR/calling-conv/c/stack.ll
@@ -11,15 +11,15 @@ define void @ret_void_args_i64_i64_i32(i64 %a, i64 %b, i32 %c) {
   ; CHECK-NEXT: in      r29, 62
 
   ; Load the top two bytes from the 32-bit int.
-  ; CHECK-NEXT: ldd     r24, Y+7
-  ; CHECK-NEXT: ldd     r25, Y+8
+  ; CHECK-NEXT: ldd     r24, Y+5
+  ; CHECK-NEXT: ldd     r25, Y+6
   ; Store the top two bytes of the 32-bit int to memory.
   ; CHECK-NEXT: sts     7, r25
   ; CHECK-NEXT: sts     6, r24
 
   ; Load the bottom two bytes from the 32-bit int.
-  ; CHECK-NEXT: ldd     r24, Y+5
-  ; CHECK-NEXT: ldd     r25, Y+6
+  ; CHECK-NEXT: ldd     r24, Y+3
+  ; CHECK-NEXT: ldd     r25, Y+4
   ; Store the bottom two bytes of the 32-bit int to memory.
   ; CHECK-NEXT: sts     5, r25
   ; CHECK-NEXT: sts     4, r24
diff --git a/test/CodeGen/AVR/return.ll b/test/CodeGen/AVR/return.ll
index d57f435fd11..1f80576af28 100644
--- a/test/CodeGen/AVR/return.ll
+++ b/test/CodeGen/AVR/return.ll
@@ -96,14 +96,14 @@ define i64 @return64_arg2(i64 %x, i64 %y, i64 %z) {
 ; CHECK-LABEL: return64_arg2:
 ; CHECK: push r28
 ; CHECK: push r29
-; CHECK: ldd r18, Y+5
-; CHECK: ldd r19, Y+6
-; CHECK: ldd r20, Y+7
-; CHECK: ldd r21, Y+8
-; CHECK: ldd r22, Y+9
-; CHECK: ldd r23, Y+10
-; CHECK: ldd r24, Y+11
-; CHECK: ldd r25, Y+12
+; CHECK: ldd r18, Y+3
+; CHECK: ldd r19, Y+4
+; CHECK: ldd r20, Y+5
+; CHECK: ldd r21, Y+6
+; CHECK: ldd r22, Y+7
+; CHECK: ldd r23, Y+8
+; CHECK: ldd r24, Y+9
+; CHECK: ldd r25, Y+10
 ; CHECK: pop r29
 ; CHECK: pop r28
     ret i64 %z
@@ -113,10 +113,10 @@ define i32 @return64_trunc(i32 %a, i32 %b, i32 %c, i64 %d) {
 ; CHECK-LABEL: return64_trunc:
 ; CHECK: push r28
 ; CHECK: push r29
-; CHECK: ldd r22, Y+5
-; CHECK: ldd r23, Y+6
-; CHECK: ldd r24, Y+7
-; CHECK: ldd r25, Y+8
+; CHECK: ldd r22, Y+3
+; CHECK: ldd r23, Y+4
+; CHECK: ldd r24, Y+5
+; CHECK: ldd r25, Y+6
 ; CHECK: pop r29
 ; CHECK: pop r28
   %result = trunc i64 %d to i32
diff --git a/test/CodeGen/AVR/rot.ll b/test/CodeGen/AVR/rot.ll
new file mode 100644
index 00000000000..e43daf3e6aa
--- /dev/null
+++ b/test/CodeGen/AVR/rot.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=avr | FileCheck %s
+
+; Bit rotation tests.
+
+; CHECK-LABEL: rol8:
+define i8 @rol8(i8 %val, i8 %amt) {
+  ; CHECK:      andi r22, 7
+
+  ; CHECK-NEXT: cp r22, r0
+  ; CHECK-NEXT: breq LBB0_2
+
+; CHECK-NEXT: LBB0_1:
+  ; CHECK-NEXT: rol r24
+  ; CHECK-NEXT: subi r22, 1
+  ; CHECK-NEXT: brne LBB0_1
+
+; CHECK-NEXT:LBB0_2:
+  ; CHECK-NEXT: ret
+  %mod = urem i8 %amt, 8
+
+  %inv = sub i8 8, %mod
+  %parta = shl i8 %val, %mod
+  %partb = lshr i8 %val, %inv
+
+  %rotl = or i8 %parta, %partb
+
+  ret i8 %rotl
+}
+
+
+; CHECK-LABEL: ror8:
+define i8 @ror8(i8 %val, i8 %amt) {
+  ; CHECK:      andi r22, 7
+
+  ; CHECK-NEXT: cp r22, r0
+  ; CHECK-NEXT: breq LBB1_2
+
+; CHECK-NEXT: LBB1_1:
+  ; CHECK-NEXT: ror r24
+  ; CHECK-NEXT: subi r22, 1
+  ; CHECK-NEXT: brne LBB1_1
+
+; CHECK-NEXT:LBB1_2:
+  ; CHECK-NEXT: ret
+  %mod = urem i8 %amt, 8
+
+  %inv = sub i8 8, %mod
+  %parta = lshr i8 %val, %mod
+  %partb = shl i8 %val, %inv
+
+  %rotr = or i8 %parta, %partb
+
+  ret i8 %rotr
+}
+
diff --git a/test/CodeGen/AVR/varargs.ll b/test/CodeGen/AVR/varargs.ll
index 4959f2d880c..6f727cda582 100644
--- a/test/CodeGen/AVR/varargs.ll
+++ b/test/CodeGen/AVR/varargs.ll
@@ -7,12 +7,12 @@ declare void @llvm.va_end(i8*)
 define i16 @varargs1(i8* nocapture %x, ...) {
 ; CHECK-LABEL: varargs1:
 ; CHECK: movw r20, r28
-; CHECK: subi r20, 215
+; CHECK: subi r20, 217
 ; CHECK: sbci r21, 255
 ; CHECK: movw r24, r28
 ; CHECK: adiw r24, 3
-; CHECK: ldd r22, Y+39
-; CHECK: ldd r23, Y+40
+; CHECK: ldd r22, Y+37
+; CHECK: ldd r23, Y+38
 ; CHECK: call
   %buffer = alloca [32 x i8]
   %ap = alloca i8*
diff --git a/test/CodeGen/BPF/mem_offset_be.ll b/test/CodeGen/BPF/mem_offset_be.ll
new file mode 100644
index 00000000000..e5e352783d7
--- /dev/null
+++ b/test/CodeGen/BPF/mem_offset_be.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=bpfeb -show-mc-encoding < %s | FileCheck %s
+
+; Function Attrs: nounwind
+define i32 @bpf_prog1(i8* nocapture readnone) local_unnamed_addr #0 {
+; CHECK: r1 = 590618314553ll   # encoding: [0x18,0x10,0x00,0x00,0x83,0x98,0x47,0x39,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x89]
+; CHECK: r1 += -1879113726     # encoding: [0x07,0x10,0x00,0x00,0x8f,0xff,0x00,0x02]
+; CHECK: r0 = *(u64 *)(r1 + 0) # encoding: [0x79,0x01,0x00,0x00,0x00,0x00,0x00,0x00]
+  %2 = alloca i64, align 8
+  %3 = bitcast i64* %2 to i8*
+  store volatile i64 590618314553, i64* %2, align 8
+  %4 = load volatile i64, i64* %2, align 8
+  %5 = add i64 %4, -1879113726
+  %6 = inttoptr i64 %5 to i64*
+  %7 = load i64, i64* %6, align 8
+  %8 = trunc i64 %7 to i32
+  ret i32 %8
+}
+
diff --git a/test/CodeGen/Hexagon/cfgopt-fall-through.ll b/test/CodeGen/Hexagon/cfgopt-fall-through.ll
new file mode 100644
index 00000000000..be234aafc0b
--- /dev/null
+++ b/test/CodeGen/Hexagon/cfgopt-fall-through.ll
@@ -0,0 +1,71 @@
+; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s
+; REQUIRES: asserts
+
+; Check for some sane output. This test used to crash.
+; CHECK: jumpr r31
+
+
+define i32 @fred(i32 %a0, i8 zeroext %a1) local_unnamed_addr #0 {
+b2:
+  br i1 undef, label %b4, label %b3
+
+b3:                                               ; preds = %b2
+  unreachable
+
+b4:                                               ; preds = %b2
+  br i1 undef, label %b19, label %b5
+
+b5:                                               ; preds = %b4
+  br i1 undef, label %b6, label %b12
+
+b6:                                               ; preds = %b5
+  switch i8 %a1, label %b17 [
+    i8 2, label %b7
+    i8 5, label %b7
+    i8 1, label %b7
+    i8 3, label %b8
+  ]
+
+b7:                                               ; preds = %b6, %b6, %b6
+  unreachable
+
+b8:                                               ; preds = %b6
+  br i1 undef, label %b11, label %b9
+
+b9:                                               ; preds = %b8
+  %v10 = or i32 undef, 0
+  br label %b15
+
+b11:                                              ; preds = %b8
+  unreachable
+
+b12:                                              ; preds = %b5
+  switch i8 %a1, label %b17 [
+    i8 5, label %b13
+    i8 1, label %b13
+    i8 2, label %b14
+    i8 3, label %b15
+  ]
+
+b13:                                              ; preds = %b12, %b12
+  store i32 %a0, i32* undef, align 4
+  br label %b17
+
+b14:                                              ; preds = %b12
+  store i16 undef, i16* undef, align 4
+  br label %b17
+
+b15:                                              ; preds = %b12, %b9
+  %v16 = phi i32 [ 0, %b12 ], [ %v10, %b9 ]
+  store i32 undef, i32* undef, align 4
+  br label %b17
+
+b17:                                              ; preds = %b15, %b14, %b13, %b12, %b6
+  %v18 = phi i32 [ 0, %b13 ], [ 0, %b12 ], [ %v16, %b15 ], [ 0, %b14 ], [ 0, %b6 ]
+  ret i32 %v18
+
+b19:                                              ; preds = %b4
+  unreachable
+}
+
+attributes #0 = { nounwind "target-cpu"="hexagonv55" "target-features"="-hvx,-hvx-double,-long-calls" }
diff --git a/test/CodeGen/Hexagon/rdf-def-mask.ll b/test/CodeGen/Hexagon/rdf-def-mask.ll
new file mode 100644
index 00000000000..3d65968911e
--- /dev/null
+++ b/test/CodeGen/Hexagon/rdf-def-mask.ll
@@ -0,0 +1,52 @@
+; RUN: llc -march=hexagon -O3 -verify-machineinstrs < %s | FileCheck %s
+; REQUIRES: asserts
+
+; Check for sane output. This testcase used to crash.
+; CHECK: jumpr r31
+
+target triple = "hexagon"
+
+@g0 = external hidden unnamed_addr constant [9 x i16], align 8
+
+; Function Attrs: nounwind readnone
+define i64 @fred(i32 %a0) local_unnamed_addr #0 {
+b1:
+  %v2 = icmp slt i32 %a0, 1
+  br i1 %v2, label %b26, label %b3
+
+b3:                                               ; preds = %b1
+  %v4 = tail call i32 @llvm.hexagon.S2.clb(i32 %a0)
+  %v5 = add nsw i32 %v4, -12
+  %v6 = add nsw i32 %v4, -28
+  %v7 = tail call i32 @llvm.hexagon.S2.asl.r.r(i32 %a0, i32 %v6)
+  %v8 = add nsw i32 %v7, -8
+  %v9 = tail call i32 @llvm.hexagon.S2.asl.r.r(i32 %a0, i32 %v5)
+  %v10 = getelementptr inbounds [9 x i16], [9 x i16]* @g0, i32 0, i32 %v8
+  %v11 = load i16, i16* %v10, align 2
+  %v12 = sext i16 %v11 to i32
+  %v13 = shl nsw i32 %v12, 16
+  %v14 = add nsw i32 %v7, -7
+  %v15 = getelementptr inbounds [9 x i16], [9 x i16]* @g0, i32 0, i32 %v14
+  %v16 = load i16, i16* %v15, align 2
+  %v17 = sub i16 %v11, %v16
+  %v18 = and i32 %v9, 65535
+  %v19 = zext i16 %v17 to i32
+  %v20 = tail call i32 @llvm.hexagon.M2.mpyu.nac.ll.s0(i32 %v13, i32 %v18, i32 %v19) #1
+  %v21 = add nsw i32 %v4, -32
+  %v22 = zext i32 %v21 to i64
+  %v23 = shl nuw i64 %v22, 32
+  %v24 = zext i32 %v20 to i64
+  %v25 = or i64 %v23, %v24
+  br label %b26
+
+b26:                                              ; preds = %b3, %b1
+  %v27 = phi i64 [ %v25, %b3 ], [ 2147483648, %b1 ]
+  ret i64 %v27
+}
+
+declare i32 @llvm.hexagon.S2.clb(i32) #1
+declare i32 @llvm.hexagon.S2.asl.r.r(i32, i32) #1
+declare i32 @llvm.hexagon.M2.mpyu.nac.ll.s0(i32, i32, i32) #1
+
+attributes #0 = { nounwind readnone "target-cpu"="hexagonv55" "target-features"="-hvx,-hvx-double,-long-calls" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/Hexagon/unreachable-mbb-phi-subreg.mir b/test/CodeGen/Hexagon/unreachable-mbb-phi-subreg.mir
new file mode 100644
index 00000000000..6d6549201ab
--- /dev/null
+++ b/test/CodeGen/Hexagon/unreachable-mbb-phi-subreg.mir
@@ -0,0 +1,25 @@
+# RUN: llc -march=hexagon -run-pass unreachable-mbb-elimination %s -o - | FileCheck %s
+
+---
+name: fred
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: %d0
+    successors: %bb.2
+
+    %0 : doubleregs = COPY %d0
+    J2_jump %bb.2, implicit-def %pc
+
+  bb.1:
+    successors: %bb.2
+    A2_nop
+
+  bb.2:
+    ; Make sure that the subregister from the PHI operand is preserved.
+    ; CHECK: %[[REG:[0-9]+]] = COPY %0.isub_lo
+    ; CHECK: %r0 = COPY %[[REG]]
+    %1 : intregs = PHI %0.isub_lo, %bb.0, %0.isub_hi, %bb.1
+    %r0 = COPY %1
+...
+
diff --git a/test/CodeGen/MIR/Generic/frame-info.mir b/test/CodeGen/MIR/Generic/frame-info.mir
index 7c6e6ebbfee..157eb99e149 100644
--- a/test/CodeGen/MIR/Generic/frame-info.mir
+++ b/test/CodeGen/MIR/Generic/frame-info.mir
@@ -36,7 +36,6 @@ tracksRegLiveness: true
 # CHECK-NEXT: maxAlignment:
 # CHECK-NEXT: adjustsStack: false
 # CHECK-NEXT: hasCalls: false
-# CHECK-NEXT: maxCallFrameSize: 0
 # CHECK-NEXT: hasOpaqueSPAdjustment: false
 # CHECK-NEXT: hasVAStart: false
 # CHECK-NEXT: hasMustTailInVarArgFunc: false
diff --git a/test/CodeGen/Mips/llvm-ir/add.ll b/test/CodeGen/Mips/llvm-ir/add.ll
index eece0309104..a5ecdda94ce 100644
--- a/test/CodeGen/Mips/llvm-ir/add.ll
+++ b/test/CodeGen/Mips/llvm-ir/add.ll
@@ -24,7 +24,7 @@
 ; RUN:    -check-prefixes=ALL,R2-R6,GP64
 ; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
 ; RUN:    -check-prefixes=ALL,R2-R6,GP64
-; RUN: llc < %s -march=mips -mcpu=mips32r3 -mattr=+micromips -O2 | FileCheck %s \
+; RUN: llc < %s -march=mips -mcpu=mips32r3 -mattr=+micromips -O2 -verify-machineinstrs | FileCheck %s \
 ; RUN:    -check-prefixes=ALL,MMR6,MM32
 ; RUN: llc < %s -march=mips -mcpu=mips32r6 -mattr=+micromips -O2 | FileCheck %s \
 ; RUN:    -check-prefixes=ALL,MMR6,MM32
@@ -117,7 +117,7 @@ entry:
 
   ; GP64:       daddu   $2, $4, $5
 
-  ; MM32:       addu    $3, $5, $7
+  ; MM32:       addu16  $3, $5, $7
   ; MM32:       sltu    $[[T0:[0-9]+]], $3, $7
   ; MM32:       addu    $[[T1:[0-9]+]], $[[T0]], $6
   ; MM32:       addu    $2, $4, $[[T1]]
@@ -158,16 +158,16 @@ entry:
   ; MM32:       addu      $[[T1:[0-9]+]], $7, $[[T0]]
   ; MM32:       sltu      $[[T2:[0-9]+]], $[[T1]], $[[T0]]
   ; MM32:       lw        $[[T3:[0-9]+]], 24($sp)
-  ; MM32:       addu      $[[T4:[0-9]+]], $[[T2]], $[[T3]]
-  ; MM32:       addu      $[[T5:[0-9]+]], $6, $[[T4]]
+  ; MM32:       addu16    $[[T4:[0-9]+]], $[[T2]], $[[T3]]
+  ; MM32:       addu16    $[[T5:[0-9]+]], $6, $[[T4]]
   ; MM32:       sltu      $[[T6:[0-9]+]], $[[T5]], $[[T3]]
   ; MM32:       lw        $[[T7:[0-9]+]], 20($sp)
-  ; MM32:       addu      $[[T8:[0-9]+]], $[[T6]], $[[T7]]
+  ; MM32:       addu16    $[[T8:[0-9]+]], $[[T6]], $[[T7]]
   ; MM32:       lw        $[[T9:[0-9]+]], 16($sp)
-  ; MM32:       addu      $[[T10:[0-9]+]], $5, $[[T8]]
+  ; MM32:       addu16    $[[T10:[0-9]+]], $5, $[[T8]]
   ; MM32:       sltu      $[[T11:[0-9]+]], $[[T10]], $[[T7]]
   ; MM32:       addu      $[[T12:[0-9]+]], $[[T11]], $[[T9]]
-  ; MM32:       addu      $[[T13:[0-9]+]], $4, $[[T12]]
+  ; MM32:       addu16    $[[T13:[0-9]+]], $4, $[[T12]]
   ; MM32:       move      $4, $[[T5]]
   ; MM32:       move      $5, $[[T1]]
 
@@ -289,12 +289,12 @@ define signext i128 @add_i128_4(i128 signext %a) {
   ; MM32:       addiu   $[[T0:[0-9]+]], $7, 4
   ; MM32:       li16    $[[T1:[0-9]+]], 4
   ; MM32:       sltu    $[[T1]], $[[T0]], $[[T1]]
-  ; MM32:       addu    $[[T2:[0-9]+]], $6, $[[T1]]
+  ; MM32:       addu16  $[[T2:[0-9]+]], $6, $[[T1]]
   ; MM32:       li16    $[[T1]], 0
   ; MM32:       sltu    $[[T3:[0-9]+]], $[[T2]], $[[T1]]
-  ; MM32:       addu    $[[T3]], $5, $[[T3]]
+  ; MM32:       addu16  $[[T3]], $5, $[[T3]]
   ; MM32:       sltu    $[[T1]], $[[T3]], $[[T1]]
-  ; MM32:       addu    $[[T1]], $4, $[[T1]]
+  ; MM32:       addu16  $[[T1]], $4, $[[T1]]
   ; MM32:       move    $4, $[[T2]]
   ; MM32:       move    $5, $[[T0]]
 
@@ -419,12 +419,12 @@ define signext i128 @add_i128_3(i128 signext %a) {
   ; MM32:       addiu   $[[T0:[0-9]+]], $7, 3
   ; MM32:       li16    $[[T1:[0-9]+]], 3
   ; MM32:       sltu    $[[T1]], $[[T0]], $[[T1]]
-  ; MM32:       addu    $[[T2:[0-9]+]], $6, $[[T1]]
+  ; MM32:       addu16  $[[T2:[0-9]+]], $6, $[[T1]]
   ; MM32:       li16    $[[T3:[0-9]+]], 0
   ; MM32:       sltu    $[[T4:[0-9]+]], $[[T2]], $[[T3]]
-  ; MM32:       addu    $[[T4]], $5, $[[T4]]
+  ; MM32:       addu16  $[[T4]], $5, $[[T4]]
   ; MM32:       sltu    $[[T5:[0-9]+]], $[[T4]], $[[T3]]
-  ; MM32:       addu    $[[T5]], $4, $[[T5]]
+  ; MM32:       addu16  $[[T5]], $4, $[[T5]]
   ; MM32:       move    $4, $[[T2]]
   ; MM32:       move    $5, $[[T0]]
 
diff --git a/test/CodeGen/Mips/llvm-ir/sub.ll b/test/CodeGen/Mips/llvm-ir/sub.ll
index 617ab3c1a21..a730063c552 100644
--- a/test/CodeGen/Mips/llvm-ir/sub.ll
+++ b/test/CodeGen/Mips/llvm-ir/sub.ll
@@ -10,7 +10,7 @@
 ; RUN:    -check-prefixes=R2-R6,GP32,GP32-NOT-MM,NOT-MM
 ; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
 ; RUN:    -check-prefixes=R2-R6,GP32,GP32-NOT-MM,NOT-MM
-; RUN: llc < %s -march=mips -mcpu=mips32r3 -mattr=+micromips | FileCheck %s \
+; RUN: llc < %s -march=mips -mcpu=mips32r3 -mattr=+micromips -verify-machineinstrs | FileCheck %s \
 ; RUN:    -check-prefixes=GP32-MM,GP32,MM
 ; RUN: llc < %s -march=mips -mcpu=mips32r6 -mattr=+micromips | FileCheck %s \
 ; RUN:    -check-prefixes=GP32-MM,GP32,MM
@@ -100,7 +100,7 @@ define signext i64 @sub_i64(i64 signext %a, i64 signext %b) {
 entry:
 ; ALL-LABEL: sub_i64:
 
-  ; GP32:           subu    $3, $5, $7
+  ; GP32-NOT-MM     subu    $3, $5, $7
   ; GP32:           sltu    $[[T0:[0-9]+]], $5, $7
   ; GP32:           addu    $[[T1:[0-9]+]], $[[T0]], $6
   ; GP32:           subu    $2, $4, $[[T1]]
@@ -138,13 +138,13 @@ entry:
   ; GP32-MM:        lw        $[[T4:[0-9]+]], 24($sp)
   ; GP32-MM:        lw        $[[T5:[0-9]+]], 28($sp)
   ; GP32-MM:        subu      $[[T1]], $7, $[[T5]]
-  ; GP32-MM:        subu      $[[T3]], $[[T6:[0-9]+]], $[[T3]]
+  ; GP32-MM:        subu16    $[[T3]], $[[T6:[0-9]+]], $[[T3]]
   ; GP32-MM:        sltu      $[[T6]], $6, $[[T4]]
-  ; GP32-MM:        addu      $[[T0]], $[[T6]], $[[T0]]
-  ; GP32-MM:        subu      $[[T0]], $5, $[[T0]]
+  ; GP32-MM:        addu16    $[[T0]], $[[T6]], $[[T0]]
+  ; GP32-MM:        subu16    $[[T0]], $5, $[[T0]]
   ; GP32-MM:        sltu      $[[T6]], $7, $[[T5]]
   ; GP32-MM:        addu      $[[T6]], $[[T6]], $[[T4]]
-  ; GP32-MM:        subu      $[[T6]], $6, $[[T6]]
+  ; GP32-MM:        subu16    $[[T6]], $6, $[[T6]]
   ; GP32-MM:        move      $[[T2]], $[[T1]]
 
   ; GP64:           dsubu     $3, $5, $7
diff --git a/test/CodeGen/Mips/micromips-sizereduction/micromips-lwsp-swsp.ll b/test/CodeGen/Mips/micromips-sizereduction/micromips-lwsp-swsp.ll
new file mode 100644
index 00000000000..b92554854c0
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-sizereduction/micromips-lwsp-swsp.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+micromips -asm-show-inst -verify-machineinstrs < %s | FileCheck %s
+
+; Function Attrs: nounwind
+define i32 @function1(i32 (i32)* %f) {
+entry:
+; CHECK-LABEL: function1:
+; CHECK: SWSP_MM
+; CHECK: LWSP_MM
+  %call = call i32 %f(i32 0)
+  ret i32 0
+}
diff --git a/test/CodeGen/NVPTX/f16-instructions.ll b/test/CodeGen/NVPTX/f16-instructions.ll
index 403a67f02f8..3d414082079 100644
--- a/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/test/CodeGen/NVPTX/f16-instructions.ll
@@ -36,6 +36,21 @@ define half @test_fadd(half %a, half %b) #0 {
   ret half %r
 }
 
+; CHECK-LABEL: test_fadd_v1f16(
+; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fadd_v1f16_param_0];
+; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fadd_v1f16_param_1];
+; CHECK-F16-NEXT:   add.rn.f16     [[R:%h[0-9]+]], [[A]], [[B]];
+; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
+; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
+; CHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]];
+; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
+; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <1 x half> @test_fadd_v1f16(<1 x half> %a, <1 x half> %b) #0 {
+  %r = fadd <1 x half> %a, %b
+  ret <1 x half> %r
+}
+
 ; Check that we can lower fadd with immediate arguments.
 ; CHECK-LABEL: test_fadd_imm_0(
 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fadd_imm_0_param_0];
diff --git a/test/CodeGen/PowerPC/build-vector-tests.ll b/test/CodeGen/PowerPC/build-vector-tests.ll
index fa4d212932f..1bce9d4cb43 100644
--- a/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -875,8 +875,8 @@ entry:
 ; P9LE: blr
 ; P8BE: lxvw4x
 ; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: xxswapd
+; P8LE: lvx
+; P8LE-NOT: xxswapd
 ; P8LE: blr
 }
 
@@ -942,8 +942,7 @@ entry:
 ; P8BE: vperm
 ; P8BE: blr
 ; P8LE: lxvd2x
-; P8LE-DAG: lxvd2x
-; P8LE-DAG: xxswapd
+; P8LE-DAG: lvx
 ; P8LE: xxswapd
 ; P8LE: vperm
 ; P8LE: blr
@@ -1036,7 +1035,6 @@ entry:
 ; P8LE: sldi {{r[0-9]+}}, r4, 2
 ; P8LE-DAG: lxvd2x
 ; P8LE-DAG: lxvd2x
-; P8LE-DAG: xxswapd
 ; P8LE: xxswapd
 ; P8LE: vperm
 ; P8LE: blr
@@ -1289,8 +1287,8 @@ entry:
 ; P9LE: blr
 ; P8BE: lxvw4x
 ; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: xxswapd
+; P8LE: lvx
+; P8LE-NOT: xxswapd
 ; P8LE: blr
 }
 
@@ -1315,7 +1313,7 @@ entry:
 ; P8BE: xvcvspsxws v2, [[REG1]]
 ; P8BE: blr
 ; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
-; P8LE: xxswapd v2, [[REG1]]
+; P8LE: xxswapd
 ; P8LE: xvcvspsxws v2, v2
 ; P8LE: blr
 }
@@ -1359,8 +1357,7 @@ entry:
 ; P8BE: xvcvspsxws
 ; P8BE: blr
 ; P8LE: lxvd2x
-; P8LE-DAG: lxvd2x
-; P8LE-DAG: xxswapd
+; P8LE-DAG: lvx
 ; P8LE: xxswapd
 ; P8LE: vperm
 ; P8LE: xvcvspsxws
@@ -1566,8 +1563,8 @@ entry:
 ; P9LE: blr
 ; P8BE: lxvw4x
 ; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: xxswapd
+; P8LE: lvx
+; P8LE-NOT: xxswapd
 ; P8LE: blr
 }
 
@@ -2036,8 +2033,8 @@ entry:
 ; P9LE: blr
 ; P8BE: lxvw4x
 ; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: xxswapd
+; P8LE: lvx
+; P8LE-NOT: xxswapd
 ; P8LE: blr
 }
 
@@ -2103,8 +2100,8 @@ entry:
 ; P8BE: vperm
 ; P8BE: blr
 ; P8LE: lxvd2x
-; P8LE-DAG: lxvd2x
-; P8LE-DAG: xxswapd
+; P8LE-DAG: lvx
+; P8LE-NOT: xxswapd
 ; P8LE: xxswapd
 ; P8LE: vperm
 ; P8LE: blr
@@ -2195,10 +2192,8 @@ entry:
 ; P8BE: vperm
 ; P8BE: blr
 ; P8LE-DAG: sldi {{r[0-9]+}}, r4, 2
-; P8LE-DAG: lxvd2x
-; P8LE-DAG: lxvd2x
-; P8LE-DAG: xxswapd
-; P8LE: xxswapd
+; P8LE-DAG: lvx
+; P8LE-DAG: lvx
 ; P8LE: vperm
 ; P8LE: blr
 }
@@ -2450,8 +2445,8 @@ entry:
 ; P9LE: blr
 ; P8BE: lxvw4x
 ; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: xxswapd
+; P8LE: lvx
+; P8LE-NOT: xxswapd
 ; P8LE: blr
 }
 
@@ -2519,9 +2514,8 @@ entry:
 ; P8BE: vperm
 ; P8BE: xvcvspuxws
 ; P8BE: blr
-; P8LE: lxvd2x
 ; P8LE-DAG: lxvd2x
-; P8LE-DAG: xxswapd
+; P8LE-DAG: lvx
 ; P8LE: xxswapd
 ; P8LE: vperm
 ; P8LE: xvcvspuxws
@@ -2727,8 +2721,8 @@ entry:
 ; P9LE: blr
 ; P8BE: lxvw4x
 ; P8BE: blr
-; P8LE: lxvd2x
-; P8LE: xxswapd
+; P8LE: lvx
+; P8LE-NOT: xxswapd
 ; P8LE: blr
 }
 
diff --git a/test/CodeGen/PowerPC/ppc64-anyregcc.ll b/test/CodeGen/PowerPC/ppc64-anyregcc.ll
index 4af118b567b..06ec561a45d 100644
--- a/test/CodeGen/PowerPC/ppc64-anyregcc.ll
+++ b/test/CodeGen/PowerPC/ppc64-anyregcc.ll
@@ -31,7 +31,7 @@ target triple = "powerpc64-unknown-linux-gnu"
 ; CHECK-LABEL: .section	.llvm_stackmaps
 ; CHECK-NEXT:  __LLVM_StackMaps:
 ; Header
-; CHECK-NEXT:   .byte 2
+; CHECK-NEXT:   .byte 3
 ; CHECK-NEXT:   .byte 0
 ; CHECK-NEXT:   .short 0
 ; Num Functions
@@ -75,18 +75,24 @@ target triple = "powerpc64-unknown-linux-gnu"
 ; CHECK-NEXT:   .short  3
 ; Loc 0: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 4
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 4
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 1: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 4
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 4
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 2: Constant 3
 ; CHECK-NEXT:   .byte 4
-; CHECK-NEXT:   .byte 8
-; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
+; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 3
 define i64 @test() nounwind ssp uwtable {
 entry:
@@ -96,18 +102,22 @@ entry:
 
 ; property access 1 - %obj is an anyreg call argument and should therefore be in a register
 ; CHECK:  .long   .L{{.*}}-.L[[property_access1_BEGIN]]
-; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short 0
 ; 2 locations
 ; CHECK-NEXT:   .short  2
 ; Loc 0: Register <-- this is the return register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 1: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 define i64 @property_access1(i8* %obj) nounwind ssp uwtable {
 entry:
@@ -118,18 +128,22 @@ entry:
 
 ; property access 2 - %obj is an anyreg call argument and should therefore be in a register
 ; CHECK:  .long   .L{{.*}}-.L[[property_access2_BEGIN]]
-; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short 0
 ; 2 locations
 ; CHECK-NEXT:   .short  2
 ; Loc 0: Register <-- this is the return register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 1: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 define i64 @property_access2() nounwind ssp uwtable {
 entry:
@@ -141,18 +155,22 @@ entry:
 
 ; property access 3 - %obj is a frame index
 ; CHECK:  .long   .L{{.*}}-.L[[property_access3_BEGIN]]
-; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short 0
 ; 2 locations
 ; CHECK-NEXT:   .short  2
 ; Loc 0: Register <-- this is the return register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 1: Direct FP - 8
 ; CHECK-NEXT:   .byte 2
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short 31
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 112
 define i64 @property_access3() nounwind ssp uwtable {
 entry:
@@ -164,78 +182,106 @@ entry:
 
 ; anyreg_test1
 ; CHECK:  .long   .L{{.*}}-.L[[anyreg_test1_BEGIN]]
-; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short 0
 ; 14 locations
 ; CHECK-NEXT:   .short  14
 ; Loc 0: Register <-- this is the return register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 1: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 2: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 3: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 4: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 5: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 6: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 7: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 8: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 9: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 10: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 11: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 12: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 13: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
 entry:
@@ -246,78 +292,106 @@ entry:
 
 ; anyreg_test2
 ; CHECK:  .long   .L{{.*}}-.L[[anyreg_test2_BEGIN]]
-; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short 0
 ; 14 locations
 ; CHECK-NEXT:   .short  14
 ; Loc 0: Register <-- this is the return register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 1: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 2: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 3: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 4: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 5: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 6: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 7: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 8: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 9: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 10: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 11: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 12: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 13: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
 entry:
@@ -335,18 +409,24 @@ entry:
 ; CHECK-NEXT: .short 3
 ; Loc 0: Register (some register that will be spilled to the stack)
 ; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .short 8
 ; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long  0
 ; Loc 1: Register
 ; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .short 8
 ; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long  0
 ; Loc 1: Register
 ; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .short 8
 ; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long  0
 define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
 entry:
@@ -365,28 +445,38 @@ entry:
 ; CHECK-NEXT: .short 5
 ; Loc 0: Return a register
 ; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .short 8
 ; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long  0
 ; Loc 1: Arg0 in a Register
 ; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .short 8
 ; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long  0
 ; Loc 2: Arg1 in a Register
 ; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .short 8
 ; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long  0
 ; Loc 3: Arg2 spilled to FP -96
 ; CHECK-NEXT: .byte  3
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .short 8
 ; CHECK-NEXT: .short 31
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long 128
 ; Loc 4: Arg3 spilled to FP - 88
 ; CHECK-NEXT: .byte  3
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .short 8
 ; CHECK-NEXT: .short 31
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long 136
 define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
 entry:
diff --git a/test/CodeGen/PowerPC/ppc64-i128-abi.ll b/test/CodeGen/PowerPC/ppc64-i128-abi.ll
index 924e04a34a9..4a8fd90db3e 100644
--- a/test/CodeGen/PowerPC/ppc64-i128-abi.ll
+++ b/test/CodeGen/PowerPC/ppc64-i128-abi.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
-; RUN:   -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE \
+; RUN:   --implicit-check-not xxswapd
 
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE
@@ -8,13 +9,15 @@
 ; RUN:   -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
 
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
-; RUN:   -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
+; RUN:   -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX \
+; RUN:   --implicit-check-not xxswapd
 
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX
 
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
-; RUN:   -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-LE-NOVSX
+; RUN:   -mcpu=pwr8 -mattr=-vsx < %s | \
+; RUN:   FileCheck %s -check-prefix=CHECK-LE-NOVSX --implicit-check-not xxswapd
 
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr9 -ppc-vsr-nums-as-vr < %s | FileCheck %s \
@@ -26,7 +29,7 @@
 
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr9 -mattr=-power9-vector -mattr=-direct-move < %s | \
-; RUN:   FileCheck %s -check-prefix=CHECK-LE
+; RUN:   FileCheck %s -check-prefix=CHECK-LE --implicit-check-not xxswapd
 
 @x = common global <1 x i128> zeroinitializer, align 16
 @y = common global <1 x i128> zeroinitializer, align 16
@@ -199,8 +202,7 @@ define <1 x i128> @call_v1i128_increment_by_one() nounwind {
        ret <1 x i128> %ret
 
 ; CHECK-LE-LABEL: @call_v1i128_increment_by_one
-; CHECK-LE: lxvd2x [[PARAM:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
-; CHECK-LE: xxswapd 34, [[PARAM]]
+; CHECK-LE: lvx 2, {{[0-9]+}}, {{[0-9]+}}
 ; CHECK-LE: bl v1i128_increment_by_one
 ; CHECK-LE: blr
 
@@ -229,10 +231,8 @@ define <1 x i128> @call_v1i128_increment_by_val() nounwind {
        ret <1 x i128> %ret
 
 ; CHECK-LE-LABEL: @call_v1i128_increment_by_val
-; CHECK-LE: lxvd2x [[PARAM1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
-; CHECK-LE: lxvd2x [[PARAM2:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
-; CHECK-LE-DAG: xxswapd 34, [[PARAM1]]
-; CHECK-LE-DAG: xxswapd 35, [[PARAM2]]
+; CHECK-LE: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK-LE: lvx 3, {{[0-9]+}}, {{[0-9]+}}
 ; CHECK-LE: bl v1i128_increment_by_val
 ; CHECK-LE: blr
 
diff --git a/test/CodeGen/PowerPC/ppc64-stackmap.ll b/test/CodeGen/PowerPC/ppc64-stackmap.ll
index 854cee22c34..5abc2a2a217 100644
--- a/test/CodeGen/PowerPC/ppc64-stackmap.ll
+++ b/test/CodeGen/PowerPC/ppc64-stackmap.ll
@@ -44,7 +44,7 @@ target triple = "powerpc64-unknown-linux-gnu"
 ; CHECK-LABEL:  .section  .llvm_stackmaps
 ; CHECK-NEXT:  __LLVM_StackMaps:
 ; Header
-; CHECK-NEXT:   .byte 2
+; CHECK-NEXT:   .byte 3
 ; CHECK-NEXT:   .byte 0
 ; CHECK-NEXT:   .short 0
 ; Num Functions
@@ -101,22 +101,30 @@ target triple = "powerpc64-unknown-linux-gnu"
 ; CHECK-NEXT:   .short  4
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   65535
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   65536
 ; SmallConstant
 ; CHECK-NEXT:   .byte   5
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; LargeConstant at index 0
 ; CHECK-NEXT:   .byte   5
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   1
 
@@ -133,12 +141,16 @@ entry:
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  2
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long  0
 define void @osrinline(i64 %a, i64 %b) {
 entry:
@@ -157,12 +169,16 @@ entry:
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  2
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long  0
 define void @osrcold(i64 %a, i64 %b) {
 entry:
@@ -197,12 +213,16 @@ entry:
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  2
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) {
 entry:
@@ -219,12 +239,16 @@ entry:
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  2
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
 entry:
@@ -241,12 +265,16 @@ entry:
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  2
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
 entry:
@@ -267,8 +295,11 @@ entry:
 ; Check that at least one is a spilled entry from r31.
 ; Location: Indirect FP + ...
 ; CHECK:        .byte 3
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 
 ; CHECK-NEXT:   .short 31
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long
 define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27) {
 entry:
   call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 40, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27)
@@ -286,8 +317,11 @@ entry:
 ; Check that at least one is a spilled entry from r31.
 ; Location: Indirect FP + ...
 ; CHECK:        .byte 3
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short 
 ; CHECK-NEXT:   .short 31
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long
 define webkit_jscc void @spilledStackMapValue(i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29) {
 entry:
   call void (i64, i32, ...) @llvm.experimental.stackmap(i64 12, i32 16, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29)
@@ -303,7 +337,9 @@ entry:
 ; CHECK-NEXT:   .short 1
 ; Loc 0: SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   33
 
@@ -320,8 +356,10 @@ define void @liveConstant() {
 ; CHECK-NEXT:   .short 1
 ; Loc 0: Indirect FP (r31) - offset
 ; CHECK-NEXT:   .byte   3
-; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  4
 ; CHECK-NEXT:   .short  31
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   {{[0-9]+}}
 define void @clobberLR(i32 %a) {
   tail call void asm sideeffect "nop", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind
diff --git a/test/CodeGen/PowerPC/swaps-le-1.ll b/test/CodeGen/PowerPC/swaps-le-1.ll
index f3db4f5c482..76264055270 100644
--- a/test/CodeGen/PowerPC/swaps-le-1.ll
+++ b/test/CodeGen/PowerPC/swaps-le-1.ll
@@ -13,6 +13,12 @@
 ; RUN:  -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s \
 ; RUN:  | FileCheck -check-prefix=NOOPTSWAP %s
 
+; LH: 2016-11-17
+;   Updated align attritue from 16 to 8 to keep swap instructions tests.
+;   Changes have been made on little-endian to use lvx and stvx
+;   instructions instead of lxvd2x/xxswapd and xxswapd/stxvd2x for
+;   aligned vectors with elements up to 4 bytes
+
 ; This test was generated from the following source:
 ;
 ; #define N 4096
@@ -29,10 +35,10 @@
 ;   }
 ; }
 
-@cb = common global [4096 x i32] zeroinitializer, align 16
-@cc = common global [4096 x i32] zeroinitializer, align 16
-@cd = common global [4096 x i32] zeroinitializer, align 16
-@ca = common global [4096 x i32] zeroinitializer, align 16
+@cb = common global [4096 x i32] zeroinitializer, align 8
+@cc = common global [4096 x i32] zeroinitializer, align 8
+@cd = common global [4096 x i32] zeroinitializer, align 8
+@ca = common global [4096 x i32] zeroinitializer, align 8
 
 define void @foo() {
 entry:
@@ -42,63 +48,63 @@ vector.body:
   %index = phi i64 [ 0, %entry ], [ %index.next.3, %vector.body ]
   %0 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index
   %1 = bitcast i32* %0 to <4 x i32>*
-  %wide.load = load <4 x i32>, <4 x i32>* %1, align 16
+  %wide.load = load <4 x i32>, <4 x i32>* %1, align 8
   %2 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index
   %3 = bitcast i32* %2 to <4 x i32>*
-  %wide.load13 = load <4 x i32>, <4 x i32>* %3, align 16
+  %wide.load13 = load <4 x i32>, <4 x i32>* %3, align 8
   %4 = add nsw <4 x i32> %wide.load13, %wide.load
   %5 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index
   %6 = bitcast i32* %5 to <4 x i32>*
-  %wide.load14 = load <4 x i32>, <4 x i32>* %6, align 16
+  %wide.load14 = load <4 x i32>, <4 x i32>* %6, align 8
   %7 = mul nsw <4 x i32> %4, %wide.load14
   %8 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index
   %9 = bitcast i32* %8 to <4 x i32>*
-  store <4 x i32> %7, <4 x i32>* %9, align 16
+  store <4 x i32> %7, <4 x i32>* %9, align 8
   %index.next = add nuw nsw i64 %index, 4
   %10 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index.next
   %11 = bitcast i32* %10 to <4 x i32>*
-  %wide.load.1 = load <4 x i32>, <4 x i32>* %11, align 16
+  %wide.load.1 = load <4 x i32>, <4 x i32>* %11, align 8
   %12 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index.next
   %13 = bitcast i32* %12 to <4 x i32>*
-  %wide.load13.1 = load <4 x i32>, <4 x i32>* %13, align 16
+  %wide.load13.1 = load <4 x i32>, <4 x i32>* %13, align 8
   %14 = add nsw <4 x i32> %wide.load13.1, %wide.load.1
   %15 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index.next
   %16 = bitcast i32* %15 to <4 x i32>*
-  %wide.load14.1 = load <4 x i32>, <4 x i32>* %16, align 16
+  %wide.load14.1 = load <4 x i32>, <4 x i32>* %16, align 8
   %17 = mul nsw <4 x i32> %14, %wide.load14.1
   %18 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index.next
   %19 = bitcast i32* %18 to <4 x i32>*
-  store <4 x i32> %17, <4 x i32>* %19, align 16
+  store <4 x i32> %17, <4 x i32>* %19, align 8
   %index.next.1 = add nuw nsw i64 %index.next, 4
   %20 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index.next.1
   %21 = bitcast i32* %20 to <4 x i32>*
-  %wide.load.2 = load <4 x i32>, <4 x i32>* %21, align 16
+  %wide.load.2 = load <4 x i32>, <4 x i32>* %21, align 8
   %22 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index.next.1
   %23 = bitcast i32* %22 to <4 x i32>*
-  %wide.load13.2 = load <4 x i32>, <4 x i32>* %23, align 16
+  %wide.load13.2 = load <4 x i32>, <4 x i32>* %23, align 8
   %24 = add nsw <4 x i32> %wide.load13.2, %wide.load.2
   %25 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index.next.1
   %26 = bitcast i32* %25 to <4 x i32>*
-  %wide.load14.2 = load <4 x i32>, <4 x i32>* %26, align 16
+  %wide.load14.2 = load <4 x i32>, <4 x i32>* %26, align 8
   %27 = mul nsw <4 x i32> %24, %wide.load14.2
   %28 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index.next.1
   %29 = bitcast i32* %28 to <4 x i32>*
-  store <4 x i32> %27, <4 x i32>* %29, align 16
+  store <4 x i32> %27, <4 x i32>* %29, align 8
   %index.next.2 = add nuw nsw i64 %index.next.1, 4
   %30 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index.next.2
   %31 = bitcast i32* %30 to <4 x i32>*
-  %wide.load.3 = load <4 x i32>, <4 x i32>* %31, align 16
+  %wide.load.3 = load <4 x i32>, <4 x i32>* %31, align 8
   %32 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index.next.2
   %33 = bitcast i32* %32 to <4 x i32>*
-  %wide.load13.3 = load <4 x i32>, <4 x i32>* %33, align 16
+  %wide.load13.3 = load <4 x i32>, <4 x i32>* %33, align 8
   %34 = add nsw <4 x i32> %wide.load13.3, %wide.load.3
   %35 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index.next.2
   %36 = bitcast i32* %35 to <4 x i32>*
-  %wide.load14.3 = load <4 x i32>, <4 x i32>* %36, align 16
+  %wide.load14.3 = load <4 x i32>, <4 x i32>* %36, align 8
   %37 = mul nsw <4 x i32> %34, %wide.load14.3
   %38 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index.next.2
   %39 = bitcast i32* %38 to <4 x i32>*
-  store <4 x i32> %37, <4 x i32>* %39, align 16
+  store <4 x i32> %37, <4 x i32>* %39, align 8
   %index.next.3 = add nuw nsw i64 %index.next.2, 4
   %40 = icmp eq i64 %index.next.3, 4096
   br i1 %40, label %for.end, label %vector.body
diff --git a/test/CodeGen/PowerPC/swaps-le-2.ll b/test/CodeGen/PowerPC/swaps-le-2.ll
index 0963b92609f..e7751a194f7 100644
--- a/test/CodeGen/PowerPC/swaps-le-2.ll
+++ b/test/CodeGen/PowerPC/swaps-le-2.ll
@@ -2,6 +2,13 @@
 
 ; Test swap removal when a vector splat must be adjusted to make it legal.
 ;
+
+; LH: 2016-11-17
+;   Updated align attritue from 16 to 8 to keep swap instructions tests.
+;   Changes have been made on little-endian to use lvx and stvx
+;   instructions instead of lxvd2x/xxswapd and xxswapd/stxvd2x for
+;   aligned vectors with elements up to 4 bytes
+
 ; Test generated from following C code:
 ;
 ; vector char vc = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
@@ -28,37 +35,37 @@
 ;   vir = (vector int){vi[1], vi[1], vi[1], vi[1]};
 ; }
 
-@vc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
-@vs = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
-@vi = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
-@vcr = common global <16 x i8> zeroinitializer, align 16
-@vsr = common global <8 x i16> zeroinitializer, align 16
-@vir = common global <4 x i32> zeroinitializer, align 16
+@vc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 8
+@vs = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 8
+@vi = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 8
+@vcr = common global <16 x i8> zeroinitializer, align 8
+@vsr = common global <8 x i16> zeroinitializer, align 8
+@vir = common global <4 x i32> zeroinitializer, align 8
 
 ; Function Attrs: nounwind
 define void @cfoo() {
 entry:
-  %0 = load <16 x i8>, <16 x i8>* @vc, align 16
+  %0 = load <16 x i8>, <16 x i8>* @vc, align 8
   %vecinit30 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-  store <16 x i8> %vecinit30, <16 x i8>* @vcr, align 16
+  store <16 x i8> %vecinit30, <16 x i8>* @vcr, align 8
   ret void
 }
 
 ; Function Attrs: nounwind
 define void @sfoo() {
 entry:
-  %0 = load <8 x i16>, <8 x i16>* @vs, align 16
+  %0 = load <8 x i16>, <8 x i16>* @vs, align 8
   %vecinit14 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
-  store <8 x i16> %vecinit14, <8 x i16>* @vsr, align 16
+  store <8 x i16> %vecinit14, <8 x i16>* @vsr, align 8
   ret void
 }
 
 ; Function Attrs: nounwind
 define void @ifoo() {
 entry:
-  %0 = load <4 x i32>, <4 x i32>* @vi, align 16
+  %0 = load <4 x i32>, <4 x i32>* @vi, align 8
   %vecinit6 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  store <4 x i32> %vecinit6, <4 x i32>* @vir, align 16
+  store <4 x i32> %vecinit6, <4 x i32>* @vir, align 8
   ret void
 }
 
diff --git a/test/CodeGen/PowerPC/vsx-ldst.ll b/test/CodeGen/PowerPC/vsx-ldst.ll
index a146182de99..d8dd635aab5 100644
--- a/test/CodeGen/PowerPC/vsx-ldst.ll
+++ b/test/CodeGen/PowerPC/vsx-ldst.ll
@@ -14,8 +14,10 @@
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 \
 ; RUN:   -mtriple=powerpc64le-unknown-linux-gnu < %s > %t
-; RUN: grep lxvd2x < %t | count 6
-; RUN: grep stxvd2x < %t | count 6
+; RUN: grep lxvd2x < %t | count 3
+; RUN: grep lvx < %t | count 3
+; RUN: grep stxvd2x < %t | count 3
+; RUN: grep stvx < %t | count 3
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O2 \
 ; RUN:   -mtriple=powerpc64le-unknown-linux-gnu < %s > %t
diff --git a/test/CodeGen/PowerPC/vsx.ll b/test/CodeGen/PowerPC/vsx.ll
index a5dd494a7cc..cfea3e5696d 100644
--- a/test/CodeGen/PowerPC/vsx.ll
+++ b/test/CodeGen/PowerPC/vsx.ll
@@ -645,8 +645,8 @@ define <4 x float> @test32(<4 x float>* %a) {
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test32
-; CHECK-LE: lxvd2x [[V1:[0-9]+]], 0, 3
-; CHECK-LE: xxswapd 34, [[V1]]
+; CHECK-LE: lvx 2, 0, 3
+; CHECK-LE-NOT: xxswapd
 ; CHECK-LE: blr
 }
 
@@ -663,8 +663,8 @@ define void @test33(<4 x float>* %a, <4 x float> %b) {
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test33
-; CHECK-LE: xxswapd [[V1:[0-9]+]], 34
-; CHECK-LE: stxvd2x [[V1]], 0, 3
+; CHECK-LE-NOT: xxswapd
+; CHECK-LE: stvx 2, 0, 3
 ; CHECK-LE: blr
 }
 
@@ -716,8 +716,8 @@ define <4 x i32> @test34(<4 x i32>* %a) {
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test34
-; CHECK-LE: lxvd2x [[V1:[0-9]+]], 0, 3
-; CHECK-LE: xxswapd 34, [[V1]]
+; CHECK-LE: lvx 2, 0, 3
+; CHECK-LE-NOT: xxswapd
 ; CHECK-LE: blr
 }
 
@@ -734,8 +734,8 @@ define void @test35(<4 x i32>* %a, <4 x i32> %b) {
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test35
-; CHECK-LE: xxswapd [[V1:[0-9]+]], 34
-; CHECK-LE: stxvd2x [[V1]], 0, 3
+; CHECK-LE-NOT: xxswapd
+; CHECK-LE: stvx 2, 0, 3
 ; CHECK-LE: blr
 }
 
@@ -1150,9 +1150,9 @@ define <2 x i32> @test80(i32 %v) {
 ; CHECK-LE-DAG: mtvsrd [[R1:[0-9]+]], 3
 ; CHECK-LE-DAG: xxswapd  [[V1:[0-9]+]], [[R1]]
 ; CHECK-LE-DAG: addi [[R2:[0-9]+]], {{[0-9]+}}, .LCPI
-; CHECK-LE-DAG: lxvd2x [[V2:[0-9]+]], 0, [[R2]]
+; CHECK-LE-DAG: lvx 3, 0, [[R2]]
 ; CHECK-LE-DAG: xxspltw 34, [[V1]]
-; CHECK-LE-DAG: xxswapd 35, [[V2]]
+; CHECK-LE-NOT: xxswapd 35, [[V2]]
 ; CHECK-LE: vadduwm 2, 2, 3
 ; CHECK-LE: blr
 }
diff --git a/test/CodeGen/X86/GlobalISel/binop.ll b/test/CodeGen/X86/GlobalISel/binop.ll
index 8499dd95844..bf4c42cb429 100644
--- a/test/CodeGen/X86/GlobalISel/binop.ll
+++ b/test/CodeGen/X86/GlobalISel/binop.ll
@@ -24,6 +24,28 @@ define i32 @test_add_i32(i32 %arg1, i32 %arg2) {
   ret i32 %ret
 }
 
+define i16 @test_add_i16(i16 %arg1, i16 %arg2) {
+; ALL-LABEL: test_add_i16:
+; ALL:       # BB#0:
+; ALL-NEXT:    # kill: %DI<def> %DI<kill> %RDI<def>
+; ALL-NEXT:    # kill: %SI<def> %SI<kill> %RSI<def>
+; ALL-NEXT:    leal (%rsi,%rdi), %eax
+; ALL-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
+; ALL-NEXT:    retq
+  %ret = add i16 %arg1, %arg2
+  ret i16 %ret
+}
+
+define i8 @test_add_i8(i8 %arg1, i8 %arg2) {
+; ALL-LABEL: test_add_i8:
+; ALL:       # BB#0:
+; ALL-NEXT:    addb %dil, %sil
+; ALL-NEXT:    movl %esi, %eax
+; ALL-NEXT:    retq
+  %ret = add i8 %arg1, %arg2
+  ret i8 %ret
+}
+
 define i64 @test_sub_i64(i64 %arg1, i64 %arg2) {
 ; ALL-LABEL: test_sub_i64:
 ; ALL:       # BB#0:
diff --git a/test/CodeGen/X86/GlobalISel/callingconv.ll b/test/CodeGen/X86/GlobalISel/callingconv.ll
index ec62ece6d40..c7e4d91ac3c 100644
--- a/test/CodeGen/X86/GlobalISel/callingconv.ll
+++ b/test/CodeGen/X86/GlobalISel/callingconv.ll
@@ -37,6 +37,44 @@ define i64 @test_ret_i64() {
   ret i64 68719476735
 }
 
+define i8 @test_arg_i8(i8 %a) {
+; X32_GISEL-LABEL: test_arg_i8:
+; X32_GISEL:       # BB#0:
+; X32_GISEL-NEXT:    leal 4(%esp), %eax
+; X32_GISEL-NEXT:    movb (%eax), %al
+; X32_GISEL-NEXT:    retl
+;
+; X32_ISEL-LABEL: test_arg_i8:
+; X32_ISEL:       # BB#0:
+; X32_ISEL-NEXT:    movb 4(%esp), %al
+; X32_ISEL-NEXT:    retl
+;
+; X64-LABEL: test_arg_i8:
+; X64:       # BB#0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    retq
+  ret i8 %a
+}
+
+define i16 @test_arg_i16(i16 %a) {
+; X32_GISEL-LABEL: test_arg_i16:
+; X32_GISEL:       # BB#0:
+; X32_GISEL-NEXT:    leal 4(%esp), %eax
+; X32_GISEL-NEXT:    movzwl (%eax), %eax
+; X32_GISEL-NEXT:    retl
+;
+; X32_ISEL-LABEL: test_arg_i16:
+; X32_ISEL:       # BB#0:
+; X32_ISEL-NEXT:    movzwl 4(%esp), %eax
+; X32_ISEL-NEXT:    retl
+;
+; X64-LABEL: test_arg_i16:
+; X64:       # BB#0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    retq
+  ret i16 %a
+}
+
 define i32 @test_arg_i32(i32 %a) {
 ; X32_GISEL-LABEL: test_arg_i32:
 ; X32_GISEL:       # BB#0:
diff --git a/test/CodeGen/X86/GlobalISel/ext-x86-64.ll b/test/CodeGen/X86/GlobalISel/ext-x86-64.ll
new file mode 100644
index 00000000000..c4d3566008b
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/ext-x86-64.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu    -global-isel < %s -o - | FileCheck %s --check-prefix=X64
+
+; TODO merge with ext.ll after i64 sext suported on 32bit platform 
+
+define i64 @test_sext_i8(i8 %val) {
+; X64-LABEL: test_sext_i8:
+; X64:       # BB#0:
+; X64-NEXT:    movsbq %dil, %rax
+; X64-NEXT:    retq
+  %r = sext i8 %val to i64
+  ret i64 %r
+}
+
+define i64 @test_sext_i16(i16 %val) {
+; X64-LABEL: test_sext_i16:
+; X64:       # BB#0:
+; X64-NEXT:    movswq %di, %rax
+; X64-NEXT:    retq
+  %r = sext i16 %val to i64
+  ret i64 %r
+}
+
+; TODO enable after selection supported
+;define i64 @test_sext_i32(i32 %val) {
+;  %r = sext i32 %val to i64
+;  ret i64 %r
+;}
+
diff --git a/test/CodeGen/X86/GlobalISel/ext.ll b/test/CodeGen/X86/GlobalISel/ext.ll
new file mode 100644
index 00000000000..3c032686130
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/ext.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu    -global-isel < %s -o - | FileCheck %s --check-prefix=X64
+; RUN: llc -mtriple=i386-linux-gnu      -global-isel < %s -o - | FileCheck %s --check-prefix=X32
+
+define i32 @test_zext_i8(i8 %val) {
+; X64-LABEL: test_zext_i8:
+; X64:       # BB#0:
+; X64-NEXT:    movzbl %dil, %eax
+; X64-NEXT:    retq
+;
+; X32-LABEL: test_zext_i8:
+; X32:       # BB#0:
+; X32-NEXT:    leal 4(%esp), %eax
+; X32-NEXT:    movzbl (%eax), %eax
+; X32-NEXT:    retl
+  %r = zext i8 %val to i32
+  ret i32 %r
+}
+
+define i32 @test_zext_i16(i16 %val) {
+; X64-LABEL: test_zext_i16:
+; X64:       # BB#0:
+; X64-NEXT:    movzwl %di, %eax
+; X64-NEXT:    retq
+;
+; X32-LABEL: test_zext_i16:
+; X32:       # BB#0:
+; X32-NEXT:    leal 4(%esp), %eax
+; X32-NEXT:    movzwl (%eax), %eax
+; X32-NEXT:    retl
+  %r = zext i16 %val to i32
+  ret i32 %r
+}
+
+define i32 @test_sext_i8(i8 %val) {
+; X64-LABEL: test_sext_i8:
+; X64:       # BB#0:
+; X64-NEXT:    movsbl %dil, %eax
+; X64-NEXT:    retq
+;
+; X32-LABEL: test_sext_i8:
+; X32:       # BB#0:
+; X32-NEXT:    leal 4(%esp), %eax
+; X32-NEXT:    movsbl (%eax), %eax
+; X32-NEXT:    retl
+  %r = sext i8 %val to i32
+  ret i32 %r
+}
+
+define i32 @test_sext_i16(i16 %val) {
+; X64-LABEL: test_sext_i16:
+; X64:       # BB#0:
+; X64-NEXT:    movswl %di, %eax
+; X64-NEXT:    retq
+;
+; X32-LABEL: test_sext_i16:
+; X32:       # BB#0:
+; X32-NEXT:    leal 4(%esp), %eax
+; X32-NEXT:    movswl (%eax), %eax
+; X32-NEXT:    retl
+  %r = sext i16 %val to i32
+  ret i32 %r
+}
+
diff --git a/test/CodeGen/X86/GlobalISel/irtranslator-call.ll b/test/CodeGen/X86/GlobalISel/irtranslator-call.ll
index c1bf4441766..bc394f6e156 100644
--- a/test/CodeGen/X86/GlobalISel/irtranslator-call.ll
+++ b/test/CodeGen/X86/GlobalISel/irtranslator-call.ll
@@ -20,7 +20,6 @@ define void @test_void_return() {
 ; CHECK-NEXT:   maxAlignment:    0
 ; CHECK-NEXT:   adjustsStack:    false
 ; CHECK-NEXT:   hasCalls:        false
-; CHECK-NEXT:   maxCallFrameSize: 0
 ; CHECK-NEXT:   hasOpaqueSPAdjustment: false
 ; CHECK-NEXT:   hasVAStart:      false
 ; CHECK-NEXT:   hasMustTailInVarArgFunc: false
diff --git a/test/CodeGen/X86/GlobalISel/legalize-ext-x86-64.mir b/test/CodeGen/X86/GlobalISel/legalize-ext-x86-64.mir
new file mode 100644
index 00000000000..25af600f229
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/legalize-ext-x86-64.mir
@@ -0,0 +1,172 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s
+
+--- |
+  define i64 @test_sext_i8(i8 %val) {
+    %r = sext i8 %val to i64
+    ret i64 %r
+  }
+
+  define i64 @test_sext_i16(i16 %val) {
+    %r = sext i16 %val to i64
+    ret i64 %r
+  }
+
+  define i64 @test_sext_i32(i32 %val) {
+    %r = sext i32 %val to i64
+    ret i64 %r
+  }
+
+  define i64 @test_zext_i8(i8 %val) {
+    %r = zext i8 %val to i64
+    ret i64 %r
+  }
+
+  define i64 @test_zext_i16(i16 %val) {
+    %r = zext i16 %val to i64
+    ret i64 %r
+  }
+
+  define i64 @test_zext_i32(i32 %val) {
+    %r = zext i32 %val to i64
+    ret i64 %r
+  }
+
+...
+---
+name:            test_sext_i8
+# CHECK-LABEL: name:  test_sext_i8
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+# CHECK:          %0(s8) = COPY %edi
+# CHECK-NEXT:     %1(s64) = G_SEXT %0(s8)
+# CHECK-NEXT:     %rax = COPY %1(s64)
+# CHECK-NEXT:     RET 0, implicit %rax
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %edi
+
+    %0(s8) = COPY %edi
+    %1(s64) = G_SEXT %0(s8)
+    %rax = COPY %1(s64)
+    RET 0, implicit %rax
+
+...
+---
+name:            test_sext_i16
+# CHECK-LABEL: name:  test_sext_i16
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+# CHECK:          %0(s16) = COPY %edi
+# CHECK-NEXT:     %1(s64) = G_SEXT %0(s16)
+# CHECK-NEXT:     %rax = COPY %1(s64)
+# CHECK-NEXT:     RET 0, implicit %rax
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %edi
+
+    %0(s16) = COPY %edi
+    %1(s64) = G_SEXT %0(s16)
+    %rax = COPY %1(s64)
+    RET 0, implicit %rax
+
+...
+---
+name:            test_sext_i32
+# CHECK-LABEL: name:  test_sext_i32
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+# CHECK:          %0(s32) = COPY %edi
+# CHECK-NEXT:     %1(s64) = G_SEXT %0(s32)
+# CHECK-NEXT:     %rax = COPY %1(s64)
+# CHECK-NEXT:     RET 0, implicit %rax
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %edi
+
+    %0(s32) = COPY %edi
+    %1(s64) = G_SEXT %0(s32)
+    %rax = COPY %1(s64)
+    RET 0, implicit %rax
+
+...
+---
+name:            test_zext_i8
+# CHECK-LABEL: name:  test_zext_i8
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+# CHECK:          %0(s8) = COPY %edi
+# CHECK-NEXT:     %1(s64) = G_ZEXT %0(s8)
+# CHECK-NEXT:     %rax = COPY %1(s64)
+# CHECK-NEXT:     RET 0, implicit %rax
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %edi
+
+    %0(s8) = COPY %edi
+    %1(s64) = G_ZEXT %0(s8)
+    %rax = COPY %1(s64)
+    RET 0, implicit %rax
+
+...
+---
+name:            test_zext_i16
+# CHECK-LABEL: name:  test_zext_i16
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+# CHECK:          %0(s16) = COPY %edi
+# CHECK-NEXT:     %1(s64) = G_ZEXT %0(s16)
+# CHECK-NEXT:     %rax = COPY %1(s64)
+# CHECK-NEXT:     RET 0, implicit %rax
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %edi
+
+    %0(s16) = COPY %edi
+    %1(s64) = G_ZEXT %0(s16)
+    %rax = COPY %1(s64)
+    RET 0, implicit %rax
+
+...
+---
+name:            test_zext_i32
+# CHECK-LABEL: name:  test_zext_i32
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+# CHECK:          %0(s32) = COPY %edi
+# CHECK-NEXT:     %1(s64) = G_ZEXT %0(s32)
+# CHECK-NEXT:     %rax = COPY %1(s64)
+# CHECK-NEXT:     RET 0, implicit %rax
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %edi
+
+    %0(s32) = COPY %edi
+    %1(s64) = G_ZEXT %0(s32)
+    %rax = COPY %1(s64)
+    RET 0, implicit %rax
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/legalize-ext.mir b/test/CodeGen/X86/GlobalISel/legalize-ext.mir
new file mode 100644
index 00000000000..46457e0fff5
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/legalize-ext.mir
@@ -0,0 +1,116 @@
+# RUN: llc -mtriple=i386-linux-gnu   -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X32
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64
+--- |
+  define i32 @test_zext_i8(i8 %val) {
+    %r = zext i8 %val to i32
+    ret i32 %r
+  }
+
+  define i32 @test_zext_i16(i16 %val) {
+    %r = zext i16 %val to i32
+    ret i32 %r
+  }
+
+  define i32 @test_sext_i8(i8 %val) {
+    %r = sext i8 %val to i32
+    ret i32 %r
+  }
+
+  define i32 @test_sext_i16(i16 %val) {
+    %r = sext i16 %val to i32
+    ret i32 %r
+  }
+
+...
+---
+name:            test_zext_i8
+# ALL-LABEL: name:  test_zext_i8
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+# ALL:          %0(s8) = COPY %edi
+# ALL-NEXT:     %1(s32) = G_ZEXT %0(s8)
+# ALL-NEXT:     %eax = COPY %1(s32)
+# ALL-NEXT:     RET 0, implicit %eax
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %edi
+
+    %0(s8) = COPY %edi
+    %1(s32) = G_ZEXT %0(s8)
+    %eax = COPY %1(s32)
+    RET 0, implicit %eax
+
+...
+---
+name:            test_zext_i16
+# ALL-LABEL: name:  test_zext_i16
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+# ALL:          %0(s16) = COPY %edi
+# ALL-NEXT:     %1(s32) = G_ZEXT %0(s16)
+# ALL-NEXT:     %eax = COPY %1(s32)
+# ALL-NEXT:     RET 0, implicit %eax
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %edi
+
+    %0(s16) = COPY %edi
+    %1(s32) = G_ZEXT %0(s16)
+    %eax = COPY %1(s32)
+    RET 0, implicit %eax
+
+...
+---
+name:            test_sext_i8
+# ALL-LABEL: name:  test_sext_i8
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+# ALL:          %0(s8) = COPY %edi
+# ALL-NEXT:     %1(s32) = G_SEXT %0(s8)
+# ALL-NEXT:     %eax = COPY %1(s32)
+# ALL-NEXT:     RET 0, implicit %eax
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %edi
+
+    %0(s8) = COPY %edi
+    %1(s32) = G_SEXT %0(s8)
+    %eax = COPY %1(s32)
+    RET 0, implicit %eax
+
+...
+---
+name:            test_sext_i16
+# ALL-LABEL: name:  test_sext_i16
+alignment:       4
+legalized:       false
+regBankSelected: false
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+# ALL:          %0(s16) = COPY %edi
+# ALL-NEXT:     %1(s32) = G_SEXT %0(s16)
+# ALL-NEXT:     %eax = COPY %1(s32)
+# ALL-NEXT:     RET 0, implicit %eax
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %edi
+
+    %0(s16) = COPY %edi
+    %1(s32) = G_SEXT %0(s16)
+    %eax = COPY %1(s32)
+    RET 0, implicit %eax
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/memop-x32.ll b/test/CodeGen/X86/GlobalISel/memop-x32.ll
new file mode 100644
index 00000000000..49a7fd79f8b
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/memop-x32.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=i386-linux-gnu                       -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE_FAST
+; RUN: llc -mtriple=i386-linux-gnu -regbankselect-greedy -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE_GREEDY
+
+;TODO merge with x86-64 tests (many operations not suppored yet)
+
+define i8 @test_load_i8(i8 * %p1) {
+; ALL-LABEL: test_load_i8:
+; ALL:       # BB#0:
+; ALL-NEXT:    leal 4(%esp), %eax
+; ALL-NEXT:    movl (%eax), %eax
+; ALL-NEXT:    movb (%eax), %al
+; ALL-NEXT:    retl
+  %r = load i8, i8* %p1
+  ret i8 %r
+}
+
+define i16 @test_load_i16(i16 * %p1) {
+; ALL-LABEL: test_load_i16:
+; ALL:       # BB#0:
+; ALL-NEXT:    leal 4(%esp), %eax
+; ALL-NEXT:    movl (%eax), %eax
+; ALL-NEXT:    movzwl (%eax), %eax
+; ALL-NEXT:    retl
+  %r = load i16, i16* %p1
+  ret i16 %r
+}
+
+define i32 @test_load_i32(i32 * %p1) {
+; ALL-LABEL: test_load_i32:
+; ALL:       # BB#0:
+; ALL-NEXT:    leal 4(%esp), %eax
+; ALL-NEXT:    movl (%eax), %eax
+; ALL-NEXT:    movl (%eax), %eax
+; ALL-NEXT:    retl
+  %r = load i32, i32* %p1
+  ret i32 %r
+}
+
+define i8 * @test_store_i8(i8 %val, i8 * %p1) {
+; ALL-LABEL: test_store_i8:
+; ALL:       # BB#0:
+; ALL-NEXT:    leal 4(%esp), %eax
+; ALL-NEXT:    movb (%eax), %cl
+; ALL-NEXT:    leal 8(%esp), %eax
+; ALL-NEXT:    movl (%eax), %eax
+; ALL-NEXT:    movb %cl, (%eax)
+; ALL-NEXT:    retl
+  store i8 %val, i8* %p1
+  ret i8 * %p1;
+}
+
+define i16 * @test_store_i16(i16 %val, i16 * %p1) {
+; ALL-LABEL: test_store_i16:
+; ALL:       # BB#0:
+; ALL-NEXT:    leal 4(%esp), %eax
+; ALL-NEXT:    movzwl (%eax), %ecx
+; ALL-NEXT:    leal 8(%esp), %eax
+; ALL-NEXT:    movl (%eax), %eax
+; ALL-NEXT:    movw %cx, (%eax)
+; ALL-NEXT:    retl
+  store i16 %val, i16* %p1
+  ret i16 * %p1;
+}
+
+define i32 * @test_store_i32(i32 %val, i32 * %p1) {
+; ALL-LABEL: test_store_i32:
+; ALL:       # BB#0:
+; ALL-NEXT:    leal 4(%esp), %eax
+; ALL-NEXT:    movl (%eax), %ecx
+; ALL-NEXT:    leal 8(%esp), %eax
+; ALL-NEXT:    movl (%eax), %eax
+; ALL-NEXT:    movl %ecx, (%eax)
+; ALL-NEXT:    retl
+  store i32 %val, i32* %p1
+  ret i32 * %p1;
+}
+
+define i32* @test_load_ptr(i32** %ptr1) {
+; ALL-LABEL: test_load_ptr:
+; ALL:       # BB#0:
+; ALL-NEXT:    leal 4(%esp), %eax
+; ALL-NEXT:    movl (%eax), %eax
+; ALL-NEXT:    movl (%eax), %eax
+; ALL-NEXT:    retl
+  %p = load i32*, i32** %ptr1
+  ret i32* %p
+}
+
+define void @test_store_ptr(i32** %ptr1, i32* %a) {
+; ALL-LABEL: test_store_ptr:
+; ALL:       # BB#0:
+; ALL-NEXT:    leal 4(%esp), %eax
+; ALL-NEXT:    movl (%eax), %eax
+; ALL-NEXT:    leal 8(%esp), %ecx
+; ALL-NEXT:    movl (%ecx), %ecx
+; ALL-NEXT:    movl %ecx, (%eax)
+; ALL-NEXT:    retl
+  store i32* %a, i32** %ptr1
+  ret void
+}
diff --git a/test/CodeGen/X86/GlobalISel/memop.ll b/test/CodeGen/X86/GlobalISel/memop.ll
index f793e36026b..a7407c0e6b7 100644
--- a/test/CodeGen/X86/GlobalISel/memop.ll
+++ b/test/CodeGen/X86/GlobalISel/memop.ll
@@ -187,3 +187,20 @@ define double * @test_store_double(double %val, double * %p1) {
   ret double * %p1;
 }
 
+define i32* @test_load_ptr(i32** %ptr1) {
+; ALL-LABEL: test_load_ptr:
+; ALL:       # BB#0:
+; ALL-NEXT:    movq (%rdi), %rax
+; ALL-NEXT:    retq
+  %p = load i32*, i32** %ptr1
+  ret i32* %p
+}
+
+define void @test_store_ptr(i32** %ptr1, i32* %a) {
+; ALL-LABEL: test_store_ptr:
+; ALL:       # BB#0:
+; ALL-NEXT:    movq %rsi, (%rdi)
+; ALL-NEXT:    retq
+  store i32* %a, i32** %ptr1
+  ret void
+}
diff --git a/test/CodeGen/X86/GlobalISel/X86-regbankselect.mir b/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
similarity index 100%
rename from test/CodeGen/X86/GlobalISel/X86-regbankselect.mir
rename to test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
diff --git a/test/CodeGen/X86/GlobalISel/select-add.mir b/test/CodeGen/X86/GlobalISel/select-add.mir
index 27fcc223d2b..7337ce12c39 100644
--- a/test/CodeGen/X86/GlobalISel/select-add.mir
+++ b/test/CodeGen/X86/GlobalISel/select-add.mir
@@ -14,6 +14,16 @@
     ret i32 %ret
   }
 
+  define i16 @test_add_i16(i16 %arg1, i16 %arg2) {
+    %ret = add i16 %arg1, %arg2
+    ret i16 %ret
+  }
+
+  define i8 @test_add_i8(i8 %arg1, i8 %arg2) {
+    %ret = add i8 %arg1, %arg2
+    ret i8 %ret
+  }
+
   define float @test_add_float(float %arg1, float %arg2) {
     %ret = fadd float %arg1, %arg2
     ret float %ret
@@ -37,6 +47,7 @@
 
 ---
 name:            test_add_i64
+# ALL-LABEL: name:            test_add_i64
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
@@ -63,6 +74,7 @@ body:             |
 
 ---
 name:            test_add_i32
+# ALL-LABEL: name:            test_add_i32
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
@@ -85,9 +97,68 @@ body:             |
     %2(s32) = G_ADD %0, %1
     %rax = COPY %2(s32)
 
+...
+---
+name:            test_add_i16
+# ALL-LABEL: name:            test_add_i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        false
+# ALL:      registers:
+# ALL-NEXT:  - { id: 0, class: gr16 }
+# ALL-NEXT:  - { id: 1, class: gr16 }
+# ALL-NEXT:  - { id: 2, class: gr16 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+# ALL: %0 = COPY %di
+# ALL: %1 = COPY %si
+# ALL: %2 = ADD16rr %0, %1, implicit-def %eflags
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %edi, %esi
+
+    %0(s16) = COPY %edi
+    %1(s16) = COPY %esi
+    %2(s16) = G_ADD %0, %1
+    %ax = COPY %2(s16)
+    RET 0, implicit %ax
+
+...
+---
+name:            test_add_i8
+# ALL-LABEL: name:            test_add_i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        false
+# ALL:      registers:
+# ALL-NEXT:  - { id: 0, class: gr8 }
+# ALL-NEXT:  - { id: 1, class: gr8 }
+# ALL-NEXT:  - { id: 2, class: gr8 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+# ALL: %0 = COPY %dil
+# ALL: %1 = COPY %sil
+# ALL: %2 = ADD8rr %0, %1, implicit-def %eflags
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %edi, %esi
+
+    %0(s8) = COPY %edi
+    %1(s8) = COPY %esi
+    %2(s8) = G_ADD %0, %1
+    %al = COPY %2(s8)
+    RET 0, implicit %al
+
 ...
 ---
 name:            test_add_float
+# ALL-LABEL: name:            test_add_float
 alignment:       4
 legalized:       true
 regBankSelected: true
@@ -122,6 +193,7 @@ body:             |
 ...
 ---
 name:            test_add_double
+# ALL-LABEL: name:            test_add_double
 alignment:       4
 legalized:       true
 regBankSelected: true
@@ -156,6 +228,7 @@ body:             |
 ...
 ---
 name:            test_add_v4i32
+# ALL-LABEL: name:            test_add_v4i32
 alignment:       4
 legalized:       true
 regBankSelected: true
@@ -191,6 +264,7 @@ body:             |
 ...
 ---
 name:            test_add_v4f32
+# ALL-LABEL: name:            test_add_v4f32
 alignment:       4
 legalized:       true
 regBankSelected: true
diff --git a/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir b/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir
new file mode 100644
index 00000000000..85b3f61a9e4
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir
@@ -0,0 +1,66 @@
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64
+
+--- |
+  define i64 @test_sext_i8(i8 %val) {
+    %r = sext i8 %val to i64
+    ret i64 %r
+  }
+
+  define i64 @test_sext_i16(i16 %val) {
+    %r = sext i16 %val to i64
+    ret i64 %r
+  }
+
+...
+---
+name:            test_sext_i8
+# ALL-LABEL: name:  test_sext_i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: gr8 }
+# ALL-NEXT:   - { id: 1, class: gr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+# ALL:          %0 = COPY %dil
+# ALL-NEXT:     %1 = MOVSX64rr8 %0
+# ALL-NEXT:     %rax = COPY %1
+# ALL-NEXT:     RET 0, implicit %rax
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %edi
+
+    %0(s8) = COPY %edi
+    %1(s64) = G_SEXT %0(s8)
+    %rax = COPY %1(s64)
+    RET 0, implicit %rax
+
+...
+---
+name:            test_sext_i16
+# ALL-LABEL: name:  test_sext_i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: gr16 }
+# ALL-NEXT:   - { id: 1, class: gr64 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+# ALL:          %0 = COPY %di
+# ALL-NEXT:     %1 = MOVSX64rr16 %0
+# ALL-NEXT:     %rax = COPY %1
+# ALL-NEXT:     RET 0, implicit %rax
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %edi
+
+    %0(s16) = COPY %edi
+    %1(s64) = G_SEXT %0(s16)
+    %rax = COPY %1(s64)
+    RET 0, implicit %rax
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/select-ext.mir b/test/CodeGen/X86/GlobalISel/select-ext.mir
new file mode 100644
index 00000000000..63aeae89bd1
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/select-ext.mir
@@ -0,0 +1,129 @@
+# RUN: llc -mtriple=i386-linux-gnu   -global-isel -run-pass=instruction-select %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X32
+# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64
+
+--- |
+  define i32 @test_zext_i8(i8 %val) {
+    %r = zext i8 %val to i32
+    ret i32 %r
+  }
+
+  define i32 @test_zext_i16(i16 %val) {
+    %r = zext i16 %val to i32
+    ret i32 %r
+  }
+
+  define i32 @test_sext_i8(i8 %val) {
+    %r = sext i8 %val to i32
+    ret i32 %r
+  }
+
+  define i32 @test_sext_i16(i16 %val) {
+    %r = sext i16 %val to i32
+    ret i32 %r
+  }
+
+...
+---
+name:            test_zext_i8
+# ALL-LABEL: name:  test_zext_i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: gr8 }
+# ALL-NEXT:   - { id: 1, class: gr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+# ALL:          %0 = COPY %dil
+# ALL-NEXT:     %1 = MOVZX32rr8 %0
+# ALL-NEXT:     %eax = COPY %1
+# ALL-NEXT:     RET 0, implicit %eax
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %edi
+
+    %0(s8) = COPY %edi
+    %1(s32) = G_ZEXT %0(s8)
+    %eax = COPY %1(s32)
+    RET 0, implicit %eax
+
+...
+---
+name:            test_zext_i16
+# ALL-LABEL: name:  test_zext_i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: gr16 }
+# ALL-NEXT:   - { id: 1, class: gr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+# ALL:          %0 = COPY %di
+# ALL-NEXT:     %1 = MOVZX32rr16 %0
+# ALL-NEXT:     %eax = COPY %1
+# ALL-NEXT:     RET 0, implicit %eax
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %edi
+
+    %0(s16) = COPY %edi
+    %1(s32) = G_ZEXT %0(s16)
+    %eax = COPY %1(s32)
+    RET 0, implicit %eax
+
+...
+---
+name:            test_sext_i8
+# ALL-LABEL: name:  test_sext_i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: gr8 }
+# ALL-NEXT:   - { id: 1, class: gr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+# ALL:          %0 = COPY %dil
+# ALL-NEXT:     %1 = MOVSX32rr8 %0
+# ALL-NEXT:     %eax = COPY %1
+# ALL-NEXT:     RET 0, implicit %eax
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %edi
+
+    %0(s8) = COPY %edi
+    %1(s32) = G_SEXT %0(s8)
+    %eax = COPY %1(s32)
+    RET 0, implicit %eax
+
+...
+---
+name:            test_sext_i16
+# ALL-LABEL: name:  test_sext_i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: gr16 }
+# ALL-NEXT:   - { id: 1, class: gr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+# ALL:          %0 = COPY %di
+# ALL-NEXT:     %1 = MOVSX32rr16 %0
+# ALL-NEXT:     %eax = COPY %1
+# ALL-NEXT:     RET 0, implicit %eax
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %edi
+
+    %0(s16) = COPY %edi
+    %1(s32) = G_SEXT %0(s16)
+    %eax = COPY %1(s32)
+    RET 0, implicit %eax
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/select-inc.mir b/test/CodeGen/X86/GlobalISel/select-inc.mir
new file mode 100644
index 00000000000..7a77864091d
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/select-inc.mir
@@ -0,0 +1,37 @@
+# RUN: llc -mtriple=x86_64-linux-gnu                     -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=ALL,INC
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+slow-incdec -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=ALL,ADD
+
+--- |
+  define i8 @test_add_i8(i8 %arg1) {
+    %ret = add i8 %arg1, 1
+    ret i8 %ret
+  }
+...
+
+---
+name:            test_add_i8
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:  - { id: 0, class: gr8 }
+# INC-NEXT:  - { id: 1, class: gpr }
+# ADD-NEXT:  - { id: 1, class: gr8 }
+# ALL-NEXT:  - { id: 2, class: gr8 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+# ALL:      %0 = COPY %al
+# INC-NEXT: %2 = INC8r %0
+# ADD-NEXT: %1 = MOV8ri 1
+# ADD-NEXT: %2 = ADD8rr %0, %1
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %al
+
+    %0(s8) = COPY %al
+    %1(s8) = G_CONSTANT i8 1
+    %2(s8) = G_ADD %0, %1
+    %al = COPY %2(s8)
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/select-memop-x32.mir b/test/CodeGen/X86/GlobalISel/select-memop-x32.mir
new file mode 100644
index 00000000000..8e6a2771db6
--- /dev/null
+++ b/test/CodeGen/X86/GlobalISel/select-memop-x32.mir
@@ -0,0 +1,310 @@
+# RUN: llc -mtriple=i386-linux-gnu  -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL
+
+--- |
+  define i8 @test_load_i8(i8* %p1) {
+    %r = load i8, i8* %p1
+    ret i8 %r
+  }
+
+  define i16 @test_load_i16(i16* %p1) {
+    %r = load i16, i16* %p1
+    ret i16 %r
+  }
+
+  define i32 @test_load_i32(i32* %p1) {
+    %r = load i32, i32* %p1
+    ret i32 %r
+  }
+
+  define i8* @test_store_i8(i8 %val, i8* %p1) {
+    store i8 %val, i8* %p1
+    ret i8* %p1
+  }
+
+  define i16* @test_store_i16(i16 %val, i16* %p1) {
+    store i16 %val, i16* %p1
+    ret i16* %p1
+  }
+
+  define i32* @test_store_i32(i32 %val, i32* %p1) {
+    store i32 %val, i32* %p1
+    ret i32* %p1
+  }
+
+  define i32* @test_load_ptr(i32** %ptr1) {
+    %p = load i32*, i32** %ptr1
+    ret i32* %p
+  }
+
+  define void @test_store_ptr(i32** %ptr1, i32* %a) {
+    store i32* %a, i32** %ptr1
+    ret void
+  }
+
+...
+---
+name:            test_load_i8
+# ALL-LABEL: name:  test_load_i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: gr32 }
+# ALL-NEXT:   - { id: 1, class: gr32 }
+# ALL-NEXT:   - { id: 2, class: gr8 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+fixedStack:
+  - { id: 0, offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+# ALL:          %1 = LEA32r %fixed-stack.0, 1, _, 0, _
+# ALL-NEXT:     %0 = MOV32rm %1, 1, _, 0, _ :: (invariant load 4 from %fixed-stack.0, align 0)
+# ALL-NEXT:     %2 = MOV8rm %0, 1, _, 0, _ :: (load 1 from %ir.p1)
+# ALL-NEXT:     %al = COPY %2
+# ALL-NEXT:     RET 0, implicit %al
+body:             |
+  bb.1 (%ir-block.0):
+    %1(p0) = G_FRAME_INDEX %fixed-stack.0
+    %0(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 0)
+    %2(s8) = G_LOAD %0(p0) :: (load 1 from %ir.p1)
+    %al = COPY %2(s8)
+    RET 0, implicit %al
+
+...
+---
+name:            test_load_i16
+# ALL-LABEL: name:  test_load_i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: gr32 }
+# ALL-NEXT:   - { id: 1, class: gr32 }
+# ALL-NEXT:   - { id: 2, class: gr16 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+fixedStack:
+  - { id: 0, offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+# ALL:          %1 = LEA32r %fixed-stack.0, 1, _, 0, _
+# ALL-NEXT:     %0 = MOV32rm %1, 1, _, 0, _ :: (invariant load 4 from %fixed-stack.0, align 0)
+# ALL-NEXT:     %2 = MOV16rm %0, 1, _, 0, _ :: (load 2 from %ir.p1)
+# ALL-NEXT:     %ax = COPY %2
+# ALL-NEXT:     RET 0, implicit %ax
+body:             |
+  bb.1 (%ir-block.0):
+    %1(p0) = G_FRAME_INDEX %fixed-stack.0
+    %0(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 0)
+    %2(s16) = G_LOAD %0(p0) :: (load 2 from %ir.p1)
+    %ax = COPY %2(s16)
+    RET 0, implicit %ax
+
+...
+---
+name:            test_load_i32
+# ALL-LABEL: name:  test_load_i32
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: gr32 }
+# ALL-NEXT:   - { id: 1, class: gr32 }
+# ALL-NEXT:   - { id: 2, class: gr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+fixedStack:
+  - { id: 0, offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+# ALL:          %1 = LEA32r %fixed-stack.0, 1, _, 0, _
+# ALL-NEXT:     %0 = MOV32rm %1, 1, _, 0, _ :: (invariant load 4 from %fixed-stack.0, align 0)
+# ALL-NEXT:     %2 = MOV32rm %0, 1, _, 0, _ :: (load 4 from %ir.p1)
+# ALL-NEXT:     %eax = COPY %2
+# ALL-NEXT:     RET 0, implicit %eax
+body:             |
+  bb.1 (%ir-block.0):
+    %1(p0) = G_FRAME_INDEX %fixed-stack.0
+    %0(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 0)
+    %2(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1)
+    %eax = COPY %2(s32)
+    RET 0, implicit %eax
+
+...
+---
+name:            test_store_i8
+# ALL-LABEL: name:  test_store_i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: gr8 }
+# ALL-NEXT:   - { id: 1, class: gr32 }
+# ALL-NEXT:   - { id: 2, class: gr32 }
+# ALL-NEXT:   - { id: 3, class: gr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+  - { id: 3, class: gpr }
+fixedStack:
+  - { id: 0, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
+  - { id: 1, offset: 0, size: 1, alignment: 16, isImmutable: true, isAliased: false }
+# ALL:          %2 = LEA32r %fixed-stack.0, 1, _, 0, _
+# ALL-NEXT:     %0 = MOV8rm %2, 1, _, 0, _ :: (invariant load 1 from %fixed-stack.0, align 0)
+# ALL-NEXT:     %3 = LEA32r %fixed-stack.1, 1, _, 0, _
+# ALL-NEXT:     %1 = MOV32rm %3, 1, _, 0, _ :: (invariant load 4 from %fixed-stack.1, align 0)
+# ALL-NEXT:     MOV8mr %1, 1, _, 0, _, %0 :: (store 1 into %ir.p1)
+# ALL-NEXT:     %eax = COPY %1
+# ALL-NEXT:     RET 0, implicit %eax
+body:             |
+  bb.1 (%ir-block.0):
+    %2(p0) = G_FRAME_INDEX %fixed-stack.1
+    %0(s8) = G_LOAD %2(p0) :: (invariant load 1 from %fixed-stack.1, align 0)
+    %3(p0) = G_FRAME_INDEX %fixed-stack.0
+    %1(p0) = G_LOAD %3(p0) :: (invariant load 4 from %fixed-stack.0, align 0)
+    G_STORE %0(s8), %1(p0) :: (store 1 into %ir.p1)
+    %eax = COPY %1(p0)
+    RET 0, implicit %eax
+
+...
+---
+name:            test_store_i16
+# ALL-LABEL: name:  test_store_i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: gr16 }
+# ALL-NEXT:   - { id: 1, class: gr32 }
+# ALL-NEXT:   - { id: 2, class: gr32 }
+# ALL-NEXT:   - { id: 3, class: gr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+  - { id: 3, class: gpr }
+fixedStack:
+  - { id: 0, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
+  - { id: 1, offset: 0, size: 2, alignment: 16, isImmutable: true, isAliased: false }
+# ALL:          %2 = LEA32r %fixed-stack.0, 1, _, 0, _
+# ALL-NEXT:     %0 = MOV16rm %2, 1, _, 0, _ :: (invariant load 2 from %fixed-stack.0, align 0)
+# ALL-NEXT:     %3 = LEA32r %fixed-stack.1, 1, _, 0, _
+# ALL-NEXT:     %1 = MOV32rm %3, 1, _, 0, _ :: (invariant load 4 from %fixed-stack.1, align 0)
+# ALL-NEXT:     MOV16mr %1, 1, _, 0, _, %0 :: (store 2 into %ir.p1)
+# ALL-NEXT:     %eax = COPY %1
+# ALL-NEXT:     RET 0, implicit %eax
+body:             |
+  bb.1 (%ir-block.0):
+    %2(p0) = G_FRAME_INDEX %fixed-stack.1
+    %0(s16) = G_LOAD %2(p0) :: (invariant load 2 from %fixed-stack.1, align 0)
+    %3(p0) = G_FRAME_INDEX %fixed-stack.0
+    %1(p0) = G_LOAD %3(p0) :: (invariant load 4 from %fixed-stack.0, align 0)
+    G_STORE %0(s16), %1(p0) :: (store 2 into %ir.p1)
+    %eax = COPY %1(p0)
+    RET 0, implicit %eax
+
+...
+---
+name:            test_store_i32
+# ALL-LABEL: name:  test_store_i32
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: gr32 }
+# ALL-NEXT:   - { id: 1, class: gr32 }
+# ALL-NEXT:   - { id: 2, class: gr32 }
+# ALL-NEXT:   - { id: 3, class: gr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+  - { id: 3, class: gpr }
+fixedStack:
+  - { id: 0, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
+  - { id: 1, offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+# ALL:          %2 = LEA32r %fixed-stack.0, 1, _, 0, _
+# ALL-NEXT:     %0 = MOV32rm %2, 1, _, 0, _ :: (invariant load 4 from %fixed-stack.0, align 0)
+# ALL-NEXT:     %3 = LEA32r %fixed-stack.1, 1, _, 0, _
+# ALL-NEXT:     %1 = MOV32rm %3, 1, _, 0, _ :: (invariant load 4 from %fixed-stack.1, align 0)
+# ALL-NEXT:     MOV32mr %1, 1, _, 0, _, %0 :: (store 4 into %ir.p1)
+# ALL-NEXT:     %eax = COPY %1
+# ALL-NEXT:     RET 0, implicit %eax
+body:             |
+  bb.1 (%ir-block.0):
+    %2(p0) = G_FRAME_INDEX %fixed-stack.1
+    %0(s32) = G_LOAD %2(p0) :: (invariant load 4 from %fixed-stack.1, align 0)
+    %3(p0) = G_FRAME_INDEX %fixed-stack.0
+    %1(p0) = G_LOAD %3(p0) :: (invariant load 4 from %fixed-stack.0, align 0)
+    G_STORE %0(s32), %1(p0) :: (store 4 into %ir.p1)
+    %eax = COPY %1(p0)
+    RET 0, implicit %eax
+
+...
+---
+name:            test_load_ptr
+# ALL-LABEL: name:  test_load_ptr
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: gr32 }
+# ALL-NEXT:   - { id: 1, class: gr32 }
+# ALL-NEXT:   - { id: 2, class: gr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+fixedStack:
+  - { id: 0, offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+# ALL:          %1 = LEA32r %fixed-stack.0, 1, _, 0, _
+# ALL-NEXT:     %0 = MOV32rm %1, 1, _, 0, _ :: (invariant load 4 from %fixed-stack.0, align 0)
+# ALL-NEXT:     %2 = MOV32rm %0, 1, _, 0, _ :: (load 4 from %ir.ptr1)
+# ALL-NEXT:     %eax = COPY %2
+# ALL-NEXT:     RET 0, implicit %eax
+body:             |
+  bb.1 (%ir-block.0):
+    %1(p0) = G_FRAME_INDEX %fixed-stack.0
+    %0(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 0)
+    %2(p0) = G_LOAD %0(p0) :: (load 4 from %ir.ptr1)
+    %eax = COPY %2(p0)
+    RET 0, implicit %eax
+
+...
+---
+name:            test_store_ptr
+# ALL-LABEL: name:  test_store_ptr
+alignment:       4
+legalized:       true
+regBankSelected: true
+# ALL:      registers:
+# ALL-NEXT:   - { id: 0, class: gr32 }
+# ALL-NEXT:   - { id: 1, class: gr32 }
+# ALL-NEXT:   - { id: 2, class: gr32 }
+# ALL-NEXT:   - { id: 3, class: gr32 }
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+  - { id: 3, class: gpr }
+fixedStack:
+  - { id: 0, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
+  - { id: 1, offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+# ALL:          %2 = LEA32r %fixed-stack.0, 1, _, 0, _
+# ALL-NEXT:     %0 = MOV32rm %2, 1, _, 0, _ :: (invariant load 4 from %fixed-stack.0, align 0)
+# ALL-NEXT:     %3 = LEA32r %fixed-stack.1, 1, _, 0, _
+# ALL-NEXT:     %1 = MOV32rm %3, 1, _, 0, _ :: (invariant load 4 from %fixed-stack.1, align 0)
+# ALL-NEXT:     MOV32mr %0, 1, _, 0, _, %1 :: (store 4 into %ir.ptr1)
+# ALL-NEXT:     RET 0
+body:             |
+  bb.1 (%ir-block.0):
+    %2(p0) = G_FRAME_INDEX %fixed-stack.1
+    %0(p0) = G_LOAD %2(p0) :: (invariant load 4 from %fixed-stack.1, align 0)
+    %3(p0) = G_FRAME_INDEX %fixed-stack.0
+    %1(p0) = G_LOAD %3(p0) :: (invariant load 4 from %fixed-stack.0, align 0)
+    G_STORE %1(p0), %0(p0) :: (store 4 into %ir.ptr1)
+    RET 0
+
+...
diff --git a/test/CodeGen/X86/GlobalISel/select-memop.mir b/test/CodeGen/X86/GlobalISel/select-memop.mir
index 943c9aceb4d..817dc3cc976 100644
--- a/test/CodeGen/X86/GlobalISel/select-memop.mir
+++ b/test/CodeGen/X86/GlobalISel/select-memop.mir
@@ -95,6 +95,15 @@
     ret <4 x i32>* %p1
   }
 
+  define i32* @test_load_ptr(i32** %ptr1) {
+    %p = load i32*, i32** %ptr1
+    ret i32* %p
+  }
+
+  define void @test_store_ptr(i32** %ptr1, i32* %a) {
+    store i32* %a, i32** %ptr1
+    ret void
+  }
 ...
 ---
 # ALL-LABEL: name:            test_load_i8
@@ -580,3 +589,49 @@ body:             |
     RET 0, implicit %rax
 
 ...
+---
+# ALL-LABEL: name:            test_load_ptr
+name:            test_load_ptr
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        false
+registers:
+# ALL:   - { id: 0, class: gr64 }
+# ALL:   - { id: 1, class: gr64 }
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+# ALL: %1 = MOV64rm %0, 1, _, 0, _ :: (load 8 from %ir.ptr1)
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %rdi
+
+    %0(p0) = COPY %rdi
+    %1(p0) = G_LOAD %0(p0) :: (load 8 from %ir.ptr1)
+    %rax = COPY %1(p0)
+    RET 0, implicit %rax
+
+...
+---
+# ALL-LABEL: name:            test_store_ptr
+name:            test_store_ptr
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        false
+registers:
+# ALL:   - { id: 0, class: gr64 }
+# ALL:   - { id: 1, class: gr64 }
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+# ALL: MOV64mr %0, 1, _, 0, _, %1 :: (store 8 into %ir.ptr1)
+body:             |
+  bb.1 (%ir-block.0):
+    liveins: %rdi, %rsi
+
+    %0(p0) = COPY %rdi
+    %1(p0) = COPY %rsi
+    G_STORE %1(p0), %0(p0) :: (store 8 into %ir.ptr1)
+    RET 0
+
+...
diff --git a/test/CodeGen/X86/adde-carry.ll b/test/CodeGen/X86/addcarry.ll
similarity index 88%
rename from test/CodeGen/X86/adde-carry.ll
rename to test/CodeGen/X86/addcarry.ll
index 9483a6b492c..6fc07cd84de 100644
--- a/test/CodeGen/X86/adde-carry.ll
+++ b/test/CodeGen/X86/addcarry.ll
@@ -47,7 +47,7 @@ define void @c(i16* nocapture %r, i64 %a, i64 %b, i16 %c) nounwind {
 ; CHECK-LABEL: c:
 ; CHECK:       # BB#0: # %entry
 ; CHECK-NEXT:    addq %rdx, %rsi
-; CHECK-NEXT:    adcl $0, %ecx
+; CHECK-NEXT:    adcw $0, %cx
 ; CHECK-NEXT:    movw %cx, (%rdi)
 ; CHECK-NEXT:    retq
 entry:
@@ -66,7 +66,7 @@ define void @d(i8* nocapture %r, i64 %a, i64 %b, i8 %c) nounwind {
 ; CHECK-LABEL: d:
 ; CHECK:       # BB#0: # %entry
 ; CHECK-NEXT:    addq %rdx, %rsi
-; CHECK-NEXT:    adcl $0, %ecx
+; CHECK-NEXT:    adcb $0, %cl
 ; CHECK-NEXT:    movb %cl, (%rdi)
 ; CHECK-NEXT:    retq
 entry:
@@ -86,25 +86,21 @@ entry:
 define %scalar @pr31719(%scalar* nocapture readonly %this, %scalar %arg.b) {
 ; CHECK-LABEL: pr31719:
 ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    addq (%rsi), %rdx
+; CHECK-NEXT:    addq 8(%rsi), %rcx
 ; CHECK-NEXT:    sbbq %r10, %r10
 ; CHECK-NEXT:    andl $1, %r10d
-; CHECK-NEXT:    addq 8(%rsi), %rcx
-; CHECK-NEXT:    sbbq %r11, %r11
-; CHECK-NEXT:    andl $1, %r11d
-; CHECK-NEXT:    addq %r10, %rcx
-; CHECK-NEXT:    adcq $0, %r11
 ; CHECK-NEXT:    addq 16(%rsi), %r8
 ; CHECK-NEXT:    sbbq %rax, %rax
 ; CHECK-NEXT:    andl $1, %eax
-; CHECK-NEXT:    addq %r11, %r8
-; CHECK-NEXT:    adcq $0, %rax
 ; CHECK-NEXT:    addq 24(%rsi), %r9
-; CHECK-NEXT:    addq %rax, %r9
+; CHECK-NEXT:    addq (%rsi), %rdx
+; CHECK-NEXT:    adcq $0, %rcx
+; CHECK-NEXT:    adcq %r8, %r10
+; CHECK-NEXT:    adcq %r9, %rax
 ; CHECK-NEXT:    movq %rdx, (%rdi)
 ; CHECK-NEXT:    movq %rcx, 8(%rdi)
-; CHECK-NEXT:    movq %r8, 16(%rdi)
-; CHECK-NEXT:    movq %r9, 24(%rdi)
+; CHECK-NEXT:    movq %r10, 16(%rdi)
+; CHECK-NEXT:    movq %rax, 24(%rdi)
 ; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    retq
 entry:
@@ -159,12 +155,10 @@ define void @muladd(%accumulator* nocapture %this, i64 %arg.a, i64 %arg.b) {
 ; CHECK-NEXT:    movq %rdx, %rax
 ; CHECK-NEXT:    mulq %rsi
 ; CHECK-NEXT:    addq (%rdi), %rax
-; CHECK-NEXT:    adcq $0, %rdx
 ; CHECK-NEXT:    movq %rax, (%rdi)
-; CHECK-NEXT:    addq 8(%rdi), %rdx
+; CHECK-NEXT:    adcq 8(%rdi), %rdx
 ; CHECK-NEXT:    movq %rdx, 8(%rdi)
-; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    subl %eax, 16(%rdi)
+; CHECK-NEXT:    adcl $0, 16(%rdi)
 ; CHECK-NEXT:    retq
 entry:
   %0 = zext i64 %arg.a to i128
@@ -192,3 +186,21 @@ entry:
   store i32 %19, i32* %15, align 4
   ret void
 }
+
+define i64 @shiftadd(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: shiftadd:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    leaq (%rdx,%rcx), %rax
+; CHECK-NEXT:    addq %rsi, %rdi
+; CHECK-NEXT:    adcq $0, %rax
+; CHECK-NEXT:    retq
+entry:
+  %0 = zext i64 %a to i128
+  %1 = zext i64 %b to i128
+  %2 = add i128 %0, %1
+  %3 = lshr i128 %2, 64
+  %4 = trunc i128 %3 to i64
+  %5 = add i64 %c, %d
+  %6 = add i64 %4, %5
+  ret i64 %6
+}
diff --git a/test/CodeGen/X86/all-ones-vector.ll b/test/CodeGen/X86/all-ones-vector.ll
index 8e050ee2404..35f488ea448 100644
--- a/test/CodeGen/X86/all-ones-vector.ll
+++ b/test/CodeGen/X86/all-ones-vector.ll
@@ -10,368 +10,442 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=knl | FileCheck %s --check-prefix=X64-AVX --check-prefix=X64-AVX256 --check-prefix=X64-AVX512 --check-prefix=X64-KNL
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=skx | FileCheck %s --check-prefix=X64-AVX --check-prefix=X64-AVX256 --check-prefix=X64-AVX512 --check-prefix=X64-SKX
 
-define <16 x i8> @coo() nounwind {
-; X32-SSE-LABEL: coo:
+define <16 x i8> @allones_v16i8() nounwind {
+; X32-SSE-LABEL: allones_v16i8:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X32-SSE-NEXT:    retl
 ;
-; X32-AVX-LABEL: coo:
+; X32-AVX-LABEL: allones_v16i8:
 ; X32-AVX:       # BB#0:
 ; X32-AVX-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X32-AVX-NEXT:    retl
 ;
-; X64-SSE-LABEL: coo:
+; X64-SSE-LABEL: allones_v16i8:
 ; X64-SSE:       # BB#0:
 ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX-LABEL: coo:
+; X64-AVX-LABEL: allones_v16i8:
 ; X64-AVX:       # BB#0:
 ; X64-AVX-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X64-AVX-NEXT:    retq
   ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 }
 
-define <8 x i16> @soo() nounwind {
-; X32-SSE-LABEL: soo:
+define <8 x i16> @allones_v8i16() nounwind {
+; X32-SSE-LABEL: allones_v8i16:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X32-SSE-NEXT:    retl
 ;
-; X32-AVX-LABEL: soo:
+; X32-AVX-LABEL: allones_v8i16:
 ; X32-AVX:       # BB#0:
 ; X32-AVX-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X32-AVX-NEXT:    retl
 ;
-; X64-SSE-LABEL: soo:
+; X64-SSE-LABEL: allones_v8i16:
 ; X64-SSE:       # BB#0:
 ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX-LABEL: soo:
+; X64-AVX-LABEL: allones_v8i16:
 ; X64-AVX:       # BB#0:
 ; X64-AVX-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X64-AVX-NEXT:    retq
   ret <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 }
 
-define <4 x i32> @ioo() nounwind {
-; X32-SSE-LABEL: ioo:
+define <4 x i32> @allones_v4i32() nounwind {
+; X32-SSE-LABEL: allones_v4i32:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X32-SSE-NEXT:    retl
 ;
-; X32-AVX-LABEL: ioo:
+; X32-AVX-LABEL: allones_v4i32:
 ; X32-AVX:       # BB#0:
 ; X32-AVX-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X32-AVX-NEXT:    retl
 ;
-; X64-SSE-LABEL: ioo:
+; X64-SSE-LABEL: allones_v4i32:
 ; X64-SSE:       # BB#0:
 ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX-LABEL: ioo:
+; X64-AVX-LABEL: allones_v4i32:
 ; X64-AVX:       # BB#0:
 ; X64-AVX-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X64-AVX-NEXT:    retq
   ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
 }
 
-define <2 x i64> @loo() nounwind {
-; X32-SSE-LABEL: loo:
+define <2 x i64> @allones_v2i64() nounwind {
+; X32-SSE-LABEL: allones_v2i64:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X32-SSE-NEXT:    retl
 ;
-; X32-AVX-LABEL: loo:
+; X32-AVX-LABEL: allones_v2i64:
 ; X32-AVX:       # BB#0:
 ; X32-AVX-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X32-AVX-NEXT:    retl
 ;
-; X64-SSE-LABEL: loo:
+; X64-SSE-LABEL: allones_v2i64:
 ; X64-SSE:       # BB#0:
 ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX-LABEL: loo:
+; X64-AVX-LABEL: allones_v2i64:
 ; X64-AVX:       # BB#0:
 ; X64-AVX-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X64-AVX-NEXT:    retq
   ret <2 x i64> <i64 -1, i64 -1>
 }
 
-define <2 x double> @doo() nounwind {
-; X32-SSE-LABEL: doo:
+define <2 x double> @allones_v2f64() nounwind {
+; X32-SSE-LABEL: allones_v2f64:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X32-SSE-NEXT:    retl
 ;
-; X32-AVX-LABEL: doo:
+; X32-AVX-LABEL: allones_v2f64:
 ; X32-AVX:       # BB#0:
 ; X32-AVX-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X32-AVX-NEXT:    retl
 ;
-; X64-SSE-LABEL: doo:
+; X64-SSE-LABEL: allones_v2f64:
 ; X64-SSE:       # BB#0:
 ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX-LABEL: doo:
+; X64-AVX-LABEL: allones_v2f64:
 ; X64-AVX:       # BB#0:
 ; X64-AVX-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X64-AVX-NEXT:    retq
   ret <2 x double> <double 0xffffffffffffffff, double 0xffffffffffffffff>
 }
 
-define <4 x float> @foo() nounwind {
-; X32-SSE-LABEL: foo:
+define <4 x float> @allones_v4f32() nounwind {
+; X32-SSE-LABEL: allones_v4f32:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X32-SSE-NEXT:    retl
 ;
-; X32-AVX-LABEL: foo:
+; X32-AVX-LABEL: allones_v4f32:
 ; X32-AVX:       # BB#0:
 ; X32-AVX-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X32-AVX-NEXT:    retl
 ;
-; X64-SSE-LABEL: foo:
+; X64-SSE-LABEL: allones_v4f32:
 ; X64-SSE:       # BB#0:
 ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX-LABEL: foo:
+; X64-AVX-LABEL: allones_v4f32:
 ; X64-AVX:       # BB#0:
 ; X64-AVX-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X64-AVX-NEXT:    retq
   ret <4 x float> <float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000>
 }
 
-define <32 x i8> @coo256() nounwind {
-; X32-SSE-LABEL: coo256:
+define <32 x i8> @allones_v32i8() nounwind {
+; X32-SSE-LABEL: allones_v32i8:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X32-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
 ; X32-SSE-NEXT:    retl
 ;
-; X32-AVX1-LABEL: coo256:
+; X32-AVX1-LABEL: allones_v32i8:
 ; X32-AVX1:       # BB#0:
 ; X32-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X32-AVX1-NEXT:    retl
 ;
-; X32-AVX256-LABEL: coo256:
+; X32-AVX256-LABEL: allones_v32i8:
 ; X32-AVX256:       # BB#0:
 ; X32-AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X32-AVX256-NEXT:    retl
 ;
-; X64-SSE-LABEL: coo256:
+; X64-SSE-LABEL: allones_v32i8:
 ; X64-SSE:       # BB#0:
 ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X64-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX1-LABEL: coo256:
+; X64-AVX1-LABEL: allones_v32i8:
 ; X64-AVX1:       # BB#0:
 ; X64-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X64-AVX1-NEXT:    retq
 ;
-; X64-AVX256-LABEL: coo256:
+; X64-AVX256-LABEL: allones_v32i8:
 ; X64-AVX256:       # BB#0:
 ; X64-AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X64-AVX256-NEXT:    retq
   ret <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 }
 
-define <16 x i16> @soo256() nounwind {
-; X32-SSE-LABEL: soo256:
+define <16 x i16> @allones_v16i16() nounwind {
+; X32-SSE-LABEL: allones_v16i16:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X32-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
 ; X32-SSE-NEXT:    retl
 ;
-; X32-AVX1-LABEL: soo256:
+; X32-AVX1-LABEL: allones_v16i16:
 ; X32-AVX1:       # BB#0:
 ; X32-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X32-AVX1-NEXT:    retl
 ;
-; X32-AVX256-LABEL: soo256:
+; X32-AVX256-LABEL: allones_v16i16:
 ; X32-AVX256:       # BB#0:
 ; X32-AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X32-AVX256-NEXT:    retl
 ;
-; X64-SSE-LABEL: soo256:
+; X64-SSE-LABEL: allones_v16i16:
 ; X64-SSE:       # BB#0:
 ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X64-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX1-LABEL: soo256:
+; X64-AVX1-LABEL: allones_v16i16:
 ; X64-AVX1:       # BB#0:
 ; X64-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X64-AVX1-NEXT:    retq
 ;
-; X64-AVX256-LABEL: soo256:
+; X64-AVX256-LABEL: allones_v16i16:
 ; X64-AVX256:       # BB#0:
 ; X64-AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X64-AVX256-NEXT:    retq
   ret <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 }
 
-define <8 x i32> @ioo256() nounwind {
-; X32-SSE-LABEL: ioo256:
+define <8 x i32> @allones_v8i32() nounwind {
+; X32-SSE-LABEL: allones_v8i32:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X32-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
 ; X32-SSE-NEXT:    retl
 ;
-; X32-AVX1-LABEL: ioo256:
+; X32-AVX1-LABEL: allones_v8i32:
 ; X32-AVX1:       # BB#0:
 ; X32-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X32-AVX1-NEXT:    retl
 ;
-; X32-AVX256-LABEL: ioo256:
+; X32-AVX256-LABEL: allones_v8i32:
 ; X32-AVX256:       # BB#0:
 ; X32-AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X32-AVX256-NEXT:    retl
 ;
-; X64-SSE-LABEL: ioo256:
+; X64-SSE-LABEL: allones_v8i32:
 ; X64-SSE:       # BB#0:
 ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X64-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX1-LABEL: ioo256:
+; X64-AVX1-LABEL: allones_v8i32:
 ; X64-AVX1:       # BB#0:
 ; X64-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X64-AVX1-NEXT:    retq
 ;
-; X64-AVX256-LABEL: ioo256:
+; X64-AVX256-LABEL: allones_v8i32:
 ; X64-AVX256:       # BB#0:
 ; X64-AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X64-AVX256-NEXT:    retq
   ret <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
 }
 
-define <4 x i64> @loo256() nounwind {
-; X32-SSE-LABEL: loo256:
+define <4 x i64> @allones_v4i64() nounwind {
+; X32-SSE-LABEL: allones_v4i64:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X32-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
 ; X32-SSE-NEXT:    retl
 ;
-; X32-AVX1-LABEL: loo256:
+; X32-AVX1-LABEL: allones_v4i64:
 ; X32-AVX1:       # BB#0:
 ; X32-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X32-AVX1-NEXT:    retl
 ;
-; X32-AVX256-LABEL: loo256:
+; X32-AVX256-LABEL: allones_v4i64:
 ; X32-AVX256:       # BB#0:
 ; X32-AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X32-AVX256-NEXT:    retl
 ;
-; X64-SSE-LABEL: loo256:
+; X64-SSE-LABEL: allones_v4i64:
 ; X64-SSE:       # BB#0:
 ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X64-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX1-LABEL: loo256:
+; X64-AVX1-LABEL: allones_v4i64:
 ; X64-AVX1:       # BB#0:
 ; X64-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X64-AVX1-NEXT:    retq
 ;
-; X64-AVX256-LABEL: loo256:
+; X64-AVX256-LABEL: allones_v4i64:
 ; X64-AVX256:       # BB#0:
 ; X64-AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X64-AVX256-NEXT:    retq
   ret <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>
 }
 
-define <4 x double> @doo256() nounwind {
-; X32-SSE-LABEL: doo256:
+define <4 x double> @allones_v4f64() nounwind {
+; X32-SSE-LABEL: allones_v4f64:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X32-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
 ; X32-SSE-NEXT:    retl
 ;
-; X32-AVX1-LABEL: doo256:
+; X32-AVX1-LABEL: allones_v4f64:
 ; X32-AVX1:       # BB#0:
 ; X32-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X32-AVX1-NEXT:    retl
 ;
-; X32-AVX256-LABEL: doo256:
+; X32-AVX256-LABEL: allones_v4f64:
 ; X32-AVX256:       # BB#0:
 ; X32-AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X32-AVX256-NEXT:    retl
 ;
-; X64-SSE-LABEL: doo256:
+; X64-SSE-LABEL: allones_v4f64:
 ; X64-SSE:       # BB#0:
 ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X64-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX1-LABEL: doo256:
+; X64-AVX1-LABEL: allones_v4f64:
 ; X64-AVX1:       # BB#0:
 ; X64-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X64-AVX1-NEXT:    retq
 ;
-; X64-AVX256-LABEL: doo256:
+; X64-AVX256-LABEL: allones_v4f64:
 ; X64-AVX256:       # BB#0:
 ; X64-AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X64-AVX256-NEXT:    retq
   ret <4 x double> <double 0xffffffffffffffff, double 0xffffffffffffffff, double 0xffffffffffffffff, double 0xffffffffffffffff>
 }
 
-define <8 x float> @foo256() nounwind {
-; X32-SSE-LABEL: foo256:
+define <4 x double> @allones_v4f64_optsize() nounwind optsize {
+; X32-SSE-LABEL: allones_v4f64_optsize:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X32-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
 ; X32-SSE-NEXT:    retl
 ;
-; X32-AVX1-LABEL: foo256:
+; X32-AVX1-LABEL: allones_v4f64_optsize:
 ; X32-AVX1:       # BB#0:
 ; X32-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X32-AVX1-NEXT:    retl
 ;
-; X32-AVX256-LABEL: foo256:
+; X32-AVX256-LABEL: allones_v4f64_optsize:
 ; X32-AVX256:       # BB#0:
 ; X32-AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X32-AVX256-NEXT:    retl
 ;
-; X64-SSE-LABEL: foo256:
+; X64-SSE-LABEL: allones_v4f64_optsize:
 ; X64-SSE:       # BB#0:
 ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X64-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX1-LABEL: foo256:
+; X64-AVX1-LABEL: allones_v4f64_optsize:
 ; X64-AVX1:       # BB#0:
 ; X64-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X64-AVX1-NEXT:    retq
 ;
-; X64-AVX256-LABEL: foo256:
+; X64-AVX256-LABEL: allones_v4f64_optsize:
+; X64-AVX256:       # BB#0:
+; X64-AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX256-NEXT:    retq
+  ret <4 x double> <double 0xffffffffffffffff, double 0xffffffffffffffff, double 0xffffffffffffffff, double 0xffffffffffffffff>
+}
+
+define <8 x float> @allones_v8f32() nounwind {
+; X32-SSE-LABEL: allones_v8f32:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
+; X32-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
+; X32-SSE-NEXT:    retl
+;
+; X32-AVX1-LABEL: allones_v8f32:
+; X32-AVX1:       # BB#0:
+; X32-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX1-NEXT:    retl
+;
+; X32-AVX256-LABEL: allones_v8f32:
+; X32-AVX256:       # BB#0:
+; X32-AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X32-AVX256-NEXT:    retl
+;
+; X64-SSE-LABEL: allones_v8f32:
+; X64-SSE:       # BB#0:
+; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
+; X64-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX1-LABEL: allones_v8f32:
+; X64-AVX1:       # BB#0:
+; X64-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX256-LABEL: allones_v8f32:
 ; X64-AVX256:       # BB#0:
 ; X64-AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X64-AVX256-NEXT:    retq
   ret <8 x float> <float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000>
 }
 
-define <64 x i8> @coo512() nounwind {
-; X32-SSE-LABEL: coo512:
+define <8 x float> @allones_v8f32_optsize() nounwind optsize {
+; X32-SSE-LABEL: allones_v8f32_optsize:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
+; X32-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
+; X32-SSE-NEXT:    retl
+;
+; X32-AVX1-LABEL: allones_v8f32_optsize:
+; X32-AVX1:       # BB#0:
+; X32-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX1-NEXT:    retl
+;
+; X32-AVX256-LABEL: allones_v8f32_optsize:
+; X32-AVX256:       # BB#0:
+; X32-AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X32-AVX256-NEXT:    retl
+;
+; X64-SSE-LABEL: allones_v8f32_optsize:
+; X64-SSE:       # BB#0:
+; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
+; X64-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX1-LABEL: allones_v8f32_optsize:
+; X64-AVX1:       # BB#0:
+; X64-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX256-LABEL: allones_v8f32_optsize:
+; X64-AVX256:       # BB#0:
+; X64-AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
+; X64-AVX256-NEXT:    retq
+  ret <8 x float> <float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000>
+}
+
+define <64 x i8> @allones_v64i8() nounwind {
+; X32-SSE-LABEL: allones_v64i8:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X32-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
@@ -379,31 +453,31 @@ define <64 x i8> @coo512() nounwind {
 ; X32-SSE-NEXT:    pcmpeqd %xmm3, %xmm3
 ; X32-SSE-NEXT:    retl
 ;
-; X32-AVX1-LABEL: coo512:
+; X32-AVX1-LABEL: allones_v64i8:
 ; X32-AVX1:       # BB#0:
 ; X32-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X32-AVX1-NEXT:    vmovaps %ymm0, %ymm1
 ; X32-AVX1-NEXT:    retl
 ;
-; X32-AVX2-LABEL: coo512:
+; X32-AVX2-LABEL: allones_v64i8:
 ; X32-AVX2:       # BB#0:
 ; X32-AVX2-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X32-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
 ; X32-AVX2-NEXT:    retl
 ;
-; X32-KNL-LABEL: coo512:
+; X32-KNL-LABEL: allones_v64i8:
 ; X32-KNL:       # BB#0:
 ; X32-KNL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X32-KNL-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
 ; X32-KNL-NEXT:    retl
 ;
-; X32-SKX-LABEL: coo512:
+; X32-SKX-LABEL: allones_v64i8:
 ; X32-SKX:       # BB#0:
 ; X32-SKX-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
 ; X32-SKX-NEXT:    retl
 ;
-; X64-SSE-LABEL: coo512:
+; X64-SSE-LABEL: allones_v64i8:
 ; X64-SSE:       # BB#0:
 ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X64-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
@@ -411,34 +485,34 @@ define <64 x i8> @coo512() nounwind {
 ; X64-SSE-NEXT:    pcmpeqd %xmm3, %xmm3
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX1-LABEL: coo512:
+; X64-AVX1-LABEL: allones_v64i8:
 ; X64-AVX1:       # BB#0:
 ; X64-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X64-AVX1-NEXT:    vmovaps %ymm0, %ymm1
 ; X64-AVX1-NEXT:    retq
 ;
-; X64-AVX2-LABEL: coo512:
+; X64-AVX2-LABEL: allones_v64i8:
 ; X64-AVX2:       # BB#0:
 ; X64-AVX2-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
 ; X64-AVX2-NEXT:    retq
 ;
-; X64-KNL-LABEL: coo512:
+; X64-KNL-LABEL: allones_v64i8:
 ; X64-KNL:       # BB#0:
 ; X64-KNL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X64-KNL-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
 ; X64-KNL-NEXT:    retq
 ;
-; X64-SKX-LABEL: coo512:
+; X64-SKX-LABEL: allones_v64i8:
 ; X64-SKX:       # BB#0:
 ; X64-SKX-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
 ; X64-SKX-NEXT:    retq
   ret <64 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 }
 
-define <32 x i16> @soo512() nounwind {
-; X32-SSE-LABEL: soo512:
+define <32 x i16> @allones_v32i16() nounwind {
+; X32-SSE-LABEL: allones_v32i16:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X32-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
@@ -446,31 +520,31 @@ define <32 x i16> @soo512() nounwind {
 ; X32-SSE-NEXT:    pcmpeqd %xmm3, %xmm3
 ; X32-SSE-NEXT:    retl
 ;
-; X32-AVX1-LABEL: soo512:
+; X32-AVX1-LABEL: allones_v32i16:
 ; X32-AVX1:       # BB#0:
 ; X32-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X32-AVX1-NEXT:    vmovaps %ymm0, %ymm1
 ; X32-AVX1-NEXT:    retl
 ;
-; X32-AVX2-LABEL: soo512:
+; X32-AVX2-LABEL: allones_v32i16:
 ; X32-AVX2:       # BB#0:
 ; X32-AVX2-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X32-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
 ; X32-AVX2-NEXT:    retl
 ;
-; X32-KNL-LABEL: soo512:
+; X32-KNL-LABEL: allones_v32i16:
 ; X32-KNL:       # BB#0:
 ; X32-KNL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X32-KNL-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
 ; X32-KNL-NEXT:    retl
 ;
-; X32-SKX-LABEL: soo512:
+; X32-SKX-LABEL: allones_v32i16:
 ; X32-SKX:       # BB#0:
 ; X32-SKX-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
 ; X32-SKX-NEXT:    retl
 ;
-; X64-SSE-LABEL: soo512:
+; X64-SSE-LABEL: allones_v32i16:
 ; X64-SSE:       # BB#0:
 ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X64-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
@@ -478,34 +552,34 @@ define <32 x i16> @soo512() nounwind {
 ; X64-SSE-NEXT:    pcmpeqd %xmm3, %xmm3
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX1-LABEL: soo512:
+; X64-AVX1-LABEL: allones_v32i16:
 ; X64-AVX1:       # BB#0:
 ; X64-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X64-AVX1-NEXT:    vmovaps %ymm0, %ymm1
 ; X64-AVX1-NEXT:    retq
 ;
-; X64-AVX2-LABEL: soo512:
+; X64-AVX2-LABEL: allones_v32i16:
 ; X64-AVX2:       # BB#0:
 ; X64-AVX2-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
 ; X64-AVX2-NEXT:    retq
 ;
-; X64-KNL-LABEL: soo512:
+; X64-KNL-LABEL: allones_v32i16:
 ; X64-KNL:       # BB#0:
 ; X64-KNL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X64-KNL-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
 ; X64-KNL-NEXT:    retq
 ;
-; X64-SKX-LABEL: soo512:
+; X64-SKX-LABEL: allones_v32i16:
 ; X64-SKX:       # BB#0:
 ; X64-SKX-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
 ; X64-SKX-NEXT:    retq
   ret <32 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 }
 
-define <16 x i32> @ioo512() nounwind {
-; X32-SSE-LABEL: ioo512:
+define <16 x i32> @allones_v16i32() nounwind {
+; X32-SSE-LABEL: allones_v16i32:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X32-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
@@ -513,25 +587,25 @@ define <16 x i32> @ioo512() nounwind {
 ; X32-SSE-NEXT:    pcmpeqd %xmm3, %xmm3
 ; X32-SSE-NEXT:    retl
 ;
-; X32-AVX1-LABEL: ioo512:
+; X32-AVX1-LABEL: allones_v16i32:
 ; X32-AVX1:       # BB#0:
 ; X32-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X32-AVX1-NEXT:    vmovaps %ymm0, %ymm1
 ; X32-AVX1-NEXT:    retl
 ;
-; X32-AVX2-LABEL: ioo512:
+; X32-AVX2-LABEL: allones_v16i32:
 ; X32-AVX2:       # BB#0:
 ; X32-AVX2-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X32-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
 ; X32-AVX2-NEXT:    retl
 ;
-; X32-AVX512-LABEL: ioo512:
+; X32-AVX512-LABEL: allones_v16i32:
 ; X32-AVX512:       # BB#0:
 ; X32-AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
 ; X32-AVX512-NEXT:    retl
 ;
-; X64-SSE-LABEL: ioo512:
+; X64-SSE-LABEL: allones_v16i32:
 ; X64-SSE:       # BB#0:
 ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X64-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
@@ -539,28 +613,28 @@ define <16 x i32> @ioo512() nounwind {
 ; X64-SSE-NEXT:    pcmpeqd %xmm3, %xmm3
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX1-LABEL: ioo512:
+; X64-AVX1-LABEL: allones_v16i32:
 ; X64-AVX1:       # BB#0:
 ; X64-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X64-AVX1-NEXT:    vmovaps %ymm0, %ymm1
 ; X64-AVX1-NEXT:    retq
 ;
-; X64-AVX2-LABEL: ioo512:
+; X64-AVX2-LABEL: allones_v16i32:
 ; X64-AVX2:       # BB#0:
 ; X64-AVX2-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
 ; X64-AVX2-NEXT:    retq
 ;
-; X64-AVX512-LABEL: ioo512:
+; X64-AVX512-LABEL: allones_v16i32:
 ; X64-AVX512:       # BB#0:
 ; X64-AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
 ; X64-AVX512-NEXT:    retq
   ret <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
 }
 
-define <8 x i64> @loo512() nounwind {
-; X32-SSE-LABEL: loo512:
+define <8 x i64> @allones_v8i64() nounwind {
+; X32-SSE-LABEL: allones_v8i64:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X32-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
@@ -568,25 +642,25 @@ define <8 x i64> @loo512() nounwind {
 ; X32-SSE-NEXT:    pcmpeqd %xmm3, %xmm3
 ; X32-SSE-NEXT:    retl
 ;
-; X32-AVX1-LABEL: loo512:
+; X32-AVX1-LABEL: allones_v8i64:
 ; X32-AVX1:       # BB#0:
 ; X32-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X32-AVX1-NEXT:    vmovaps %ymm0, %ymm1
 ; X32-AVX1-NEXT:    retl
 ;
-; X32-AVX2-LABEL: loo512:
+; X32-AVX2-LABEL: allones_v8i64:
 ; X32-AVX2:       # BB#0:
 ; X32-AVX2-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X32-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
 ; X32-AVX2-NEXT:    retl
 ;
-; X32-AVX512-LABEL: loo512:
+; X32-AVX512-LABEL: allones_v8i64:
 ; X32-AVX512:       # BB#0:
 ; X32-AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
 ; X32-AVX512-NEXT:    retl
 ;
-; X64-SSE-LABEL: loo512:
+; X64-SSE-LABEL: allones_v8i64:
 ; X64-SSE:       # BB#0:
 ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X64-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
@@ -594,28 +668,28 @@ define <8 x i64> @loo512() nounwind {
 ; X64-SSE-NEXT:    pcmpeqd %xmm3, %xmm3
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX1-LABEL: loo512:
+; X64-AVX1-LABEL: allones_v8i64:
 ; X64-AVX1:       # BB#0:
 ; X64-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X64-AVX1-NEXT:    vmovaps %ymm0, %ymm1
 ; X64-AVX1-NEXT:    retq
 ;
-; X64-AVX2-LABEL: loo512:
+; X64-AVX2-LABEL: allones_v8i64:
 ; X64-AVX2:       # BB#0:
 ; X64-AVX2-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
 ; X64-AVX2-NEXT:    retq
 ;
-; X64-AVX512-LABEL: loo512:
+; X64-AVX512-LABEL: allones_v8i64:
 ; X64-AVX512:       # BB#0:
 ; X64-AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
 ; X64-AVX512-NEXT:    retq
   ret <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
 }
 
-define <8 x double> @doo512() nounwind {
-; X32-SSE-LABEL: doo512:
+define <8 x double> @allones_v8f64() nounwind {
+; X32-SSE-LABEL: allones_v8f64:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X32-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
@@ -623,25 +697,25 @@ define <8 x double> @doo512() nounwind {
 ; X32-SSE-NEXT:    pcmpeqd %xmm3, %xmm3
 ; X32-SSE-NEXT:    retl
 ;
-; X32-AVX1-LABEL: doo512:
+; X32-AVX1-LABEL: allones_v8f64:
 ; X32-AVX1:       # BB#0:
 ; X32-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X32-AVX1-NEXT:    vmovaps %ymm0, %ymm1
 ; X32-AVX1-NEXT:    retl
 ;
-; X32-AVX2-LABEL: doo512:
+; X32-AVX2-LABEL: allones_v8f64:
 ; X32-AVX2:       # BB#0:
 ; X32-AVX2-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X32-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
 ; X32-AVX2-NEXT:    retl
 ;
-; X32-AVX512-LABEL: doo512:
+; X32-AVX512-LABEL: allones_v8f64:
 ; X32-AVX512:       # BB#0:
 ; X32-AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
 ; X32-AVX512-NEXT:    retl
 ;
-; X64-SSE-LABEL: doo512:
+; X64-SSE-LABEL: allones_v8f64:
 ; X64-SSE:       # BB#0:
 ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X64-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
@@ -649,28 +723,28 @@ define <8 x double> @doo512() nounwind {
 ; X64-SSE-NEXT:    pcmpeqd %xmm3, %xmm3
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX1-LABEL: doo512:
+; X64-AVX1-LABEL: allones_v8f64:
 ; X64-AVX1:       # BB#0:
 ; X64-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X64-AVX1-NEXT:    vmovaps %ymm0, %ymm1
 ; X64-AVX1-NEXT:    retq
 ;
-; X64-AVX2-LABEL: doo512:
+; X64-AVX2-LABEL: allones_v8f64:
 ; X64-AVX2:       # BB#0:
 ; X64-AVX2-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
 ; X64-AVX2-NEXT:    retq
 ;
-; X64-AVX512-LABEL: doo512:
+; X64-AVX512-LABEL: allones_v8f64:
 ; X64-AVX512:       # BB#0:
 ; X64-AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
 ; X64-AVX512-NEXT:    retq
   ret <8 x double> <double 0xffffffffffffffff, double 0xffffffffffffffff, double 0xffffffffffffffff, double 0xffffffffffffffff, double 0xffffffffffffffff, double 0xffffffffffffffff, double 0xffffffffffffffff, double 0xffffffffffffffff>
 }
 
-define <16 x float> @foo512() nounwind {
-; X32-SSE-LABEL: foo512:
+define <16 x float> @allones_v16f32() nounwind {
+; X32-SSE-LABEL: allones_v16f32:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X32-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
@@ -678,25 +752,25 @@ define <16 x float> @foo512() nounwind {
 ; X32-SSE-NEXT:    pcmpeqd %xmm3, %xmm3
 ; X32-SSE-NEXT:    retl
 ;
-; X32-AVX1-LABEL: foo512:
+; X32-AVX1-LABEL: allones_v16f32:
 ; X32-AVX1:       # BB#0:
 ; X32-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X32-AVX1-NEXT:    vmovaps %ymm0, %ymm1
 ; X32-AVX1-NEXT:    retl
 ;
-; X32-AVX2-LABEL: foo512:
+; X32-AVX2-LABEL: allones_v16f32:
 ; X32-AVX2:       # BB#0:
 ; X32-AVX2-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X32-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
 ; X32-AVX2-NEXT:    retl
 ;
-; X32-AVX512-LABEL: foo512:
+; X32-AVX512-LABEL: allones_v16f32:
 ; X32-AVX512:       # BB#0:
 ; X32-AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
 ; X32-AVX512-NEXT:    retl
 ;
-; X64-SSE-LABEL: foo512:
+; X64-SSE-LABEL: allones_v16f32:
 ; X64-SSE:       # BB#0:
 ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; X64-SSE-NEXT:    pcmpeqd %xmm1, %xmm1
@@ -704,20 +778,20 @@ define <16 x float> @foo512() nounwind {
 ; X64-SSE-NEXT:    pcmpeqd %xmm3, %xmm3
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX1-LABEL: foo512:
+; X64-AVX1-LABEL: allones_v16f32:
 ; X64-AVX1:       # BB#0:
 ; X64-AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; X64-AVX1-NEXT:    vmovaps %ymm0, %ymm1
 ; X64-AVX1-NEXT:    retq
 ;
-; X64-AVX2-LABEL: foo512:
+; X64-AVX2-LABEL: allones_v16f32:
 ; X64-AVX2:       # BB#0:
 ; X64-AVX2-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
 ; X64-AVX2-NEXT:    retq
 ;
-; X64-AVX512-LABEL: foo512:
+; X64-AVX512-LABEL: allones_v16f32:
 ; X64-AVX512:       # BB#0:
 ; X64-AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
 ; X64-AVX512-NEXT:    retq
diff --git a/test/CodeGen/X86/anyregcc.ll b/test/CodeGen/X86/anyregcc.ll
index 1b51b53bd22..b75774ab12c 100644
--- a/test/CodeGen/X86/anyregcc.ll
+++ b/test/CodeGen/X86/anyregcc.ll
@@ -7,7 +7,7 @@
 ; CHECK-LABEL:  .section __LLVM_STACKMAPS,__llvm_stackmaps
 ; CHECK-NEXT:   __LLVM_StackMaps:
 ; Header
-; CHECK-NEXT:   .byte 2
+; CHECK-NEXT:   .byte 3
 ; CHECK-NEXT:   .byte 0
 ; CHECK-NEXT:   .short 0
 ; Num Functions
@@ -53,18 +53,24 @@
 ; CHECK-NEXT:   .short  3
 ; Loc 0: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 4
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 4
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 1: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 4
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 4
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 2: Constant 3
 ; CHECK-NEXT:   .byte 4
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 3
 define i64 @test() nounwind ssp uwtable {
 entry:
@@ -79,13 +85,17 @@ entry:
 ; CHECK-NEXT:   .short  2
 ; Loc 0: Register <-- this is the return register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 1: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 define i64 @property_access1(i8* %obj) nounwind ssp uwtable {
 entry:
@@ -101,13 +111,17 @@ entry:
 ; CHECK-NEXT:   .short  2
 ; Loc 0: Register <-- this is the return register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 1: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 define i64 @property_access2() nounwind ssp uwtable {
 entry:
@@ -124,13 +138,17 @@ entry:
 ; CHECK-NEXT:   .short  2
 ; Loc 0: Register <-- this is the return register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 1: Direct RBP - ofs
 ; CHECK-NEXT:   .byte 2
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short 6
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long
 define i64 @property_access3() nounwind ssp uwtable {
 entry:
@@ -147,73 +165,101 @@ entry:
 ; CHECK-NEXT:   .short  14
 ; Loc 0: Register <-- this is the return register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 1: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 2: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 3: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 4: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 5: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 6: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 7: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 8: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 9: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 10: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 11: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 12: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 13: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
 entry:
@@ -229,73 +275,101 @@ entry:
 ; CHECK-NEXT:   .short  14
 ; Loc 0: Register <-- this is the return register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 1: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 2: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 3: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 4: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 5: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 6: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 7: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 8: Register
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 ; Loc 9: Argument, still on stack
 ; CHECK-NEXT: .byte  3
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte  0
+; CHECK-NEXT: .short  8
 ; CHECK-NEXT: .short 6
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long
 ; Loc 10: Argument, still on stack
 ; CHECK-NEXT: .byte  3
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte  0
+; CHECK-NEXT: .short  8
 ; CHECK-NEXT: .short 6
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long
 ; Loc 11: Argument, still on stack
 ; CHECK-NEXT: .byte  3
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte  0
+; CHECK-NEXT: .short  8
 ; CHECK-NEXT: .short 6
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long
 ; Loc 12: Argument, still on stack
 ; CHECK-NEXT: .byte  3
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte  0
+; CHECK-NEXT: .short  8
 ; CHECK-NEXT: .short 6
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long
 ; Loc 13: Argument, still on stack
 ; CHECK-NEXT: .byte  3
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte  0
+; CHECK-NEXT: .short  8
 ; CHECK-NEXT: .short 6
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long
 define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
 entry:
@@ -313,18 +387,24 @@ entry:
 ; CHECK-NEXT: .short 3
 ; Loc 0: Register (some register that will be spilled to the stack)
 ; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte  0
+; CHECK-NEXT: .short  8
 ; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long  0
 ; Loc 1: Register RDI
 ; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte  0
+; CHECK-NEXT: .short  8
 ; CHECK-NEXT: .short 5
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long  0
 ; Loc 1: Register RSI
 ; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte  0
+; CHECK-NEXT: .short  8
 ; CHECK-NEXT: .short 4
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long  0
 define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
 entry:
@@ -342,28 +422,38 @@ entry:
 ; CHECK-NEXT: .short 5
 ; Loc 0: Return a register
 ; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte  0
+; CHECK-NEXT: .short  8
 ; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long  0
 ; Loc 1: Arg0 in a Register
 ; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte  0
+; CHECK-NEXT: .short  8
 ; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long  0
 ; Loc 2: Arg1 in a Register
 ; CHECK-NEXT: .byte  1
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte  0
+; CHECK-NEXT: .short  8
 ; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long  0
 ; Loc 3: Arg2 spilled to RBP +
 ; CHECK-NEXT: .byte  3
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte  0
+; CHECK-NEXT: .short  8
 ; CHECK-NEXT: .short 6
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long
 ; Loc 4: Arg3 spilled to RBP +
 ; CHECK-NEXT: .byte  3
-; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .byte  0
+; CHECK-NEXT: .short  8
 ; CHECK-NEXT: .short 6
+; CHECK-NEXT: .short 0
 ; CHECK-NEXT: .long
 define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
 entry:
diff --git a/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
index 4a86fa22f08..1d925ff8e9b 100644
--- a/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
@@ -3774,4 +3774,58 @@ define void @test_mm256_zeroupper() nounwind {
 }
 declare void @llvm.x86.avx.vzeroupper() nounwind readnone
 
+define <4 x double> @test_mm256_zextpd128_pd256(<2 x double> %a0) nounwind {
+; X32-LABEL: test_mm256_zextpd128_pd256:
+; X32:       # BB#0:
+; X32-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; X32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm256_zextpd128_pd256:
+; X64:       # BB#0:
+; X64-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-NEXT:    retq
+  %res = shufflevector <2 x double> %a0, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x double> %res
+}
+
+define <8 x float> @test_mm256_zextps128_ps256(<4 x float> %a0) nounwind {
+; X32-LABEL: test_mm256_zextps128_ps256:
+; X32:       # BB#0:
+; X32-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; X32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm256_zextps128_ps256:
+; X64:       # BB#0:
+; X64-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-NEXT:    retq
+  %res = shufflevector <4 x float> %a0, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x float> %res
+}
+
+define <4 x i64> @test_mm256_zextsi128_si256(<2 x i64> %a0) nounwind {
+; X32-LABEL: test_mm256_zextsi128_si256:
+; X32:       # BB#0:
+; X32-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; X32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm256_zextsi128_si256:
+; X64:       # BB#0:
+; X64-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-NEXT:    retq
+  %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i64> %res
+}
+
 !0 = !{i32 1}
diff --git a/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
index 2d4bf6ebb25..652f85d8833 100644
--- a/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
@@ -1130,5 +1130,125 @@ define <16 x float> @test_mm512_maskz_unpacklo_ps(i16 %a0, <16 x float> %a1, <16
   ret <16 x float> %res1
 }
 
+define <8 x double> @test_mm512_zextpd128_pd512(<2 x double> %a0) nounwind {
+; X32-LABEL: test_mm512_zextpd128_pd512:
+; X32:       # BB#0:
+; X32-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; X32-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm2
+; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X32-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm512_zextpd128_pd512:
+; X64:       # BB#0:
+; X64-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; X64-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm2
+; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; X64-NEXT:    retq
+  %res = shufflevector <2 x double> %a0, <2 x double> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  ret <8 x double> %res
+}
+
+define <8 x double> @test_mm512_zextpd256_pd512(<4 x double> %a0) nounwind {
+; X32-LABEL: test_mm512_zextpd256_pd512:
+; X32:       # BB#0:
+; X32-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; X32-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
+; X32-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm512_zextpd256_pd512:
+; X64:       # BB#0:
+; X64-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; X64-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
+; X64-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; X64-NEXT:    retq
+  %res = shufflevector <4 x double> %a0, <4 x double> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x double> %res
+}
+
+define <16 x float> @test_mm512_zextps128_ps512(<4 x float> %a0) nounwind {
+; X32-LABEL: test_mm512_zextps128_ps512:
+; X32:       # BB#0:
+; X32-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; X32-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm2
+; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X32-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm512_zextps128_ps512:
+; X64:       # BB#0:
+; X64-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; X64-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm2
+; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; X64-NEXT:    retq
+  %res = shufflevector <4 x float> %a0, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+  ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_zextps256_ps512(<8 x float> %a0) nounwind {
+; X32-LABEL: test_mm512_zextps256_ps512:
+; X32:       # BB#0:
+; X32-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; X32-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
+; X32-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm512_zextps256_ps512:
+; X64:       # BB#0:
+; X64-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; X64-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
+; X64-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; X64-NEXT:    retq
+  %res = shufflevector <8 x float> %a0, <8 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x float> %res
+}
+
+define <8 x i64> @test_mm512_zextsi128_si512(<2 x i64> %a0) nounwind {
+; X32-LABEL: test_mm512_zextsi128_si512:
+; X32:       # BB#0:
+; X32-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; X32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; X32-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm2
+; X32-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; X32-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm512_zextsi128_si512:
+; X64:       # BB#0:
+; X64-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; X64-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm2
+; X64-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; X64-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; X64-NEXT:    retq
+  %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_mm512_zextsi256_si512(<4 x i64> %a0) nounwind {
+; X32-LABEL: test_mm512_zextsi256_si512:
+; X32:       # BB#0:
+; X32-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; X32-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; X32-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm512_zextsi256_si512:
+; X64:       # BB#0:
+; X64-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; X64-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; X64-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; X64-NEXT:    retq
+  %res = shufflevector <4 x i64> %a0, <4 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i64> %res
+}
+
 !0 = !{i32 1}
 
diff --git a/test/CodeGen/X86/bool-ext-inc.ll b/test/CodeGen/X86/bool-ext-inc.ll
index 1b69b554255..e292ccd0be1 100644
--- a/test/CodeGen/X86/bool-ext-inc.ll
+++ b/test/CodeGen/X86/bool-ext-inc.ll
@@ -1,29 +1,26 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s
 
-; FIXME: add (sext i1 X), 1 -> zext (not i1 X)
+; add (sext i1 X), 1 -> zext (not i1 X)
 
 define i32 @sext_inc(i1 zeroext %x) nounwind {
 ; CHECK-LABEL: sext_inc:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    movzbl %dil, %ecx
-; CHECK-NEXT:    movl $1, %eax
-; CHECK-NEXT:    subl %ecx, %eax
+; CHECK-NEXT:    xorb $1, %dil
+; CHECK-NEXT:    movzbl %dil, %eax
 ; CHECK-NEXT:    retq
   %ext = sext i1 %x to i32
   %add = add i32 %ext, 1
   ret i32 %add
 }
 
-; FIXME: add (sext i1 X), 1 -> zext (not i1 X)
+; add (sext i1 X), 1 -> zext (not i1 X)
 
 define <4 x i32> @sext_inc_vec(<4 x i1> %x) nounwind {
 ; CHECK-LABEL: sext_inc_vec:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
-; CHECK-NEXT:    vpsrad $31, %xmm0, %xmm0
-; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
-; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    vandnps %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %ext = sext <4 x i1> %x to <4 x i32>
   %add = add <4 x i32> %ext, <i32 1, i32 1, i32 1, i32 1>
@@ -35,7 +32,7 @@ define <4 x i32> @cmpgt_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
-; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpandn %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %cmp = icmp sgt <4 x i32> %x, %y
   %ext = sext <4 x i1> %cmp to <4 x i32>
@@ -47,10 +44,7 @@ define <4 x i32> @cmpne_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; CHECK-LABEL: cmpne_sext_inc_vec:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
-; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpsrld $31, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %cmp = icmp ne <4 x i32> %x, %y
   %ext = sext <4 x i1> %cmp to <4 x i32>
@@ -63,7 +57,7 @@ define <4 x i64> @cmpgt_sext_inc_vec256(<4 x i64> %x, <4 x i64> %y) nounwind {
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm1
-; CHECK-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpandn %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retq
   %cmp = icmp sgt <4 x i64> %x, %y
   %ext = sext <4 x i1> %cmp to <4 x i64>
@@ -75,13 +69,11 @@ define i32 @bool_logic_and_math(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 ; CHECK-LABEL: bool_logic_and_math:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    cmpl %ecx, %edx
-; CHECK-NEXT:    setne %cl
-; CHECK-NEXT:    andb %al, %cl
-; CHECK-NEXT:    movzbl %cl, %ecx
-; CHECK-NEXT:    movl $1, %eax
-; CHECK-NEXT:    subl %ecx, %eax
+; CHECK-NEXT:    sete %cl
+; CHECK-NEXT:    orb %al, %cl
+; CHECK-NEXT:    movzbl %cl, %eax
 ; CHECK-NEXT:    retq
   %cmp1 = icmp ne i32 %a, %b
   %cmp2 = icmp ne i32 %c, %d
@@ -95,12 +87,12 @@ define <4 x i32> @bool_logic_and_math_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32>
 ; CHECK-LABEL: bool_logic_and_math_vec:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
-; CHECK-NEXT:    vpxor %xmm1, %xmm2, %xmm1
+; CHECK-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm1
+; CHECK-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
+; CHECK-NEXT:    vpxor %xmm2, %xmm1, %xmm1
 ; CHECK-NEXT:    vpandn %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
-; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpandn %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %cmp1 = icmp ne <4 x i32> %a, %b
   %cmp2 = icmp ne <4 x i32> %c, %d
diff --git a/test/CodeGen/X86/bswap_tree.ll b/test/CodeGen/X86/bswap_tree.ll
index 35a28af8557..c217879d438 100644
--- a/test/CodeGen/X86/bswap_tree.ll
+++ b/test/CodeGen/X86/bswap_tree.ll
@@ -13,32 +13,16 @@
 define i32 @test1(i32 %x) nounwind {
 ; CHECK-LABEL: test1:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    andl $16711680, %edx # imm = 0xFF0000
-; CHECK-NEXT:    movl %ecx, %eax
-; CHECK-NEXT:    andl $-16777216, %eax # imm = 0xFF000000
-; CHECK-NEXT:    shll $8, %edx
-; CHECK-NEXT:    shrl $8, %eax
-; CHECK-NEXT:    bswapl %ecx
-; CHECK-NEXT:    shrl $16, %ecx
-; CHECK-NEXT:    orl %edx, %eax
-; CHECK-NEXT:    orl %ecx, %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    bswapl %eax
+; CHECK-NEXT:    roll $16, %eax
 ; CHECK-NEXT:    retl
 ;
 ; CHECK64-LABEL: test1:
 ; CHECK64:       # BB#0:
-; CHECK64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
-; CHECK64-NEXT:    movl %edi, %eax
-; CHECK64-NEXT:    andl $16711680, %eax # imm = 0xFF0000
-; CHECK64-NEXT:    movl %edi, %ecx
-; CHECK64-NEXT:    andl $-16777216, %ecx # imm = 0xFF000000
-; CHECK64-NEXT:    shll $8, %eax
-; CHECK64-NEXT:    shrl $8, %ecx
 ; CHECK64-NEXT:    bswapl %edi
-; CHECK64-NEXT:    shrl $16, %edi
-; CHECK64-NEXT:    orl %eax, %ecx
-; CHECK64-NEXT:    leal (%rcx,%rdi), %eax
+; CHECK64-NEXT:    roll $16, %edi
+; CHECK64-NEXT:    movl %edi, %eax
 ; CHECK64-NEXT:    retq
   %byte0 = and i32 %x, 255        ; 0x000000ff
   %byte1 = and i32 %x, 65280      ; 0x0000ff00
@@ -62,33 +46,16 @@ define i32 @test1(i32 %x) nounwind {
 define i32 @test2(i32 %x) nounwind {
 ; CHECK-LABEL: test2:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    pushl %esi
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    shll $8, %ecx
-; CHECK-NEXT:    shrl $8, %eax
-; CHECK-NEXT:    movzwl %cx, %edx
-; CHECK-NEXT:    movzbl %al, %esi
-; CHECK-NEXT:    andl $-16777216, %ecx # imm = 0xFF000000
-; CHECK-NEXT:    andl $16711680, %eax # imm = 0xFF0000
-; CHECK-NEXT:    orl %edx, %esi
-; CHECK-NEXT:    orl %ecx, %eax
-; CHECK-NEXT:    orl %esi, %eax
-; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    bswapl %eax
+; CHECK-NEXT:    roll $16, %eax
 ; CHECK-NEXT:    retl
 ;
 ; CHECK64-LABEL: test2:
 ; CHECK64:       # BB#0:
-; CHECK64-NEXT:    movl %edi, %ecx
-; CHECK64-NEXT:    shll $8, %ecx
-; CHECK64-NEXT:    shrl $8, %edi
-; CHECK64-NEXT:    movzwl %cx, %edx
-; CHECK64-NEXT:    movzbl %dil, %eax
-; CHECK64-NEXT:    andl $-16777216, %ecx # imm = 0xFF000000
-; CHECK64-NEXT:    andl $16711680, %edi # imm = 0xFF0000
-; CHECK64-NEXT:    orl %edx, %eax
-; CHECK64-NEXT:    orl %ecx, %edi
-; CHECK64-NEXT:    orl %edi, %eax
+; CHECK64-NEXT:    bswapl %edi
+; CHECK64-NEXT:    roll $16, %edi
+; CHECK64-NEXT:    movl %edi, %eax
 ; CHECK64-NEXT:    retq
   %byte1 = shl  i32 %x, 8
   %byte0 = lshr i32 %x, 8
diff --git a/test/CodeGen/X86/cast-vsel.ll b/test/CodeGen/X86/cast-vsel.ll
new file mode 100644
index 00000000000..1e44aec99fc
--- /dev/null
+++ b/test/CodeGen/X86/cast-vsel.ll
@@ -0,0 +1,611 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+
+; If we have a cmp and a sel with different-sized operands followed by a size-changing cast,
+; we may want to pull the cast ahead of the select operands to create a select with matching op sizes:
+; ext (sel (cmp a, b), c, d) --> sel (cmp a, b), (ext c), (ext d)
+
+define <8 x i32> @sext(<8 x float> %a, <8 x float> %b, <8 x i16> %c, <8 x i16> %d) {
+; SSE2-LABEL: sext:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    cmpltps %xmm3, %xmm1
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT:    cmpltps %xmm2, %xmm0
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE2-NEXT:    pand %xmm2, %xmm4
+; SSE2-NEXT:    pandn %xmm5, %xmm2
+; SSE2-NEXT:    por %xmm4, %xmm2
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT:    psrad $16, %xmm0
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT:    psrad $16, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: sext:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    cmpltps %xmm3, %xmm1
+; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSE41-NEXT:    pshufb %xmm3, %xmm1
+; SSE41-NEXT:    cmpltps %xmm2, %xmm0
+; SSE41-NEXT:    pshufb %xmm3, %xmm0
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT:    pand %xmm0, %xmm4
+; SSE41-NEXT:    pandn %xmm5, %xmm0
+; SSE41-NEXT:    por %xmm4, %xmm0
+; SSE41-NEXT:    pmovsxwd %xmm0, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE41-NEXT:    pmovsxwd %xmm0, %xmm1
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: sext:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vcmpltps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpmovsxwd %xmm2, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; AVX1-NEXT:    vpmovsxwd %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    vpmovsxwd %xmm3, %xmm2
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
+; AVX1-NEXT:    vpmovsxwd %xmm3, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX1-NEXT:    vblendvps %ymm0, %ymm1, %ymm2, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: sext:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vcmpltps %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovsxwd %xmm2, %ymm1
+; AVX2-NEXT:    vpmovsxwd %xmm3, %ymm2
+; AVX2-NEXT:    vblendvps %ymm0, %ymm1, %ymm2, %ymm0
+; AVX2-NEXT:    retq
+  %cmp = fcmp olt <8 x float> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i16> %c, <8 x i16> %d
+  %ext = sext <8 x i16> %sel to <8 x i32>
+  ret <8 x i32> %ext
+}
+
+define <8 x i32> @zext(<8 x float> %a, <8 x float> %b, <8 x i16> %c, <8 x i16> %d) {
+; SSE2-LABEL: zext:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    cmpltps %xmm3, %xmm1
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[0,2,2,3]
+; SSE2-NEXT:    cmpltps %xmm2, %xmm0
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE2-NEXT:    pand %xmm1, %xmm4
+; SSE2-NEXT:    pandn %xmm5, %xmm1
+; SSE2-NEXT:    por %xmm4, %xmm1
+; SSE2-NEXT:    xorps %xmm2, %xmm2
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: zext:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    cmpltps %xmm3, %xmm1
+; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSE41-NEXT:    pshufb %xmm3, %xmm1
+; SSE41-NEXT:    cmpltps %xmm2, %xmm0
+; SSE41-NEXT:    pshufb %xmm3, %xmm0
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT:    pand %xmm0, %xmm4
+; SSE41-NEXT:    pandn %xmm5, %xmm0
+; SSE41-NEXT:    por %xmm4, %xmm0
+; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: zext:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vcmpltps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX1-NEXT:    vblendvps %ymm0, %ymm1, %ymm2, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: zext:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vcmpltps %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; AVX2-NEXT:    vblendvps %ymm0, %ymm1, %ymm2, %ymm0
+; AVX2-NEXT:    retq
+  %cmp = fcmp olt <8 x float> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i16> %c, <8 x i16> %d
+  %ext = zext <8 x i16> %sel to <8 x i32>
+  ret <8 x i32> %ext
+}
+
+define <4 x double> @fpext(<4 x double> %a, <4 x double> %b, <4 x float> %c, <4 x float> %d) {
+; SSE2-LABEL: fpext:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    cmpltpd %xmm3, %xmm1
+; SSE2-NEXT:    cmpltpd %xmm2, %xmm0
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE2-NEXT:    andps %xmm0, %xmm4
+; SSE2-NEXT:    andnps %xmm5, %xmm0
+; SSE2-NEXT:    orps %xmm4, %xmm0
+; SSE2-NEXT:    cvtps2pd %xmm0, %xmm2
+; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    cvtps2pd %xmm0, %xmm1
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: fpext:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    cmpltpd %xmm3, %xmm1
+; SSE41-NEXT:    cmpltpd %xmm2, %xmm0
+; SSE41-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE41-NEXT:    blendvps %xmm0, %xmm4, %xmm5
+; SSE41-NEXT:    cvtps2pd %xmm5, %xmm0
+; SSE41-NEXT:    movhlps {{.*#+}} xmm5 = xmm5[1,1]
+; SSE41-NEXT:    cvtps2pd %xmm5, %xmm1
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: fpext:
+; AVX:       # BB#0:
+; AVX-NEXT:    vcmpltpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vcvtps2pd %xmm2, %ymm1
+; AVX-NEXT:    vcvtps2pd %xmm3, %ymm2
+; AVX-NEXT:    vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
+; AVX-NEXT:    retq
+  %cmp = fcmp olt <4 x double> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x float> %c, <4 x float> %d
+  %ext = fpext <4 x float> %sel to <4 x double>
+  ret <4 x double> %ext
+}
+
+define <8 x i16> @trunc(<8 x i16> %a, <8 x i16> %b, <8 x i32> %c, <8 x i32> %d) {
+; SSE2-LABEL: trunc:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE2-NEXT:    pslld $16, %xmm5
+; SSE2-NEXT:    psrad $16, %xmm5
+; SSE2-NEXT:    pslld $16, %xmm4
+; SSE2-NEXT:    psrad $16, %xmm4
+; SSE2-NEXT:    packssdw %xmm5, %xmm4
+; SSE2-NEXT:    pslld $16, %xmm3
+; SSE2-NEXT:    psrad $16, %xmm3
+; SSE2-NEXT:    pslld $16, %xmm2
+; SSE2-NEXT:    psrad $16, %xmm2
+; SSE2-NEXT:    packssdw %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm0, %xmm2
+; SSE2-NEXT:    pandn %xmm4, %xmm0
+; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: trunc:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSE41-NEXT:    pshufb %xmm1, %xmm5
+; SSE41-NEXT:    pshufb %xmm1, %xmm4
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
+; SSE41-NEXT:    pshufb %xmm1, %xmm3
+; SSE41-NEXT:    pshufb %xmm1, %xmm2
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE41-NEXT:    pand %xmm0, %xmm2
+; SSE41-NEXT:    pandn %xmm4, %xmm0
+; SSE41-NEXT:    por %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: trunc:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX1-NEXT:    vpshufb %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
+; AVX1-NEXT:    vpandn %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vpshufb %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpshufb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX1-NEXT:    vpand %xmm0, %xmm2, %xmm0
+; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
+; AVX2-NEXT:    vpshufb %ymm1, %ymm3, %ymm3
+; AVX2-NEXT:    vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3]
+; AVX2-NEXT:    vpandn %xmm3, %xmm0, %xmm3
+; AVX2-NEXT:    vpshufb %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpor %xmm3, %xmm0, %xmm0
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+  %cmp = icmp eq <8 x i16> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i32> %c, <8 x i32> %d
+  %tr = trunc <8 x i32> %sel to <8 x i16>
+  ret <8 x i16> %tr
+}
+
+define <4 x float> @fptrunc(<4 x float> %a, <4 x float> %b, <4 x double> %c, <4 x double> %d) {
+; SSE2-LABEL: fptrunc:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    cmpltps %xmm1, %xmm0
+; SSE2-NEXT:    cvtpd2ps %xmm5, %xmm1
+; SSE2-NEXT:    cvtpd2ps %xmm4, %xmm4
+; SSE2-NEXT:    unpcklpd {{.*#+}} xmm4 = xmm4[0],xmm1[0]
+; SSE2-NEXT:    cvtpd2ps %xmm3, %xmm1
+; SSE2-NEXT:    cvtpd2ps %xmm2, %xmm2
+; SSE2-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE2-NEXT:    andpd %xmm0, %xmm2
+; SSE2-NEXT:    andnpd %xmm4, %xmm0
+; SSE2-NEXT:    orpd %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: fptrunc:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    cmpltps %xmm1, %xmm0
+; SSE41-NEXT:    cvtpd2ps %xmm3, %xmm1
+; SSE41-NEXT:    cvtpd2ps %xmm2, %xmm2
+; SSE41-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE41-NEXT:    cvtpd2ps %xmm5, %xmm3
+; SSE41-NEXT:    cvtpd2ps %xmm4, %xmm1
+; SSE41-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE41-NEXT:    blendvps %xmm0, %xmm2, %xmm1
+; SSE41-NEXT:    movaps %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: fptrunc:
+; AVX:       # BB#0:
+; AVX-NEXT:    vcmpltps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vcvtpd2ps %ymm2, %xmm1
+; AVX-NEXT:    vcvtpd2ps %ymm3, %xmm2
+; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+  %cmp = fcmp olt <4 x float> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x double> %c, <4 x double> %d
+  %tr = fptrunc <4 x double> %sel to <4 x float>
+  ret <4 x float> %tr
+}
+
+; PR14657 - avoid truncation/extension of comparison results
+; These tests demonstrate the same issue as the simpler cases above,
+; but also include multi-BB to show potentially larger transforms/codegen issues.
+
+@da = common global [1024 x float] zeroinitializer, align 32
+@db = common global [1024 x float] zeroinitializer, align 32
+@dc = common global [1024 x float] zeroinitializer, align 32
+@dd = common global [1024 x float] zeroinitializer, align 32
+@dj = common global [1024 x i32] zeroinitializer, align 32
+
+define void @example25() nounwind {
+; SSE2-LABEL: example25:
+; SSE2:       # BB#0: # %vector.ph
+; SSE2-NEXT:    movq $-4096, %rax # imm = 0xF000
+; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [1,1,1,1]
+; SSE2-NEXT:    .p2align 4, 0x90
+; SSE2-NEXT:  .LBB5_1: # %vector.body
+; SSE2-NEXT:    # =>This Inner Loop Header: Depth=1
+; SSE2-NEXT:    movaps da+4096(%rax), %xmm1
+; SSE2-NEXT:    movaps da+4112(%rax), %xmm2
+; SSE2-NEXT:    cmpltps db+4112(%rax), %xmm2
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; SSE2-NEXT:    cmpltps db+4096(%rax), %xmm1
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE2-NEXT:    psllw $15, %xmm1
+; SSE2-NEXT:    psraw $15, %xmm1
+; SSE2-NEXT:    movaps dc+4096(%rax), %xmm2
+; SSE2-NEXT:    movaps dc+4112(%rax), %xmm3
+; SSE2-NEXT:    cmpltps dd+4112(%rax), %xmm3
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; SSE2-NEXT:    cmpltps dd+4096(%rax), %xmm2
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE2-NEXT:    psllw $15, %xmm2
+; SSE2-NEXT:    psraw $15, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm1
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT:    pand %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, dj+4112(%rax)
+; SSE2-NEXT:    movdqa %xmm1, dj+4096(%rax)
+; SSE2-NEXT:    addq $32, %rax
+; SSE2-NEXT:    jne .LBB5_1
+; SSE2-NEXT:  # BB#2: # %for.end
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: example25:
+; SSE41:       # BB#0: # %vector.ph
+; SSE41-NEXT:    movq $-4096, %rax # imm = 0xF000
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1,1,1,1]
+; SSE41-NEXT:    .p2align 4, 0x90
+; SSE41-NEXT:  .LBB5_1: # %vector.body
+; SSE41-NEXT:    # =>This Inner Loop Header: Depth=1
+; SSE41-NEXT:    movaps da+4096(%rax), %xmm2
+; SSE41-NEXT:    movaps da+4112(%rax), %xmm3
+; SSE41-NEXT:    cmpltps db+4112(%rax), %xmm3
+; SSE41-NEXT:    pshufb %xmm0, %xmm3
+; SSE41-NEXT:    cmpltps db+4096(%rax), %xmm2
+; SSE41-NEXT:    pshufb %xmm0, %xmm2
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE41-NEXT:    psllw $15, %xmm2
+; SSE41-NEXT:    psraw $15, %xmm2
+; SSE41-NEXT:    movaps dc+4096(%rax), %xmm3
+; SSE41-NEXT:    movaps dc+4112(%rax), %xmm4
+; SSE41-NEXT:    cmpltps dd+4112(%rax), %xmm4
+; SSE41-NEXT:    pshufb %xmm0, %xmm4
+; SSE41-NEXT:    cmpltps dd+4096(%rax), %xmm3
+; SSE41-NEXT:    pshufb %xmm0, %xmm3
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; SSE41-NEXT:    psllw $15, %xmm3
+; SSE41-NEXT:    psraw $15, %xmm3
+; SSE41-NEXT:    pand %xmm2, %xmm3
+; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
+; SSE41-NEXT:    pand %xmm1, %xmm2
+; SSE41-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE41-NEXT:    pand %xmm1, %xmm3
+; SSE41-NEXT:    movdqa %xmm3, dj+4112(%rax)
+; SSE41-NEXT:    movdqa %xmm2, dj+4096(%rax)
+; SSE41-NEXT:    addq $32, %rax
+; SSE41-NEXT:    jne .LBB5_1
+; SSE41-NEXT:  # BB#2: # %for.end
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: example25:
+; AVX1:       # BB#0: # %vector.ph
+; AVX1-NEXT:    movq $-4096, %rax # imm = 0xF000
+; AVX1-NEXT:    vmovaps {{.*#+}} ymm0 = [1,1,1,1,1,1,1,1]
+; AVX1-NEXT:    .p2align 4, 0x90
+; AVX1-NEXT:  .LBB5_1: # %vector.body
+; AVX1-NEXT:    # =>This Inner Loop Header: Depth=1
+; AVX1-NEXT:    vmovups da+4096(%rax), %ymm1
+; AVX1-NEXT:    vcmpltps db+4096(%rax), %ymm1, %ymm1
+; AVX1-NEXT:    vmovups dc+4096(%rax), %ymm2
+; AVX1-NEXT:    vcmpltps dd+4096(%rax), %ymm2, %ymm2
+; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm1
+; AVX1-NEXT:    vmovups %ymm1, dj+4096(%rax)
+; AVX1-NEXT:    addq $32, %rax
+; AVX1-NEXT:    jne .LBB5_1
+; AVX1-NEXT:  # BB#2: # %for.end
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: example25:
+; AVX2:       # BB#0: # %vector.ph
+; AVX2-NEXT:    movq $-4096, %rax # imm = 0xF000
+; AVX2-NEXT:    vbroadcastss {{.*}}(%rip), %ymm0
+; AVX2-NEXT:    .p2align 4, 0x90
+; AVX2-NEXT:  .LBB5_1: # %vector.body
+; AVX2-NEXT:    # =>This Inner Loop Header: Depth=1
+; AVX2-NEXT:    vmovups da+4096(%rax), %ymm1
+; AVX2-NEXT:    vcmpltps db+4096(%rax), %ymm1, %ymm1
+; AVX2-NEXT:    vmovups dc+4096(%rax), %ymm2
+; AVX2-NEXT:    vcmpltps dd+4096(%rax), %ymm2, %ymm2
+; AVX2-NEXT:    vandps %ymm2, %ymm1, %ymm1
+; AVX2-NEXT:    vandps %ymm0, %ymm1, %ymm1
+; AVX2-NEXT:    vmovups %ymm1, dj+4096(%rax)
+; AVX2-NEXT:    addq $32, %rax
+; AVX2-NEXT:    jne .LBB5_1
+; AVX2-NEXT:  # BB#2: # %for.end
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+vector.ph:
+  br label %vector.body
+
+vector.body:
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds [1024 x float], [1024 x float]* @da, i64 0, i64 %index
+  %1 = bitcast float* %0 to <8 x float>*
+  %2 = load <8 x float>, <8 x float>* %1, align 16
+  %3 = getelementptr inbounds [1024 x float], [1024 x float]* @db, i64 0, i64 %index
+  %4 = bitcast float* %3 to <8 x float>*
+  %5 = load <8 x float>, <8 x float>* %4, align 16
+  %6 = fcmp olt <8 x float> %2, %5
+  %7 = getelementptr inbounds [1024 x float], [1024 x float]* @dc, i64 0, i64 %index
+  %8 = bitcast float* %7 to <8 x float>*
+  %9 = load <8 x float>, <8 x float>* %8, align 16
+  %10 = getelementptr inbounds [1024 x float], [1024 x float]* @dd, i64 0, i64 %index
+  %11 = bitcast float* %10 to <8 x float>*
+  %12 = load <8 x float>, <8 x float>* %11, align 16
+  %13 = fcmp olt <8 x float> %9, %12
+  %14 = and <8 x i1> %6, %13
+  %15 = zext <8 x i1> %14 to <8 x i32>
+  %16 = getelementptr inbounds [1024 x i32], [1024 x i32]* @dj, i64 0, i64 %index
+  %17 = bitcast i32* %16 to <8 x i32>*
+  store <8 x i32> %15, <8 x i32>* %17, align 16
+  %index.next = add i64 %index, 8
+  %18 = icmp eq i64 %index.next, 1024
+  br i1 %18, label %for.end, label %vector.body
+
+for.end:
+  ret void
+}
+
+define void @example24(i16 signext %x, i16 signext %y) nounwind {
+; SSE2-LABEL: example24:
+; SSE2:       # BB#0: # %vector.ph
+; SSE2-NEXT:    movd %edi, %xmm0
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT:    movd %esi, %xmm1
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE2-NEXT:    movq $-4096, %rax # imm = 0xF000
+; SSE2-NEXT:    .p2align 4, 0x90
+; SSE2-NEXT:  .LBB6_1: # %vector.body
+; SSE2-NEXT:    # =>This Inner Loop Header: Depth=1
+; SSE2-NEXT:    movaps da+4096(%rax), %xmm2
+; SSE2-NEXT:    movaps da+4112(%rax), %xmm3
+; SSE2-NEXT:    cmpltps db+4112(%rax), %xmm3
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; SSE2-NEXT:    cmpltps db+4096(%rax), %xmm2
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pand %xmm2, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE2-NEXT:    psrad $16, %xmm3
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    psrad $16, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, dj+4112(%rax)
+; SSE2-NEXT:    movdqa %xmm3, dj+4096(%rax)
+; SSE2-NEXT:    addq $32, %rax
+; SSE2-NEXT:    jne .LBB6_1
+; SSE2-NEXT:  # BB#2: # %for.end
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: example24:
+; SSE41:       # BB#0: # %vector.ph
+; SSE41-NEXT:    movd %edi, %xmm0
+; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE41-NEXT:    movd %esi, %xmm1
+; SSE41-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE41-NEXT:    movq $-4096, %rax # imm = 0xF000
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSE41-NEXT:    .p2align 4, 0x90
+; SSE41-NEXT:  .LBB6_1: # %vector.body
+; SSE41-NEXT:    # =>This Inner Loop Header: Depth=1
+; SSE41-NEXT:    movaps da+4096(%rax), %xmm3
+; SSE41-NEXT:    movaps da+4112(%rax), %xmm4
+; SSE41-NEXT:    cmpltps db+4112(%rax), %xmm4
+; SSE41-NEXT:    pshufb %xmm2, %xmm4
+; SSE41-NEXT:    cmpltps db+4096(%rax), %xmm3
+; SSE41-NEXT:    pshufb %xmm2, %xmm3
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; SSE41-NEXT:    movdqa %xmm0, %xmm4
+; SSE41-NEXT:    pand %xmm3, %xmm4
+; SSE41-NEXT:    pandn %xmm1, %xmm3
+; SSE41-NEXT:    por %xmm4, %xmm3
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
+; SSE41-NEXT:    pmovsxwd %xmm4, %xmm4
+; SSE41-NEXT:    pmovsxwd %xmm3, %xmm3
+; SSE41-NEXT:    movdqa %xmm3, dj+4096(%rax)
+; SSE41-NEXT:    movdqa %xmm4, dj+4112(%rax)
+; SSE41-NEXT:    addq $32, %rax
+; SSE41-NEXT:    jne .LBB6_1
+; SSE41-NEXT:  # BB#2: # %for.end
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: example24:
+; AVX1:       # BB#0: # %vector.ph
+; AVX1-NEXT:    vmovd %edi, %xmm0
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX1-NEXT:    vmovd %esi, %xmm1
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX1-NEXT:    movq $-4096, %rax # imm = 0xF000
+; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm2
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT:    vpmovsxwd %xmm1, %xmm2
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpmovsxwd %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT:    .p2align 4, 0x90
+; AVX1-NEXT:  .LBB6_1: # %vector.body
+; AVX1-NEXT:    # =>This Inner Loop Header: Depth=1
+; AVX1-NEXT:    vmovups da+4096(%rax), %ymm2
+; AVX1-NEXT:    vcmpltps db+4096(%rax), %ymm2, %ymm2
+; AVX1-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm2
+; AVX1-NEXT:    vmovups %ymm2, dj+4096(%rax)
+; AVX1-NEXT:    addq $32, %rax
+; AVX1-NEXT:    jne .LBB6_1
+; AVX1-NEXT:  # BB#2: # %for.end
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: example24:
+; AVX2:       # BB#0: # %vector.ph
+; AVX2-NEXT:    vmovd %edi, %xmm0
+; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT:    vmovd %esi, %xmm1
+; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm1
+; AVX2-NEXT:    movq $-4096, %rax # imm = 0xF000
+; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT:    vpmovsxwd %xmm1, %ymm1
+; AVX2-NEXT:    .p2align 4, 0x90
+; AVX2-NEXT:  .LBB6_1: # %vector.body
+; AVX2-NEXT:    # =>This Inner Loop Header: Depth=1
+; AVX2-NEXT:    vmovups da+4096(%rax), %ymm2
+; AVX2-NEXT:    vcmpltps db+4096(%rax), %ymm2, %ymm2
+; AVX2-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm2
+; AVX2-NEXT:    vmovups %ymm2, dj+4096(%rax)
+; AVX2-NEXT:    addq $32, %rax
+; AVX2-NEXT:    jne .LBB6_1
+; AVX2-NEXT:  # BB#2: # %for.end
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+vector.ph:
+  %0 = insertelement <8 x i16> undef, i16 %x, i32 0
+  %broadcast11 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer
+  %1 = insertelement <8 x i16> undef, i16 %y, i32 0
+  %broadcast12 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %2 = getelementptr inbounds [1024 x float], [1024 x float]* @da, i64 0, i64 %index
+  %3 = bitcast float* %2 to <8 x float>*
+  %4 = load <8 x float>, <8 x float>* %3, align 16
+  %5 = getelementptr inbounds [1024 x float], [1024 x float]* @db, i64 0, i64 %index
+  %6 = bitcast float* %5 to <8 x float>*
+  %7 = load <8 x float>, <8 x float>* %6, align 16
+  %8 = fcmp olt <8 x float> %4, %7
+  %9 = select <8 x i1> %8, <8 x i16> %broadcast11, <8 x i16> %broadcast12
+  %10 = sext <8 x i16> %9 to <8 x i32>
+  %11 = getelementptr inbounds [1024 x i32], [1024 x i32]* @dj, i64 0, i64 %index
+  %12 = bitcast i32* %11 to <8 x i32>*
+  store <8 x i32> %10, <8 x i32>* %12, align 16
+  %index.next = add i64 %index, 8
+  %13 = icmp eq i64 %index.next, 1024
+  br i1 %13, label %for.end, label %vector.body
+
+for.end:
+  ret void
+}
+
diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll
index cffc6732728..9d827fc88b3 100644
--- a/test/CodeGen/X86/clz.ll
+++ b/test/CodeGen/X86/clz.ll
@@ -778,3 +778,88 @@ define i32 @ctlz_bsr_zero_test(i32 %n) {
   %bsr = xor i32 %ctlz, 31
   ret i32 %bsr
 }
+
+define i8 @cttz_i8_knownbits(i8 %x)  {
+; X32-LABEL: cttz_i8_knownbits:
+; X32:       # BB#0:
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X32-NEXT:    orb $2, %al
+; X32-NEXT:    movzbl %al, %eax
+; X32-NEXT:    bsfl %eax, %eax
+; X32-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
+; X32-NEXT:    retl
+;
+; X64-LABEL: cttz_i8_knownbits:
+; X64:       # BB#0:
+; X64-NEXT:    orb $2, %dil
+; X64-NEXT:    movzbl %dil, %eax
+; X64-NEXT:    bsfl %eax, %eax
+; X64-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
+; X64-NEXT:    retq
+;
+; X32-CLZ-LABEL: cttz_i8_knownbits:
+; X32-CLZ:       # BB#0:
+; X32-CLZ-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X32-CLZ-NEXT:    orb $2, %al
+; X32-CLZ-NEXT:    movzbl %al, %eax
+; X32-CLZ-NEXT:    tzcntl %eax, %eax
+; X32-CLZ-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
+; X32-CLZ-NEXT:    retl
+;
+; X64-CLZ-LABEL: cttz_i8_knownbits:
+; X64-CLZ:       # BB#0:
+; X64-CLZ-NEXT:    orb $2, %dil
+; X64-CLZ-NEXT:    movzbl %dil, %eax
+; X64-CLZ-NEXT:    tzcntl %eax, %eax
+; X64-CLZ-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
+; X64-CLZ-NEXT:    retq
+  %x2 = or i8 %x, 2
+  %tmp = call i8 @llvm.cttz.i8(i8 %x2, i1 true )
+  %tmp2 = and i8 %tmp, 1
+  ret i8 %tmp2
+}
+
+define i8 @ctlz_i8_knownbits(i8 %x)  {
+; X32-LABEL: ctlz_i8_knownbits:
+; X32:       # BB#0:
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X32-NEXT:    orb $64, %al
+; X32-NEXT:    movzbl %al, %eax
+; X32-NEXT:    bsrl %eax, %eax
+; X32-NEXT:    xorl $7, %eax
+; X32-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
+; X32-NEXT:    retl
+;
+; X64-LABEL: ctlz_i8_knownbits:
+; X64:       # BB#0:
+; X64-NEXT:    orb $64, %dil
+; X64-NEXT:    movzbl %dil, %eax
+; X64-NEXT:    bsrl %eax, %eax
+; X64-NEXT:    xorl $7, %eax
+; X64-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
+; X64-NEXT:    retq
+;
+; X32-CLZ-LABEL: ctlz_i8_knownbits:
+; X32-CLZ:       # BB#0:
+; X32-CLZ-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X32-CLZ-NEXT:    orb $64, %al
+; X32-CLZ-NEXT:    movzbl %al, %eax
+; X32-CLZ-NEXT:    lzcntl %eax, %eax
+; X32-CLZ-NEXT:    addl $-24, %eax
+; X32-CLZ-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
+; X32-CLZ-NEXT:    retl
+;
+; X64-CLZ-LABEL: ctlz_i8_knownbits:
+; X64-CLZ:       # BB#0:
+; X64-CLZ-NEXT:    orb $64, %dil
+; X64-CLZ-NEXT:    movzbl %dil, %eax
+; X64-CLZ-NEXT:    lzcntl %eax, %eax
+; X64-CLZ-NEXT:    addl $-24, %eax
+; X64-CLZ-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
+; X64-CLZ-NEXT:    retq
+
+  %x2 = or i8 %x, 64
+  %tmp = call i8 @llvm.ctlz.i8(i8 %x2, i1 true )
+  %tmp2 = and i8 %tmp, 1
+  ret i8 %tmp2
+}
diff --git a/test/CodeGen/X86/deopt-bundles.ll b/test/CodeGen/X86/deopt-bundles.ll
index 1fb73ea252e..9745330c9ac 100644
--- a/test/CodeGen/X86/deopt-bundles.ll
+++ b/test/CodeGen/X86/deopt-bundles.ll
@@ -9,48 +9,47 @@ target triple = "x86_64-apple-macosx10.11.0"
 
 ; STACKMAPS: Stack Maps: callsite 2882400015
 ; STACKMAPS-NEXT: Stack Maps:   has 4 locations
-; STACKMAPS-NEXT: Stack Maps: 		Loc 0: Constant 0	[encoding: .byte 4, .byte 8, .short 0, .int 0]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 1: Constant 0	[encoding: .byte 4, .byte 8, .short 0, .int 0]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 2: Constant 1	[encoding: .byte 4, .byte 8, .short 0, .int 1]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 3: Constant 0	[encoding: .byte 4, .byte 8, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 0: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 1: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 2: Constant 1	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 1]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 3: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
 ; STACKMAPS-NEXT: Stack Maps: 	has 0 live-out registers
 ; STACKMAPS-NEXT: Stack Maps: callsite 4242
 ; STACKMAPS-NEXT: Stack Maps:   has 4 locations
-; STACKMAPS-NEXT: Stack Maps: 		Loc 0: Constant 0	[encoding: .byte 4, .byte 8, .short 0, .int 0]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 1: Constant 0	[encoding: .byte 4, .byte 8, .short 0, .int 0]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 2: Constant 1	[encoding: .byte 4, .byte 8, .short 0, .int 1]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 3: Constant 1	[encoding: .byte 4, .byte 8, .short 0, .int 1]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 0: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 1: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 2: Constant 1	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 1]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 3: Constant 1	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 1]
 ; STACKMAPS-NEXT: Stack Maps: 	has 0 live-out registers
 ; STACKMAPS-NEXT: Stack Maps: callsite 4243
 ; STACKMAPS-NEXT: Stack Maps:   has 4 locations
-; STACKMAPS-NEXT: Stack Maps: 		Loc 0: Constant 0	[encoding: .byte 4, .byte 8, .short 0, .int 0]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 1: Constant 0	[encoding: .byte 4, .byte 8, .short 0, .int 0]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 2: Constant 1	[encoding: .byte 4, .byte 8, .short 0, .int 1]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 3: Constant 16	[encoding: .byte 4, .byte 8, .short 0, .int 16]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 0: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 1: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 2: Constant 1	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 1]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 3: Constant 16	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 16]
 ; STACKMAPS-NEXT: Stack Maps: 	has 0 live-out registers
 ; STACKMAPS-NEXT: Stack Maps: callsite 2882400015
 ; STACKMAPS-NEXT: Stack Maps:   has 4 locations
-; STACKMAPS-NEXT: Stack Maps: 		Loc 0: Constant 0	[encoding: .byte 4, .byte 8, .short 0, .int 0]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 1: Constant 0	[encoding: .byte 4, .byte 8, .short 0, .int 0]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 2: Constant 1	[encoding: .byte 4, .byte 8, .short 0, .int 1]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 3: Constant 2	[encoding: .byte 4, .byte 8, .short 0, .int 2]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 0: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 1: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 2: Constant 1	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 1]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 3: Constant 2	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 2]
 ; STACKMAPS-NEXT: Stack Maps: 	has 0 live-out registers
 ; STACKMAPS-NEXT: Stack Maps: callsite 2882400015
 ; STACKMAPS-NEXT: Stack Maps:   has 4 locations
-; STACKMAPS-NEXT: Stack Maps: 		Loc 0: Constant 0	[encoding: .byte 4, .byte 8, .short 0, .int 0]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 1: Constant 0	[encoding: .byte 4, .byte 8, .short 0, .int 0]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 2: Constant 1	[encoding: .byte 4, .byte 8, .short 0, .int 1]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 3: Constant 3	[encoding: .byte 4, .byte 8, .short 0, .int 3]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 0: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 1: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 2: Constant 1	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 1]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 3: Constant 3	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 3]
 ; STACKMAPS-NEXT: Stack Maps: 	has 0 live-out registers
 ; STACKMAPS-NEXT: Stack Maps: callsite 4243
 ; STACKMAPS-NEXT: Stack Maps:   has 4 locations
-; STACKMAPS-NEXT: Stack Maps: 		Loc 0: Constant 0	[encoding: .byte 4, .byte 8, .short 0, .int 0]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 1: Constant 0	[encoding: .byte 4, .byte 8, .short 0, .int 0]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 2: Constant 1	[encoding: .byte 4, .byte 8, .short 0, .int 1]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 3: Constant 55	[encoding: .byte 4, .byte 8, .short 0, .int 55]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 0: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 1: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 2: Constant 1	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 1]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 3: Constant 55	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 55]
 ; STACKMAPS-NEXT: Stack Maps: 	has 0 live-out registers
 
-
 declare i32 @callee_0()
 declare i32 @callee_1(i32)
 declare i32 @callee_vararg(...)
@@ -159,3 +158,42 @@ define void @f_0(i64 %n) {
 }
 
 declare void @g_0(i64* %vl)
+
+define void @vector_deopt_bundle(<32 x i64 addrspace(1)*> %val) {
+; CHECK-LABEL: _vector_deopt_bundle:
+; CHECK: movaps  16(%rbp), %xmm8
+; CHECK-NEXT: movaps  32(%rbp), %xmm9
+; CHECK-NEXT: movaps  48(%rbp), %xmm10
+; CHECK-NEXT: movaps  64(%rbp), %xmm11
+; CHECK-NEXT: movaps  80(%rbp), %xmm12
+; CHECK-NEXT: movaps  96(%rbp), %xmm13
+; CHECK-NEXT: movaps  112(%rbp), %xmm14
+; CHECK-NEXT: movaps  128(%rbp), %xmm15
+; CHECK-NEXT: movaps  %xmm15, 240(%rsp)
+; CHECK-NEXT: movaps  %xmm14, 224(%rsp)
+; CHECK-NEXT: movaps  %xmm13, 208(%rsp)
+; CHECK-NEXT: movaps  %xmm12, 192(%rsp)
+; CHECK-NEXT: movaps  %xmm11, 176(%rsp)
+; CHECK-NEXT: movaps  %xmm10, 160(%rsp)
+; CHECK-NEXT: movaps  %xmm9, 144(%rsp)
+; CHECK-NEXT: movaps  %xmm8, 128(%rsp)
+; CHECK-NEXT: movaps  %xmm7, 112(%rsp)
+; CHECK-NEXT: movaps  %xmm6, 96(%rsp)
+; CHECK-NEXT: movaps  %xmm5, 80(%rsp)
+; CHECK-NEXT: movaps  %xmm4, 64(%rsp)
+; CHECK-NEXT: movaps  %xmm3, 48(%rsp)
+; CHECK-NEXT: movaps  %xmm2, 32(%rsp)
+; CHECK-NEXT: movaps  %xmm1, 16(%rsp)
+; CHECK-NEXT: movaps  %xmm0, (%rsp)
+  call void @unknown() [ "deopt"(<32 x i64 addrspace(1)*> %val) ]
+  ret void
+; STACKMAPS: Stack Maps: callsite 2882400015
+; STACKMAPS-NEXT: Stack Maps:   has 4 locations
+; STACKMAPS-NEXT: Stack Maps: 		Loc 0: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 1: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 2: Constant 1	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 1]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 3: Indirect 7+0	[encoding: .byte 3, .byte 0, .short 256, .short 7, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 	has 0 live-out registers
+}
+
+declare void @unknown()
diff --git a/test/CodeGen/X86/deopt-intrinsic-cconv.ll b/test/CodeGen/X86/deopt-intrinsic-cconv.ll
index c382d66cee6..97bca1f69db 100644
--- a/test/CodeGen/X86/deopt-intrinsic-cconv.ll
+++ b/test/CodeGen/X86/deopt-intrinsic-cconv.ll
@@ -27,8 +27,8 @@ entry:
 ; STACKMAPS: Stack Maps: callsites:
 ; STACKMAPS-NEXT: Stack Maps: callsite 2882400015
 ; STACKMAPS-NEXT: Stack Maps:   has 4 locations
-; STACKMAPS-NEXT: Stack Maps: 		Loc 0: Constant 12	[encoding: .byte 4, .byte 8, .short 0, .int 12]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 1: Constant 0	[encoding: .byte 4, .byte 8, .short 0, .int 0]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 2: Constant 1	[encoding: .byte 4, .byte 8, .short 0, .int 1]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 3: Constant 3	[encoding: .byte 4, .byte 8, .short 0, .int 3]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 0: Constant 12	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 12]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 1: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 2: Constant 1	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 1]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 3: Constant 3	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 3]
 ; STACKMAPS-NEXT: Stack Maps: 	has 0 live-out registers
diff --git a/test/CodeGen/X86/deopt-intrinsic.ll b/test/CodeGen/X86/deopt-intrinsic.ll
index 1254e116029..0e894516ffa 100644
--- a/test/CodeGen/X86/deopt-intrinsic.ll
+++ b/test/CodeGen/X86/deopt-intrinsic.ll
@@ -42,15 +42,15 @@ entry:
 ; STACKMAPS: Stack Maps: callsites:
 ; STACKMAPS-NEXT: Stack Maps: callsite 2882400015
 ; STACKMAPS-NEXT: Stack Maps:   has 4 locations
-; STACKMAPS-NEXT: Stack Maps: 		Loc 0: Constant 0	[encoding: .byte 4, .byte 8, .short 0, .int 0]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 1: Constant 0	[encoding: .byte 4, .byte 8, .short 0, .int 0]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 2: Constant 1	[encoding: .byte 4, .byte 8, .short 0, .int 1]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 3: Constant 0	[encoding: .byte 4, .byte 8, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 0: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 1: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 2: Constant 1	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 1]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 3: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
 ; STACKMAPS-NEXT: Stack Maps: 	has 0 live-out registers
 ; STACKMAPS-NEXT: Stack Maps: callsite 2882400015
 ; STACKMAPS-NEXT: Stack Maps:   has 4 locations
-; STACKMAPS-NEXT: Stack Maps: 		Loc 0: Constant 0	[encoding: .byte 4, .byte 8, .short 0, .int 0]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 1: Constant 0	[encoding: .byte 4, .byte 8, .short 0, .int 0]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 2: Constant 1	[encoding: .byte 4, .byte 8, .short 0, .int 1]
-; STACKMAPS-NEXT: Stack Maps: 		Loc 3: Constant 1	[encoding: .byte 4, .byte 8, .short 0, .int 1]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 0: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 1: Constant 0	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 0]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 2: Constant 1	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 1]
+; STACKMAPS-NEXT: Stack Maps: 		Loc 3: Constant 1	[encoding: .byte 4, .byte 0, .short 8, .short 0, .short 0, .int 1]
 ; STACKMAPS-NEXT: Stack Maps: 	has 0 live-out registers
diff --git a/test/CodeGen/X86/inline-0bh.ll b/test/CodeGen/X86/inline-0bh.ll
new file mode 100644
index 00000000000..ceef395aa14
--- /dev/null
+++ b/test/CodeGen/X86/inline-0bh.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+; Function Attrs: noinline nounwind
+define i32 @PR31007() {
+; CHECK-LABEL: PR31007:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:  #APP
+; CHECK   :    addb $11, %al
+; CHECK:       #NO_APP
+; CHECK-NEXT:  xorl %eax, %eax
+; CHECK-NEXT:  retq
+entry:
+  call void asm sideeffect inteldialect "add al,$$0bH", "~{al},~{flags},~{dirflag},~{fpsr},~{flags}"()
+  ret i32 0
+}
+
diff --git a/test/CodeGen/X86/known-bits.ll b/test/CodeGen/X86/known-bits.ll
index 81a60cdee3a..90f6e930138 100644
--- a/test/CodeGen/X86/known-bits.ll
+++ b/test/CodeGen/X86/known-bits.ll
@@ -173,8 +173,8 @@ define {i32, i1} @knownbits_uaddo_saddo(i64 %a0, i64 %a1) nounwind {
 ; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    leal (%ecx,%eax), %edx
-; X32-NEXT:    cmpl %ecx, %edx
+; X32-NEXT:    movl %ecx, %edx
+; X32-NEXT:    addl %eax, %edx
 ; X32-NEXT:    setb %bl
 ; X32-NEXT:    testl %eax, %eax
 ; X32-NEXT:    setns %al
@@ -226,19 +226,19 @@ define {i32, i1} @knownbits_usubo_ssubo(i64 %a0, i64 %a1) nounwind {
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl %ecx, %edx
 ; X32-NEXT:    subl %eax, %edx
-; X32-NEXT:    setns %bl
-; X32-NEXT:    cmpl %edx, %ecx
-; X32-NEXT:    setb %dh
-; X32-NEXT:    testl %ecx, %ecx
-; X32-NEXT:    setns %cl
-; X32-NEXT:    cmpb %bl, %cl
-; X32-NEXT:    setne %ch
+; X32-NEXT:    setb %bl
 ; X32-NEXT:    testl %eax, %eax
 ; X32-NEXT:    setns %al
+; X32-NEXT:    testl %ecx, %ecx
+; X32-NEXT:    setns %cl
 ; X32-NEXT:    cmpb %al, %cl
+; X32-NEXT:    setne %al
+; X32-NEXT:    testl %edx, %edx
+; X32-NEXT:    setns %dl
+; X32-NEXT:    cmpb %dl, %cl
 ; X32-NEXT:    setne %dl
-; X32-NEXT:    andb %ch, %dl
-; X32-NEXT:    orb %dh, %dl
+; X32-NEXT:    andb %al, %dl
+; X32-NEXT:    orb %bl, %dl
 ; X32-NEXT:    xorl %eax, %eax
 ; X32-NEXT:    popl %ebx
 ; X32-NEXT:    retl
diff --git a/test/CodeGen/X86/known-signbits-vector.ll b/test/CodeGen/X86/known-signbits-vector.ll
index 4c3c8bbd793..cea9ac26edb 100644
--- a/test/CodeGen/X86/known-signbits-vector.ll
+++ b/test/CodeGen/X86/known-signbits-vector.ll
@@ -100,27 +100,21 @@ define float @signbits_ashr_extract_sitofp(<2 x i64> %a0) nounwind {
 define float @signbits_ashr_insert_ashr_extract_sitofp(i64 %a0, i64 %a1) nounwind {
 ; X32-LABEL: signbits_ashr_insert_ashr_extract_sitofp:
 ; X32:       # BB#0:
-; X32-NEXT:    pushl %ebp
-; X32-NEXT:    movl %esp, %ebp
-; X32-NEXT:    andl $-8, %esp
-; X32-NEXT:    subl $16, %esp
-; X32-NEXT:    movl 8(%ebp), %eax
-; X32-NEXT:    movl 12(%ebp), %ecx
+; X32-NEXT:    pushl %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    shrdl $30, %ecx, %eax
 ; X32-NEXT:    sarl $30, %ecx
 ; X32-NEXT:    vmovd %eax, %xmm0
 ; X32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
-; X32-NEXT:    vpinsrd $2, 16(%ebp), %xmm0, %xmm0
-; X32-NEXT:    vpinsrd $3, 20(%ebp), %xmm0, %xmm0
-; X32-NEXT:    vpsrad $3, %xmm0, %xmm1
+; X32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; X32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
 ; X32-NEXT:    vpsrlq $3, %xmm0, %xmm0
-; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; X32-NEXT:    vmovq %xmm0, {{[0-9]+}}(%esp)
-; X32-NEXT:    fildll {{[0-9]+}}(%esp)
-; X32-NEXT:    fstps {{[0-9]+}}(%esp)
-; X32-NEXT:    flds {{[0-9]+}}(%esp)
-; X32-NEXT:    movl %ebp, %esp
-; X32-NEXT:    popl %ebp
+; X32-NEXT:    vmovd %xmm0, %eax
+; X32-NEXT:    vcvtsi2ssl %eax, %xmm1, %xmm0
+; X32-NEXT:    vmovss %xmm0, (%esp)
+; X32-NEXT:    flds (%esp)
+; X32-NEXT:    popl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: signbits_ashr_insert_ashr_extract_sitofp:
@@ -133,7 +127,7 @@ define float @signbits_ashr_insert_ashr_extract_sitofp(i64 %a0, i64 %a1) nounwin
 ; X64-NEXT:    vpsrlq $3, %xmm0, %xmm0
 ; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
 ; X64-NEXT:    vmovq %xmm0, %rax
-; X64-NEXT:    vcvtsi2ssq %rax, %xmm2, %xmm0
+; X64-NEXT:    vcvtsi2ssl %eax, %xmm2, %xmm0
 ; X64-NEXT:    retq
   %1 = ashr i64 %a0, 30
   %2 = insertelement <2 x i64> undef, i64 %1, i32 0
diff --git a/test/CodeGen/X86/lea-opt-with-debug.mir b/test/CodeGen/X86/lea-opt-with-debug.mir
index ebf86ff718d..0a477706df1 100644
--- a/test/CodeGen/X86/lea-opt-with-debug.mir
+++ b/test/CodeGen/X86/lea-opt-with-debug.mir
@@ -1,7 +1,8 @@
-# RUN: llc -mtriple=x86_64-unknown-unknown -start-after peephole-opt -stop-before detect-dead-lanes -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-unknown-unknown -start-after=peephole-opt -stop-before=detect-dead-lanes -o - %s | FileCheck %s
 
-# Test that pass optimize LEA can remove a redundant LEA even when it is also
-# used by a DBG_VALUE.
+# Test that the optimize LEA pass can remove a redundant LEA even when it is
+# also used by a DBG_VALUE. Check that the uses of the replaced LEA are updated
+# correctly.
 
 --- |
   target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -13,22 +14,22 @@
   @d = common local_unnamed_addr global i32 0, align 4
   @b = common local_unnamed_addr global i32 0, align 4
 
-  define i32 @fn1() local_unnamed_addr !dbg !8 {
-    %1 = load %struct.A*, %struct.A** @c, align 8, !dbg !13
-    %2 = load i32, i32* @a, align 4, !dbg !13
-    %3 = sext i32 %2 to i64, !dbg !13
-    %4 = getelementptr inbounds %struct.A, %struct.A* %1, i64 %3, !dbg !13
-    %5 = ptrtoint %struct.A* %4 to i64, !dbg !13
-    %6 = trunc i64 %5 to i32, !dbg !13
-    store i32 %6, i32* @d, align 4, !dbg !13
-    %7 = getelementptr inbounds %struct.A, %struct.A* %1, i64 %3, i32 2, !dbg !14
-    tail call void @llvm.dbg.value(metadata i32* %7, i64 0, metadata !11, metadata !15), !dbg !16
-    br label %8, !dbg !17
+  define i32 @fn1() local_unnamed_addr !dbg !9 {
+    %1 = load %struct.A*, %struct.A** @c, align 8, !dbg !14
+    %2 = load i32, i32* @a, align 4, !dbg !14
+    %3 = sext i32 %2 to i64, !dbg !14
+    %4 = getelementptr inbounds %struct.A, %struct.A* %1, i64 %3, !dbg !14
+    %5 = ptrtoint %struct.A* %4 to i64, !dbg !14
+    %6 = trunc i64 %5 to i32, !dbg !14
+    store i32 %6, i32* @d, align 4, !dbg !14
+    %7 = getelementptr inbounds %struct.A, %struct.A* %1, i64 %3, i32 2, !dbg !15
+    tail call void @llvm.dbg.value(metadata i32* %7, i64 0, metadata !12, metadata !16), !dbg !17
+    br label %8, !dbg !18
 
   ; <label>:8:                                      ; preds = %8, %0
-    %9 = load i32, i32* %7, align 4, !dbg !18
-    store i32 %9, i32* @d, align 4, !dbg !18
-    br label %8, !dbg !19
+    %9 = load i32, i32* %7, align 4, !dbg !19
+    store i32 %9, i32* @d, align 4, !dbg !19
+    br label %8, !dbg !20
   }
 
   ; Function Attrs: nounwind readnone
@@ -38,6 +39,7 @@
 
   !llvm.dbg.cu = !{!0}
   !llvm.module.flags = !{!5, !6, !7}
+  !misc = !{!8}
 
   !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, globals: !2)
   !1 = !DIFile(filename: "test.c", directory: "")
@@ -47,18 +49,19 @@
   !5 = !{i32 2, !"Dwarf Version", i32 4}
   !6 = !{i32 2, !"Debug Info Version", i32 3}
   !7 = !{i32 1, !"PIC Level", i32 2}
-  !8 = distinct !DISubprogram(name: "fn1", scope: !1, file: !1, line: 7, type: !9, isLocal: false, isDefinition: true, scopeLine: 7, isOptimized: true, unit: !0, variables: !10)
-  !9 = !DISubroutineType(types: !3)
-  !10 = !{!11}
-  !11 = !DILocalVariable(name: "e", scope: !8, file: !1, line: 8, type: !12)
-  !12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !4, size: 64)
-  !13 = !DILocation(line: 9, scope: !8)
-  !14 = !DILocation(line: 10, scope: !8)
-  !15 = !DIExpression()
-  !16 = !DILocation(line: 8, scope: !8)
-  !17 = !DILocation(line: 11, scope: !8)
-  !18 = !DILocation(line: 13, scope: !8)
-  !19 = !DILocation(line: 14, scope: !8)
+  !8 = !DIExpression(DW_OP_plus, 8, DW_OP_stack_value)
+  !9 = distinct !DISubprogram(name: "fn1", scope: !1, file: !1, line: 7, type: !10, isLocal: false, isDefinition: true, scopeLine: 7, isOptimized: true, unit: !0, variables: !11)
+  !10 = !DISubroutineType(types: !3)
+  !11 = !{!12}
+  !12 = !DILocalVariable(name: "e", scope: !9, file: !1, line: 8, type: !13)
+  !13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !4, size: 64)
+  !14 = !DILocation(line: 9, scope: !9)
+  !15 = !DILocation(line: 10, scope: !9)
+  !16 = !DIExpression()
+  !17 = !DILocation(line: 8, scope: !9)
+  !18 = !DILocation(line: 11, scope: !9)
+  !19 = !DILocation(line: 13, scope: !9)
+  !20 = !DILocation(line: 14, scope: !9)
 
 ...
 ---
@@ -95,28 +98,28 @@ body:             |
   bb.0 (%ir-block.0):
     successors: %bb.1(0x80000000)
 
-    ; CHECK: %3 = LEA64r %2, 2, %2, 0, _, debug-location !13
-    ; CHECK-NEXT: %4 = LEA64r %1, 4, %3, 0, _, debug-location !13
-    ; CHECK-NOT: %0 = LEA64r %1, 4, %3, 8, _, debug-location !14
-    ; CHECK: DBG_VALUE debug-use _, debug-use _, !11, !15, debug-location !16
+    ; CHECK: %3 = LEA64r %2, 2, %2, 0, _, debug-location !14
+    ; CHECK-NEXT: %4 = LEA64r %1, 4, %3, 0, _, debug-location !14
+    ; CHECK-NOT: %0 = LEA64r %1, 4, %3, 8, _, debug-location !15
+    ; CHECK: DBG_VALUE debug-use %4, debug-use _, !12, !8, debug-location !17
 
-    %1 = MOV64rm %rip, 1, _, @c, _, debug-location !13 :: (dereferenceable load 8 from @c)
-    %2 = MOVSX64rm32 %rip, 1, _, @a, _, debug-location !13 :: (dereferenceable load 4 from @a)
-    %3 = LEA64r %2, 2, %2, 0, _, debug-location !13
-    %4 = LEA64r %1, 4, %3, 0, _, debug-location !13
-    %5 = COPY %4.sub_32bit, debug-location !13
-    MOV32mr %rip, 1, _, @d, _, killed %5, debug-location !13 :: (store 4 into @d)
-    %0 = LEA64r %1, 4, %3, 8, _, debug-location !14
-    DBG_VALUE debug-use %0, debug-use _, !11, !15, debug-location !16
+    %1 = MOV64rm %rip, 1, _, @c, _, debug-location !14 :: (dereferenceable load 8 from @c)
+    %2 = MOVSX64rm32 %rip, 1, _, @a, _, debug-location !14 :: (dereferenceable load 4 from @a)
+    %3 = LEA64r %2, 2, %2, 0, _, debug-location !14
+    %4 = LEA64r %1, 4, %3, 0, _, debug-location !14
+    %5 = COPY %4.sub_32bit, debug-location !14
+    MOV32mr %rip, 1, _, @d, _, killed %5, debug-location !14 :: (store 4 into @d)
+    %0 = LEA64r %1, 4, %3, 8, _, debug-location !15
+    DBG_VALUE debug-use %0, debug-use _, !12, !16, debug-location !17
 
     ; CHECK-LABEL: bb.1 (%ir-block.8):
-    ; CHECK: %6 = MOV32rm %4, 1, _, 8, _, debug-location !18 :: (load 4 from %ir.7)
+    ; CHECK: %6 = MOV32rm %4, 1, _, 8, _, debug-location !19 :: (load 4 from %ir.7)
 
   bb.1 (%ir-block.8):
     successors: %bb.1(0x80000000)
 
-    %6 = MOV32rm %0, 1, _, 0, _, debug-location !18 :: (load 4 from %ir.7)
-    MOV32mr %rip, 1, _, @d, _, killed %6, debug-location !18 :: (store 4 into @d)
-    JMP_1 %bb.1, debug-location !19
+    %6 = MOV32rm %0, 1, _, 0, _, debug-location !19 :: (load 4 from %ir.7)
+    MOV32mr %rip, 1, _, @d, _, killed %6, debug-location !19 :: (store 4 into @d)
+    JMP_1 %bb.1, debug-location !20
 
 ...
diff --git a/test/CodeGen/X86/mul-i1024.ll b/test/CodeGen/X86/mul-i1024.ll
index 93d55a00128..340aa047c02 100644
--- a/test/CodeGen/X86/mul-i1024.ll
+++ b/test/CodeGen/X86/mul-i1024.ll
@@ -4388,1537 +4388,1526 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {
 ; X64-NEXT:    pushq %r13
 ; X64-NEXT:    pushq %r12
 ; X64-NEXT:    pushq %rbx
-; X64-NEXT:    subq $360, %rsp # imm = 0x168
+; X64-NEXT:    subq $352, %rsp # imm = 0x160
 ; X64-NEXT:    movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq 48(%rdi), %r9
-; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq 48(%rdi), %r8
+; X64-NEXT:    movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    movq 40(%rdi), %rcx
 ; X64-NEXT:    movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    movq 32(%rdi), %rax
 ; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rdi, %r10
-; X64-NEXT:    xorl %r8d, %r8d
-; X64-NEXT:    mulq %r8
-; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    movq %rdi, %r13
+; X64-NEXT:    xorl %r9d, %r9d
+; X64-NEXT:    mulq %r9
+; X64-NEXT:    movq %rdx, %rbx
 ; X64-NEXT:    movq %rax, %r11
 ; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    mulq %r8
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rax, %rcx
-; X64-NEXT:    addq %rdi, %rcx
-; X64-NEXT:    movq %rdx, %rbx
-; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    addq %rbx, %rcx
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    adcq $0, %rbp
 ; X64-NEXT:    addq %r11, %rcx
 ; X64-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rdi, %rbp
-; X64-NEXT:    adcq $0, %rbp
-; X64-NEXT:    addq %rbx, %rbp
+; X64-NEXT:    adcq %rbx, %rbp
+; X64-NEXT:    movq %rbx, %rcx
 ; X64-NEXT:    sbbq %rbx, %rbx
 ; X64-NEXT:    andl $1, %ebx
 ; X64-NEXT:    addq %rax, %rbp
 ; X64-NEXT:    adcq %rdx, %rbx
-; X64-NEXT:    movq %r9, %rax
-; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %r11, %r13
-; X64-NEXT:    addq %rax, %r13
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movq %rdi, %r15
-; X64-NEXT:    movq %r15, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %r11, %r12
+; X64-NEXT:    addq %rax, %r12
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq %rcx, %r8
+; X64-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    adcq %rdx, %rax
-; X64-NEXT:    addq %rbp, %r13
-; X64-NEXT:    movq %r13, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    addq %rbp, %r12
+; X64-NEXT:    movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    adcq %rbx, %rax
 ; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rsi, %r8
-; X64-NEXT:    movq (%r8), %rax
+; X64-NEXT:    movq (%rsi), %rax
 ; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    xorl %ebp, %ebp
 ; X64-NEXT:    mulq %rbp
-; X64-NEXT:    movq %rax, %r14
+; X64-NEXT:    movq %rax, %r10
 ; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq 8(%r8), %rax
+; X64-NEXT:    movq 8(%rsi), %rax
 ; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    mulq %rbp
 ; X64-NEXT:    xorl %r9d, %r9d
-; X64-NEXT:    movq %rax, %r12
-; X64-NEXT:    addq %rcx, %r12
+; X64-NEXT:    movq %rax, %r15
+; X64-NEXT:    addq %rcx, %r15
 ; X64-NEXT:    movq %rdx, %rbp
 ; X64-NEXT:    adcq $0, %rbp
-; X64-NEXT:    addq %r14, %r12
-; X64-NEXT:    movq %rcx, %rbx
-; X64-NEXT:    movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq $0, %rbx
-; X64-NEXT:    addq %rbp, %rbx
-; X64-NEXT:    sbbq %rbp, %rbp
-; X64-NEXT:    andl $1, %ebp
-; X64-NEXT:    addq %rax, %rbx
-; X64-NEXT:    adcq %rdx, %rbp
-; X64-NEXT:    movq 16(%r8), %rax
-; X64-NEXT:    movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    addq %r10, %r15
+; X64-NEXT:    adcq %rcx, %rbp
+; X64-NEXT:    movq %rcx, %rdi
+; X64-NEXT:    movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    sbbq %rbx, %rbx
+; X64-NEXT:    andl $1, %ebx
+; X64-NEXT:    addq %rax, %rbp
+; X64-NEXT:    adcq %rdx, %rbx
+; X64-NEXT:    movq 16(%rsi), %rax
+; X64-NEXT:    movq %rsi, %r14
+; X64-NEXT:    movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %r14, %r9
-; X64-NEXT:    addq %rax, %r9
-; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    adcq %rdx, %rax
-; X64-NEXT:    addq %rbx, %r9
-; X64-NEXT:    adcq %rbp, %rax
-; X64-NEXT:    movq %rax, %rbp
-; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    movq %r10, %rcx
+; X64-NEXT:    movq %rcx, %rsi
+; X64-NEXT:    addq %rax, %rsi
+; X64-NEXT:    movq %rdi, %r9
+; X64-NEXT:    adcq %rdx, %r9
+; X64-NEXT:    addq %rbp, %rsi
+; X64-NEXT:    movq %rsi, %r10
+; X64-NEXT:    adcq %rbx, %r9
 ; X64-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    addq %r14, %rax
-; X64-NEXT:    adcq %rcx, %r15
-; X64-NEXT:    movq %r15, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq (%r10), %rax
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    adcq %rdi, %r8
+; X64-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %r13, %rbp
+; X64-NEXT:    movq (%rbp), %rax
 ; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    xorl %r15d, %r15d
-; X64-NEXT:    mulq %r15
+; X64-NEXT:    xorl %r8d, %r8d
+; X64-NEXT:    mulq %r8
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rbx
-; X64-NEXT:    addq %r14, %rax
-; X64-NEXT:    movq %r14, %rdi
+; X64-NEXT:    addq %rcx, %rax
 ; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    adcq %rcx, %rax
+; X64-NEXT:    adcq %rdi, %rax
 ; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq 32(%r8), %rax
+; X64-NEXT:    movq 32(%r14), %rax
 ; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    mulq %r15
-; X64-NEXT:    xorl %r8d, %r8d
-; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rbx, %rcx
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rax, %r13
+; X64-NEXT:    movq %rdx, (%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rbx, %rax
 ; X64-NEXT:    movq %rbx, %r14
-; X64-NEXT:    addq %rax, %rcx
+; X64-NEXT:    addq %r13, %rax
 ; X64-NEXT:    movq %rsi, %rax
 ; X64-NEXT:    adcq %rdx, %rax
 ; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    addq %rdi, %r11
+; X64-NEXT:    addq %rcx, %r11
 ; X64-NEXT:    movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rdi, %r11
-; X64-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rcx, %r8
+; X64-NEXT:    movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; X64-NEXT:    adcq %r12, %rax
+; X64-NEXT:    adcq %r15, %rax
 ; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %r9, %r13
-; X64-NEXT:    movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %r9, %rdi
+; X64-NEXT:    adcq %r10, %r12
+; X64-NEXT:    movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %r10, %rcx
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; X64-NEXT:    adcq %rbp, %rax
+; X64-NEXT:    adcq %r9, %rax
 ; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rbp, %r9
-; X64-NEXT:    movq 8(%r10), %rax
+; X64-NEXT:    movq %r9, %r10
+; X64-NEXT:    movq 8(%rbp), %rax
 ; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    mulq %r8
-; X64-NEXT:    xorl %ecx, %ecx
-; X64-NEXT:    movq %rax, %r15
-; X64-NEXT:    addq %rsi, %r15
-; X64-NEXT:    movq %rdx, %rbx
-; X64-NEXT:    adcq $0, %rbx
-; X64-NEXT:    addq %r14, %r15
-; X64-NEXT:    movq %rsi, %rbp
-; X64-NEXT:    movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq $0, %rbp
-; X64-NEXT:    addq %rbx, %rbp
-; X64-NEXT:    sbbq %r8, %r8
-; X64-NEXT:    andl $1, %r8d
-; X64-NEXT:    addq %rax, %rbp
-; X64-NEXT:    adcq %rdx, %r8
-; X64-NEXT:    movq 16(%r10), %rax
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %r14, %rbx
-; X64-NEXT:    addq %rax, %rbx
-; X64-NEXT:    movq %rsi, %r10
-; X64-NEXT:    adcq %rdx, %r10
-; X64-NEXT:    addq %rbp, %rbx
-; X64-NEXT:    adcq %r8, %r10
-; X64-NEXT:    movq %r14, %rax
-; X64-NEXT:    movq %r14, (%rsp) # 8-byte Spill
-; X64-NEXT:    addq %r11, %rax
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %r15, %rcx
-; X64-NEXT:    adcq %rcx, %r12
-; X64-NEXT:    movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %rbx, %rdi
+; X64-NEXT:    movq %rbp, %rdi
 ; X64-NEXT:    movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rbx, %r8
-; X64-NEXT:    adcq %r10, %r9
-; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload
-; X64-NEXT:    movq 40(%r13), %rax
-; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    mulq %rdx
-; X64-NEXT:    xorl %r11d, %r11d
-; X64-NEXT:    movq %rax, %rsi
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload
-; X64-NEXT:    addq %r9, %rsi
-; X64-NEXT:    movq %rdx, %rbx
-; X64-NEXT:    adcq $0, %rbx
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    addq %rdi, %rsi
-; X64-NEXT:    movq %r9, %rbp
+; X64-NEXT:    xorl %r9d, %r9d
+; X64-NEXT:    movq %rax, %r12
+; X64-NEXT:    addq %rsi, %r12
+; X64-NEXT:    movq %rdx, %rbp
 ; X64-NEXT:    adcq $0, %rbp
-; X64-NEXT:    addq %rbx, %rbp
+; X64-NEXT:    addq %r14, %r12
+; X64-NEXT:    adcq %rsi, %rbp
+; X64-NEXT:    movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    sbbq %rbx, %rbx
 ; X64-NEXT:    andl $1, %ebx
 ; X64-NEXT:    addq %rax, %rbp
 ; X64-NEXT:    adcq %rdx, %rbx
-; X64-NEXT:    movq 48(%r13), %rax
+; X64-NEXT:    movq 16(%rdi), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    mulq %r9
+; X64-NEXT:    xorl %edi, %edi
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %r14, %r9
+; X64-NEXT:    addq %rax, %r9
+; X64-NEXT:    adcq %rdx, %rsi
+; X64-NEXT:    addq %rbp, %r9
+; X64-NEXT:    movq %r9, %rdx
+; X64-NEXT:    adcq %rbx, %rsi
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    movq %r14, %rsi
+; X64-NEXT:    movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    addq %r8, %rsi
+; X64-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %r12, %r15
+; X64-NEXT:    movq %r15, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %rdx, %rcx
+; X64-NEXT:    movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %r15
+; X64-NEXT:    adcq %rax, %r10
+; X64-NEXT:    movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rax, %r10
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq 40(%rsi), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    xorl %r8d, %r8d
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    movq (%rsp), %rdi # 8-byte Reload
+; X64-NEXT:    addq %rdi, %rcx
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    addq %r13, %rcx
+; X64-NEXT:    adcq %rdi, %rbp
+; X64-NEXT:    sbbq %rbx, %rbx
+; X64-NEXT:    andl $1, %ebx
+; X64-NEXT:    addq %rax, %rbp
+; X64-NEXT:    adcq %rdx, %rbx
+; X64-NEXT:    movq 48(%rsi), %rax
 ; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    mulq %r11
+; X64-NEXT:    mulq %r8
 ; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rdi, %r15
-; X64-NEXT:    movq %rdi, %r11
-; X64-NEXT:    addq %rax, %r15
-; X64-NEXT:    movq %r9, %rdi
-; X64-NEXT:    adcq %rdx, %rdi
-; X64-NEXT:    addq %rbp, %r15
-; X64-NEXT:    adcq %rbx, %rdi
-; X64-NEXT:    addq %r11, %r14
+; X64-NEXT:    movq %r13, %r8
+; X64-NEXT:    addq %rax, %r8
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    adcq %rdx, %rax
+; X64-NEXT:    addq %rbp, %r8
+; X64-NEXT:    adcq %rbx, %rax
+; X64-NEXT:    movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    addq %r13, %r14
 ; X64-NEXT:    movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %rsi, %rcx
-; X64-NEXT:    movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %r15, %r8
-; X64-NEXT:    movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %rdi, %r10
-; X64-NEXT:    movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r14 # 8-byte Reload
-; X64-NEXT:    movq %r14, %rax
-; X64-NEXT:    addq %r11, %rax
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; X64-NEXT:    adcq %r9, %rax
-; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %r14, %rax
-; X64-NEXT:    addq %r11, %rax
-; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
-; X64-NEXT:    movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload
+; X64-NEXT:    adcq %rcx, %r12
+; X64-NEXT:    movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %r8, %r15
 ; X64-NEXT:    movq %r15, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload
-; X64-NEXT:    movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %rax, %r10
+; X64-NEXT:    movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rax, %rdx
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    addq %r13, %rax
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
+; X64-NEXT:    adcq %rdi, %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    addq %r13, %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload
+; X64-NEXT:    movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
 ; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
 ; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rax, %r9
-; X64-NEXT:    movq %rdx, %rbx
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; X64-NEXT:    movq 56(%rax), %rsi
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdi, %r10
-; X64-NEXT:    movq %rdx, %rdi
-; X64-NEXT:    movq %rax, %rbp
-; X64-NEXT:    addq %rbx, %rbp
-; X64-NEXT:    adcq $0, %rdi
-; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rdx, %rbx
 ; X64-NEXT:    movq %rax, %r8
-; X64-NEXT:    addq %rbp, %r8
-; X64-NEXT:    adcq $0, %rbx
-; X64-NEXT:    addq %rdi, %rbx
-; X64-NEXT:    sbbq %rdi, %rdi
-; X64-NEXT:    andl $1, %edi
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rcx, %r11
-; X64-NEXT:    addq %rbx, %rax
-; X64-NEXT:    adcq %rdi, %rdx
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
+; X64-NEXT:    movq 56(%rax), %r11
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdi, %r9
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %rsi, %rbx
+; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %r10
+; X64-NEXT:    addq %rbx, %r10
+; X64-NEXT:    adcq %rbp, %rsi
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdi, %r14
+; X64-NEXT:    addq %rsi, %rax
+; X64-NEXT:    adcq %rcx, %rdx
 ; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload
-; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload
+; X64-NEXT:    addq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload
 ; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload
 ; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload
 ; X64-NEXT:    addq %rax, %r15
 ; X64-NEXT:    adcq %rdx, %r12
 ; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movq %r10, %rsi
-; X64-NEXT:    mulq %rsi
-; X64-NEXT:    movq %rdx, %r10
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %r9, %rcx
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rdx, %r9
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    mulq %rcx
 ; X64-NEXT:    movq %rdx, %rbp
 ; X64-NEXT:    movq %rax, %rbx
-; X64-NEXT:    addq %r10, %rbx
+; X64-NEXT:    addq %r9, %rbx
 ; X64-NEXT:    adcq $0, %rbp
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movq %rdi, %r10
-; X64-NEXT:    mulq %r11
-; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    mulq %r14
+; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    addq %rbx, %rax
 ; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq $0, %rdi
-; X64-NEXT:    addq %rbp, %rdi
+; X64-NEXT:    adcq %rbp, %rcx
 ; X64-NEXT:    sbbq %rbp, %rbp
 ; X64-NEXT:    andl $1, %ebp
-; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    movq %rcx, %rsi
-; X64-NEXT:    mulq %r11
+; X64-NEXT:    movq %rsi, %rbx
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    mulq %r14
 ; X64-NEXT:    movq %rdx, %r13
-; X64-NEXT:    movq %rax, %rbx
-; X64-NEXT:    addq %rdi, %rbx
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    addq %rcx, %rsi
 ; X64-NEXT:    adcq %rbp, %r13
-; X64-NEXT:    addq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload
-; X64-NEXT:    addq %r9, %rbx
-; X64-NEXT:    adcq %r8, %r13
+; X64-NEXT:    addq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload
+; X64-NEXT:    addq %r8, %rsi
+; X64-NEXT:    adcq %r10, %r13
 ; X64-NEXT:    adcq $0, %r15
 ; X64-NEXT:    adcq $0, %r12
-; X64-NEXT:    movq %r10, %rbp
+; X64-NEXT:    movq %rdi, %rbp
 ; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, %r9
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    movq %rsi, %r8
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    movq %rax, %rdi
-; X64-NEXT:    addq %rcx, %rdi
-; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; X64-NEXT:    movq 24(%rax), %r14
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    mulq %r14
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    addq %rdi, %rax
-; X64-NEXT:    movq %rax, %rdi
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq %rsi, %rcx
-; X64-NEXT:    sbbq %rsi, %rsi
-; X64-NEXT:    andl $1, %esi
-; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    mulq %r14
-; X64-NEXT:    addq %rcx, %rax
-; X64-NEXT:    adcq %rsi, %rdx
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload
-; X64-NEXT:    addq %r11, %rbp
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
-; X64-NEXT:    addq %rax, %rbp
-; X64-NEXT:    adcq %rdx, %rsi
-; X64-NEXT:    addq %rbx, %r9
-; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %r13, %rdi
-; X64-NEXT:    movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq $0, %rbp
-; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    addq %r15, %rbp
-; X64-NEXT:    adcq %r12, %rsi
-; X64-NEXT:    movl $0, %r10d
-; X64-NEXT:    adcq $0, %r10
-; X64-NEXT:    sbbq %r15, %r15
-; X64-NEXT:    andl $1, %r15d
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdx, %r12
-; X64-NEXT:    movq %rax, %r13
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload
-; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdx, %rbx
-; X64-NEXT:    movq %rax, %rdi
-; X64-NEXT:    addq %r12, %rdi
-; X64-NEXT:    adcq $0, %rbx
-; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    mulq %r14
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, %r9
-; X64-NEXT:    addq %rdi, %r9
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq %rbx, %rcx
-; X64-NEXT:    sbbq %rdi, %rdi
-; X64-NEXT:    andl $1, %edi
-; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    mulq %r14
-; X64-NEXT:    movq %r14, %r12
-; X64-NEXT:    addq %rcx, %rax
-; X64-NEXT:    adcq %rdi, %rdx
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    addq %r11, %rcx
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload
-; X64-NEXT:    addq %rax, %rcx
-; X64-NEXT:    adcq %rdx, %rdi
-; X64-NEXT:    addq %rbp, %r13
-; X64-NEXT:    adcq %rsi, %r9
-; X64-NEXT:    adcq %r10, %rcx
-; X64-NEXT:    adcq %r15, %rdi
-; X64-NEXT:    addq {{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload
-; X64-NEXT:    movq %r13, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload
-; X64-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
-; X64-NEXT:    movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload
-; X64-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rax, %r11
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; X64-NEXT:    movq 24(%rax), %r8
-; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdi, %r13
+; X64-NEXT:    movq %rax, %r10
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movq %rbx, %r14
+; X64-NEXT:    mulq %r9
 ; X64-NEXT:    movq %rdx, %rdi
 ; X64-NEXT:    movq %rax, %rbx
 ; X64-NEXT:    addq %rcx, %rbx
 ; X64-NEXT:    adcq $0, %rdi
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
+; X64-NEXT:    movq 24(%rax), %rcx
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rcx, %rbp
+; X64-NEXT:    movq %rbp, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    addq %rbx, %r8
+; X64-NEXT:    adcq %rdi, %rcx
+; X64-NEXT:    sbbq %rdi, %rdi
+; X64-NEXT:    andl $1, %edi
+; X64-NEXT:    movq %r14, %rax
 ; X64-NEXT:    mulq %rbp
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    adcq %rdi, %rdx
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rbx # 8-byte Reload
+; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r14 # 8-byte Reload
+; X64-NEXT:    adcq %r14, %rbp
+; X64-NEXT:    addq %rax, %rbx
+; X64-NEXT:    adcq %rdx, %rbp
+; X64-NEXT:    addq %rsi, %r10
+; X64-NEXT:    movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %r13, %r8
+; X64-NEXT:    movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    addq %r15, %rbx
+; X64-NEXT:    adcq %r12, %rbp
+; X64-NEXT:    movl $0, %r8d
+; X64-NEXT:    adcq $0, %r8
+; X64-NEXT:    sbbq %r10, %r10
+; X64-NEXT:    andl $1, %r10d
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq %r9, %rsi
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %r9
+; X64-NEXT:    movq %rax, %r15
+; X64-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %r9, %rdi
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rdx, %r9
+; X64-NEXT:    movq %rax, %r13
+; X64-NEXT:    addq %rdi, %r13
+; X64-NEXT:    adcq %rsi, %r9
+; X64-NEXT:    sbbq %rsi, %rsi
+; X64-NEXT:    andl $1, %esi
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rcx, %r12
+; X64-NEXT:    addq %r9, %rax
+; X64-NEXT:    adcq %rsi, %rdx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    adcq %r14, %rsi
+; X64-NEXT:    addq %rax, %rcx
+; X64-NEXT:    adcq %rdx, %rsi
+; X64-NEXT:    addq %rbx, %r15
+; X64-NEXT:    adcq %rbp, %r13
+; X64-NEXT:    adcq %r8, %rcx
+; X64-NEXT:    adcq %r10, %rsi
+; X64-NEXT:    addq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload
+; X64-NEXT:    movq %r15, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload
+; X64-NEXT:    movq %r13, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
+; X64-NEXT:    movq 24(%rax), %rbp
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    movq %rbp, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rsi, %r14
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %rcx, %rbx
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
+; X64-NEXT:    mulq %rdi
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    movq %rax, %r15
 ; X64-NEXT:    addq %rbx, %r15
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq %rdi, %rcx
+; X64-NEXT:    adcq %rsi, %rcx
 ; X64-NEXT:    sbbq %rsi, %rsi
 ; X64-NEXT:    andl $1, %esi
-; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    mulq %rbp
-; X64-NEXT:    movq %rbp, %r14
-; X64-NEXT:    addq %rcx, %rax
-; X64-NEXT:    adcq %rsi, %rdx
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload
-; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload
-; X64-NEXT:    addq %rax, %r8
-; X64-NEXT:    adcq %rdx, %r10
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    mulq %r13
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    mulq %r13
-; X64-NEXT:    movq %rdx, %rdi
-; X64-NEXT:    movq %rax, %rbx
-; X64-NEXT:    addq %rcx, %rbx
-; X64-NEXT:    adcq $0, %rdi
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    movq %rsi, %r9
-; X64-NEXT:    mulq %r14
-; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    addq %rbx, %rax
-; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    addq %rdi, %rsi
-; X64-NEXT:    sbbq %rdi, %rdi
-; X64-NEXT:    andl $1, %edi
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    mulq %r14
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, %rbx
-; X64-NEXT:    addq %rsi, %rbx
-; X64-NEXT:    adcq %rdi, %rcx
-; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
-; X64-NEXT:    addq %r11, %rbx
-; X64-NEXT:    adcq %r15, %rcx
-; X64-NEXT:    adcq $0, %r8
-; X64-NEXT:    adcq $0, %r10
-; X64-NEXT:    movq %r9, %rsi
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdx, %r9
-; X64-NEXT:    movq %rax, %r11
-; X64-NEXT:    movq %rbp, %r14
-; X64-NEXT:    movq %r14, %rax
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdi, %r13
-; X64-NEXT:    movq %rdx, %rdi
-; X64-NEXT:    movq %rax, %rbp
-; X64-NEXT:    addq %r9, %rbp
-; X64-NEXT:    adcq $0, %rdi
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    mulq %r12
-; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    addq %rbp, %rax
-; X64-NEXT:    movq %rax, %r15
-; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    addq %rdi, %rsi
-; X64-NEXT:    sbbq %rdi, %rdi
-; X64-NEXT:    andl $1, %edi
-; X64-NEXT:    movq %r14, %rax
-; X64-NEXT:    mulq %r12
-; X64-NEXT:    addq %rsi, %rax
-; X64-NEXT:    adcq %rdi, %rdx
-; X64-NEXT:    movq (%rsp), %rsi # 8-byte Reload
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r14 # 8-byte Reload
-; X64-NEXT:    addq %r14, %rsi
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload
-; X64-NEXT:    addq %rax, %rsi
-; X64-NEXT:    adcq %rdx, %rbp
-; X64-NEXT:    addq %rbx, %r11
-; X64-NEXT:    movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %rcx, %r15
-; X64-NEXT:    movq %r15, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    adcq $0, %rbp
-; X64-NEXT:    addq %r8, %rsi
-; X64-NEXT:    adcq %r10, %rbp
-; X64-NEXT:    movl $0, %r10d
-; X64-NEXT:    adcq $0, %r10
-; X64-NEXT:    sbbq %r15, %r15
-; X64-NEXT:    andl $1, %r15d
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    mulq %r13
-; X64-NEXT:    movq %rdx, %r9
-; X64-NEXT:    movq %rax, %r11
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload
-; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    mulq %r13
-; X64-NEXT:    movq %rdx, %rdi
-; X64-NEXT:    movq %rax, %rbx
-; X64-NEXT:    addq %r9, %rbx
-; X64-NEXT:    adcq $0, %rdi
-; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    mulq %r12
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, %r9
-; X64-NEXT:    addq %rbx, %r9
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq %rdi, %rcx
-; X64-NEXT:    sbbq %rdi, %rdi
-; X64-NEXT:    andl $1, %edi
-; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    mulq %r12
-; X64-NEXT:    addq %rcx, %rax
-; X64-NEXT:    adcq %rdi, %rdx
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    addq %r14, %rcx
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload
-; X64-NEXT:    movq %r12, %r13
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload
-; X64-NEXT:    addq %rax, %rcx
-; X64-NEXT:    adcq %rdx, %r13
-; X64-NEXT:    addq %rsi, %r11
-; X64-NEXT:    adcq %rbp, %r9
-; X64-NEXT:    adcq %r10, %rcx
-; X64-NEXT:    adcq %r15, %r13
-; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload
-; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload
-; X64-NEXT:    movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload
-; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
-; X64-NEXT:    movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload
-; X64-NEXT:    adcq $0, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill
-; X64-NEXT:    adcq $0, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill
-; X64-NEXT:    adcq $0, {{[0-9]+}}(%rsp) # 8-byte Folded Spill
-; X64-NEXT:    adcq $0, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rdx, %rdi
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rcx, %r11
-; X64-NEXT:    movq %rdx, %rbx
-; X64-NEXT:    movq %rax, %rbp
-; X64-NEXT:    addq %rdi, %rbp
-; X64-NEXT:    adcq $0, %rbx
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, %r10
-; X64-NEXT:    addq %rbp, %r10
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq %rbx, %rcx
-; X64-NEXT:    sbbq %rsi, %rsi
-; X64-NEXT:    andl $1, %esi
-; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdi, %r14
-; X64-NEXT:    addq %rcx, %rax
-; X64-NEXT:    adcq %rsi, %rdx
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload
-; X64-NEXT:    addq {{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload
-; X64-NEXT:    movq %r12, %r15
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload
-; X64-NEXT:    addq %rax, %r9
-; X64-NEXT:    adcq %rdx, %r15
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    movq %r11, %rdi
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdx, %r8
-; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r11 # 8-byte Reload
-; X64-NEXT:    movq %r11, %rax
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdx, %rbx
-; X64-NEXT:    movq %rax, %rbp
-; X64-NEXT:    addq %r8, %rbp
-; X64-NEXT:    adcq $0, %rbx
-; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    movq %rcx, %r8
-; X64-NEXT:    mulq %r14
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    addq %rbp, %rax
-; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq %rbx, %rcx
-; X64-NEXT:    sbbq %rbp, %rbp
-; X64-NEXT:    andl $1, %ebp
-; X64-NEXT:    movq %r11, %rax
-; X64-NEXT:    mulq %r14
-; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    movq %rax, %rbx
-; X64-NEXT:    addq %rcx, %rbx
-; X64-NEXT:    adcq %rbp, %rsi
-; X64-NEXT:    addq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
-; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
-; X64-NEXT:    adcq %r10, %rsi
-; X64-NEXT:    adcq $0, %r9
-; X64-NEXT:    adcq $0, %r15
-; X64-NEXT:    movq %r8, %rbp
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rdx, %r14
-; X64-NEXT:    movq %rax, %r8
-; X64-NEXT:    movq %r11, %rax
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rdx, %r12
-; X64-NEXT:    movq %rax, %rcx
-; X64-NEXT:    addq %r14, %rcx
-; X64-NEXT:    adcq $0, %r12
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; X64-NEXT:    movq 56(%rax), %rdi
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rdx, %rbp
-; X64-NEXT:    addq %rcx, %rax
-; X64-NEXT:    movq %rax, %r10
-; X64-NEXT:    adcq $0, %rbp
-; X64-NEXT:    addq %r12, %rbp
-; X64-NEXT:    sbbq %rcx, %rcx
-; X64-NEXT:    andl $1, %ecx
-; X64-NEXT:    movq %r11, %rax
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    addq %rbp, %rax
-; X64-NEXT:    adcq %rcx, %rdx
-; X64-NEXT:    movq (%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r11 # 8-byte Reload
-; X64-NEXT:    addq %r11, %rcx
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload
-; X64-NEXT:    adcq %r12, %rdi
-; X64-NEXT:    addq %rax, %rcx
-; X64-NEXT:    adcq %rdx, %rdi
-; X64-NEXT:    addq %rbx, %r8
-; X64-NEXT:    movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %rsi, %r10
-; X64-NEXT:    movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    adcq $0, %rdi
-; X64-NEXT:    addq %r9, %rcx
-; X64-NEXT:    adcq %r15, %rdi
-; X64-NEXT:    movl $0, %r8d
-; X64-NEXT:    adcq $0, %r8
-; X64-NEXT:    sbbq %r9, %r9
-; X64-NEXT:    andl $1, %r9d
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rbx # 8-byte Reload
-; X64-NEXT:    mulq %rbx
-; X64-NEXT:    movq %rdx, %r10
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload
-; X64-NEXT:    movq %r14, %rax
-; X64-NEXT:    mulq %rbx
-; X64-NEXT:    movq %rdx, %rbx
-; X64-NEXT:    movq %rax, %rbp
-; X64-NEXT:    addq %r10, %rbp
-; X64-NEXT:    adcq $0, %rbx
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload
-; X64-NEXT:    mulq %r10
-; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    movq %rax, %r15
-; X64-NEXT:    addq %rbp, %r15
-; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    addq %rbx, %rsi
-; X64-NEXT:    sbbq %rbp, %rbp
-; X64-NEXT:    andl $1, %ebp
-; X64-NEXT:    movq %r14, %rax
-; X64-NEXT:    mulq %r10
-; X64-NEXT:    addq %rsi, %rax
-; X64-NEXT:    adcq %rbp, %rdx
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload
-; X64-NEXT:    addq %r11, %r10
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload
-; X64-NEXT:    adcq %r12, %r14
-; X64-NEXT:    addq %rax, %r10
-; X64-NEXT:    adcq %rdx, %r14
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; X64-NEXT:    addq %rcx, %rax
-; X64-NEXT:    adcq %rdi, %r15
-; X64-NEXT:    adcq %r8, %r10
-; X64-NEXT:    adcq %r9, %r14
-; X64-NEXT:    addq {{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    addq %rcx, {{[0-9]+}}(%rsp) # 8-byte Folded Spill
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    adcq %rcx, {{[0-9]+}}(%rsp) # 8-byte Folded Spill
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    adcq %rcx, {{[0-9]+}}(%rsp) # 8-byte Folded Spill
-; X64-NEXT:    adcq %r13, {{[0-9]+}}(%rsp) # 8-byte Folded Spill
-; X64-NEXT:    adcq $0, %rax
-; X64-NEXT:    adcq $0, %r15
-; X64-NEXT:    adcq $0, %r10
-; X64-NEXT:    adcq $0, %r14
-; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload
-; X64-NEXT:    movl $0, %eax
-; X64-NEXT:    adcq $0, %rax
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movl $0, %eax
-; X64-NEXT:    adcq $0, %rax
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movl $0, %eax
-; X64-NEXT:    adcq $0, %rax
-; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    sbbq %rax, %rax
-; X64-NEXT:    andl $1, %eax
-; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rcx, %r9
-; X64-NEXT:    movq %rdx, %rbx
-; X64-NEXT:    movq %rax, %rcx
-; X64-NEXT:    addq %rsi, %rcx
-; X64-NEXT:    adcq $0, %rbx
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    movq %rax, %r13
-; X64-NEXT:    addq %rcx, %r13
-; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    addq %rbx, %rsi
-; X64-NEXT:    sbbq %rcx, %rcx
-; X64-NEXT:    andl $1, %ecx
 ; X64-NEXT:    movq %rbp, %rax
 ; X64-NEXT:    mulq %rdi
 ; X64-NEXT:    movq %rdi, %r11
-; X64-NEXT:    addq %rsi, %rax
-; X64-NEXT:    adcq %rcx, %rdx
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload
-; X64-NEXT:    addq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload
-; X64-NEXT:    addq %rax, %r12
-; X64-NEXT:    adcq %rdx, %r8
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    adcq %rsi, %rdx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload
+; X64-NEXT:    addq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload
+; X64-NEXT:    addq %rax, %r8
+; X64-NEXT:    adcq %rdx, %r10
 ; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    mulq %r9
+; X64-NEXT:    movq %r14, %rsi
+; X64-NEXT:    mulq %rsi
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    mulq %r9
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    mulq %rsi
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rbx
 ; X64-NEXT:    addq %rcx, %rbx
 ; X64-NEXT:    adcq $0, %rsi
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movq %rdi, %r9
 ; X64-NEXT:    mulq %r11
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    addq %rbx, %rax
 ; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq %rsi, %rcx
-; X64-NEXT:    sbbq %rdi, %rdi
-; X64-NEXT:    andl $1, %edi
-; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    adcq %rsi, %rcx
+; X64-NEXT:    sbbq %rbx, %rbx
+; X64-NEXT:    andl $1, %ebx
+; X64-NEXT:    movq %r14, %rax
 ; X64-NEXT:    mulq %r11
 ; X64-NEXT:    movq %rdx, %r11
 ; X64-NEXT:    movq %rax, %rsi
 ; X64-NEXT:    addq %rcx, %rsi
-; X64-NEXT:    adcq %rdi, %r11
-; X64-NEXT:    addq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload
+; X64-NEXT:    adcq %rbx, %r11
 ; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
-; X64-NEXT:    adcq %r13, %r11
-; X64-NEXT:    adcq $0, %r12
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload
+; X64-NEXT:    addq %r9, %rsi
+; X64-NEXT:    adcq %r15, %r11
 ; X64-NEXT:    adcq $0, %r8
-; X64-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %r9, %rdi
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rdx, %r8
-; X64-NEXT:    movq %rax, %r13
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    movq %rbp, %r9
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rdx, %rbp
-; X64-NEXT:    movq %rax, %rcx
-; X64-NEXT:    addq %r8, %rcx
-; X64-NEXT:    adcq $0, %rbp
-; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    adcq $0, %r10
+; X64-NEXT:    movq %rdi, %rbx
+; X64-NEXT:    movq %rbx, %rax
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload
 ; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdx, %rbx
-; X64-NEXT:    addq %rcx, %rax
-; X64-NEXT:    movq %rax, %r8
-; X64-NEXT:    adcq $0, %rbx
-; X64-NEXT:    addq %rbp, %rbx
-; X64-NEXT:    sbbq %rcx, %rcx
-; X64-NEXT:    andl $1, %ecx
-; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    movq %r14, %rax
 ; X64-NEXT:    mulq %rdi
-; X64-NEXT:    addq %rbx, %rax
-; X64-NEXT:    adcq %rcx, %rdx
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    addq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
-; X64-NEXT:    addq %rax, %rdi
-; X64-NEXT:    adcq %rdx, %rcx
-; X64-NEXT:    addq %rsi, %r13
-; X64-NEXT:    adcq %r11, %r8
-; X64-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rdi, %r15
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    movq %rax, %rbp
+; X64-NEXT:    addq %rcx, %rbp
 ; X64-NEXT:    adcq $0, %rdi
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq %r12, %rdi
-; X64-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movq %r12, %r13
+; X64-NEXT:    mulq %r13
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    addq %rbp, %rax
+; X64-NEXT:    movq %rax, %rbp
+; X64-NEXT:    adcq %rdi, %rcx
+; X64-NEXT:    sbbq %rdi, %rdi
+; X64-NEXT:    andl $1, %edi
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    mulq %r13
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    adcq %rdi, %rdx
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rbx # 8-byte Reload
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r14 # 8-byte Reload
+; X64-NEXT:    addq %r14, %rbx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
 ; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
-; X64-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    addq %rax, %rbx
+; X64-NEXT:    adcq %rdx, %rcx
+; X64-NEXT:    addq %rsi, %r9
+; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %r11, %rbp
+; X64-NEXT:    movq %rbp, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    adcq $0, %rcx
+; X64-NEXT:    addq %r8, %rbx
+; X64-NEXT:    adcq %r10, %rcx
 ; X64-NEXT:    movl $0, %r12d
 ; X64-NEXT:    adcq $0, %r12
 ; X64-NEXT:    sbbq %r9, %r9
 ; X64-NEXT:    andl $1, %r9d
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload
-; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
-; X64-NEXT:    mulq %rsi
-; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    mulq %r15
+; X64-NEXT:    movq %rdx, %r8
 ; X64-NEXT:    movq %rax, %r11
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload
-; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    mulq %r15
 ; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    movq %rax, %rbp
-; X64-NEXT:    addq %rcx, %rbp
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %r8, %rdi
 ; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, %rbx
-; X64-NEXT:    addq %rbp, %rbx
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq %rsi, %rcx
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    mulq %r13
+; X64-NEXT:    movq %rdx, %r8
+; X64-NEXT:    movq %rax, %r15
+; X64-NEXT:    addq %rdi, %r15
+; X64-NEXT:    adcq %rsi, %r8
 ; X64-NEXT:    sbbq %rsi, %rsi
 ; X64-NEXT:    andl $1, %esi
-; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    movq %r10, %rbp
+; X64-NEXT:    mulq %r13
+; X64-NEXT:    addq %r8, %rax
 ; X64-NEXT:    adcq %rsi, %rdx
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
-; X64-NEXT:    addq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload
+; X64-NEXT:    movq %r10, %rsi
+; X64-NEXT:    addq %r14, %rsi
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload
+; X64-NEXT:    movq %r8, %rdi
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload
 ; X64-NEXT:    addq %rax, %rsi
-; X64-NEXT:    adcq %rdx, %rcx
-; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
+; X64-NEXT:    adcq %rdx, %rdi
+; X64-NEXT:    addq %rbx, %r11
+; X64-NEXT:    adcq %rcx, %r15
 ; X64-NEXT:    adcq %r12, %rsi
-; X64-NEXT:    adcq %r9, %rcx
-; X64-NEXT:    addq {{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
+; X64-NEXT:    adcq %r9, %rdi
+; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload
 ; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; X64-NEXT:    addq %rax, {{[0-9]+}}(%rsp) # 8-byte Folded Spill
-; X64-NEXT:    adcq %r15, {{[0-9]+}}(%rsp) # 8-byte Folded Spill
-; X64-NEXT:    adcq %r10, %r13
-; X64-NEXT:    movq %r13, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %r14, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload
-; X64-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
-; X64-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload
+; X64-NEXT:    addq {{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload
+; X64-NEXT:    movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload
+; X64-NEXT:    movq %r15, {{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
 ; X64-NEXT:    movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
-; X64-NEXT:    movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
-; X64-NEXT:    movq 64(%rsi), %r14
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    mulq %r14
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload
-; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    mulq %r14
-; X64-NEXT:    movq %rdx, %rdi
-; X64-NEXT:    movq %rax, %rbx
-; X64-NEXT:    addq %rcx, %rbx
-; X64-NEXT:    adcq $0, %rdi
-; X64-NEXT:    movq 72(%rsi), %rcx
-; X64-NEXT:    movq %rsi, %r13
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rcx, %rsi
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, %r10
-; X64-NEXT:    addq %rbx, %r10
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq %rdi, %rcx
-; X64-NEXT:    sbbq %rdi, %rdi
-; X64-NEXT:    andl $1, %edi
-; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    mulq %rsi
-; X64-NEXT:    movq %rsi, %r8
-; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    movq %rax, %rbp
-; X64-NEXT:    addq %rcx, %rbp
-; X64-NEXT:    adcq %rdi, %rsi
-; X64-NEXT:    movq %r14, %rax
-; X64-NEXT:    xorl %ecx, %ecx
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rax, %rbx
-; X64-NEXT:    movq %rdx, %r11
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload
-; X64-NEXT:    addq %rbx, %r15
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r12 # 8-byte Reload
-; X64-NEXT:    adcq %r11, %r12
-; X64-NEXT:    addq %rbp, %r15
-; X64-NEXT:    adcq %rsi, %r12
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    mulq %r14
-; X64-NEXT:    movq %rdx, %r9
-; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload
+; X64-NEXT:    movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq $0, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill
+; X64-NEXT:    adcq $0, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill
+; X64-NEXT:    adcq $0, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill
+; X64-NEXT:    adcq $0, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill
 ; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    mulq %r14
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    mulq %rcx
 ; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    movq %rax, %rbp
-; X64-NEXT:    addq %r9, %rbp
-; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    movq %rcx, %r14
-; X64-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    mulq %r8
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    addq %rbp, %rax
-; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq %rsi, %rcx
-; X64-NEXT:    sbbq %rsi, %rsi
-; X64-NEXT:    andl $1, %esi
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movq %rdi, %r9
-; X64-NEXT:    mulq %r8
-; X64-NEXT:    addq %rcx, %rax
-; X64-NEXT:    adcq %rsi, %rdx
-; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload
-; X64-NEXT:    addq %rax, %rbx
-; X64-NEXT:    adcq %rdx, %r11
-; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
-; X64-NEXT:    adcq %r10, %r11
-; X64-NEXT:    adcq $0, %r15
-; X64-NEXT:    adcq $0, %r12
-; X64-NEXT:    movq 80(%r13), %rbp
-; X64-NEXT:    movq %r14, %rsi
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    mulq %rbp
-; X64-NEXT:    movq %rdx, %r8
 ; X64-NEXT:    movq %rax, %r14
-; X64-NEXT:    movq %r9, %rax
-; X64-NEXT:    mulq %rbp
-; X64-NEXT:    movq %rdx, %r10
-; X64-NEXT:    movq %rax, %rcx
-; X64-NEXT:    addq %r8, %rcx
-; X64-NEXT:    adcq $0, %r10
-; X64-NEXT:    movq 88(%r13), %r13
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    mulq %r13
-; X64-NEXT:    movq %rdx, %rdi
-; X64-NEXT:    movq %rax, %r8
-; X64-NEXT:    addq %rcx, %r8
-; X64-NEXT:    adcq $0, %rdi
-; X64-NEXT:    addq %r10, %rdi
-; X64-NEXT:    sbbq %rsi, %rsi
-; X64-NEXT:    andl $1, %esi
-; X64-NEXT:    movq %r9, %rax
-; X64-NEXT:    mulq %r13
-; X64-NEXT:    movq %rdx, %r10
-; X64-NEXT:    movq %rax, %rcx
-; X64-NEXT:    addq %rdi, %rcx
-; X64-NEXT:    adcq %rsi, %r10
 ; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    mulq %rdx
-; X64-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rax, %rdi
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
-; X64-NEXT:    addq %rdi, %rsi
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; X64-NEXT:    adcq %rdx, %rax
-; X64-NEXT:    addq %rcx, %rsi
-; X64-NEXT:    adcq %r10, %rax
-; X64-NEXT:    addq %rbx, %r14
-; X64-NEXT:    movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %r11, %r8
-; X64-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    adcq $0, %rax
-; X64-NEXT:    addq %r15, %rsi
-; X64-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %r12, %rax
-; X64-NEXT:    movq %rax, %r10
-; X64-NEXT:    movl $0, %r15d
-; X64-NEXT:    adcq $0, %r15
-; X64-NEXT:    sbbq %r12, %r12
-; X64-NEXT:    andl $1, %r12d
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rcx, %r13
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    addq %rsi, %rcx
+; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    addq %rcx, %r11
+; X64-NEXT:    adcq %rbx, %rsi
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdi, %r15
+; X64-NEXT:    addq %rsi, %rax
+; X64-NEXT:    adcq %rcx, %rdx
+; X64-NEXT:    movq %r10, %r9
+; X64-NEXT:    addq {{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload
+; X64-NEXT:    movq %r8, %r12
+; X64-NEXT:    adcq (%rsp), %r12 # 8-byte Folded Reload
+; X64-NEXT:    addq %rax, %r9
+; X64-NEXT:    adcq %rdx, %r12
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
 ; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq %r13, %rbp
 ; X64-NEXT:    mulq %rbp
 ; X64-NEXT:    movq %rdx, %r8
-; X64-NEXT:    movq %rax, %r14
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r11 # 8-byte Reload
-; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload
+; X64-NEXT:    movq %r13, %rax
 ; X64-NEXT:    mulq %rbp
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rbx
 ; X64-NEXT:    addq %r8, %rbx
 ; X64-NEXT:    adcq $0, %rsi
 ; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    mulq %r13
+; X64-NEXT:    movq %rcx, %rbp
+; X64-NEXT:    mulq %r15
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    addq %rbx, %rax
-; X64-NEXT:    movq %rax, %rbx
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq %rsi, %rcx
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %rsi, %rcx
 ; X64-NEXT:    sbbq %rsi, %rsi
 ; X64-NEXT:    andl $1, %esi
-; X64-NEXT:    movq %r11, %rax
-; X64-NEXT:    mulq %r13
-; X64-NEXT:    addq %rcx, %rax
-; X64-NEXT:    adcq %rsi, %rdx
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
-; X64-NEXT:    addq %rdi, %rsi
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    mulq %r15
+; X64-NEXT:    movq %rdx, %r8
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %rcx, %rbx
+; X64-NEXT:    adcq %rsi, %r8
+; X64-NEXT:    addq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload
+; X64-NEXT:    addq %r14, %rbx
+; X64-NEXT:    adcq %r11, %r8
+; X64-NEXT:    adcq $0, %r9
+; X64-NEXT:    adcq $0, %r12
+; X64-NEXT:    movq %rbp, %rsi
+; X64-NEXT:    movq %rsi, %rax
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
-; X64-NEXT:    addq %rax, %rsi
-; X64-NEXT:    adcq %rdx, %rcx
-; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload
-; X64-NEXT:    movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %r10, %rbx
-; X64-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %r15, %rsi
-; X64-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %r12, %rcx
-; X64-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; X64-NEXT:    imulq %rax, %r13
-; X64-NEXT:    movq %rax, %r8
-; X64-NEXT:    mulq %rbp
-; X64-NEXT:    movq %rax, %r9
-; X64-NEXT:    addq %r13, %rdx
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    imulq %rdi, %rbp
-; X64-NEXT:    addq %rdx, %rbp
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; X64-NEXT:    movq %rax, %rsi
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r12 # 8-byte Reload
-; X64-NEXT:    imulq %r12, %rsi
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
 ; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rdx, %r14
 ; X64-NEXT:    movq %rax, %r10
-; X64-NEXT:    addq %rsi, %rdx
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    addq %r14, %rcx
+; X64-NEXT:    adcq $0, %rdi
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; X64-NEXT:    imulq %rcx, %rax
-; X64-NEXT:    addq %rdx, %rax
-; X64-NEXT:    addq %r9, %r10
-; X64-NEXT:    adcq %rbp, %rax
-; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    movq 56(%rax), %r15
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    mulq %r15
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    adcq %rdi, %rsi
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    mulq %r15
+; X64-NEXT:    addq %rsi, %rax
+; X64-NEXT:    adcq %rcx, %rdx
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload
+; X64-NEXT:    addq %r13, %rdi
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload
+; X64-NEXT:    adcq %rbp, %rsi
+; X64-NEXT:    addq %rax, %rdi
+; X64-NEXT:    adcq %rdx, %rsi
+; X64-NEXT:    addq %rbx, %r10
+; X64-NEXT:    movq %r10, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %r8, %r11
+; X64-NEXT:    movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq $0, %rdi
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    addq %r9, %rdi
+; X64-NEXT:    adcq %r12, %rsi
+; X64-NEXT:    movl $0, %r14d
+; X64-NEXT:    adcq $0, %r14
+; X64-NEXT:    sbbq %r10, %r10
+; X64-NEXT:    andl $1, %r10d
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rdx, %r8
+; X64-NEXT:    movq %rax, %r12
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rdx, %r11
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    addq %r8, %rcx
+; X64-NEXT:    adcq $0, %r11
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movq %r15, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    mulq %r15
+; X64-NEXT:    movq %rdx, %r8
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %rcx, %rbx
+; X64-NEXT:    adcq %r11, %r8
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    mulq %r15
+; X64-NEXT:    addq %r8, %rax
+; X64-NEXT:    adcq %rcx, %rdx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload
+; X64-NEXT:    addq %r13, %r15
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload
+; X64-NEXT:    adcq %rbp, %r11
+; X64-NEXT:    addq %rax, %r15
+; X64-NEXT:    adcq %rdx, %r11
+; X64-NEXT:    addq %rdi, %r12
+; X64-NEXT:    adcq %rsi, %rbx
+; X64-NEXT:    adcq %r14, %r15
+; X64-NEXT:    adcq %r10, %r11
+; X64-NEXT:    addq {{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    addq %rcx, {{[0-9]+}}(%rsp) # 8-byte Folded Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    adcq %rcx, {{[0-9]+}}(%rsp) # 8-byte Folded Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    adcq %rcx, {{[0-9]+}}(%rsp) # 8-byte Folded Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    adcq %rcx, {{[0-9]+}}(%rsp) # 8-byte Folded Spill
+; X64-NEXT:    adcq $0, %r12
+; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    adcq $0, %r15
+; X64-NEXT:    adcq $0, %r11
+; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %r12 # 8-byte Folded Reload
+; X64-NEXT:    movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
+; X64-NEXT:    movq %rbx, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %r15 # 8-byte Folded Reload
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    adcq $0, %rax
+; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    adcq $0, %rax
+; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    adcq $0, %rax
+; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
 ; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    movq %rcx, %rbp
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdx, %r8
+; X64-NEXT:    movq %rax, %r13
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdi, %r14
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rax, %rbp
+; X64-NEXT:    addq %r8, %rbp
+; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload
 ; X64-NEXT:    mulq %r8
 ; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r12
+; X64-NEXT:    addq %rbp, %r12
+; X64-NEXT:    adcq %rbx, %rcx
+; X64-NEXT:    sbbq %rbp, %rbp
+; X64-NEXT:    andl $1, %ebp
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    adcq %rbp, %rdx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload
+; X64-NEXT:    addq {{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload
+; X64-NEXT:    movq (%rsp), %r10 # 8-byte Reload
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload
+; X64-NEXT:    addq %rax, %r9
+; X64-NEXT:    adcq %rdx, %r10
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %r14, %rbx
+; X64-NEXT:    mulq %rbx
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, (%rsp) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    mulq %rbx
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rax, %rbp
+; X64-NEXT:    addq %rcx, %rbp
+; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rdi, %r14
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    addq %rbp, %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %rbx, %rcx
+; X64-NEXT:    sbbq %rdi, %rdi
+; X64-NEXT:    andl $1, %edi
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rax, %rbp
+; X64-NEXT:    addq %rcx, %rbp
+; X64-NEXT:    adcq %rdi, %rbx
+; X64-NEXT:    addq {{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
+; X64-NEXT:    addq %r13, %rbp
+; X64-NEXT:    adcq %r12, %rbx
+; X64-NEXT:    adcq $0, %r9
+; X64-NEXT:    movq %r9, %r12
+; X64-NEXT:    adcq $0, %r10
+; X64-NEXT:    movq %r10, %r8
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    movq %rsi, %r10
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %rcx, %rdi
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    adcq %rsi, %r14
+; X64-NEXT:    sbbq %rsi, %rsi
+; X64-NEXT:    andl $1, %esi
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    addq %r14, %rax
+; X64-NEXT:    adcq %rsi, %rdx
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload
+; X64-NEXT:    addq %r14, %rsi
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload
+; X64-NEXT:    adcq %r13, %rcx
+; X64-NEXT:    addq %rax, %rsi
+; X64-NEXT:    adcq %rdx, %rcx
+; X64-NEXT:    addq %rbp, %r9
+; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %rbx, %rdi
+; X64-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    adcq $0, %rcx
+; X64-NEXT:    addq %r12, %rsi
+; X64-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %r8, %rcx
+; X64-NEXT:    movq %rcx, %r12
+; X64-NEXT:    movl $0, %r10d
+; X64-NEXT:    adcq $0, %r10
+; X64-NEXT:    sbbq %r9, %r9
+; X64-NEXT:    andl $1, %r9d
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %r12, %rax
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %rcx, %rdi
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %rdi, %rbx
+; X64-NEXT:    adcq %rsi, %rcx
+; X64-NEXT:    sbbq %rsi, %rsi
+; X64-NEXT:    andl $1, %esi
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    adcq %rsi, %rdx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    addq %r14, %rsi
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    adcq %r13, %rcx
+; X64-NEXT:    addq %rax, %rsi
+; X64-NEXT:    adcq %rdx, %rcx
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r13 # 8-byte Reload
+; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload
+; X64-NEXT:    adcq %r12, %rbx
+; X64-NEXT:    adcq %r10, %rsi
+; X64-NEXT:    adcq %r9, %rcx
+; X64-NEXT:    addq {{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
+; X64-NEXT:    addq %rax, (%rsp) # 8-byte Folded Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
+; X64-NEXT:    adcq %rax, {{[0-9]+}}(%rsp) # 8-byte Folded Spill
+; X64-NEXT:    adcq %r15, {{[0-9]+}}(%rsp) # 8-byte Folded Spill
+; X64-NEXT:    adcq %r11, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload
+; X64-NEXT:    movq %r13, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
+; X64-NEXT:    movq %rbx, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
+; X64-NEXT:    movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload
+; X64-NEXT:    movq 64(%r9), %r11
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    mulq %r11
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    mulq %r11
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %rcx, %rbx
+; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    movq 72(%r9), %rcx
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rcx, %rsi
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    addq %rbx, %r8
+; X64-NEXT:    adcq %rbp, %rcx
+; X64-NEXT:    sbbq %rbp, %rbp
+; X64-NEXT:    andl $1, %ebp
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rsi, %r10
+; X64-NEXT:    movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %rcx, %rdi
+; X64-NEXT:    adcq %rbp, %rsi
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r12 # 8-byte Reload
+; X64-NEXT:    addq %rbx, %r12
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload
+; X64-NEXT:    adcq %r14, %r15
+; X64-NEXT:    addq %rdi, %r12
+; X64-NEXT:    adcq %rsi, %r15
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq %r11, %rsi
+; X64-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %r11
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %r11, %rdi
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq %rcx, %r11
+; X64-NEXT:    mulq %r10
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %rsi, %rcx
+; X64-NEXT:    sbbq %rsi, %rsi
+; X64-NEXT:    andl $1, %esi
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    movq %rbp, %rdi
+; X64-NEXT:    mulq %r10
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    adcq %rsi, %rdx
+; X64-NEXT:    addq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload
+; X64-NEXT:    adcq %r13, %r14
+; X64-NEXT:    addq %rax, %rbx
+; X64-NEXT:    adcq %rdx, %r14
+; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
+; X64-NEXT:    adcq %r8, %r14
+; X64-NEXT:    adcq $0, %r12
+; X64-NEXT:    adcq $0, %r15
+; X64-NEXT:    movq %r9, %rbp
+; X64-NEXT:    movq 80(%rbp), %r8
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    movq %r11, %r9
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rdx, %r10
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    addq %r10, %rcx
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq 88(%rbp), %r10
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    mulq %r10
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    addq %rcx, %r9
+; X64-NEXT:    adcq %rsi, %rbp
+; X64-NEXT:    sbbq %rsi, %rsi
+; X64-NEXT:    andl $1, %esi
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    mulq %r10
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    addq %rbp, %rcx
+; X64-NEXT:    adcq %rsi, %rdi
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    mulq %rdx
+; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    addq %rax, %rsi
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    adcq %rdx, %rax
+; X64-NEXT:    movq %rdx, %r13
+; X64-NEXT:    addq %rcx, %rsi
+; X64-NEXT:    adcq %rdi, %rax
+; X64-NEXT:    addq %rbx, %r11
+; X64-NEXT:    movq %r11, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %r14, %r9
+; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    adcq $0, %rax
+; X64-NEXT:    addq %r12, %rsi
+; X64-NEXT:    movq %rsi, %r14
+; X64-NEXT:    adcq %r15, %rax
+; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    movl $0, %r12d
+; X64-NEXT:    adcq $0, %r12
+; X64-NEXT:    sbbq %r11, %r11
+; X64-NEXT:    andl $1, %r11d
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r15
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload
+; X64-NEXT:    movq %rbp, %rax
 ; X64-NEXT:    mulq %r8
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rbx
 ; X64-NEXT:    addq %rcx, %rbx
 ; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdx, %rbp
-; X64-NEXT:    movq %rax, %r14
-; X64-NEXT:    addq %rbx, %r14
-; X64-NEXT:    adcq $0, %rbp
-; X64-NEXT:    addq %rsi, %rbp
-; X64-NEXT:    sbbq %rcx, %rcx
-; X64-NEXT:    andl $1, %ecx
-; X64-NEXT:    movq %r12, %rax
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdx, %r12
-; X64-NEXT:    movq %rax, %r8
-; X64-NEXT:    addq %rbp, %r8
-; X64-NEXT:    adcq %rcx, %r12
-; X64-NEXT:    addq %r10, %r8
-; X64-NEXT:    adcq %r9, %r12
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx # 8-byte Reload
-; X64-NEXT:    movq 120(%rdx), %rcx
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload
-; X64-NEXT:    imulq %r9, %rcx
-; X64-NEXT:    movq 112(%rdx), %rsi
-; X64-NEXT:    movq %rdx, %rdi
-; X64-NEXT:    movq %r9, %rax
-; X64-NEXT:    mulq %rsi
-; X64-NEXT:    movq %rax, %r15
-; X64-NEXT:    addq %rcx, %rdx
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload
-; X64-NEXT:    imulq %r10, %rsi
-; X64-NEXT:    addq %rdx, %rsi
-; X64-NEXT:    movq 96(%rdi), %rbp
-; X64-NEXT:    movq 104(%rdi), %rbx
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; X64-NEXT:    movq %rax, %rcx
-; X64-NEXT:    imulq %rbx, %rcx
-; X64-NEXT:    mulq %rbp
-; X64-NEXT:    movq %rax, %r13
-; X64-NEXT:    addq %rcx, %rdx
-; X64-NEXT:    imulq %rbp, %r11
-; X64-NEXT:    addq %rdx, %r11
-; X64-NEXT:    addq %r15, %r13
-; X64-NEXT:    adcq %rsi, %r11
-; X64-NEXT:    movq %r11, %r15
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    mulq %r9
-; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    movq %rax, %r11
-; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    mulq %r9
-; X64-NEXT:    movq %rdx, %rdi
-; X64-NEXT:    movq %rax, %rcx
-; X64-NEXT:    addq %rsi, %rcx
-; X64-NEXT:    adcq $0, %rdi
-; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    mulq %r10
-; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    movq %rax, %rbp
-; X64-NEXT:    addq %rcx, %rbp
-; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    addq %rdi, %rsi
-; X64-NEXT:    sbbq %rcx, %rcx
-; X64-NEXT:    andl $1, %ecx
-; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    mulq %r10
-; X64-NEXT:    addq %rsi, %rax
-; X64-NEXT:    adcq %rcx, %rdx
-; X64-NEXT:    addq %r13, %rax
-; X64-NEXT:    adcq %r15, %rdx
-; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload
-; X64-NEXT:    adcq %r14, %rbp
-; X64-NEXT:    adcq %r8, %rax
-; X64-NEXT:    adcq %r12, %rdx
-; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload
-; X64-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload
-; X64-NEXT:    movq %rbp, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload
-; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload
-; X64-NEXT:    movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
-; X64-NEXT:    movq 80(%rsi), %r9
-; X64-NEXT:    movq %r9, %rax
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq 88(%rsi), %r8
-; X64-NEXT:    movq %rsi, %r11
-; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdi, %rbx
-; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    addq %rbx, %rax
 ; X64-NEXT:    movq %rax, %rdi
-; X64-NEXT:    addq %rcx, %rdi
-; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    movq %r9, %rax
-; X64-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload
-; X64-NEXT:    mulq %r15
-; X64-NEXT:    movq %rdx, %rbp
-; X64-NEXT:    movq %rax, %r14
-; X64-NEXT:    addq %rdi, %r14
-; X64-NEXT:    adcq $0, %rbp
-; X64-NEXT:    addq %rsi, %rbp
-; X64-NEXT:    sbbq %rdi, %rdi
-; X64-NEXT:    andl $1, %edi
-; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    mulq %r15
-; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    movq %rax, %rcx
-; X64-NEXT:    addq %rbp, %rcx
-; X64-NEXT:    adcq %rdi, %rsi
-; X64-NEXT:    movq %r9, %rax
-; X64-NEXT:    xorl %r13d, %r13d
-; X64-NEXT:    mulq %r13
-; X64-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rax, %r12
-; X64-NEXT:    movq (%rsp), %r10 # 8-byte Reload
-; X64-NEXT:    addq %r10, %r12
-; X64-NEXT:    movq %rdx, %r8
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload
-; X64-NEXT:    adcq %r9, %r8
-; X64-NEXT:    addq %rcx, %r12
-; X64-NEXT:    adcq %rsi, %r8
-; X64-NEXT:    movq %r11, %rsi
-; X64-NEXT:    movq 64(%rsi), %r11
-; X64-NEXT:    movq %r11, %rax
-; X64-NEXT:    movq %rbx, %rdi
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq 72(%rsi), %rbx
-; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    mulq %rdi
-; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    movq %rax, %rbp
-; X64-NEXT:    addq %rcx, %rbp
-; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    movq %r11, %rax
-; X64-NEXT:    mulq %r15
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    addq %rbp, %rax
-; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq %rsi, %rcx
-; X64-NEXT:    sbbq %rdi, %rdi
-; X64-NEXT:    andl $1, %edi
-; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    mulq %r15
-; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    movq %rax, %rbp
-; X64-NEXT:    addq %rcx, %rbp
-; X64-NEXT:    adcq %rdi, %rsi
-; X64-NEXT:    movq %r11, %rdi
-; X64-NEXT:    movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    mulq %r13
-; X64-NEXT:    movq %rdx, %r11
-; X64-NEXT:    movq %rax, %r13
-; X64-NEXT:    addq %r13, %r10
-; X64-NEXT:    adcq %r11, %r9
-; X64-NEXT:    addq %rbp, %r10
-; X64-NEXT:    adcq %rsi, %r9
-; X64-NEXT:    addq {{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload
-; X64-NEXT:    movq %r10, (%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %r14, %r9
-; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq $0, %r12
-; X64-NEXT:    adcq $0, %r8
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rdx, %r9
-; X64-NEXT:    movq %rax, %r15
-; X64-NEXT:    movq %rbx, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rdx, %rbp
-; X64-NEXT:    movq %rax, %rsi
-; X64-NEXT:    addq %r9, %rsi
-; X64-NEXT:    adcq $0, %rbp
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload
-; X64-NEXT:    mulq %r10
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, %rdi
-; X64-NEXT:    addq %rsi, %rdi
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq %rbp, %rcx
+; X64-NEXT:    adcq %rsi, %rcx
 ; X64-NEXT:    sbbq %rsi, %rsi
 ; X64-NEXT:    andl $1, %esi
-; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    mulq %r10
-; X64-NEXT:    addq %rcx, %rax
-; X64-NEXT:    adcq %rsi, %rdx
-; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %r13 # 8-byte Folded Reload
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload
-; X64-NEXT:    adcq %r14, %r11
-; X64-NEXT:    addq %rax, %r13
-; X64-NEXT:    adcq %rdx, %r11
-; X64-NEXT:    addq (%rsp), %r15 # 8-byte Folded Reload
-; X64-NEXT:    movq %r15, (%rsp) # 8-byte Spill
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload
-; X64-NEXT:    movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq $0, %r13
-; X64-NEXT:    adcq $0, %r11
-; X64-NEXT:    addq %r12, %r13
-; X64-NEXT:    adcq %r8, %r11
-; X64-NEXT:    movl $0, %r8d
-; X64-NEXT:    adcq $0, %r8
-; X64-NEXT:    sbbq %r9, %r9
-; X64-NEXT:    andl $1, %r9d
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload
-; X64-NEXT:    mulq %r15
-; X64-NEXT:    movq %rdx, %r12
-; X64-NEXT:    movq %rax, %rdi
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rbx # 8-byte Reload
-; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    mulq %r15
-; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    movq %rax, %rbp
-; X64-NEXT:    addq %r12, %rbp
-; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    mulq %r10
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    addq %rbp, %rax
-; X64-NEXT:    movq %rax, %rbp
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq %rsi, %rcx
-; X64-NEXT:    sbbq %rsi, %rsi
-; X64-NEXT:    andl $1, %esi
-; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movq %rbp, %rax
 ; X64-NEXT:    mulq %r10
 ; X64-NEXT:    addq %rcx, %rax
 ; X64-NEXT:    adcq %rsi, %rdx
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
 ; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    adcq %r14, %rcx
+; X64-NEXT:    adcq %r13, %rcx
 ; X64-NEXT:    addq %rax, %rsi
 ; X64-NEXT:    adcq %rdx, %rcx
-; X64-NEXT:    addq %r13, %rdi
+; X64-NEXT:    addq %r14, %r15
+; X64-NEXT:    movq %r15, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %r9, %rdi
 ; X64-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %r11, %rbp
-; X64-NEXT:    movq %rbp, {{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %r8, %rsi
+; X64-NEXT:    adcq %r12, %rsi
 ; X64-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq %r9, %rcx
+; X64-NEXT:    adcq %r11, %rcx
 ; X64-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload
-; X64-NEXT:    movq 96(%rbp), %rcx
-; X64-NEXT:    imulq %rcx, %r10
-; X64-NEXT:    movq %rcx, %rax
-; X64-NEXT:    mulq %r15
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
+; X64-NEXT:    imulq %rax, %r10
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    mulq %r8
 ; X64-NEXT:    movq %rax, %r9
 ; X64-NEXT:    addq %r10, %rdx
-; X64-NEXT:    movq 104(%rbp), %r8
-; X64-NEXT:    imulq %r8, %r15
-; X64-NEXT:    addq %rdx, %r15
-; X64-NEXT:    movq 112(%rbp), %rax
-; X64-NEXT:    movq %rbp, %rdi
-; X64-NEXT:    movq %rax, %rsi
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload
-; X64-NEXT:    imulq %rbx, %rsi
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload
-; X64-NEXT:    mulq %rbp
-; X64-NEXT:    movq %rax, %r13
-; X64-NEXT:    addq %rsi, %rdx
-; X64-NEXT:    movq 120(%rdi), %rdi
-; X64-NEXT:    imulq %rbp, %rdi
-; X64-NEXT:    addq %rdx, %rdi
-; X64-NEXT:    addq %r9, %r13
-; X64-NEXT:    adcq %r15, %rdi
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    movq %rbp, %r9
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rdx, %rbp
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, %rsi
-; X64-NEXT:    addq %rbp, %rsi
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    movq %r9, %rax
-; X64-NEXT:    mulq %r8
-; X64-NEXT:    movq %rdx, %rbp
-; X64-NEXT:    movq %rax, %r12
-; X64-NEXT:    addq %rsi, %r12
-; X64-NEXT:    adcq $0, %rbp
-; X64-NEXT:    addq %rcx, %rbp
-; X64-NEXT:    sbbq %rcx, %rcx
-; X64-NEXT:    andl $1, %ecx
-; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    mulq %r8
-; X64-NEXT:    movq %rdx, %r8
-; X64-NEXT:    movq %rax, %r9
-; X64-NEXT:    addq %rbp, %r9
-; X64-NEXT:    adcq %rcx, %r8
-; X64-NEXT:    addq %r13, %r9
-; X64-NEXT:    adcq %rdi, %r8
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload
-; X64-NEXT:    imulq %rbx, %rsi
-; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rax, %r10
-; X64-NEXT:    addq %rsi, %rdx
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r11 # 8-byte Reload
-; X64-NEXT:    imulq %r11, %rcx
-; X64-NEXT:    addq %rdx, %rcx
-; X64-NEXT:    movq %rcx, %rsi
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload
+; X64-NEXT:    imulq %r10, %r8
+; X64-NEXT:    addq %rdx, %r8
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; X64-NEXT:    movq %rax, %rcx
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r14 # 8-byte Reload
-; X64-NEXT:    imulq %r14, %rcx
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rbx # 8-byte Reload
+; X64-NEXT:    imulq %rbx, %rsi
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload
 ; X64-NEXT:    mulq %rbp
-; X64-NEXT:    movq %rax, %r13
-; X64-NEXT:    addq %rcx, %rdx
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    addq %rsi, %rdx
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
 ; X64-NEXT:    imulq %rbp, %rax
 ; X64-NEXT:    addq %rdx, %rax
-; X64-NEXT:    addq %r10, %r13
-; X64-NEXT:    adcq %rsi, %rax
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    addq %r9, %r11
+; X64-NEXT:    adcq %r8, %rax
+; X64-NEXT:    movq %rax, %r14
 ; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    movq %rbp, %r10
-; X64-NEXT:    mulq %rbx
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movq %rbx, %r8
+; X64-NEXT:    mulq %rdi
 ; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    movq %rax, %rbp
-; X64-NEXT:    movq %r14, %rax
-; X64-NEXT:    mulq %rbx
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %rcx, %rbx
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %rbp, %rax
+; X64-NEXT:    movq %r10, %rbp
+; X64-NEXT:    mulq %rbp
 ; X64-NEXT:    movq %rdx, %rdi
-; X64-NEXT:    movq %rax, %rcx
-; X64-NEXT:    addq %rsi, %rcx
-; X64-NEXT:    adcq $0, %rdi
+; X64-NEXT:    movq %rax, %r15
+; X64-NEXT:    addq %rbx, %r15
+; X64-NEXT:    adcq %rsi, %rdi
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rdx, %r12
+; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    addq %rdi, %r9
+; X64-NEXT:    adcq %rcx, %r12
+; X64-NEXT:    addq %r11, %r9
+; X64-NEXT:    adcq %r14, %r12
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx # 8-byte Reload
+; X64-NEXT:    movq 120(%rdx), %rcx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r10 # 8-byte Reload
+; X64-NEXT:    imulq %r10, %rcx
+; X64-NEXT:    movq 112(%rdx), %rsi
+; X64-NEXT:    movq %rdx, %rbp
 ; X64-NEXT:    movq %r10, %rax
-; X64-NEXT:    mulq %r11
-; X64-NEXT:    movq %rdx, %r15
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    addq %rcx, %rdx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload
+; X64-NEXT:    imulq %r8, %rsi
+; X64-NEXT:    addq %rdx, %rsi
+; X64-NEXT:    movq 96(%rbp), %rdi
+; X64-NEXT:    movq 104(%rbp), %rbx
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    imulq %rbx, %rcx
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rax, %r13
+; X64-NEXT:    addq %rcx, %rdx
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
+; X64-NEXT:    imulq %rdi, %rax
+; X64-NEXT:    addq %rdx, %rax
+; X64-NEXT:    addq %r11, %r13
+; X64-NEXT:    adcq %rsi, %rax
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    mulq %r10
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %r14
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    mulq %r10
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    addq %rcx, %rsi
+; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %rsi, %rdi
+; X64-NEXT:    adcq %rbp, %rcx
+; X64-NEXT:    sbbq %rsi, %rsi
+; X64-NEXT:    andl $1, %esi
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    adcq %rsi, %rdx
+; X64-NEXT:    addq %r13, %rax
+; X64-NEXT:    adcq %r11, %rdx
+; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload
+; X64-NEXT:    adcq %r15, %rdi
+; X64-NEXT:    adcq %r9, %rax
+; X64-NEXT:    adcq %r12, %rdx
+; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload
+; X64-NEXT:    movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload
+; X64-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload
+; X64-NEXT:    movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq 80(%rsi), %rcx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r8 # 8-byte Reload
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %r9
+; X64-NEXT:    movq 88(%rsi), %r10
+; X64-NEXT:    movq %rsi, %r12
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %r9, %rbx
+; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq %rcx, %r9
+; X64-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r15 # 8-byte Reload
+; X64-NEXT:    mulq %r15
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    addq %rbx, %rax
+; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %rbp, %rsi
+; X64-NEXT:    sbbq %rdi, %rdi
+; X64-NEXT:    andl $1, %edi
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    mulq %r15
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %rsi, %rbx
+; X64-NEXT:    adcq %rdi, %rbp
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rdx, %r11
+; X64-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    movq %rax, %r10
-; X64-NEXT:    addq %rcx, %r10
-; X64-NEXT:    adcq $0, %r15
-; X64-NEXT:    addq %rdi, %r15
+; X64-NEXT:    movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r9 # 8-byte Reload
+; X64-NEXT:    addq %r9, %r10
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload
+; X64-NEXT:    adcq %r13, %r11
+; X64-NEXT:    addq %rbx, %r10
+; X64-NEXT:    adcq %rbp, %r11
+; X64-NEXT:    movq %r12, %rcx
+; X64-NEXT:    movq 64(%rcx), %rdi
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq 72(%rcx), %r14
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %rsi, %rbx
+; X64-NEXT:    adcq $0, %rbp
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    mulq %r15
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    addq %rbx, %rax
+; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %rbp, %rsi
 ; X64-NEXT:    sbbq %rcx, %rcx
 ; X64-NEXT:    andl $1, %ecx
 ; X64-NEXT:    movq %r14, %rax
-; X64-NEXT:    mulq %r11
-; X64-NEXT:    addq %r15, %rax
+; X64-NEXT:    mulq %r15
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rax, %rbp
+; X64-NEXT:    addq %rsi, %rbp
+; X64-NEXT:    adcq %rcx, %rbx
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rdx, %r8
+; X64-NEXT:    movq %rax, %r15
+; X64-NEXT:    addq %r15, %r9
+; X64-NEXT:    movq %r13, %rax
+; X64-NEXT:    adcq %r8, %rax
+; X64-NEXT:    addq %rbp, %r9
+; X64-NEXT:    adcq %rbx, %rax
+; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %r9 # 8-byte Folded Reload
+; X64-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload
+; X64-NEXT:    movq %rax, %r13
+; X64-NEXT:    adcq $0, %r10
+; X64-NEXT:    adcq $0, %r11
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    movq %r14, %r12
+; X64-NEXT:    movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rsi, %r14
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %rbp
+; X64-NEXT:    addq %rcx, %rbp
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rdx, %r9
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    addq %rbp, %rdi
+; X64-NEXT:    adcq %rsi, %r9
+; X64-NEXT:    sbbq %rsi, %rsi
+; X64-NEXT:    andl $1, %esi
+; X64-NEXT:    movq %r12, %rax
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rcx, %rbp
+; X64-NEXT:    addq %r9, %rax
+; X64-NEXT:    adcq %rsi, %rdx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload
+; X64-NEXT:    addq %r12, %r15
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %r8 # 8-byte Folded Reload
+; X64-NEXT:    addq %rax, %r15
+; X64-NEXT:    adcq %rdx, %r8
+; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
+; X64-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %r13, %rdi
+; X64-NEXT:    movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq $0, %r15
+; X64-NEXT:    adcq $0, %r8
+; X64-NEXT:    addq %r10, %r15
+; X64-NEXT:    adcq %r11, %r8
+; X64-NEXT:    movl $0, %r9d
+; X64-NEXT:    adcq $0, %r9
+; X64-NEXT:    sbbq %r13, %r13
+; X64-NEXT:    andl $1, %r13d
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq %r14, %rsi
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    movq %rax, %r10
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdi # 8-byte Reload
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rsi, %r11
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %r14, %rbx
+; X64-NEXT:    adcq $0, %rsi
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    addq %rbx, %rax
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    adcq %rsi, %rcx
+; X64-NEXT:    sbbq %rsi, %rsi
+; X64-NEXT:    andl $1, %esi
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rdi, %r14
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    adcq %rsi, %rdx
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    addq %r12, %rsi
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
+; X64-NEXT:    addq %rax, %rsi
+; X64-NEXT:    adcq %rdx, %rcx
+; X64-NEXT:    addq %r15, %r10
+; X64-NEXT:    movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %r8, %rbx
+; X64-NEXT:    movq %rbx, {{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %r9, %rsi
+; X64-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq %r13, %rcx
+; X64-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq 96(%rsi), %rcx
+; X64-NEXT:    imulq %rcx, %rbp
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq %r11, %rdi
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    addq %rbp, %rdx
+; X64-NEXT:    movq 104(%rsi), %r8
+; X64-NEXT:    imulq %r8, %rdi
+; X64-NEXT:    addq %rdx, %rdi
+; X64-NEXT:    movq %rdi, %r10
+; X64-NEXT:    movq 112(%rsi), %rax
+; X64-NEXT:    movq %rsi, %rbp
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload
+; X64-NEXT:    imulq %rbx, %rdi
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    addq %rdi, %rdx
+; X64-NEXT:    movq 120(%rbp), %rdi
+; X64-NEXT:    imulq %rsi, %rdi
+; X64-NEXT:    addq %rdx, %rdi
+; X64-NEXT:    addq %r9, %r11
+; X64-NEXT:    adcq %r10, %rdi
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    movq %rsi, %r10
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    movq %rbx, %r9
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rdx, %rcx
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %rsi, %rbx
+; X64-NEXT:    adcq $0, %rcx
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %r15
+; X64-NEXT:    addq %rbx, %r15
+; X64-NEXT:    adcq %rcx, %rsi
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    movq %r9, %rax
+; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rax, %r8
+; X64-NEXT:    addq %rsi, %r8
+; X64-NEXT:    adcq %rcx, %rbx
+; X64-NEXT:    addq %r11, %r8
+; X64-NEXT:    adcq %rdi, %rbx
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
+; X64-NEXT:    imulq %rax, %rsi
+; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    addq %rsi, %rdx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r12 # 8-byte Reload
+; X64-NEXT:    imulq %r12, %rcx
+; X64-NEXT:    addq %rdx, %rcx
+; X64-NEXT:    movq %rcx, %rbp
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    imulq %rsi, %rcx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
+; X64-NEXT:    mulq %rdi
+; X64-NEXT:    movq %rax, %r10
+; X64-NEXT:    addq %rcx, %rdx
+; X64-NEXT:    movq %r14, %r13
+; X64-NEXT:    imulq %rdi, %r13
+; X64-NEXT:    addq %rdx, %r13
+; X64-NEXT:    addq %r11, %r10
+; X64-NEXT:    adcq %rbp, %r13
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    movq %rdi, %r11
+; X64-NEXT:    mulq %r9
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    movq %rsi, %r14
+; X64-NEXT:    mulq %r9
+; X64-NEXT:    movq %rdx, %r9
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    addq %rbp, %rcx
+; X64-NEXT:    adcq $0, %r9
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    movq %r12, %rbp
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    addq %rcx, %r11
+; X64-NEXT:    adcq %r9, %rsi
+; X64-NEXT:    sbbq %rcx, %rcx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    addq %rsi, %rax
 ; X64-NEXT:    adcq %rcx, %rdx
-; X64-NEXT:    addq %r13, %rax
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload
-; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload
-; X64-NEXT:    adcq %r12, %r10
-; X64-NEXT:    adcq %r9, %rax
-; X64-NEXT:    adcq %r8, %rdx
-; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload
+; X64-NEXT:    addq %r10, %rax
+; X64-NEXT:    adcq %r13, %rdx
+; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload
+; X64-NEXT:    adcq %r15, %r11
+; X64-NEXT:    adcq %r8, %rax
+; X64-NEXT:    adcq %rbx, %rdx
+; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload
 ; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload
 ; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
 ; X64-NEXT:    addq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload
-; X64-NEXT:    movq (%rsp), %rbx # 8-byte Reload
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rbx # 8-byte Reload
 ; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload
 ; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload
 ; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload
-; X64-NEXT:    addq {{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload
+; X64-NEXT:    addq (%rsp), %rcx # 8-byte Folded Reload
 ; X64-NEXT:    movq %rcx, %r8
-; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload
-; X64-NEXT:    movq %rdi, %r9
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
+; X64-NEXT:    movq %rsi, %r9
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rbx # 8-byte Folded Reload
 ; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rbp # 8-byte Folded Reload
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload
+; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload
+; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %r11 # 8-byte Folded Reload
 ; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload
 ; X64-NEXT:    adcq {{[0-9]+}}(%rsp), %rdx # 8-byte Folded Reload
 ; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    movq %rdi, (%rcx)
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    movq %rdi, 8(%rcx)
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    movq %rdi, 16(%rcx)
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    movq %rdi, 24(%rcx)
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    movq %rdi, 32(%rcx)
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    movq %rdi, 40(%rcx)
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    movq %rdi, 48(%rcx)
-; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
-; X64-NEXT:    movq %rdi, 56(%rcx)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, (%rcx)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, 8(%rcx)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, 16(%rcx)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, 24(%rcx)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, 32(%rcx)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, 40(%rcx)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, 48(%rcx)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload
+; X64-NEXT:    movq %rsi, 56(%rcx)
 ; X64-NEXT:    movq %r8, 64(%rcx)
 ; X64-NEXT:    movq %r9, 72(%rcx)
 ; X64-NEXT:    movq %rbx, 80(%rcx)
-; X64-NEXT:    movq %rsi, 88(%rcx)
-; X64-NEXT:    movq %rbp, 96(%rcx)
-; X64-NEXT:    movq %r10, 104(%rcx)
+; X64-NEXT:    movq %rbp, 88(%rcx)
+; X64-NEXT:    movq %rdi, 96(%rcx)
+; X64-NEXT:    movq %r11, 104(%rcx)
 ; X64-NEXT:    movq %rax, 112(%rcx)
 ; X64-NEXT:    movq %rdx, 120(%rcx)
-; X64-NEXT:    addq $360, %rsp # imm = 0x168
+; X64-NEXT:    addq $352, %rsp # imm = 0x160
 ; X64-NEXT:    popq %rbx
 ; X64-NEXT:    popq %r12
 ; X64-NEXT:    popq %r13
diff --git a/test/CodeGen/X86/mul-i256.ll b/test/CodeGen/X86/mul-i256.ll
index bb2989b9298..34148471865 100644
--- a/test/CodeGen/X86/mul-i256.ll
+++ b/test/CodeGen/X86/mul-i256.ll
@@ -219,7 +219,7 @@ define void @test(i256* %a, i256* %b, i256* %out) #0 {
 ; X64-NEXT:    .cfi_offset %r14, -24
 ; X64-NEXT:  .Lcfi7:
 ; X64-NEXT:    .cfi_offset %r15, -16
-; X64-NEXT:    movq %rdx, %r10
+; X64-NEXT:    movq %rdx, %r9
 ; X64-NEXT:    movq (%rdi), %r14
 ; X64-NEXT:    movq 8(%rdi), %r8
 ; X64-NEXT:    movq 16(%rdi), %rcx
@@ -230,7 +230,7 @@ define void @test(i256* %a, i256* %b, i256* %out) #0 {
 ; X64-NEXT:    imulq %r12, %rdi
 ; X64-NEXT:    movq %r12, %rax
 ; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    movq %rax, %r10
 ; X64-NEXT:    addq %rdi, %rdx
 ; X64-NEXT:    imulq %r15, %rcx
 ; X64-NEXT:    addq %rdx, %rcx
@@ -243,12 +243,12 @@ define void @test(i256* %a, i256* %b, i256* %out) #0 {
 ; X64-NEXT:    movq 24(%rsi), %rbx
 ; X64-NEXT:    imulq %r14, %rbx
 ; X64-NEXT:    addq %rdx, %rbx
-; X64-NEXT:    addq %r9, %r11
+; X64-NEXT:    addq %r10, %r11
 ; X64-NEXT:    adcq %rcx, %rbx
 ; X64-NEXT:    movq %r14, %rax
 ; X64-NEXT:    mulq %r12
 ; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    movq %rax, %r10
 ; X64-NEXT:    movq %r8, %rax
 ; X64-NEXT:    mulq %r12
 ; X64-NEXT:    movq %rdx, %rcx
@@ -260,8 +260,7 @@ define void @test(i256* %a, i256* %b, i256* %out) #0 {
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %r14
 ; X64-NEXT:    addq %rdi, %r14
-; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    addq %rcx, %rsi
+; X64-NEXT:    adcq %rcx, %rsi
 ; X64-NEXT:    sbbq %rcx, %rcx
 ; X64-NEXT:    andl $1, %ecx
 ; X64-NEXT:    movq %r8, %rax
@@ -270,10 +269,10 @@ define void @test(i256* %a, i256* %b, i256* %out) #0 {
 ; X64-NEXT:    adcq %rcx, %rdx
 ; X64-NEXT:    addq %r11, %rax
 ; X64-NEXT:    adcq %rbx, %rdx
-; X64-NEXT:    movq %r9, (%r10)
-; X64-NEXT:    movq %r14, 8(%r10)
-; X64-NEXT:    movq %rax, 16(%r10)
-; X64-NEXT:    movq %rdx, 24(%r10)
+; X64-NEXT:    movq %r10, (%r9)
+; X64-NEXT:    movq %r14, 8(%r9)
+; X64-NEXT:    movq %rax, 16(%r9)
+; X64-NEXT:    movq %rdx, 24(%r9)
 ; X64-NEXT:    popq %rbx
 ; X64-NEXT:    popq %r12
 ; X64-NEXT:    popq %r14
diff --git a/test/CodeGen/X86/mul-i512.ll b/test/CodeGen/X86/mul-i512.ll
index d26040059e6..14fbeae5279 100644
--- a/test/CodeGen/X86/mul-i512.ll
+++ b/test/CodeGen/X86/mul-i512.ll
@@ -911,57 +911,57 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind {
 ; X64-NEXT:    pushq %rbx
 ; X64-NEXT:    pushq %rax
 ; X64-NEXT:    movq %rdx, (%rsp) # 8-byte Spill
-; X64-NEXT:    movq 24(%rdi), %rbp
-; X64-NEXT:    movq 16(%rdi), %r11
+; X64-NEXT:    movq 24(%rdi), %r11
+; X64-NEXT:    movq 16(%rdi), %r14
 ; X64-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    movq (%rsi), %rdx
-; X64-NEXT:    movq 8(%rsi), %r8
-; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    movq 8(%rsi), %rbp
+; X64-NEXT:    movq %r14, %rax
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    mulq %rsi
-; X64-NEXT:    movq %rdx, %r10
+; X64-NEXT:    movq %rdx, %r8
 ; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    movq %rbp, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    mulq %rsi
-; X64-NEXT:    movq %rsi, %r9
-; X64-NEXT:    movq %rdx, %rbx
-; X64-NEXT:    movq %rax, %rsi
-; X64-NEXT:    addq %r10, %rsi
-; X64-NEXT:    adcq $0, %rbx
 ; X64-NEXT:    movq %r11, %rax
 ; X64-NEXT:    movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    mulq %r8
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rsi, %r10
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    addq %r8, %rsi
+; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    movq %r14, %rax
+; X64-NEXT:    movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    mulq %rbp
 ; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rax, %r12
-; X64-NEXT:    addq %rsi, %r12
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq %rbx, %rcx
+; X64-NEXT:    movq %rax, %r9
+; X64-NEXT:    addq %rsi, %r9
+; X64-NEXT:    adcq %rbx, %rcx
 ; X64-NEXT:    sbbq %rbx, %rbx
 ; X64-NEXT:    andl $1, %ebx
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    mulq %r8
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq %rbp, %r8
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rbp
 ; X64-NEXT:    addq %rcx, %rbp
 ; X64-NEXT:    adcq %rbx, %rsi
 ; X64-NEXT:    xorl %ecx, %ecx
-; X64-NEXT:    movq %r9, %rbx
+; X64-NEXT:    movq %r10, %rbx
 ; X64-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    movq %rbx, %rax
 ; X64-NEXT:    mulq %rcx
 ; X64-NEXT:    movq %rdx, %r13
 ; X64-NEXT:    movq %rax, %r10
-; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    movq %r14, %rax
 ; X64-NEXT:    mulq %rcx
-; X64-NEXT:    movq %rdx, %r9
-; X64-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %r12
+; X64-NEXT:    movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    movq %rax, %r15
 ; X64-NEXT:    movq %r15, -{{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    addq %r10, %r15
-; X64-NEXT:    adcq %r13, %r9
+; X64-NEXT:    adcq %r13, %r12
 ; X64-NEXT:    addq %rbp, %r15
-; X64-NEXT:    adcq %rsi, %r9
+; X64-NEXT:    adcq %rsi, %r12
 ; X64-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    movq (%rdi), %r14
 ; X64-NEXT:    movq %r14, %rax
@@ -982,8 +982,7 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind {
 ; X64-NEXT:    movq %rdx, %rbx
 ; X64-NEXT:    addq %rsi, %rax
 ; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq $0, %rbx
-; X64-NEXT:    addq %rbp, %rbx
+; X64-NEXT:    adcq %rbp, %rbx
 ; X64-NEXT:    sbbq %rdi, %rdi
 ; X64-NEXT:    andl $1, %edi
 ; X64-NEXT:    movq %rcx, %rax
@@ -1003,10 +1002,9 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind {
 ; X64-NEXT:    addq %rbp, %r10
 ; X64-NEXT:    adcq %rsi, %r13
 ; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %r10 # 8-byte Folded Reload
-; X64-NEXT:    adcq %r12, %r13
+; X64-NEXT:    adcq %r9, %r13
 ; X64-NEXT:    adcq $0, %r15
-; X64-NEXT:    adcq $0, %r9
-; X64-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    adcq $0, %r12
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload
 ; X64-NEXT:    movq 16(%rsi), %r8
 ; X64-NEXT:    movq %rcx, %rax
@@ -1014,22 +1012,21 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind {
 ; X64-NEXT:    movq %r9, -{{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    mulq %r8
 ; X64-NEXT:    movq %rdx, %rdi
-; X64-NEXT:    movq %rax, %r12
+; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
 ; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    mulq %r8
-; X64-NEXT:    movq %rdx, %rbx
-; X64-NEXT:    movq %rax, %rbp
-; X64-NEXT:    addq %rdi, %rbp
-; X64-NEXT:    adcq $0, %rbx
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    addq %rdi, %rbx
+; X64-NEXT:    adcq $0, %rbp
 ; X64-NEXT:    movq 24(%rsi), %rdi
 ; X64-NEXT:    movq %r9, %rax
 ; X64-NEXT:    mulq %rdi
 ; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    addq %rbp, %rax
+; X64-NEXT:    addq %rbx, %rax
 ; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    addq %rbx, %rsi
+; X64-NEXT:    adcq %rbp, %rsi
 ; X64-NEXT:    sbbq %rbp, %rbp
 ; X64-NEXT:    andl $1, %ebp
 ; X64-NEXT:    movq %rcx, %rax
@@ -1047,13 +1044,12 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind {
 ; X64-NEXT:    adcq %rdx, %r14
 ; X64-NEXT:    addq %r9, %r11
 ; X64-NEXT:    adcq %rbx, %r14
-; X64-NEXT:    addq %r10, %r12
-; X64-NEXT:    movq %r12, -{{[0-9]+}}(%rsp) # 8-byte Spill
+; X64-NEXT:    addq %r10, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill
 ; X64-NEXT:    adcq %r13, -{{[0-9]+}}(%rsp) # 8-byte Folded Spill
 ; X64-NEXT:    adcq $0, %r11
 ; X64-NEXT:    adcq $0, %r14
 ; X64-NEXT:    addq %r15, %r11
-; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %r14 # 8-byte Folded Reload
+; X64-NEXT:    adcq %r12, %r14
 ; X64-NEXT:    adcq $0, %rcx
 ; X64-NEXT:    movq %rcx, %r13
 ; X64-NEXT:    sbbq %r9, %r9
@@ -1075,8 +1071,7 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind {
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    addq %rbx, %rax
 ; X64-NEXT:    movq %rax, %rbx
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq %rsi, %rcx
+; X64-NEXT:    adcq %rsi, %rcx
 ; X64-NEXT:    sbbq %rsi, %rsi
 ; X64-NEXT:    andl $1, %esi
 ; X64-NEXT:    movq %r10, %rax
@@ -1102,7 +1097,7 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind {
 ; X64-NEXT:    imulq %rsi, %rdi
 ; X64-NEXT:    movq %rsi, %rax
 ; X64-NEXT:    mulq %r8
-; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    movq %rax, %r10
 ; X64-NEXT:    addq %rdi, %rdx
 ; X64-NEXT:    movq 40(%rcx), %r9
 ; X64-NEXT:    imulq %r9, %r8
@@ -1110,60 +1105,58 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind {
 ; X64-NEXT:    movq 48(%rcx), %rax
 ; X64-NEXT:    movq %rcx, %rbx
 ; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload
+; X64-NEXT:    imulq %r11, %rdi
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload
-; X64-NEXT:    imulq %rcx, %rdi
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload
-; X64-NEXT:    mulq %rbp
+; X64-NEXT:    mulq %rcx
 ; X64-NEXT:    movq %rax, %r12
 ; X64-NEXT:    addq %rdi, %rdx
 ; X64-NEXT:    movq 56(%rbx), %rbx
-; X64-NEXT:    imulq %rbp, %rbx
+; X64-NEXT:    imulq %rcx, %rbx
 ; X64-NEXT:    addq %rdx, %rbx
-; X64-NEXT:    addq %r11, %r12
+; X64-NEXT:    addq %r10, %r12
 ; X64-NEXT:    adcq %r8, %rbx
-; X64-NEXT:    movq %rbp, %rax
-; X64-NEXT:    movq %rbp, %r8
+; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    mulq %rsi
 ; X64-NEXT:    movq %rdx, %rdi
 ; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    movq %r11, %rax
 ; X64-NEXT:    mulq %rsi
 ; X64-NEXT:    movq %rdx, %rsi
 ; X64-NEXT:    movq %rax, %rbp
 ; X64-NEXT:    addq %rdi, %rbp
 ; X64-NEXT:    adcq $0, %rsi
-; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    mulq %r9
-; X64-NEXT:    movq %rdx, %rdi
-; X64-NEXT:    movq %rax, %r11
-; X64-NEXT:    addq %rbp, %r11
-; X64-NEXT:    adcq $0, %rdi
-; X64-NEXT:    addq %rsi, %rdi
-; X64-NEXT:    sbbq %rsi, %rsi
-; X64-NEXT:    andl $1, %esi
 ; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    mulq %r9
-; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    movq %rdx, %rdi
 ; X64-NEXT:    movq %rax, %r15
-; X64-NEXT:    addq %rdi, %r15
-; X64-NEXT:    adcq %rsi, %r14
-; X64-NEXT:    addq %r12, %r15
-; X64-NEXT:    adcq %rbx, %r14
+; X64-NEXT:    addq %rbp, %r15
+; X64-NEXT:    adcq %rsi, %rdi
+; X64-NEXT:    sbbq %rsi, %rsi
+; X64-NEXT:    andl $1, %esi
+; X64-NEXT:    movq %r11, %rax
+; X64-NEXT:    mulq %r9
+; X64-NEXT:    movq %rdx, %r11
+; X64-NEXT:    movq %rax, %r14
+; X64-NEXT:    addq %rdi, %r14
+; X64-NEXT:    adcq %rsi, %r11
+; X64-NEXT:    addq %r12, %r14
+; X64-NEXT:    adcq %rbx, %r11
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdx # 8-byte Reload
 ; X64-NEXT:    movq 56(%rdx), %rcx
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload
-; X64-NEXT:    imulq %r8, %rcx
-; X64-NEXT:    movq 48(%rdx), %rbp
-; X64-NEXT:    movq %rdx, %rbx
-; X64-NEXT:    movq %r8, %rax
-; X64-NEXT:    mulq %rbp
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload
+; X64-NEXT:    imulq %r10, %rcx
+; X64-NEXT:    movq 48(%rdx), %rbx
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %r10, %rax
+; X64-NEXT:    mulq %rbx
 ; X64-NEXT:    movq %rax, %rsi
 ; X64-NEXT:    addq %rcx, %rdx
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r10 # 8-byte Reload
-; X64-NEXT:    imulq %r10, %rbp
-; X64-NEXT:    addq %rdx, %rbp
-; X64-NEXT:    movq 32(%rbx), %rdi
-; X64-NEXT:    movq 40(%rbx), %r12
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload
+; X64-NEXT:    imulq %r8, %rbx
+; X64-NEXT:    addq %rdx, %rbx
+; X64-NEXT:    movq 32(%rbp), %rdi
+; X64-NEXT:    movq 40(%rbp), %r12
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
 ; X64-NEXT:    movq %rax, %rcx
 ; X64-NEXT:    imulq %r12, %rcx
@@ -1174,36 +1167,35 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind {
 ; X64-NEXT:    imulq %rdi, %r13
 ; X64-NEXT:    addq %rdx, %r13
 ; X64-NEXT:    addq %rsi, %r9
-; X64-NEXT:    adcq %rbp, %r13
+; X64-NEXT:    adcq %rbx, %r13
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    mulq %r8
+; X64-NEXT:    mulq %r10
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    movq %rax, %rsi
 ; X64-NEXT:    movq %r12, %rax
-; X64-NEXT:    mulq %r8
+; X64-NEXT:    mulq %r10
 ; X64-NEXT:    movq %rdx, %rbx
 ; X64-NEXT:    movq %rax, %rbp
 ; X64-NEXT:    addq %rcx, %rbp
 ; X64-NEXT:    adcq $0, %rbx
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    mulq %r10
+; X64-NEXT:    mulq %r8
 ; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    movq %rax, %rdi
 ; X64-NEXT:    addq %rbp, %rdi
-; X64-NEXT:    adcq $0, %rcx
-; X64-NEXT:    addq %rbx, %rcx
-; X64-NEXT:    sbbq %rbp, %rbp
-; X64-NEXT:    andl $1, %ebp
+; X64-NEXT:    adcq %rbx, %rcx
+; X64-NEXT:    sbbq %rbx, %rbx
+; X64-NEXT:    andl $1, %ebx
 ; X64-NEXT:    movq %r12, %rax
-; X64-NEXT:    mulq %r10
+; X64-NEXT:    mulq %r8
 ; X64-NEXT:    addq %rcx, %rax
-; X64-NEXT:    adcq %rbp, %rdx
+; X64-NEXT:    adcq %rbx, %rdx
 ; X64-NEXT:    addq %r9, %rax
 ; X64-NEXT:    adcq %r13, %rdx
 ; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
-; X64-NEXT:    adcq %r11, %rdi
-; X64-NEXT:    adcq %r15, %rax
-; X64-NEXT:    adcq %r14, %rdx
+; X64-NEXT:    adcq %r15, %rdi
+; X64-NEXT:    adcq %r14, %rax
+; X64-NEXT:    adcq %r11, %rdx
 ; X64-NEXT:    addq -{{[0-9]+}}(%rsp), %rsi # 8-byte Folded Reload
 ; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rdi # 8-byte Folded Reload
 ; X64-NEXT:    adcq -{{[0-9]+}}(%rsp), %rax # 8-byte Folded Reload
diff --git a/test/CodeGen/X86/patchpoint-invoke.ll b/test/CodeGen/X86/patchpoint-invoke.ll
index c6dff3b78f1..9270bf2b06b 100644
--- a/test/CodeGen/X86/patchpoint-invoke.ll
+++ b/test/CodeGen/X86/patchpoint-invoke.ll
@@ -45,7 +45,7 @@ threw:
 ; Verify that the stackmap section got emitted:
 ; CHECK-LABEL: __LLVM_StackMaps:
 ; Header
-; CHECK-NEXT:   .byte 2
+; CHECK-NEXT:   .byte 3
 ; CHECK-NEXT:   .byte 0
 ; CHECK-NEXT:   .short 0
 ; Num Functions
diff --git a/test/CodeGen/X86/pr14657.ll b/test/CodeGen/X86/pr14657.ll
deleted file mode 100644
index cc7d3e068d4..00000000000
--- a/test/CodeGen/X86/pr14657.ll
+++ /dev/null
@@ -1,325 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
-
-; PR14657 - avoid truncation/extension of comparison results
-
-@da = common global [1024 x float] zeroinitializer, align 32
-@db = common global [1024 x float] zeroinitializer, align 32
-@dc = common global [1024 x float] zeroinitializer, align 32
-@dd = common global [1024 x float] zeroinitializer, align 32
-@dj = common global [1024 x i32] zeroinitializer, align 32
-
-define void @_Z9example25v() nounwind uwtable noinline ssp {
-; SSE2-LABEL: _Z9example25v:
-; SSE2:       # BB#0: # %vector.ph
-; SSE2-NEXT:    movq $-4096, %rax # imm = 0xF000
-; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [1,1,1,1]
-; SSE2-NEXT:    .p2align 4, 0x90
-; SSE2-NEXT:  .LBB0_1: # %vector.body
-; SSE2-NEXT:    # =>This Inner Loop Header: Depth=1
-; SSE2-NEXT:    movaps da+4096(%rax), %xmm1
-; SSE2-NEXT:    movaps da+4112(%rax), %xmm2
-; SSE2-NEXT:    cmpltps db+4112(%rax), %xmm2
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; SSE2-NEXT:    cmpltps db+4096(%rax), %xmm1
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; SSE2-NEXT:    psllw $15, %xmm1
-; SSE2-NEXT:    psraw $15, %xmm1
-; SSE2-NEXT:    movaps dc+4096(%rax), %xmm2
-; SSE2-NEXT:    movaps dc+4112(%rax), %xmm3
-; SSE2-NEXT:    cmpltps dd+4112(%rax), %xmm3
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,6,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
-; SSE2-NEXT:    cmpltps dd+4096(%rax), %xmm2
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; SSE2-NEXT:    psllw $15, %xmm2
-; SSE2-NEXT:    psraw $15, %xmm2
-; SSE2-NEXT:    pand %xmm1, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm1
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT:    pand %xmm0, %xmm1
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    pand %xmm0, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, dj+4112(%rax)
-; SSE2-NEXT:    movdqa %xmm1, dj+4096(%rax)
-; SSE2-NEXT:    addq $32, %rax
-; SSE2-NEXT:    jne .LBB0_1
-; SSE2-NEXT:  # BB#2: # %for.end
-; SSE2-NEXT:    retq
-;
-; SSE41-LABEL: _Z9example25v:
-; SSE41:       # BB#0: # %vector.ph
-; SSE41-NEXT:    movq $-4096, %rax # imm = 0xF000
-; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1,1,1,1]
-; SSE41-NEXT:    .p2align 4, 0x90
-; SSE41-NEXT:  .LBB0_1: # %vector.body
-; SSE41-NEXT:    # =>This Inner Loop Header: Depth=1
-; SSE41-NEXT:    movaps da+4096(%rax), %xmm2
-; SSE41-NEXT:    movaps da+4112(%rax), %xmm3
-; SSE41-NEXT:    cmpltps db+4112(%rax), %xmm3
-; SSE41-NEXT:    pshufb %xmm0, %xmm3
-; SSE41-NEXT:    cmpltps db+4096(%rax), %xmm2
-; SSE41-NEXT:    pshufb %xmm0, %xmm2
-; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; SSE41-NEXT:    psllw $15, %xmm2
-; SSE41-NEXT:    psraw $15, %xmm2
-; SSE41-NEXT:    movaps dc+4096(%rax), %xmm3
-; SSE41-NEXT:    movaps dc+4112(%rax), %xmm4
-; SSE41-NEXT:    cmpltps dd+4112(%rax), %xmm4
-; SSE41-NEXT:    pshufb %xmm0, %xmm4
-; SSE41-NEXT:    cmpltps dd+4096(%rax), %xmm3
-; SSE41-NEXT:    pshufb %xmm0, %xmm3
-; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
-; SSE41-NEXT:    psllw $15, %xmm3
-; SSE41-NEXT:    psraw $15, %xmm3
-; SSE41-NEXT:    pand %xmm2, %xmm3
-; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
-; SSE41-NEXT:    pand %xmm1, %xmm2
-; SSE41-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE41-NEXT:    pand %xmm1, %xmm3
-; SSE41-NEXT:    movdqa %xmm3, dj+4112(%rax)
-; SSE41-NEXT:    movdqa %xmm2, dj+4096(%rax)
-; SSE41-NEXT:    addq $32, %rax
-; SSE41-NEXT:    jne .LBB0_1
-; SSE41-NEXT:  # BB#2: # %for.end
-; SSE41-NEXT:    retq
-;
-; AVX1-LABEL: _Z9example25v:
-; AVX1:       # BB#0: # %vector.ph
-; AVX1-NEXT:    movq $-4096, %rax # imm = 0xF000
-; AVX1-NEXT:    vmovaps {{.*#+}} ymm0 = [1,1,1,1,1,1,1,1]
-; AVX1-NEXT:    .p2align 4, 0x90
-; AVX1-NEXT:  .LBB0_1: # %vector.body
-; AVX1-NEXT:    # =>This Inner Loop Header: Depth=1
-; AVX1-NEXT:    vmovups da+4096(%rax), %ymm1
-; AVX1-NEXT:    vcmpltps db+4096(%rax), %ymm1, %ymm1
-; AVX1-NEXT:    vmovups dc+4096(%rax), %ymm2
-; AVX1-NEXT:    vcmpltps dd+4096(%rax), %ymm2, %ymm2
-; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm1
-; AVX1-NEXT:    vmovups %ymm1, dj+4096(%rax)
-; AVX1-NEXT:    addq $32, %rax
-; AVX1-NEXT:    jne .LBB0_1
-; AVX1-NEXT:  # BB#2: # %for.end
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: _Z9example25v:
-; AVX2:       # BB#0: # %vector.ph
-; AVX2-NEXT:    movq $-4096, %rax # imm = 0xF000
-; AVX2-NEXT:    vbroadcastss {{.*}}(%rip), %ymm0
-; AVX2-NEXT:    .p2align 4, 0x90
-; AVX2-NEXT:  .LBB0_1: # %vector.body
-; AVX2-NEXT:    # =>This Inner Loop Header: Depth=1
-; AVX2-NEXT:    vmovups da+4096(%rax), %ymm1
-; AVX2-NEXT:    vcmpltps db+4096(%rax), %ymm1, %ymm1
-; AVX2-NEXT:    vmovups dc+4096(%rax), %ymm2
-; AVX2-NEXT:    vcmpltps dd+4096(%rax), %ymm2, %ymm2
-; AVX2-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX2-NEXT:    vandps %ymm0, %ymm1, %ymm1
-; AVX2-NEXT:    vmovups %ymm1, dj+4096(%rax)
-; AVX2-NEXT:    addq $32, %rax
-; AVX2-NEXT:    jne .LBB0_1
-; AVX2-NEXT:  # BB#2: # %for.end
-; AVX2-NEXT:    vzeroupper
-; AVX2-NEXT:    retq
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %0 = getelementptr inbounds [1024 x float], [1024 x float]* @da, i64 0, i64 %index
-  %1 = bitcast float* %0 to <8 x float>*
-  %2 = load <8 x float>, <8 x float>* %1, align 16
-  %3 = getelementptr inbounds [1024 x float], [1024 x float]* @db, i64 0, i64 %index
-  %4 = bitcast float* %3 to <8 x float>*
-  %5 = load <8 x float>, <8 x float>* %4, align 16
-  %6 = fcmp olt <8 x float> %2, %5
-  %7 = getelementptr inbounds [1024 x float], [1024 x float]* @dc, i64 0, i64 %index
-  %8 = bitcast float* %7 to <8 x float>*
-  %9 = load <8 x float>, <8 x float>* %8, align 16
-  %10 = getelementptr inbounds [1024 x float], [1024 x float]* @dd, i64 0, i64 %index
-  %11 = bitcast float* %10 to <8 x float>*
-  %12 = load <8 x float>, <8 x float>* %11, align 16
-  %13 = fcmp olt <8 x float> %9, %12
-  %14 = and <8 x i1> %6, %13
-  %15 = zext <8 x i1> %14 to <8 x i32>
-  %16 = getelementptr inbounds [1024 x i32], [1024 x i32]* @dj, i64 0, i64 %index
-  %17 = bitcast i32* %16 to <8 x i32>*
-  store <8 x i32> %15, <8 x i32>* %17, align 16
-  %index.next = add i64 %index, 8
-  %18 = icmp eq i64 %index.next, 1024
-  br i1 %18, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-}
-
-define void @_Z9example24ss(i16 signext %x, i16 signext %y) nounwind uwtable noinline ssp {
-; SSE2-LABEL: _Z9example24ss:
-; SSE2:       # BB#0: # %vector.ph
-; SSE2-NEXT:    movd %edi, %xmm0
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; SSE2-NEXT:    movd %esi, %xmm1
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; SSE2-NEXT:    movq $-4096, %rax # imm = 0xF000
-; SSE2-NEXT:    .p2align 4, 0x90
-; SSE2-NEXT:  .LBB1_1: # %vector.body
-; SSE2-NEXT:    # =>This Inner Loop Header: Depth=1
-; SSE2-NEXT:    movaps da+4096(%rax), %xmm2
-; SSE2-NEXT:    movaps da+4112(%rax), %xmm3
-; SSE2-NEXT:    cmpltps db+4112(%rax), %xmm3
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,6,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
-; SSE2-NEXT:    cmpltps db+4096(%rax), %xmm2
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm2, %xmm3
-; SSE2-NEXT:    pandn %xmm1, %xmm2
-; SSE2-NEXT:    por %xmm3, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE2-NEXT:    psrad $16, %xmm3
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psrad $16, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, dj+4112(%rax)
-; SSE2-NEXT:    movdqa %xmm3, dj+4096(%rax)
-; SSE2-NEXT:    addq $32, %rax
-; SSE2-NEXT:    jne .LBB1_1
-; SSE2-NEXT:  # BB#2: # %for.end
-; SSE2-NEXT:    retq
-;
-; SSE41-LABEL: _Z9example24ss:
-; SSE41:       # BB#0: # %vector.ph
-; SSE41-NEXT:    movd %edi, %xmm0
-; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; SSE41-NEXT:    movd %esi, %xmm1
-; SSE41-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; SSE41-NEXT:    movq $-4096, %rax # imm = 0xF000
-; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; SSE41-NEXT:    .p2align 4, 0x90
-; SSE41-NEXT:  .LBB1_1: # %vector.body
-; SSE41-NEXT:    # =>This Inner Loop Header: Depth=1
-; SSE41-NEXT:    movaps da+4096(%rax), %xmm3
-; SSE41-NEXT:    movaps da+4112(%rax), %xmm4
-; SSE41-NEXT:    cmpltps db+4112(%rax), %xmm4
-; SSE41-NEXT:    pshufb %xmm2, %xmm4
-; SSE41-NEXT:    cmpltps db+4096(%rax), %xmm3
-; SSE41-NEXT:    pshufb %xmm2, %xmm3
-; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
-; SSE41-NEXT:    movdqa %xmm0, %xmm4
-; SSE41-NEXT:    pand %xmm3, %xmm4
-; SSE41-NEXT:    pandn %xmm1, %xmm3
-; SSE41-NEXT:    por %xmm4, %xmm3
-; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
-; SSE41-NEXT:    pmovsxwd %xmm4, %xmm4
-; SSE41-NEXT:    pmovsxwd %xmm3, %xmm3
-; SSE41-NEXT:    movdqa %xmm3, dj+4096(%rax)
-; SSE41-NEXT:    movdqa %xmm4, dj+4112(%rax)
-; SSE41-NEXT:    addq $32, %rax
-; SSE41-NEXT:    jne .LBB1_1
-; SSE41-NEXT:  # BB#2: # %for.end
-; SSE41-NEXT:    retq
-;
-; AVX1-LABEL: _Z9example24ss:
-; AVX1:       # BB#0: # %vector.ph
-; AVX1-NEXT:    vmovd %edi, %xmm0
-; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; AVX1-NEXT:    vmovd %esi, %xmm1
-; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; AVX1-NEXT:    movq $-4096, %rax # imm = 0xF000
-; AVX1-NEXT:    .p2align 4, 0x90
-; AVX1-NEXT:  .LBB1_1: # %vector.body
-; AVX1-NEXT:    # =>This Inner Loop Header: Depth=1
-; AVX1-NEXT:    vmovups da+4096(%rax), %ymm2
-; AVX1-NEXT:    vcmpltps db+4096(%rax), %ymm2, %ymm2
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
-; AVX1-NEXT:    vpacksswb %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpandn %xmm1, %xmm2, %xmm3
-; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm2
-; AVX1-NEXT:    vpor %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpmovsxwd %xmm2, %xmm3
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; AVX1-NEXT:    vpmovsxwd %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
-; AVX1-NEXT:    vmovups %ymm2, dj+4096(%rax)
-; AVX1-NEXT:    addq $32, %rax
-; AVX1-NEXT:    jne .LBB1_1
-; AVX1-NEXT:  # BB#2: # %for.end
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: _Z9example24ss:
-; AVX2:       # BB#0: # %vector.ph
-; AVX2-NEXT:    vmovd %edi, %xmm0
-; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
-; AVX2-NEXT:    vmovd %esi, %xmm1
-; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm1
-; AVX2-NEXT:    movq $-4096, %rax # imm = 0xF000
-; AVX2-NEXT:    .p2align 4, 0x90
-; AVX2-NEXT:  .LBB1_1: # %vector.body
-; AVX2-NEXT:    # =>This Inner Loop Header: Depth=1
-; AVX2-NEXT:    vmovups da+4096(%rax), %ymm2
-; AVX2-NEXT:    vcmpltps db+4096(%rax), %ymm2, %ymm2
-; AVX2-NEXT:    vextractf128 $1, %ymm2, %xmm3
-; AVX2-NEXT:    vpacksswb %xmm3, %xmm2, %xmm2
-; AVX2-NEXT:    vpandn %xmm1, %xmm2, %xmm3
-; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm2
-; AVX2-NEXT:    vpor %xmm3, %xmm2, %xmm2
-; AVX2-NEXT:    vpmovsxwd %xmm2, %ymm2
-; AVX2-NEXT:    vmovdqu %ymm2, dj+4096(%rax)
-; AVX2-NEXT:    addq $32, %rax
-; AVX2-NEXT:    jne .LBB1_1
-; AVX2-NEXT:  # BB#2: # %for.end
-; AVX2-NEXT:    vzeroupper
-; AVX2-NEXT:    retq
-vector.ph:
-  %0 = insertelement <8 x i16> undef, i16 %x, i32 0
-  %broadcast11 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer
-  %1 = insertelement <8 x i16> undef, i16 %y, i32 0
-  %broadcast12 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %2 = getelementptr inbounds [1024 x float], [1024 x float]* @da, i64 0, i64 %index
-  %3 = bitcast float* %2 to <8 x float>*
-  %4 = load <8 x float>, <8 x float>* %3, align 16
-  %5 = getelementptr inbounds [1024 x float], [1024 x float]* @db, i64 0, i64 %index
-  %6 = bitcast float* %5 to <8 x float>*
-  %7 = load <8 x float>, <8 x float>* %6, align 16
-  %8 = fcmp olt <8 x float> %4, %7
-  %9 = select <8 x i1> %8, <8 x i16> %broadcast11, <8 x i16> %broadcast12
-  %10 = sext <8 x i16> %9 to <8 x i32>
-  %11 = getelementptr inbounds [1024 x i32], [1024 x i32]* @dj, i64 0, i64 %index
-  %12 = bitcast i32* %11 to <8 x i32>*
-  store <8 x i32> %10, <8 x i32>* %12, align 16
-  %index.next = add i64 %index, 8
-  %13 = icmp eq i64 %index.next, 1024
-  br i1 %13, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-}
diff --git a/test/CodeGen/X86/pr28129.ll b/test/CodeGen/X86/pr28129.ll
new file mode 100644
index 00000000000..a155f71f79c
--- /dev/null
+++ b/test/CodeGen/X86/pr28129.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64
+
+define <4 x double> @cmp4f64_domain(<4 x double> %a) {
+; X86-LABEL: cmp4f64_domain:
+; X86:       # BB#0:
+; X86-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X86-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X86-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: cmp4f64_domain:
+; X64:       # BB#0:
+; X64-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X64-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
+; X64-NEXT:    retq
+  %cmp = fcmp oeq <4 x double> zeroinitializer, zeroinitializer
+  %sext = sext <4 x i1> %cmp to <4 x i64>
+  %mask = bitcast <4 x i64> %sext to <4 x double>
+  %add = fadd <4 x double> %a, %mask
+  ret <4 x double> %add
+}
+
+define <4 x double> @cmp4f64_domain_optsize(<4 x double> %a) optsize {
+; X86-LABEL: cmp4f64_domain_optsize:
+; X86:       # BB#0:
+; X86-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X86-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X86-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: cmp4f64_domain_optsize:
+; X64:       # BB#0:
+; X64-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X64-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
+; X64-NEXT:    retq
+  %cmp = fcmp oeq <4 x double> zeroinitializer, zeroinitializer
+  %sext = sext <4 x i1> %cmp to <4 x i64>
+  %mask = bitcast <4 x i64> %sext to <4 x double>
+  %add = fadd <4 x double> %a, %mask
+  ret <4 x double> %add
+}
+
+define <8 x float> @cmp8f32_domain(<8 x float> %a) {
+; X86-LABEL: cmp8f32_domain:
+; X86:       # BB#0:
+; X86-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X86-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X86-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: cmp8f32_domain:
+; X64:       # BB#0:
+; X64-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; X64-NEXT:    retq
+  %cmp = fcmp oeq <8 x float> zeroinitializer, zeroinitializer
+  %sext = sext <8 x i1> %cmp to <8 x i32>
+  %mask = bitcast <8 x i32> %sext to <8 x float>
+  %add = fadd <8 x float> %a, %mask
+  ret <8 x float> %add
+}
+
+define <8 x float> @cmp8f32_domain_optsize(<8 x float> %a) optsize {
+; X86-LABEL: cmp8f32_domain_optsize:
+; X86:       # BB#0:
+; X86-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X86-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X86-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: cmp8f32_domain_optsize:
+; X64:       # BB#0:
+; X64-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; X64-NEXT:    retq
+  %cmp = fcmp oeq <8 x float> zeroinitializer, zeroinitializer
+  %sext = sext <8 x i1> %cmp to <8 x i32>
+  %mask = bitcast <8 x i32> %sext to <8 x float>
+  %add = fadd <8 x float> %a, %mask
+  ret <8 x float> %add
+}
diff --git a/test/CodeGen/X86/pr31088.ll b/test/CodeGen/X86/pr31088.ll
new file mode 100644
index 00000000000..0dd8eb0ece8
--- /dev/null
+++ b/test/CodeGen/X86/pr31088.ll
@@ -0,0 +1,162 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c | FileCheck %s --check-prefix=F16C
+
+define <1 x half> @ir_fadd_v1f16(<1 x half> %arg0, <1 x half> %arg1) nounwind {
+; X86-LABEL: ir_fadd_v1f16:
+; X86:       # BB#0:
+; X86-NEXT:    subl $28, %esp
+; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    movss %xmm0, (%esp)
+; X86-NEXT:    calll __gnu_f2h_ieee
+; X86-NEXT:    movzwl %ax, %eax
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    calll __gnu_h2f_ieee
+; X86-NEXT:    fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill
+; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    movss %xmm0, (%esp)
+; X86-NEXT:    calll __gnu_f2h_ieee
+; X86-NEXT:    movzwl %ax, %eax
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    calll __gnu_h2f_ieee
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
+; X86-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-NEXT:    addl $28, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: ir_fadd_v1f16:
+; X64:       # BB#0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    callq __gnu_f2h_ieee
+; X64-NEXT:    movzwl %ax, %edi
+; X64-NEXT:    callq __gnu_h2f_ieee
+; X64-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
+; X64-NEXT:    movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    callq __gnu_f2h_ieee
+; X64-NEXT:    movzwl %ax, %edi
+; X64-NEXT:    callq __gnu_h2f_ieee
+; X64-NEXT:    addss (%rsp), %xmm0 # 4-byte Folded Reload
+; X64-NEXT:    popq %rax
+; X64-NEXT:    retq
+;
+; F16C-LABEL: ir_fadd_v1f16:
+; F16C:       # BB#0:
+; F16C-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
+; F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
+; F16C-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; F16C-NEXT:    retq
+  %retval = fadd <1 x half> %arg0, %arg1
+  ret <1 x half> %retval
+}
+
+define <2 x half> @ir_fadd_v2f16(<2 x half> %arg0, <2 x half> %arg1) nounwind {
+; X86-LABEL: ir_fadd_v2f16:
+; X86:       # BB#0:
+; X86-NEXT:    subl $64, %esp
+; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    movss %xmm0, (%esp)
+; X86-NEXT:    calll __gnu_f2h_ieee
+; X86-NEXT:    movzwl %ax, %eax
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    calll __gnu_h2f_ieee
+; X86-NEXT:    fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill
+; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    movss %xmm0, (%esp)
+; X86-NEXT:    calll __gnu_f2h_ieee
+; X86-NEXT:    movzwl %ax, %eax
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    calll __gnu_h2f_ieee
+; X86-NEXT:    fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill
+; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    movss %xmm0, (%esp)
+; X86-NEXT:    calll __gnu_f2h_ieee
+; X86-NEXT:    movzwl %ax, %eax
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    calll __gnu_h2f_ieee
+; X86-NEXT:    fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill
+; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    movss %xmm0, (%esp)
+; X86-NEXT:    calll __gnu_f2h_ieee
+; X86-NEXT:    movzwl %ax, %eax
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    calll __gnu_h2f_ieee
+; X86-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-NEXT:    addss {{[0-9]+}}(%esp), %xmm1
+; X86-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
+; X86-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-NEXT:    addl $64, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: ir_fadd_v2f16:
+; X64:       # BB#0:
+; X64-NEXT:    subq $24, %rsp
+; X64-NEXT:    movss %xmm2, {{[0-9]+}}(%rsp) # 4-byte Spill
+; X64-NEXT:    movss %xmm1, {{[0-9]+}}(%rsp) # 4-byte Spill
+; X64-NEXT:    movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
+; X64-NEXT:    movaps %xmm3, %xmm0
+; X64-NEXT:    callq __gnu_f2h_ieee
+; X64-NEXT:    movzwl %ax, %edi
+; X64-NEXT:    callq __gnu_h2f_ieee
+; X64-NEXT:    movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
+; X64-NEXT:    movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    callq __gnu_f2h_ieee
+; X64-NEXT:    movzwl %ax, %edi
+; X64-NEXT:    callq __gnu_h2f_ieee
+; X64-NEXT:    movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
+; X64-NEXT:    movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    callq __gnu_f2h_ieee
+; X64-NEXT:    movzwl %ax, %edi
+; X64-NEXT:    callq __gnu_h2f_ieee
+; X64-NEXT:    movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
+; X64-NEXT:    movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload
+; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    callq __gnu_f2h_ieee
+; X64-NEXT:    movzwl %ax, %edi
+; X64-NEXT:    callq __gnu_h2f_ieee
+; X64-NEXT:    addss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload
+; X64-NEXT:    movss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Reload
+; X64-NEXT:    # xmm1 = mem[0],zero,zero,zero
+; X64-NEXT:    addss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Folded Reload
+; X64-NEXT:    addq $24, %rsp
+; X64-NEXT:    retq
+;
+; F16C-LABEL: ir_fadd_v2f16:
+; F16C:       # BB#0:
+; F16C-NEXT:    vcvtps2ph $4, %xmm3, %xmm3
+; F16C-NEXT:    vcvtph2ps %xmm3, %xmm3
+; F16C-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
+; F16C-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; F16C-NEXT:    vcvtph2ps %xmm2, %xmm2
+; F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
+; F16C-NEXT:    vaddss %xmm2, %xmm0, %xmm0
+; F16C-NEXT:    vaddss %xmm3, %xmm1, %xmm1
+; F16C-NEXT:    retq
+  %retval = fadd <2 x half> %arg0, %arg1
+  ret <2 x half> %retval
+}
diff --git a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
index 964037ea80a..20387ccd6b7 100644
--- a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -2318,6 +2318,22 @@ define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind {
   ret <2 x double> %res1
 }
 
+define <2 x double> @test_mm_set_pd1(double %a0) nounwind {
+; X32-LABEL: test_mm_set_pd1:
+; X32:       # BB#0:
+; X32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_set_pd1:
+; X64:       # BB#0:
+; X64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
+; X64-NEXT:    retq
+  %res0  = insertelement <2 x double> undef, double %a0, i32 0
+  %res1  = insertelement <2 x double> %res0, double %a0, i32 1
+  ret <2 x double> %res1
+}
+
 define <2 x double> @test_mm_set_sd(double %a0) nounwind {
 ; X32-LABEL: test_mm_set_sd:
 ; X32:       # BB#0:
diff --git a/test/CodeGen/X86/stack-protector-dbginfo.ll b/test/CodeGen/X86/stack-protector-dbginfo.ll
index 8413b8ef82c..a685ed1f678 100644
--- a/test/CodeGen/X86/stack-protector-dbginfo.ll
+++ b/test/CodeGen/X86/stack-protector-dbginfo.ll
@@ -49,7 +49,7 @@ attributes #0 = { sspreq }
 !22 = !{i64* getelementptr inbounds ({ i64, [56 x i8] }, { i64, [56 x i8] }* @a, i32 0, i32 0)}
 !23 = !DILocalVariable(name: "p2", line: 12, arg: 2, scope: !24, file: !10, type: !32)
 !24 = distinct !DISubprogram(name: "min<unsigned long long>", linkageName: "_ZN3__13minIyEERKT_S3_RS1_", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 12, file: !1, scope: !25, type: !27, templateParams: !33, variables: !35)
-!25 = !DINamespace(name: "__1", line: 1, file: !26, scope: null)
+!25 = !DINamespace(name: "__1", scope: null)
 !26 = !DIFile(filename: "main.cpp", directory: "/Users/matt/ryan_bug")
 !27 = !DISubroutineType(types: !28)
 !28 = !{!29, !29, !32}
diff --git a/test/CodeGen/X86/stackmap-fast-isel.ll b/test/CodeGen/X86/stackmap-fast-isel.ll
index 7afe966b77a..ae10a37756b 100644
--- a/test/CodeGen/X86/stackmap-fast-isel.ll
+++ b/test/CodeGen/X86/stackmap-fast-isel.ll
@@ -4,7 +4,7 @@
 ; CHECK-LABEL:  .section  __LLVM_STACKMAPS,__llvm_stackmaps
 ; CHECK-NEXT:  __LLVM_StackMaps:
 ; Header
-; CHECK-NEXT:   .byte 2
+; CHECK-NEXT:   .byte 3
 ; CHECK-NEXT:   .byte 0
 ; CHECK-NEXT:   .short 0
 ; Num Functions
@@ -42,62 +42,86 @@
 ; CHECK-NEXT:   .short  12
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   -1
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   -1
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   65536
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   2000000000
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   2147483647
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   -1
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   -1
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; LargeConstant at index 0
 ; CHECK-NEXT:   .byte   5
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; LargeConstant at index 1
 ; CHECK-NEXT:   .byte   5
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   1
 ; LargeConstant at index 2
 ; CHECK-NEXT:   .byte   5
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   2
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   -1
 
@@ -115,7 +139,9 @@ entry:
 ; CHECK-NEXT:   .short 1
 ; Loc 0: SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   33
 
@@ -133,8 +159,10 @@ define void @liveConstant() {
 ; CHECK-NEXT:   .short	1
 ; Loc 0: Direct RBP - ofs
 ; CHECK-NEXT:   .byte	2
-; CHECK-NEXT:   .byte	8
+; CHECK-NEXT:   .byte	0
+; CHECK-NEXT:   .short	8
 ; CHECK-NEXT:   .short	6
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long
 
 define void @directFrameIdx() {
diff --git a/test/CodeGen/X86/stackmap-large-constants.ll b/test/CodeGen/X86/stackmap-large-constants.ll
index 99a2c11828e..7de430b5393 100644
--- a/test/CodeGen/X86/stackmap-large-constants.ll
+++ b/test/CodeGen/X86/stackmap-large-constants.ll
@@ -3,7 +3,7 @@
 ; CHECK-LABEL:	.section	__LLVM_STACKMAPS,__llvm_stackmaps
 ; CHECK-NEXT: __LLVM_StackMaps:
 ; version
-; CHECK-NEXT: 	.byte	2
+; CHECK-NEXT: 	.byte	3
 ; reserved
 ; CHECK-NEXT: 	.byte	0
 ; reserved
@@ -38,12 +38,17 @@
 ; ConstantIndex
 ; CHECK-NEXT: 	.byte	5
 ; reserved
-; CHECK-NEXT: 	.byte	8
+; CHECK-NEXT:   .byte	0
+; size
+; CHECK-NEXT: 	.short	8
 ; Dwarf RegNum
 ; CHECK-NEXT: 	.short	0
+; reserved
+; CHECK-NEXT:   .short  0
 ; Offset
 ; CHECK-NEXT: 	.long	0
 ; padding
+; CHECK-NEXT: 	.p2align 3
 ; CHECK-NEXT: 	.short	0
 ; NumLiveOuts
 ; CHECK-NEXT: 	.short	0
@@ -68,12 +73,17 @@ define void @foo() {
 ; ConstantIndex
 ; CHECK-NEXT: 	.byte	5
 ; reserved
-; CHECK-NEXT: 	.byte	8
+; CHECK-NEXT:   .byte	0
+; size
+; CHECK-NEXT: 	.short	8
 ; Dwarf RegNum
 ; CHECK-NEXT: 	.short	0
+; reserved
+; CHECK-NEXT:    .short 0
 ; Offset
 ; CHECK-NEXT: 	.long	1
 ; padding
+; CHECK-NEXT:  .p2align 3
 ; CHECK-NEXT: 	.short	0
 ; NumLiveOuts
 ; CHECK-NEXT: 	.short	0
diff --git a/test/CodeGen/X86/stackmap-large-location-size.ll b/test/CodeGen/X86/stackmap-large-location-size.ll
new file mode 100644
index 00000000000..6c90ddaedcc
--- /dev/null
+++ b/test/CodeGen/X86/stackmap-large-location-size.ll
@@ -0,0 +1,172 @@
+; RUN: llc < %s -mtriple="x86_64-pc-linux-gnu" | FileCheck %s
+
+declare void @callee()
+
+define void @f_0(<1024 x i64> %val) {
+; CHECK:      .quad	2882400015
+; CHECK-NEXT: .long	.Ltmp0-f_0
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .short	4
+; Constant(0)
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	0 
+; CHECK-NEXT: .short	8
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .long	0
+; Constant(0)
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .short	8
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .long	0
+; Constant(1)
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .short	8
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .long	1
+; Indirect
+; CHECK-NEXT: .byte	3
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .short	8192
+; CHECK-NEXT: .short	7
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .long	0
+; Padding
+; CHECK-NEXT: .p2align	3
+  call void @callee() [ "deopt"(<1024 x i64> %val) ]
+  ret void
+}
+
+define void @f_1(<1024 x i8*> %val) {
+; CHECK:      .quad	2882400015
+; CHECK-NEXT: .long	.Ltmp1-f_1
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .short	4
+; Constant(0)
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .short	8
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .long	0
+; Constant(0)
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .short	8
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .long	0
+; Constant(1)
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .short	8
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .long	1
+; Indirect
+; CHECK-NEXT: .byte	3
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .short	8192
+; CHECK-NEXT: .short	7
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .long	0
+; Padding
+; CHECK-NEXT: .p2align	3
+  call void @callee() [ "deopt"(<1024 x i8*> %val) ]
+  ret void
+}
+
+define void @f_2(<99 x i8*> %val) {
+; CHECK:      .quad	2882400015
+; CHECK-NEXT: .long	.Ltmp2-f_2
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .short	4
+; Constant(0)
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .short	8
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .long	0
+; Constant(0)
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .short	8
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .long	0
+; Constant(1)
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .short	8
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .long	1
+; Indirect
+; CHECK-NEXT: .byte	3
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .short	792
+; CHECK-NEXT: .short	7
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .long	0
+; CHECK-NEXT: .p2align	3
+  call void @callee() [ "deopt"(<99 x i8*> %val) ]
+  ret void
+}
+
+
+define <400 x i8 addrspace(1)*> @f_3(<400 x i8 addrspace(1)*> %obj) gc "statepoint-example" {
+; CHECK:      .quad	4242
+; CHECK-NEXT: .long	.Ltmp3-f_3
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .short	5
+; Constant(0)
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .short	8
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .long	0
+; Constant(0)
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .short	8
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .long	0
+; Constant(0)
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .short	8
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .long	0
+; Indirect
+; CHECK-NEXT: .byte	3
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .short	3200
+; CHECK-NEXT: .short	7
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .long	0
+; Indirect
+; CHECK-NEXT: .byte	3
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .short	3200
+; CHECK-NEXT: .short	7
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .long	0
+; Padding
+; CHECK-NEXT: .p2align	3
+  %tok = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 4242, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <400 x i8 addrspace(1)*> %obj)
+  %obj.r = call coldcc <400 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v400p1i8(token %tok, i32 7, i32 7)
+  ret <400 x i8 addrspace(1)*> %obj.r
+}
+
+declare void @do_safepoint()
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare <400 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v400p1i8(token, i32, i32)
diff --git a/test/CodeGen/X86/stackmap-liveness.ll b/test/CodeGen/X86/stackmap-liveness.ll
index a5809ace795..eb95b9c8df4 100644
--- a/test/CodeGen/X86/stackmap-liveness.ll
+++ b/test/CodeGen/X86/stackmap-liveness.ll
@@ -6,7 +6,7 @@
 ; CHECK-LABEL:  .section  __LLVM_STACKMAPS,__llvm_stackmaps
 ; CHECK-NEXT:   __LLVM_StackMaps:
 ; Header
-; CHECK-NEXT:   .byte 2
+; CHECK-NEXT:   .byte 3
 ; CHECK-NEXT:   .byte 0
 ; CHECK-NEXT:   .short 0
 ; Num Functions
@@ -32,6 +32,7 @@ entry:
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; Padding
+; CHECK-NEXT:   .p2align 3
 ; CHECK-NEXT:   .short  0
 ; Num LiveOut Entries: 0
 ; CHECK-NEXT:   .short  0
@@ -43,6 +44,7 @@ entry:
 ; PATCH-NEXT:   .short  0
 ; PATCH-NEXT:   .short  0
 ; Padding
+; PATCH-NEXT:   .p2align  3
 ; PATCH-NEXT:   .short  0
 ; Num LiveOut Entries: 1
 ; PATCH-NEXT:   .short  1
@@ -63,6 +65,7 @@ entry:
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; Padding
+; CHECK-NEXT:   .p2align  3
 ; CHECK-NEXT:   .short  0
 ; Num LiveOut Entries: 0
 ; CHECK-NEXT:   .short  0
@@ -74,6 +77,7 @@ entry:
 ; PATCH-NEXT:   .short  0
 ; PATCH-NEXT:   .short  0
 ; Padding
+; PATCH-NEXT:   .p2align  3
 ; PATCH-NEXT:   .short  0
 ; Num LiveOut Entries: 5
 ; PATCH-NEXT:   .short  5
@@ -107,6 +111,7 @@ entry:
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; Padding
+; CHECK-NEXT:   .p2align  3
 ; CHECK-NEXT:   .short  0
 ; Num LiveOut Entries: 0
 ; CHECK-NEXT:   .short  0
@@ -118,6 +123,7 @@ entry:
 ; PATCH-NEXT:   .short  0
 ; PATCH-NEXT:   .short  0
 ; Padding
+; PATCH-NEXT:   .p2align  3
 ; PATCH-NEXT:   .short  0
 ; Num LiveOut Entries: 2
 ; PATCH-NEXT:   .short  2
@@ -144,6 +150,7 @@ entry:
 ; PATCH-NEXT:   .short  0
 ; PATCH-NEXT:   .short  0
 ; Padding
+; PATCH-NEXT:   .p2align  3
 ; PATCH-NEXT:   .short  0
 ; Num LiveOut Entries: 0
 ; PATCH-NEXT:   .short  0
@@ -155,6 +162,7 @@ entry:
 ; PATCH-NEXT:   .short  0
 ; PATCH-NEXT:   .short  0
 ; Padding
+; PATCH-NEXT:   .p2align  3
 ; PATCH-NEXT:   .short  0
 ; Num LiveOut Entries: 2
 ; PATCH-NEXT:   .short  2
diff --git a/test/CodeGen/X86/stackmap.ll b/test/CodeGen/X86/stackmap.ll
index 9818d3547fc..601100bd570 100644
--- a/test/CodeGen/X86/stackmap.ll
+++ b/test/CodeGen/X86/stackmap.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL:  .section  __LLVM_STACKMAPS,__llvm_stackmaps
 ; CHECK-NEXT:  __LLVM_StackMaps:
 ; Header
-; CHECK-NEXT:   .byte 2
+; CHECK-NEXT:   .byte 3
 ; CHECK-NEXT:   .byte 0
 ; CHECK-NEXT:   .short 0
 ; Num Functions
@@ -79,62 +79,86 @@
 ; CHECK-NEXT:   .short  12
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   -1
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   -1
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   65536
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   2000000000
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   2147483647
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   -1
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   -1
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; LargeConstant at index 0
 ; CHECK-NEXT:   .byte   5
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; LargeConstant at index 1
 ; CHECK-NEXT:   .byte   5
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   1
 ; LargeConstant at index 2
 ; CHECK-NEXT:   .byte   5
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   2
 ; SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   -1
 
@@ -151,12 +175,16 @@ entry:
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  2
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long  0
 define void @osrinline(i64 %a, i64 %b) {
 entry:
@@ -175,12 +203,16 @@ entry:
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  2
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 define void @osrcold(i64 %a, i64 %b) {
 entry:
@@ -200,12 +232,16 @@ ret:
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  2
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 define i64 @propertyRead(i64* %obj) {
 entry:
@@ -220,12 +256,16 @@ entry:
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  2
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) {
 entry:
@@ -242,12 +282,16 @@ entry:
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  2
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
 entry:
@@ -264,12 +308,16 @@ entry:
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  2
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 ; CHECK-NEXT:   .byte   1
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   0
 define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
 entry:
@@ -290,8 +338,11 @@ entry:
 ; Check that at least one is a spilled entry from RBP.
 ; Location: Indirect RBP + ...
 ; CHECK:        .byte 3
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short 6
+; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .long
 define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) {
 entry:
   call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 15, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
@@ -309,8 +360,11 @@ entry:
 ; Check that at least one is a spilled entry from RBP.
 ; Location: Indirect RBP + ...
 ; CHECK:        .byte 3
-; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short 8
 ; CHECK-NEXT:   .short 6
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long
 define webkit_jscc void @spilledStackMapValue(i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) {
 entry:
   call void (i64, i32, ...) @llvm.experimental.stackmap(i64 12, i32 15, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
@@ -327,8 +381,11 @@ entry:
 ; Check that the subregister operand is a 4-byte spill.
 ; Location: Indirect, 4-byte, RBP + ...
 ; CHECK:        .byte 3
-; CHECK-NEXT:   .byte 4
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 4
 ; CHECK-NEXT:   .short 6
+; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .long
 define void @spillSubReg(i64 %arg) #0 {
 bb:
   br i1 undef, label %bb1, label %bb2
@@ -367,14 +424,18 @@ bb61:
 ; Check that the subregister operands are 1-byte spills.
 ; Location 0: Register, 4-byte, AL
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short 1
 ; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long 0
 ;
 ; Location 1: Register, 4-byte, BL
 ; CHECK-NEXT:   .byte 1
-; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 1
 ; CHECK-NEXT:   .short 3
+; CHECK-NEXT:   .short 0
 ; CHECK-NEXT:   .long 0
 define void @subRegOffset(i16 %arg) {
   %v = mul i16 %arg, 5
@@ -395,7 +456,9 @@ define void @subRegOffset(i16 %arg) {
 ; CHECK-NEXT:   .short 1
 ; Loc 0: SmallConstant
 ; CHECK-NEXT:   .byte   4
-; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   33
 
@@ -413,8 +476,10 @@ define void @liveConstant() {
 ; CHECK-NEXT:   .short	1
 ; Loc 0: Direct RBP - ofs
 ; CHECK-NEXT:   .byte	2
-; CHECK-NEXT:   .byte	8
+; CHECK-NEXT:   .byte	0
+; CHECK-NEXT:   .short	8
 ; CHECK-NEXT:   .short	6
+; CHECK-NEXT:   .short	0
 ; CHECK-NEXT:   .long
 
 ; Callsite 17
@@ -424,13 +489,17 @@ define void @liveConstant() {
 ; CHECK-NEXT:   .short	2
 ; Loc 0: Direct RBP - ofs
 ; CHECK-NEXT:   .byte	2
-; CHECK-NEXT:   .byte	8
+; CHECK-NEXT:   .byte	0
+; CHECK-NEXT:   .short	8
 ; CHECK-NEXT:   .short	6
+; CHECK-NEXT:   .short	0
 ; CHECK-NEXT:   .long
 ; Loc 1: Direct RBP - ofs
 ; CHECK-NEXT:   .byte	2
-; CHECK-NEXT:   .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
 ; CHECK-NEXT:   .short	6
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long
 define void @directFrameIdx() {
 entry:
@@ -473,8 +542,10 @@ entry:
 ; CHECK-NEXT:   .short 1
 ; Loc 0: Indirect fp - offset
 ; CHECK-NEXT:   .byte   3
-; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  4
 ; CHECK-NEXT:   .short  6
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT:   .long   -{{[0-9]+}}
 define void @clobberScratch(i32 %a) {
   tail call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r12},~{r13},~{r14},~{r15}"() nounwind
diff --git a/test/CodeGen/X86/statepoint-allocas.ll b/test/CodeGen/X86/statepoint-allocas.ll
index 95ce8f37617..9f5418432ab 100644
--- a/test/CodeGen/X86/statepoint-allocas.ll
+++ b/test/CodeGen/X86/statepoint-allocas.ll
@@ -48,7 +48,7 @@ declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i3
 ; CHECK-LABEL: .section .llvm_stackmaps
 ; CHECK-NEXT:  __LLVM_StackMaps:
 ; Header
-; CHECK-NEXT:   .byte 2
+; CHECK-NEXT:   .byte 3
 ; CHECK-NEXT:   .byte 0
 ; CHECK-NEXT:   .short 0
 ; Num Functions
@@ -77,23 +77,31 @@ declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i3
 ; CHECK: .short	4
 ; SmallConstant (0)
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK: .byte	0
+; CHECK: .short 8
+; CHECK: .short	0
 ; CHECK: .short	0
 ; CHECK: .long	0
 ; SmallConstant (0)
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK: .byte	0
+; CHECK: .short 8
+; CHECK: .short	0
 ; CHECK: .short	0
 ; CHECK: .long	0
 ; SmallConstant (0)
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK: .byte	0
+; CHECK: .short 8
+; CHECK: .short	0
 ; CHECK: .short	0
 ; CHECK: .long	0
 ; Direct Spill Slot [RSP+0]
 ; CHECK: .byte	2
-; CHECK: .byte	8
+; CHECK: .byte	0
+; CHECK: .short 8
 ; CHECK: .short	7
+; CHECK: .short	0
 ; CHECK: .long	0
 ; No Padding or LiveOuts
 ; CHECK: .short	0
@@ -106,23 +114,31 @@ declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i3
 ; CHECK: .short	4
 ; SmallConstant (0)
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK: .byte	0
+; CHECK: .short 8
+; CHECK: .short	0
 ; CHECK: .short	0
 ; CHECK: .long	0
 ; SmallConstant (0)
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK: .byte	0
+; CHECK: .short 8
+; CHECK: .short	0
 ; CHECK: .short	0
 ; CHECK: .long	0
 ; SmallConstant (1)
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK: .byte	0
+; CHECK: .short 8
+; CHECK: .short	0
 ; CHECK: .short	0
 ; CHECK: .long	1
 ; Direct Spill Slot [RSP+0]
 ; CHECK: .byte	2
-; CHECK: .byte	8
+; CHECK: .byte	0
+; CHECK: .short 8
 ; CHECK: .short	7
+; CHECK: .short	0
 ; CHECK: .long	0
 
 ; No Padding or LiveOuts
diff --git a/test/CodeGen/X86/statepoint-live-in.ll b/test/CodeGen/X86/statepoint-live-in.ll
index b236393e9f4..abe2b0a7acc 100644
--- a/test/CodeGen/X86/statepoint-live-in.ll
+++ b/test/CodeGen/X86/statepoint-live-in.ll
@@ -89,27 +89,37 @@ entry:
 
 ; CHECK: Ltmp0-_test1
 ; CHECK:      .byte	1
-; CHECK-NEXT: .byte	4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT: .short 4
 ; CHECK-NEXT: .short	5
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT: .long	0
 
 ; CHECK: Ltmp1-_test2
 ; CHECK:      .byte	1
-; CHECK-NEXT: .byte	4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT: .short 4
 ; CHECK-NEXT: .short	6
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT: .long	0
 ; CHECK:      .byte	1
-; CHECK-NEXT: .byte	4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT: .short 4
 ; CHECK-NEXT: .short	3
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT: .long	0
 ; CHECK: Ltmp2-_test2
 ; CHECK:      .byte	1
-; CHECK-NEXT: .byte	4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT: .short 4
 ; CHECK-NEXT: .short	3
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT: .long	0
 ; CHECK:      .byte	1
-; CHECK-NEXT: .byte	4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT: .short 4
 ; CHECK-NEXT: .short	6
+; CHECK-NEXT:   .short  0
 ; CHECK-NEXT: .long	0
 
 declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/CodeGen/X86/statepoint-stackmap-format.ll b/test/CodeGen/X86/statepoint-stackmap-format.ll
index df6180de17a..0506381b9ec 100644
--- a/test/CodeGen/X86/statepoint-stackmap-format.ll
+++ b/test/CodeGen/X86/statepoint-stackmap-format.ll
@@ -79,7 +79,7 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
 ; CHECK-LABEL: .section .llvm_stackmaps
 ; CHECK-NEXT:  __LLVM_StackMaps:
 ; Header
-; CHECK-NEXT:   .byte 2
+; CHECK-NEXT:   .byte 3
 ; CHECK-NEXT:   .byte 0
 ; CHECK-NEXT:   .short 0
 ; Num Functions
@@ -114,58 +114,80 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
 ; CHECK: .short	11
 ; SmallConstant (0)
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	0
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	0
 ; SmallConstant (0)
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	0
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	0
 ; SmallConstant (2)
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	0
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	2
 ; Indirect Spill Slot [RSP+0]
 ; CHECK: .byte	3
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	7
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	16
 ; SmallConstant  (0)
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	0
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	0
 ; SmallConstant  (0)
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	0
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	0
 ; SmallConstant  (0)
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	0
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	0
 ; Indirect Spill Slot [RSP+16]
 ; CHECK: .byte	3
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	7
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	16
 ; Indirect Spill Slot [RSP+8]
 ; CHECK: .byte	3
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	7
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	8
 ; Indirect Spill Slot [RSP+16]
 ; CHECK: .byte	3
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	7
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	16
 ; Indirect Spill Slot [RSP+16]
 ; CHECK: .byte	3
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	7
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	16
 
 ; No Padding or LiveOuts
@@ -186,53 +208,73 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
 ; CHECK: .short	11
 ; SmallConstant (0)
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	0
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	0
 ; SmallConstant (2)
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	0
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	2
 ; Indirect Spill Slot [RSP+0]
 ; CHECK: .byte	3
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	7
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	16
 ; SmallConstant  (0)
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	0
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	0
 ; SmallConstant  (0)
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	0
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	0
 ; SmallConstant  (0)
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	0
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	0
 ; Indirect Spill Slot [RSP+16]
 ; CHECK: .byte	3
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	7
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	16
 ; Indirect Spill Slot [RSP+8]
 ; CHECK: .byte	3
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	7
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	8
 ; Indirect Spill Slot [RSP+16]
 ; CHECK: .byte	3
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	7
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	16
 ; Indirect Spill Slot [RSP+16]
 ; CHECK: .byte	3
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	7
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	16
 
 ; No Padding or LiveOuts
@@ -257,22 +299,28 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
 ; StkMapRecord[0]:
 ; SmallConstant(0):
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	0
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	0
 
 ; StkMapRecord[1]:
 ; SmallConstant(0):
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	0
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	0
 
 ; StkMapRecord[2]:
 ; SmallConstant(0):
 ; CHECK: .byte	4
-; CHECK: .byte	8
+; CHECK-NEXT:   .byte   0
+; CHECK: .short 8
 ; CHECK: .short	0
+; CHECK-NEXT:   .short  0
 ; CHECK: .long	0
 
 ; No padding or LiveOuts
diff --git a/test/CodeGen/X86/statepoint-vector.ll b/test/CodeGen/X86/statepoint-vector.ll
index 15fb25777ed..000e8874288 100644
--- a/test/CodeGen/X86/statepoint-vector.ll
+++ b/test/CodeGen/X86/statepoint-vector.ll
@@ -108,51 +108,67 @@ entry:
 
 ; CHECK: .Ltmp0-test
 ; Check for the two spill slots
-; Stack Maps: 		Loc 3: Indirect 7+0	[encoding: .byte 3, .byte 16, .short 7, .int 0]
-; Stack Maps: 		Loc 4: Indirect 7+0	[encoding: .byte 3, .byte 16, .short 7, .int 0]
+; Stack Maps: 		Loc 3: Indirect 7+0	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
+; Stack Maps: 		Loc 4: Indirect 7+0	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
 ; CHECK: .byte	3
-; CHECK: .byte	16
+; CHECK: .byte	0
+; CHECK: .short 16
 ; CHECK: .short	7
+; CHECK: .short	0
 ; CHECK: .long	0
 ; CHECK: .byte	3
-; CHECK: .byte	16
+; CHECK: .byte	0
+; CHECK: .short 16
 ; CHECK: .short	7
+; CHECK: .short	0
 ; CHECK: .long	0
 
 ; CHECK: .Ltmp1-test2
 ; Check for the two spill slots
-; Stack Maps: 		Loc 3: Indirect 7+16	[encoding: .byte 3, .byte 16, .short 7, .int 16]
-; Stack Maps: 		Loc 4: Indirect 7+0	[encoding: .byte 3, .byte 16, .short 7, .int 0]
+; Stack Maps: 		Loc 3: Indirect 7+16	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
+; Stack Maps: 		Loc 4: Indirect 7+0	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
 ; CHECK: .byte	3
-; CHECK: .byte	16
+; CHECK: .byte	0
+; CHECK: .short 16
 ; CHECK: .short	7
+; CHECK: .short	0
 ; CHECK: .long	16
 ; CHECK: .byte	3
-; CHECK: .byte	16
+; CHECK: .byte	0
+; CHECK: .short 16
 ; CHECK: .short	7
+; CHECK: .short	0
 ; CHECK: .long	0
 
 ; CHECK: .Ltmp2-test3
 ; Check for the four spill slots
-; Stack Maps: 		Loc 3: Indirect 7+16	[encoding: .byte 3, .byte 16, .short 7, .int 16]
-; Stack Maps: 		Loc 4: Indirect 7+16	[encoding: .byte 3, .byte 16, .short 7, .int 16]
-; Stack Maps: 		Loc 5: Indirect 7+16	[encoding: .byte 3, .byte 16, .short 7, .int 16]
-; Stack Maps: 		Loc 6: Indirect 7+0		[encoding: .byte 3, .byte 16, .short 7, .int 0]
+; Stack Maps: 		Loc 3: Indirect 7+16	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
+; Stack Maps: 		Loc 4: Indirect 7+16	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
+; Stack Maps: 		Loc 5: Indirect 7+16	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
+; Stack Maps: 		Loc 6: Indirect 7+0	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
 ; CHECK: .byte	3
-; CHECK: .byte	16
+; CHECK: .byte	0
+; CHECK: .short 16
 ; CHECK: .short	7
+; CHECK: .short	0
 ; CHECK: .long	16
 ; CHECK: .byte	3
-; CHECK: .byte	16
+; CHECK: .byte	 0
+; CHECK: .short 16
 ; CHECK: .short	7
+; CHECK: .short	0
 ; CHECK: .long	16
 ; CHECK: .byte	3
-; CHECK: .byte	16
+; CHECK: .byte	 0
+; CHECK: .short 16
 ; CHECK: .short	7
+; CHECK: .short	0
 ; CHECK: .long	16
 ; CHECK: .byte	3
-; CHECK: .byte	16
+; CHECK: .byte	 0
+; CHECK: .short 16
 ; CHECK: .short	7
+; CHECK: .short	0
 ; CHECK: .long	0
 
 declare void @do_safepoint()
diff --git a/test/CodeGen/X86/vector-shuffle-combining.ll b/test/CodeGen/X86/vector-shuffle-combining.ll
index a65d830351e..e04c5321fa2 100644
--- a/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -2899,3 +2899,37 @@ entry:
   %s2 = shufflevector <8 x float> %s1, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2>
   ret <8 x float> %s2
 }
+
+define <4 x float> @PR30264(<4 x float> %x) {
+; SSE2-LABEL: PR30264:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm1, %xmm1
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],mem[2,3]
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: PR30264:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    xorps %xmm1, %xmm1
+; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],mem[2,3]
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: PR30264:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movaps {{.*#+}} xmm1 = <u,u,4,1>
+; SSE41-NEXT:    insertps {{.*#+}} xmm1 = xmm0[0],zero,xmm1[2,3]
+; SSE41-NEXT:    movaps %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: PR30264:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm1 = <u,u,4,1>
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2,3]
+; AVX-NEXT:    retq
+  %shuf1 = shufflevector <4 x float> %x, <4 x float> <float undef, float 0.0, float undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
+  %shuf2 = shufflevector <4 x float> %shuf1, <4 x float> <float undef, float undef, float 4.0, float 1.0>, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x float> %shuf2
+}
diff --git a/test/CodeGen/X86/widened-broadcast.ll b/test/CodeGen/X86/widened-broadcast.ll
index 900a7546f15..6b2e4de5cda 100644
--- a/test/CodeGen/X86/widened-broadcast.ll
+++ b/test/CodeGen/X86/widened-broadcast.ll
@@ -579,3 +579,72 @@ entry:
   %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <32 x i8> %ret
 }
+
+define <4 x float> @load_splat_4f32_8f32_0000(<8 x float>* %ptr) nounwind uwtable readnone ssp {
+; SSE-LABEL: load_splat_4f32_8f32_0000:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    movaps (%rdi), %xmm0
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: load_splat_4f32_8f32_0000:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vmovaps (%rdi), %ymm0
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: load_splat_4f32_8f32_0000:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vmovaps (%rdi), %ymm0
+; AVX2-NEXT:    vbroadcastss %xmm0, %xmm0
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: load_splat_4f32_8f32_0000:
+; AVX512:       # BB#0: # %entry
+; AVX512-NEXT:    vmovaps (%rdi), %ymm0
+; AVX512-NEXT:    vbroadcastss %xmm0, %xmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+entry:
+  %ld = load <8 x float>, <8 x float>* %ptr
+  %ret = shufflevector <8 x float> %ld, <8 x float> undef, <4 x i32> zeroinitializer
+  ret <4 x float> %ret
+}
+
+define <8 x float> @load_splat_8f32_16f32_89898989(<16 x float>* %ptr) nounwind uwtable readnone ssp {
+; SSE2-LABEL: load_splat_8f32_16f32_89898989:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movaps 32(%rdi), %xmm0
+; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
+; SSE2-NEXT:    movaps %xmm0, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: load_splat_8f32_16f32_89898989:
+; SSE42:       # BB#0: # %entry
+; SSE42-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
+; SSE42-NEXT:    movapd %xmm0, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: load_splat_8f32_16f32_89898989:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vbroadcastsd 32(%rdi), %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: load_splat_8f32_16f32_89898989:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vbroadcastsd 32(%rdi), %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: load_splat_8f32_16f32_89898989:
+; AVX512:       # BB#0: # %entry
+; AVX512-NEXT:    vmovapd (%rdi), %zmm0
+; AVX512-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; AVX512-NEXT:    vbroadcastsd %xmm0, %ymm0
+; AVX512-NEXT:    retq
+entry:
+  %ld = load <16 x float>, <16 x float>* %ptr
+  %ret = shufflevector <16 x float> %ld, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 8, i32 9, i32 8, i32 9, i32 8, i32 9>
+  ret <8 x float> %ret
+}
diff --git a/test/DebugInfo/AMDGPU/dbg-value-sched-crash.ll b/test/DebugInfo/AMDGPU/dbg-value-sched-crash.ll
new file mode 100644
index 00000000000..4e79727c857
--- /dev/null
+++ b/test/DebugInfo/AMDGPU/dbg-value-sched-crash.ll
@@ -0,0 +1,95 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck %s
+
+; Make sure we do not crash during scheduling when DBG_VALUE is the first
+; instruction in the basic block.
+
+; LLVM IR generated with the following command and OpenCL source:
+;
+; $clang -cl-std=CL2.0 -g -O2 -target amdgcn-amd-amdhsa -S -emit-llvm <path-to-file>
+;
+; kernel void kernel1(global int *A, global int *B) {
+;   if (*A == 1) {
+;     *B = 12;
+;   }
+;   if (*A == 2) {
+;     *B = 13;
+;   }
+; }
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
+
+; CHECK-LABEL: {{^}}kernel1:
+define amdgpu_kernel void @kernel1(
+    i32 addrspace(1)* nocapture readonly %A,
+    i32 addrspace(1)* nocapture %B) !dbg !7  {
+entry:
+  tail call void @llvm.dbg.value(metadata i32 addrspace(1)* %A, i64 0, metadata !13, metadata !19), !dbg !20
+  tail call void @llvm.dbg.value(metadata i32 addrspace(1)* %B, i64 0, metadata !14, metadata !19), !dbg !21
+  %0 = load i32, i32 addrspace(1)* %A, align 4, !dbg !22, !tbaa !24
+  %cmp = icmp eq i32 %0, 1, !dbg !28
+  br i1 %cmp, label %if.then, label %if.end, !dbg !29
+
+if.then:                                          ; preds = %entry
+  store i32 12, i32 addrspace(1)* %B, align 4, !dbg !30, !tbaa !24
+  %.pr = load i32, i32 addrspace(1)* %A, align 4, !dbg !32, !tbaa !24
+  br label %if.end, !dbg !34
+
+if.end:                                           ; preds = %if.then, %entry
+  %1 = phi i32 [ %.pr, %if.then ], [ %0, %entry ], !dbg !32
+  %cmp1 = icmp eq i32 %1, 2, !dbg !35
+  br i1 %cmp1, label %if.then2, label %if.end3, !dbg !36
+
+if.then2:                                         ; preds = %if.end
+  store i32 13, i32 addrspace(1)* %B, align 4, !dbg !37, !tbaa !24
+  br label %if.end3, !dbg !39
+
+if.end3:                                          ; preds = %if.then2, %if.end
+  ret void, !dbg !40
+}
+
+!llvm.dbg.cu = !{!0}
+!opencl.ocl.version = !{!3}
+!llvm.module.flags = !{!4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 4.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "dbg-value-sched-crash.cl", directory: "/some/random/directory")
+!2 = !{}
+!3 = !{i32 2, i32 0}
+!4 = !{i32 2, !"Dwarf Version", i32 2}
+!5 = !{i32 2, !"Debug Info Version", i32 3}
+!6 = !{!"clang version 4.0 "}
+!7 = distinct !DISubprogram(name: "kernel1", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !12)
+!8 = !DISubroutineType(types: !9)
+!9 = !{null, !10, !10}
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64)
+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!12 = !{!13, !14}
+!13 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10)
+!14 = !DILocalVariable(name: "B", arg: 2, scope: !7, file: !1, line: 1, type: !10)
+!15 = !{i32 1, i32 1}
+!16 = !{!"none", !"none"}
+!17 = !{!"int*", !"int*"}
+!18 = !{!"", !""}
+!19 = !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)
+!20 = !DILocation(line: 1, column: 33, scope: !7)
+!21 = !DILocation(line: 1, column: 48, scope: !7)
+!22 = !DILocation(line: 2, column: 7, scope: !23)
+!23 = distinct !DILexicalBlock(scope: !7, file: !1, line: 2, column: 7)
+!24 = !{!25, !25, i64 0}
+!25 = !{!"int", !26, i64 0}
+!26 = !{!"omnipotent char", !27, i64 0}
+!27 = !{!"Simple C/C++ TBAA"}
+!28 = !DILocation(line: 2, column: 10, scope: !23)
+!29 = !DILocation(line: 2, column: 7, scope: !7)
+!30 = !DILocation(line: 3, column: 8, scope: !31)
+!31 = distinct !DILexicalBlock(scope: !23, file: !1, line: 2, column: 16)
+!32 = !DILocation(line: 5, column: 7, scope: !33)
+!33 = distinct !DILexicalBlock(scope: !7, file: !1, line: 5, column: 7)
+!34 = !DILocation(line: 4, column: 3, scope: !31)
+!35 = !DILocation(line: 5, column: 10, scope: !33)
+!36 = !DILocation(line: 5, column: 7, scope: !7)
+!37 = !DILocation(line: 6, column: 8, scope: !38)
+!38 = distinct !DILexicalBlock(scope: !33, file: !1, line: 5, column: 16)
+!39 = !DILocation(line: 7, column: 3, scope: !38)
+!40 = !DILocation(line: 8, column: 1, scope: !7)
diff --git a/test/DebugInfo/COFF/cpp-mangling.ll b/test/DebugInfo/COFF/cpp-mangling.ll
index a75720f1812..8d1a136ec5f 100644
--- a/test/DebugInfo/COFF/cpp-mangling.ll
+++ b/test/DebugInfo/COFF/cpp-mangling.ll
@@ -63,7 +63,7 @@ attributes #1 = { nounwind readnone }
 !4 = !{i32 2, !"Debug Info Version", i32 3}
 !5 = !{!"clang version 3.9.0 "}
 !6 = distinct !DISubprogram(name: "bar", linkageName: "\01?bar@foo@@YAHH@Z", scope: !7, file: !1, line: 2, type: !8, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
-!7 = !DINamespace(name: "foo", scope: null, file: !1, line: 1)
+!7 = !DINamespace(name: "foo", scope: null)
 !8 = !DISubroutineType(types: !9)
 !9 = !{!10, !10}
 !10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/DebugInfo/COFF/scopes.ll b/test/DebugInfo/COFF/scopes.ll
index c81ff40fee6..6ff462213f2 100644
--- a/test/DebugInfo/COFF/scopes.ll
+++ b/test/DebugInfo/COFF/scopes.ll
@@ -110,9 +110,9 @@ attributes #1 = { nounwind readnone }
 
 !0 = distinct !DIGlobalVariableExpression(var: !1)
 !1 = !DIGlobalVariable(name: "g", linkageName: "\01?g@bar@foo@@3UGlobalRecord@12@A", scope: !2, file: !3, line: 12, type: !5, isLocal: false, isDefinition: true)
-!2 = !DINamespace(name: "bar", scope: !4, file: !3, line: 2)
+!2 = !DINamespace(name: "bar", scope: !4)
 !3 = !DIFile(filename: "t.cpp", directory: "D:\5Csrc\5Cllvm\5Cbuild")
-!4 = !DINamespace(name: "foo", scope: null, file: !3, line: 1)
+!4 = !DINamespace(name: "foo", scope: null)
 !5 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "GlobalRecord", scope: !2, file: !3, line: 9, size: 32, align: 32, elements: !6, identifier: ".?AUGlobalRecord@bar@foo@@")
 !6 = !{!7, !9}
 !7 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !5, file: !3, line: 10, baseType: !8, size: 32, align: 32)
diff --git a/test/DebugInfo/Generic/dwarf-public-names.ll b/test/DebugInfo/Generic/dwarf-public-names.ll
index 42f4c9a34ff..ff545cc7af0 100644
--- a/test/DebugInfo/Generic/dwarf-public-names.ll
+++ b/test/DebugInfo/Generic/dwarf-public-names.ll
@@ -116,7 +116,7 @@ attributes #1 = { nounwind readnone }
 !16 = !DIGlobalVariable(name: "global_variable", scope: null, file: !3, line: 17, type: !2, isLocal: false, isDefinition: true) ; previously: invalid DW_TAG_base_type
 !17 = !DIGlobalVariableExpression(var: !18)
 !18 = !DIGlobalVariable(name: "global_namespace_variable", linkageName: "_ZN2ns25global_namespace_variableE", scope: !19, file: !3, line: 27, type: !6, isLocal: false, isDefinition: true)
-!19 = !DINamespace(name: "ns", scope: null, file: !3, line: 23)
+!19 = !DINamespace(name: "ns", scope: null)
 !20 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !11, retainedTypes: !11, globals: !21, imports: !11) ; previously: invalid DW_TAG_base_type
 !21 = !{!0, !15, !17}
 !22 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/namespace.ll b/test/DebugInfo/Generic/namespace.ll
index c9a259dc700..983e20db411 100644
--- a/test/DebugInfo/Generic/namespace.ll
+++ b/test/DebugInfo/Generic/namespace.ll
@@ -2,15 +2,15 @@
 
 ; RUN: %llc_dwarf -O0 -filetype=obj -dwarf-linkage-names=All < %s | llvm-dwarfdump - | FileCheck %s
 ; CHECK: debug_info contents
+; CHECK: DW_AT_name{{.*}}= [[F1:.*]])
 ; CHECK: [[NS1:0x[0-9a-f]*]]:{{ *}}DW_TAG_namespace
-; CHECK-NEXT: DW_AT_name{{.*}} = "A"
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F1:".*debug-info-namespace.cpp"]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(5)
+; CHECK-NOT: DW_AT_decl_file
+; CHECK-NOT: DW_AT_decl_line
 ; CHECK-NOT: NULL
 ; CHECK: [[NS2:0x[0-9a-f]*]]:{{ *}}DW_TAG_namespace
 ; CHECK-NEXT: DW_AT_name{{.*}} = "B"
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2:".*foo.cpp"]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(1)
+; CHECK-NOT: DW_AT_decl_file
+; CHECK-NOT: DW_AT_decl_line
 ; CHECK-NOT: NULL
 ; CHECK: [[I:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable
 ; CHECK-NEXT: DW_AT_name{{.*}}= "i"
@@ -56,15 +56,15 @@
 ; CHECK: DW_TAG_imported_module
 ; This is a bug, it should be in F2 but it inherits the file from its
 ; enclosing scope
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F1]])
+; CHECK-NEXT: DW_AT_decl_file{{.*}}stdin
 ; CHECK-NEXT: DW_AT_decl_line{{.*}}(15)
 ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS2]]})
 ; CHECK: NULL
 ; CHECK-NOT: NULL
 
 ; CHECK: DW_TAG_imported_module
-; Same bug as above, this should be F2, not F1
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F1]])
+; Same bug as above, this should be F2
+; CHECK-NEXT: DW_AT_decl_file{{.*}}debug-info-namespace.cpp
 ; CHECK-NEXT: DW_AT_decl_line{{.*}}(18)
 ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]})
 ; CHECK-NOT: NULL
@@ -76,7 +76,7 @@
 ; CHECK: DW_AT_name{{.*}}= "func"
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_imported_module
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2:.*]])
 ; CHECK-NEXT: DW_AT_decl_line{{.*}}(26)
 ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]})
 ; CHECK-NOT: NULL
@@ -293,8 +293,8 @@ attributes #1 = { nounwind readnone }
 !3 = !{!4, !8}
 !4 = !DICompositeType(tag: DW_TAG_structure_type, name: "foo", line: 5, flags: DIFlagFwdDecl, file: !5, scope: !6, identifier: "_ZTSN1A1B3fooE")
 !5 = !DIFile(filename: "foo.cpp", directory: "/tmp")
-!6 = !DINamespace(name: "B", line: 1, file: !5, scope: !7)
-!7 = !DINamespace(name: "A", line: 5, file: !1, scope: null)
+!6 = !DINamespace(name: "B", scope: !7)
+!7 = !DINamespace(name: "A", scope: null)
 !8 = !DICompositeType(tag: DW_TAG_structure_type, name: "bar", line: 6, size: 8, align: 8, file: !5, scope: !6, elements: !2, identifier: "_ZTSN1A1B3barE")
 !10 = distinct !DISubprogram(name: "f1", linkageName: "_ZN1A1B2f1Ev", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !5, scope: !6, type: !11, variables: !2)
 !11 = !DISubroutineType(types: !12)
diff --git a/test/DebugInfo/Generic/namespace_function_definition.ll b/test/DebugInfo/Generic/namespace_function_definition.ll
index 04fcc3bfb90..7d7725dec62 100644
--- a/test/DebugInfo/Generic/namespace_function_definition.ll
+++ b/test/DebugInfo/Generic/namespace_function_definition.ll
@@ -34,7 +34,7 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !1 = !DIFile(filename: "namespace_function_definition.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !4 = distinct !DISubprogram(name: "func", linkageName: "_ZN2ns4funcEv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
-!5 = !DINamespace(name: "ns", line: 1, file: !1, scope: null)
+!5 = !DINamespace(name: "ns", scope: null)
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
 !8 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/DebugInfo/Generic/namespace_inline_function_definition.ll b/test/DebugInfo/Generic/namespace_inline_function_definition.ll
index 72502e5f759..f029ab277d7 100644
--- a/test/DebugInfo/Generic/namespace_inline_function_definition.ll
+++ b/test/DebugInfo/Generic/namespace_inline_function_definition.ll
@@ -79,7 +79,7 @@ attributes #2 = { nounwind readnone }
 !7 = !{!8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !9 = distinct !DISubprogram(name: "func", linkageName: "_ZN2ns4funcEi", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 6, file: !1, scope: !10, type: !11, variables: !2)
-!10 = !DINamespace(name: "ns", line: 1, file: !1, scope: null)
+!10 = !DINamespace(name: "ns", scope: null)
 !11 = !DISubroutineType(types: !12)
 !12 = !{!8, !8}
 !13 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/DebugInfo/Generic/thrownTypes.ll b/test/DebugInfo/Generic/thrownTypes.ll
new file mode 100644
index 00000000000..8e84e7bf2bf
--- /dev/null
+++ b/test/DebugInfo/Generic/thrownTypes.ll
@@ -0,0 +1,38 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; CHECK: DW_TAG_subprogram
+; CHECK:   DW_AT_name {{.*}} "f"
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_TAG_thrown_type
+; CHECK-NEXT:   DW_AT_type {{.*}} {[[ERROR:.*]]}
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_TAG_thrown_type
+; CHECK-NEXT:   DW_AT_type {{.*}} {[[ERROR2:.*]]}
+; CHECK: [[ERROR]]: DW_TAG_structure_type
+; CHECK-NEXT:   DW_AT_name {{.*}} "Error"
+; CHECK: [[ERROR2]]: DW_TAG_structure_type
+; CHECK-NEXT:   DW_AT_name {{.*}} "DifferentError"
+
+; Function Attrs: nounwind uwtable
+define void @f() #0 !dbg !5 {
+entry:
+  ret void, !dbg !11
+}
+
+attributes #0 = { nounwind uwtable }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_Swift, producer: "swiftc", isOptimized: false, emissionKind: FullDebug, file: !1)
+!1 = !DIFile(filename: "f.swift", directory: "/")
+!3 = !DICompositeType(tag: DW_TAG_structure_type, name: "Error")
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "DifferentError")
+!5 = distinct !DISubprogram(name: "f", line: 2, isLocal: false, isDefinition: true, unit: !0, scopeLine: 2, file: !1, scope: !1, type: !6, thrownTypes: !{!3, !4})
+!6 = !DISubroutineType(types: !7)
+!7 = !{null}
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 3}
+!11 = !DILocation(line: 3, scope: !5)
diff --git a/test/DebugInfo/PDB/Inputs/simple-line-info.yaml b/test/DebugInfo/PDB/Inputs/simple-line-info.yaml
new file mode 100644
index 00000000000..66030020f8f
--- /dev/null
+++ b/test/DebugInfo/PDB/Inputs/simple-line-info.yaml
@@ -0,0 +1,43 @@
+---
+DbiStream:
+  Modules:
+    - Module:          'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
+      ObjFile:         'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
+      SourceFiles:
+        - 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+      LineInfo:
+        Checksums:
+          - FileName:        'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+            Kind:            MD5
+            Checksum:        A0A5BD0D3ECD93FC29D19DE826FBF4BC
+          - FileName:        'f:\dd\externalapis\windows\10\sdk\inc\winerror.h'
+            Kind:            MD5
+            Checksum:        1154D69F5B2650196E1FC34F4134E56B
+        Lines:
+          - CodeSize:        10
+            Flags:           [  ]
+            RelocOffset:     16
+            RelocSegment:    1
+            Blocks:
+              - FileName:        'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+                Lines:
+                  - Offset:          0
+                    LineStart:       5
+                    IsStatement:     true
+                    EndDelta:        0
+                  - Offset:          3
+                    LineStart:       6
+                    IsStatement:     true
+                    EndDelta:        0
+                  - Offset:          8
+                    LineStart:       7
+                    IsStatement:     true
+                    EndDelta:        0
+                Columns:
+        InlineeLines:    
+          - HasExtraFiles:   false
+            Sites:           
+              - FileName:        'f:\dd\externalapis\windows\10\sdk\inc\winerror.h'
+                LineNum:         26950
+                Inlinee:         22767
+...
diff --git a/test/DebugInfo/PDB/pdbdump-headers.test b/test/DebugInfo/PDB/pdbdump-headers.test
index 4152f0f9da0..d67743efd70 100644
--- a/test/DebugInfo/PDB/pdbdump-headers.test
+++ b/test/DebugInfo/PDB/pdbdump-headers.test
@@ -485,27 +485,6 @@
 ; EMPTY-NEXT:         }
 ; EMPTY-NEXT:       ]
 ; EMPTY-NEXT:       LineInfo [
-; EMPTY-NEXT:         Lines {
-; EMPTY-NEXT:           FileName: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
-; EMPTY-NEXT:           Line {
-; EMPTY-NEXT:             Offset: 0
-; EMPTY-NEXT:             LineNumberStart: 5
-; EMPTY-NEXT:             EndDelta: 0
-; EMPTY-NEXT:             IsStatement: Yes
-; EMPTY-NEXT:           }
-; EMPTY-NEXT:           Line {
-; EMPTY-NEXT:             Offset: 3
-; EMPTY-NEXT:             LineNumberStart: 6
-; EMPTY-NEXT:             EndDelta: 0
-; EMPTY-NEXT:             IsStatement: Yes
-; EMPTY-NEXT:           }
-; EMPTY-NEXT:           Line {
-; EMPTY-NEXT:             Offset: 8
-; EMPTY-NEXT:             LineNumberStart: 7
-; EMPTY-NEXT:             EndDelta: 0
-; EMPTY-NEXT:             IsStatement: Yes
-; EMPTY-NEXT:           }
-; EMPTY-NEXT:         }
 ; EMPTY-NEXT:         FileChecksums {
 ; EMPTY-NEXT:           Checksum {
 ; EMPTY-NEXT:             FileName: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
@@ -515,6 +494,35 @@
 ; EMPTY-NEXT:             )
 ; EMPTY-NEXT:           }
 ; EMPTY-NEXT:         }
+; EMPTY-NEXT:         Lines {
+; EMPTY-NEXT:           Block {
+; EMPTY-NEXT:             RelocSegment: 1
+; EMPTY-NEXT:             RelocOffset: 16
+; EMPTY-NEXT:             CodeSize: 10
+; EMPTY-NEXT:             HasColumns: No
+; EMPTY-NEXT:             Lines {
+; EMPTY-NEXT:               FileName: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
+; EMPTY-NEXT:               Line {
+; EMPTY-NEXT:                 Offset: 0
+; EMPTY-NEXT:                 LineNumberStart: 5
+; EMPTY-NEXT:                 EndDelta: 0
+; EMPTY-NEXT:                 IsStatement: Yes
+; EMPTY-NEXT:               }
+; EMPTY-NEXT:               Line {
+; EMPTY-NEXT:                 Offset: 3
+; EMPTY-NEXT:                 LineNumberStart: 6
+; EMPTY-NEXT:                 EndDelta: 0
+; EMPTY-NEXT:                 IsStatement: Yes
+; EMPTY-NEXT:               }
+; EMPTY-NEXT:               Line {
+; EMPTY-NEXT:                 Offset: 8
+; EMPTY-NEXT:                 LineNumberStart: 7
+; EMPTY-NEXT:                 EndDelta: 0
+; EMPTY-NEXT:                 IsStatement: Yes
+; EMPTY-NEXT:               }
+; EMPTY-NEXT:             }
+; EMPTY-NEXT:           }
+; EMPTY-NEXT:         }
 ; EMPTY-NEXT:       ]
 ; EMPTY-NEXT:     }
 ; EMPTY-NEXT:     {
diff --git a/test/DebugInfo/PDB/pdbdump-yaml-lineinfo-write.test b/test/DebugInfo/PDB/pdbdump-yaml-lineinfo-write.test
new file mode 100644
index 00000000000..1d63c85352a
--- /dev/null
+++ b/test/DebugInfo/PDB/pdbdump-yaml-lineinfo-write.test
@@ -0,0 +1,71 @@
+; This testcase verifies that we can produce a PDB with line
+; information.  It does this by describing some line information
+; manually in YAML, creating a PDB out of it, then dumping then
+; line information from the resulting PDB.
+
+; RUN: llvm-pdbdump yaml2pdb -pdb=%t.pdb %p/Inputs/simple-line-info.yaml
+; RUN: llvm-pdbdump raw -line-info %t.pdb | FileCheck -check-prefix=LINES %s
+
+LINES:       Modules [
+LINES-NEXT:    {
+LINES-NEXT:      Name: d:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj
+LINES:           LineInfo [
+LINES-NEXT:        FileChecksums {
+LINES-NEXT:          Checksum {
+LINES-NEXT:            FileName: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
+LINES-NEXT:            Kind: MD5 (0x1)
+LINES-NEXT:            Checksum (
+LINES-NEXT:              0000: A0A5BD0D 3ECD93FC 29D19DE8 26FBF4BC  |....>...)...&...|
+LINES-NEXT:            )
+LINES-NEXT:          }
+LINES-NEXT:          Checksum {
+LINES-NEXT:            FileName: f:\dd\externalapis\windows\10\sdk\inc\winerror.h
+LINES-NEXT:            Kind: MD5 (0x1)
+LINES-NEXT:            Checksum (
+LINES-NEXT:              0000: 1154D69F 5B265019 6E1FC34F 4134E56B  |.T..[&P.n..OA4.k|
+LINES-NEXT:            )
+LINES-NEXT:          }
+LINES-NEXT:        }
+LINES-NEXT:        Lines {
+LINES-NEXT:          Block {
+LINES-NEXT:            RelocSegment: 1
+LINES-NEXT:            RelocOffset: 16
+LINES-NEXT:            CodeSize: 10
+LINES-NEXT:            HasColumns: No
+LINES-NEXT:            Lines {
+LINES-NEXT:              FileName: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
+LINES-NEXT:              Line {
+LINES-NEXT:                Offset: 0
+LINES-NEXT:                LineNumberStart: 5
+LINES-NEXT:                EndDelta: 0
+LINES-NEXT:                IsStatement: Yes
+LINES-NEXT:              }
+LINES-NEXT:              Line {
+LINES-NEXT:                Offset: 3
+LINES-NEXT:                LineNumberStart: 6
+LINES-NEXT:                EndDelta: 0
+LINES-NEXT:                IsStatement: Yes
+LINES-NEXT:              }
+LINES-NEXT:              Line {
+LINES-NEXT:                Offset: 8
+LINES-NEXT:                LineNumberStart: 7
+LINES-NEXT:                EndDelta: 0
+LINES-NEXT:                IsStatement: Yes
+LINES-NEXT:              }
+LINES-NEXT:            }
+LINES-NEXT:          }
+LINES-NEXT:        }
+LINES-NEXT:        InlineeLines {
+LINES-NEXT:          HasExtraFiles: No
+LINES-NEXT:          Lines [
+LINES-NEXT:            Inlinee {
+LINES-NEXT:              FileName: f:\dd\externalapis\windows\10\sdk\inc\winerror.h
+LINES-NEXT:              Function {
+LINES-NEXT:                Index: 0x58ef (unknown function)
+LINES-NEXT:              }
+LINES-NEXT:              SourceLine: 26950
+LINES-NEXT:            }
+LINES-NEXT:          ]
+LINES-NEXT:        }
+LINES-NEXT:      ]
+LINES-NEXT:    }
diff --git a/test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test b/test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test
index 016d5246498..ca7427c0099 100644
--- a/test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test
+++ b/test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test
@@ -1,59 +1,59 @@
-; RUN: llvm-pdbdump pdb2yaml -dbi-module-lines %p/Inputs/empty.pdb \
-; RUN:   | FileCheck -check-prefix=YAML %s
-
-
-YAML: ---
-YAML: MSF:
-YAML:   SuperBlock:
-YAML:     BlockSize:       4096
-YAML:     FreeBlockMap:    2
-YAML:     NumBlocks:       25
-YAML:     NumDirectoryBytes: 136
-YAML:     Unknown1:        0
-YAML:     BlockMapAddr:    24
-YAML:   NumDirectoryBlocks: 1
-YAML:   DirectoryBlocks: [ 23 ]
-YAML:   NumStreams:      0
-YAML:   FileSize:        102400
-YAML: DbiStream:
-YAML:   VerHeader:       V70
-YAML:   Age:             1
-YAML:   BuildNumber:     35840
-YAML:   PdbDllVersion:   31101
-YAML:   PdbDllRbld:      0
-YAML:   Flags:           1
-YAML:   MachineType:     x86
-YAML:   Modules:
-YAML:   - Module:          'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
-YAML:     ObjFile:         'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
-YAML:     SourceFiles:
-YAML:       - 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
-YAML:     LineInfo:
-YAML:       Lines:
-YAML:         CodeSize:        10
-YAML:         Flags:           [  ]
-YAML:         RelocOffset:     16
-YAML:      RelocSegment:    1
-YAML:      LineInfo:
-YAML:        - FileName:        'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
-YAML:          Lines:
-YAML:            - Offset:          0
-YAML:              LineStart:       5
-YAML:              IsStatement:     true
-YAML:              EndDelta:        5
-YAML:            - Offset:          3
-YAML:              LineStart:       6
-YAML:              IsStatement:     true
-YAML:              EndDelta:        6
-YAML:            - Offset:          8
-YAML:              LineStart:       7
-YAML:              IsStatement:     true
-YAML:              EndDelta:        7
-YAML:          Columns:
-YAML:     Checksums:
-YAML:       - FileName:        'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
-YAML:         Kind:            MD5
-YAML:         Checksum:        A0A5BD0D3ECD93FC29D19DE826FBF4BC
-YAML:  - Module:          '* Linker *'
-YAML:    ObjFile:         ''
+; RUN: llvm-pdbdump pdb2yaml -dbi-module-lines %p/Inputs/empty.pdb \
+; RUN:   | FileCheck -check-prefix=YAML %s
+
+
+YAML: ---
+YAML: MSF:
+YAML:   SuperBlock:
+YAML:     BlockSize:       4096
+YAML:     FreeBlockMap:    2
+YAML:     NumBlocks:       25
+YAML:     NumDirectoryBytes: 136
+YAML:     Unknown1:        0
+YAML:     BlockMapAddr:    24
+YAML:   NumDirectoryBlocks: 1
+YAML:   DirectoryBlocks: [ 23 ]
+YAML:   NumStreams:      0
+YAML:   FileSize:        102400
+YAML: DbiStream:
+YAML:   VerHeader:       V70
+YAML:   Age:             1
+YAML:   BuildNumber:     35840
+YAML:   PdbDllVersion:   31101
+YAML:   PdbDllRbld:      0
+YAML:   Flags:           1
+YAML:   MachineType:     x86
+YAML:   Modules:
+YAML:   - Module:          'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
+YAML:     ObjFile:         'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
+YAML:     SourceFiles:
+YAML:       - 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+YAML:     LineInfo:
+YAML:       Checksums:
+YAML:         - FileName:        'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+YAML:           Kind:            MD5
+YAML:           Checksum:        A0A5BD0D3ECD93FC29D19DE826FBF4BC
+YAML:       Lines:
+YAML:         CodeSize:        10
+YAML:         Flags:           [  ]
+YAML:         RelocOffset:     16
+YAML:         RelocSegment:    1
+YAML:         Blocks:
+YAML:           - FileName:        'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+YAML:             Lines:
+YAML:               - Offset:          0
+YAML:                 LineStart:       5
+YAML:                 IsStatement:     true
+YAML:                 EndDelta:        0
+YAML:               - Offset:          3
+YAML:                 LineStart:       6
+YAML:                 IsStatement:     true
+YAML:                 EndDelta:        0
+YAML:               - Offset:          8
+YAML:                 LineStart:       7
+YAML:                 IsStatement:     true
+YAML:                 EndDelta:        0
+YAML:             Columns:
+YAML:  - Module:          '* Linker *'
+YAML:    ObjFile:         ''
 YAML: ...
\ No newline at end of file
diff --git a/test/DebugInfo/X86/dwarf-linkage-names.ll b/test/DebugInfo/X86/dwarf-linkage-names.ll
index c736dab2acd..2a0ec737082 100644
--- a/test/DebugInfo/X86/dwarf-linkage-names.ll
+++ b/test/DebugInfo/X86/dwarf-linkage-names.ll
@@ -57,7 +57,7 @@ attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fp
 
 !0 = !DIGlobalVariableExpression(var: !1)
 !1 = !DIGlobalVariable(name: "global_var", linkageName: "_ZN4test10global_varE", scope: !2, file: !3, line: 2, type: !4, isLocal: false, isDefinition: true)
-!2 = !DINamespace(name: "test", scope: null, file: !3, line: 1)
+!2 = !DINamespace(name: "test", scope: null)
 !3 = !DIFile(filename: "dwarf-linkage-names.cpp", directory: "/home/probinson/projects/scratch")
 !4 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !5 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 3.8.0 (trunk 244662)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !6, globals: !7)
diff --git a/test/DebugInfo/X86/dwarf-public-names.ll b/test/DebugInfo/X86/dwarf-public-names.ll
index e4e6d54e1ad..150e08bc210 100644
--- a/test/DebugInfo/X86/dwarf-public-names.ll
+++ b/test/DebugInfo/X86/dwarf-public-names.ll
@@ -43,7 +43,7 @@
 
 ; Skip the output to the header of the pubnames section.
 ; LINUX: debug_pubnames
-; LINUX-NEXT: unit_size = 0x0000012a
+; LINUX-NEXT: unit_size = 0x00000128
 
 ; Check for each name in the output.
 ; LINUX-DAG: "ns"
@@ -122,7 +122,7 @@ attributes #1 = { nounwind readnone }
 !16 = !DIGlobalVariable(name: "global_variable", scope: null, file: !3, line: 17, type: !2, isLocal: false, isDefinition: true) ; previously: invalid DW_TAG_base_type
 !17 = !DIGlobalVariableExpression(var: !18)
 !18 = !DIGlobalVariable(name: "global_namespace_variable", linkageName: "_ZN2ns25global_namespace_variableE", scope: !19, file: !3, line: 27, type: !6, isLocal: false, isDefinition: true)
-!19 = !DINamespace(name: "ns", scope: null, file: !3, line: 23)
+!19 = !DINamespace(name: "ns", scope: null)
 !20 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !11, retainedTypes: !11, globals: !21, imports: !11) ; previously: invalid DW_TAG_base_type
 !21 = !{!0, !15, !17}
 !22 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/generate-odr-hash.ll b/test/DebugInfo/X86/generate-odr-hash.ll
index e9303e6a265..ce5f6d67bd0 100644
--- a/test/DebugInfo/X86/generate-odr-hash.ll
+++ b/test/DebugInfo/X86/generate-odr-hash.ll
@@ -227,9 +227,9 @@ attributes #1 = { nounwind readnone }
 !5 = !{}
 !6 = !DIGlobalVariableExpression(var: !7)
 !7 = !DIGlobalVariable(name: "animal", linkageName: "_ZN7echidna8capybara8mongoose6animalE", scope: !8, file: !2, line: 18, type: !11, isLocal: false, isDefinition: true)
-!8 = !DINamespace(name: "mongoose", scope: !9, file: !2, line: 12)
-!9 = !DINamespace(name: "capybara", scope: !10, file: !2, line: 11)
-!10 = !DINamespace(name: "echidna", scope: null, file: !2, line: 10)
+!8 = !DINamespace(name: "mongoose", scope: !9)
+!9 = !DINamespace(name: "capybara", scope: !10)
+!10 = !DINamespace(name: "echidna", scope: null)
 !11 = !DICompositeType(tag: DW_TAG_class_type, name: "fluffy", scope: !8, file: !2, line: 13, size: 64, align: 32, elements: !12, identifier: "_ZTSN7echidna8capybara8mongoose6fluffyE")
 !12 = !{!13, !15}
 !13 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !11, file: !2, line: 14, baseType: !14, size: 32, align: 32, flags: DIFlagPrivate)
@@ -238,7 +238,7 @@ attributes #1 = { nounwind readnone }
 !16 = !DIGlobalVariableExpression(var: !17)
 !17 = !DIGlobalVariable(name: "w", scope: null, file: !2, line: 29, type: !18, isLocal: true, isDefinition: true)
 !18 = !DICompositeType(tag: DW_TAG_structure_type, name: "walrus", scope: !19, file: !2, line: 24, size: 8, align: 8, elements: !20)
-!19 = !DINamespace(scope: null, file: !2, line: 23)
+!19 = !DINamespace(scope: null)
 !20 = !{!21}
 !21 = !DISubprogram(name: "walrus", scope: !18, file: !2, line: 25, type: !22, isLocal: false, isDefinition: false, scopeLine: 25, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false)
 !22 = !DISubroutineType(types: !23)
diff --git a/test/DebugInfo/X86/gnu-public-names-tu.ll b/test/DebugInfo/X86/gnu-public-names-tu.ll
index 0b7647aa8c7..9390d693ea0 100644
--- a/test/DebugInfo/X86/gnu-public-names-tu.ll
+++ b/test/DebugInfo/X86/gnu-public-names-tu.ll
@@ -47,7 +47,7 @@
 !7 = !{!8}
 !8 = !DIDerivedType(tag: DW_TAG_member, name: "f", scope: !6, file: !3, line: 6, baseType: !9, size: 8)
 !9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "foo", scope: !10, file: !3, line: 2, size: 8, elements: !4, identifier: "_ZTSN2ns3fooE")
-!10 = !DINamespace(name: "ns", scope: null, file: !3, line: 1)
+!10 = !DINamespace(name: "ns", scope: null)
 !11 = !{i32 2, !"Dwarf Version", i32 4}
 !12 = !{i32 2, !"Debug Info Version", i32 3}
 !13 = !{!"clang version 5.0.0 (trunk 293904) (llvm/trunk 293908)"}
diff --git a/test/DebugInfo/X86/gnu-public-names.ll b/test/DebugInfo/X86/gnu-public-names.ll
index 44cf0c5cea9..533ab838a73 100644
--- a/test/DebugInfo/X86/gnu-public-names.ll
+++ b/test/DebugInfo/X86/gnu-public-names.ll
@@ -318,7 +318,7 @@ attributes #1 = { nounwind readnone }
 !15 = !DISubroutineType(types: !16)
 !16 = !{!9}
 !17 = !DICompositeType(tag: DW_TAG_structure_type, name: "D", scope: !18, file: !3, line: 29, size: 32, align: 32, elements: !19, identifier: "_ZTSN2ns1DE")
-!18 = !DINamespace(name: "ns", scope: null, file: !3, line: 23)
+!18 = !DINamespace(name: "ns", scope: null)
 !19 = !{!20}
 !20 = !DIDerivedType(tag: DW_TAG_member, name: "A", scope: !17, file: !3, line: 30, baseType: !9, size: 32, align: 32)
 !21 = !{!0, !22, !24, !26, !28, !34, !37, !40}
@@ -336,14 +336,14 @@ attributes #1 = { nounwind readnone }
 !33 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, align: 64)
 !34 = !DIGlobalVariableExpression(var: !35)
 !35 = !DIGlobalVariable(name: "i", linkageName: "_ZN12_GLOBAL__N_11iE", scope: !36, file: !3, line: 37, type: !9, isLocal: true, isDefinition: true)
-!36 = !DINamespace(scope: null, file: !3, line: 36)
+!36 = !DINamespace(scope: null)
 !37 = !DIGlobalVariableExpression(var: !38)
 !38 = !DIGlobalVariable(name: "b", linkageName: "_ZN12_GLOBAL__N_15inner1bE", scope: !39, file: !3, line: 47, type: !9, isLocal: true, isDefinition: true)
-!39 = !DINamespace(name: "inner", scope: !36, file: !3, line: 46)
+!39 = !DINamespace(name: "inner", scope: !36)
 !40 = !DIGlobalVariableExpression(var: !41)
 !41 = !DIGlobalVariable(name: "c", linkageName: "_ZN5outer12_GLOBAL__N_11cE", scope: !42, file: !3, line: 53, type: !9, isLocal: true, isDefinition: true)
-!42 = !DINamespace(scope: !43, file: !3, line: 52)
-!43 = !DINamespace(name: "outer", scope: null, file: !3, line: 51)
+!42 = !DINamespace(scope: !43)
+!43 = !DINamespace(name: "outer", scope: null)
 !44 = !{!45, !47}
 !45 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !2, entity: !46, line: 34)
 !46 = !DIGlobalVariable(name: "global_namespace_variable_decl", linkageName: "_ZN2ns30global_namespace_variable_declE", scope: !18, file: !3, line: 28, type: !9, isLocal: false, isDefinition: false)
diff --git a/test/DebugInfo/X86/inline-namespace.ll b/test/DebugInfo/X86/inline-namespace.ll
index 3036e032008..4cfd8395062 100644
--- a/test/DebugInfo/X86/inline-namespace.ll
+++ b/test/DebugInfo/X86/inline-namespace.ll
@@ -28,9 +28,9 @@ target triple = "x86_64-apple-macosx10.12.0"
 
 !0 = distinct !DIGlobalVariableExpression(var: !1)
 !1 = !DIGlobalVariable(name: "i", linkageName: "_ZN6normal7inlined1iE", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
-!2 = !DINamespace(name: "inlined", scope: !4, file: !3, line: 1, exportSymbols: true)
+!2 = !DINamespace(name: "inlined", scope: !4, exportSymbols: true)
 !3 = !DIFile(filename: "namespace.cpp", directory: "/")
-!4 = !DINamespace(name: "normal", scope: null, file: !3, line: 1)
+!4 = !DINamespace(name: "normal", scope: null)
 !5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
 !6 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 4.0.0 (trunk 285825) (llvm/trunk 285822)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !7, globals: !8)
 !7 = !{}
diff --git a/test/DebugInfo/X86/lexical-block-file-inline.ll b/test/DebugInfo/X86/lexical-block-file-inline.ll
index d956efee1ee..0f85a5573f0 100644
--- a/test/DebugInfo/X86/lexical-block-file-inline.ll
+++ b/test/DebugInfo/X86/lexical-block-file-inline.ll
@@ -137,7 +137,7 @@ attributes #2 = { nounwind }
 !11 = !DIImportedEntity(tag: DW_TAG_imported_module, scope: !12, entity: !14, line: 1)
 !12 = !DILexicalBlockFile(scope: !13, file: !9, discriminator: 0)
 !13 = distinct !DILexicalBlock(scope: !4, file: !1, line: 3)
-!14 = !DINamespace(name: "N", scope: null, file: !1, line: 1)
+!14 = !DINamespace(name: "N", scope: null)
 !15 = !{i32 2, !"Dwarf Version", i32 4}
 !16 = !{i32 2, !"Debug Info Version", i32 3}
 !17 = !{!"clang version 3.9.0 (trunk 264349)"}
diff --git a/test/DebugInfo/X86/multiple-at-const-val.ll b/test/DebugInfo/X86/multiple-at-const-val.ll
index 497a345e07b..3122b9d1fef 100644
--- a/test/DebugInfo/X86/multiple-at-const-val.ll
+++ b/test/DebugInfo/X86/multiple-at-const-val.ll
@@ -34,7 +34,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 
 !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 174207)", isOptimized: true, emissionKind: FullDebug, file: !1802, enums: !1, retainedTypes: !955, globals: !1786, imports:  !955)
 !1 = !{!26}
-!4 = !DINamespace(name: "std", line: 48, scope: !5)
+!4 = !DINamespace(name: "std", scope: !5)
 !5 = !DIFile(filename: "os_base.h", directory: "/privite/tmp")
 !25 = !DIEnumerator(name: "_S_os_fmtflags_end", value: 65536) ; [ DW_TAG_enumerator ]
 !26 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "_Ios_Iostate", line: 146, size: 32, align: 32, file: !1801, scope: !4, elements: !27)
diff --git a/test/DebugInfo/X86/parameters.ll b/test/DebugInfo/X86/parameters.ll
index 38c40dfd6c4..a8494f9e963 100644
--- a/test/DebugInfo/X86/parameters.ll
+++ b/test/DebugInfo/X86/parameters.ll
@@ -90,7 +90,7 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !1 = !DIFile(filename: "pass.cpp", directory: "/tmp")
 !2 = !{}
 !4 = distinct !DISubprogram(name: "func", linkageName: "_ZN7pr147634funcENS_3fooE", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 6, file: !1, scope: !5, type: !6, variables: !2)
-!5 = !DINamespace(name: "pr14763", line: 1, file: !1, scope: null)
+!5 = !DINamespace(name: "pr14763", scope: null)
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !8}
 !8 = !DICompositeType(tag: DW_TAG_structure_type, name: "foo", line: 2, size: 8, align: 8, file: !1, scope: !5, elements: !9)
diff --git a/test/DebugInfo/X86/pr19307.ll b/test/DebugInfo/X86/pr19307.ll
index ab37ae57345..a8278c9dcf8 100644
--- a/test/DebugInfo/X86/pr19307.ll
+++ b/test/DebugInfo/X86/pr19307.ll
@@ -94,7 +94,7 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !7 = !DIFile(filename: "/usr/include/locale.h", directory: "/llvm_cmake_gcc")
 !8 = !DICompositeType(tag: DW_TAG_class_type, name: "basic_string<char, std::char_traits<char>, std::allocator<char> >", line: 1134, flags: DIFlagFwdDecl, file: !9, scope: !10, identifier: "_ZTSSs")
 !9 = !DIFile(filename: "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/bits/basic_string.tcc", directory: "/llvm_cmake_gcc")
-!10 = !DINamespace(name: "std", line: 153, file: !11, scope: null)
+!10 = !DINamespace(name: "std", scope: null)
 !11 = !DIFile(filename: "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/x86_64-linux-gnu/bits/c++config.h", directory: "/llvm_cmake_gcc")
 !13 = distinct !DISubprogram(name: "parse_range", linkageName: "_Z11parse_rangeRyS_Ss", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 4, file: !1, scope: !14, type: !15, variables: !2)
 !14 = !DIFile(filename: "pr19307.cc", directory: "/llvm_cmake_gcc")
@@ -106,9 +106,9 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !20 = !DIFile(filename: "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/bits/stringfwd.h", directory: "/llvm_cmake_gcc")
 !21 = !{!22, !26, !29, !33, !38, !41}
 !22 = !DIImportedEntity(tag: DW_TAG_imported_module, line: 57, scope: !23, entity: !25)
-!23 = !DINamespace(name: "__gnu_debug", line: 55, file: !24, scope: null)
+!23 = !DINamespace(name: "__gnu_debug", scope: null)
 !24 = !DIFile(filename: "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/debug/debug.h", directory: "/llvm_cmake_gcc")
-!25 = !DINamespace(name: "__debug", line: 49, file: !24, scope: !10)
+!25 = !DINamespace(name: "__debug", scope: !10)
 !26 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 66, scope: !10, entity: !27)
 !27 = !DIDerivedType(tag: DW_TAG_typedef, name: "mbstate_t", line: 106, file: !5, baseType: !28)
 !28 = !DIDerivedType(tag: DW_TAG_typedef, name: "__mbstate_t", line: 95, file: !5, baseType: !4)
@@ -117,7 +117,7 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !31 = !DIFile(filename: "/llvm_cmake_gcc/bin/../lib/clang/3.5.0/include/stddef.h", directory: "/llvm_cmake_gcc")
 !32 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
 !33 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 42, scope: !34, entity: !36)
-!34 = !DINamespace(name: "__gnu_cxx", line: 69, file: !35, scope: null)
+!34 = !DINamespace(name: "__gnu_cxx", scope: null)
 !35 = !DIFile(filename: "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/bits/cpp_type_traits.h", directory: "/llvm_cmake_gcc")
 !36 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", line: 155, file: !11, scope: !10, baseType: !37)
 !37 = !DIBasicType(tag: DW_TAG_base_type, name: "long unsigned int", size: 64, align: 64, encoding: DW_ATE_unsigned)
diff --git a/test/DebugInfo/X86/union-template.ll b/test/DebugInfo/X86/union-template.ll
index 3b08415df74..83bd7b8c06a 100644
--- a/test/DebugInfo/X86/union-template.ll
+++ b/test/DebugInfo/X86/union-template.ll
@@ -35,7 +35,7 @@ attributes #1 = { nounwind readnone }
 
 !0 = !DIGlobalVariableExpression(var: !1)
 !1 = !DIGlobalVariable(name: "f", linkageName: "_ZN7PR156371fE", scope: !2, file: !3, line: 6, type: !4, isLocal: false, isDefinition: true)
-!2 = !DINamespace(name: "PR15637", scope: null, file: !3, line: 1)
+!2 = !DINamespace(name: "PR15637", scope: null)
 !3 = !DIFile(filename: "foo.cc", directory: "/usr/local/google/home/echristo/tmp")
 !4 = !DICompositeType(tag: DW_TAG_union_type, name: "Value<float>", scope: !2, file: !3, line: 2, size: 32, align: 32, elements: !5, templateParams: !12)
 !5 = !{!6, !8}
diff --git a/test/DebugInfo/dwarfdump-dump-gdbindex.test b/test/DebugInfo/dwarfdump-dump-gdbindex.test
index 7bdf6ed0c5d..6f1ef7cda3d 100644
--- a/test/DebugInfo/dwarfdump-dump-gdbindex.test
+++ b/test/DebugInfo/dwarfdump-dump-gdbindex.test
@@ -18,8 +18,8 @@ RUN: llvm-dwarfdump -debug-dump=gdb_index %p/Inputs/dwarfdump-gdbindex-v7.elf-x8
 ; CHECK-NEXT:   1: Offset = 0x34, Length = 0x34
 
 ; CHECK:      Address area offset = 0x38, has 2 entries:
-; CHECK-NEXT:   Low address = 0x4000e8, High address = 0x4000f3, CU index = 0
-; CHECK-NEXT:   Low address = 0x4000f3, High address = 0x4000fe, CU index = 1
+; CHECK-NEXT:   Low/High address = [0x4000e8, 0x4000f3) (Size: 0xb), CU id = 0
+; CHECK-NEXT:   Low/High address = [0x4000f3, 0x4000fe) (Size: 0xb), CU id = 1
 
 ; CHECK:      Symbol table offset = 0x60, size = 1024, filled slots:
 ; CHECK-NEXT:   489: Name offset = 0x1d, CU vector offset = 0x0
diff --git a/test/Feature/optnone-opt.ll b/test/Feature/optnone-opt.ll
index a00013ec179..efd35e56603 100644
--- a/test/Feature/optnone-opt.ll
+++ b/test/Feature/optnone-opt.ll
@@ -41,7 +41,6 @@ attributes #0 = { optnone noinline }
 ; OPT-O1-DAG: Skipping pass 'Combine redundant instructions'
 ; OPT-O1-DAG: Skipping pass 'Dead Store Elimination'
 ; OPT-O1-DAG: Skipping pass 'Early CSE'
-; OPT-O1-DAG: Skipping pass 'Early GVN Hoisting of Expressions'
 ; OPT-O1-DAG: Skipping pass 'Jump Threading'
 ; OPT-O1-DAG: Skipping pass 'MemCpy Optimization'
 ; OPT-O1-DAG: Skipping pass 'Reassociate expressions'
@@ -59,7 +58,6 @@ attributes #0 = { optnone noinline }
 ; OPT-MORE-DAG: Skipping pass 'Basic-Block Vectorization'
 ; OPT-MORE-DAG: Skipping pass 'Dead Code Elimination'
 ; OPT-MORE-DAG: Skipping pass 'Dead Instruction Elimination'
-; OPT-MORE-DAG: Skipping pass 'Early GVN Hoisting of Expressions'
 ; OPT-MORE-DAG: Skipping pass 'Lower atomic intrinsics
 
 ; Loop IR passes that opt doesn't turn on by default.
diff --git a/test/Instrumentation/AddressSanitizer/global_metadata.ll b/test/Instrumentation/AddressSanitizer/global_metadata.ll
index c189603381a..7d485a2ebce 100644
--- a/test/Instrumentation/AddressSanitizer/global_metadata.ll
+++ b/test/Instrumentation/AddressSanitizer/global_metadata.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -asan-globals-live-support=1 -S | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -12,17 +12,23 @@ target triple = "x86_64-unknown-linux-gnu"
 @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_asan_globals.cpp, i8* null }]
 
 ; Check that globals were instrumented:
-; CHECK: @global = global { i32, [60 x i8] } zeroinitializer, align 32
-; CHECK: @.str = internal unnamed_addr constant { [14 x i8], [50 x i8] } { [14 x i8] c"Hello, world!\00", [50 x i8] zeroinitializer }, align 32
+
+; CHECK: @global = global { i32, [60 x i8] } zeroinitializer, comdat, align 32
+; CHECK: @.str = internal unnamed_addr constant { [14 x i8], [50 x i8] } { [14 x i8] c"Hello, world!\00", [50 x i8] zeroinitializer }, comdat($".str${{[01-9a-f]+}}"), align 32
 
 ; Check emitted location descriptions:
 ; CHECK: [[VARNAME:@__asan_gen_.[0-9]+]] = private unnamed_addr constant [7 x i8] c"global\00", align 1
 ; CHECK: [[FILENAME:@__asan_gen_.[0-9]+]] = private unnamed_addr constant [22 x i8] c"/tmp/asan-globals.cpp\00", align 1
 ; CHECK: [[LOCDESCR:@__asan_gen_.[0-9]+]] = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* [[FILENAME]], i32 5, i32 5 }
+; CHECK: @__asan_global_global = {{.*}}i64 ptrtoint ({ i32, [60 x i8] }* @global to i64){{.*}} section "asan_globals"{{.*}}, !associated
+; CHECK: @__asan_global_.str = {{.*}}i64 ptrtoint ({ [14 x i8], [50 x i8] }* @.str to i64){{.*}} section "asan_globals"{{.*}}, !associated
+
+; The metadata has to be inserted to llvm.compiler.used to avoid being stripped
+; during LTO.
+; CHECK: @llvm.compiler.used {{.*}} @__asan_global_global {{.*}} section "llvm.metadata"
 
 ; Check that location descriptors and global names were passed into __asan_register_globals:
-; CHECK: i64 ptrtoint ([7 x i8]* [[VARNAME]] to i64)
-; CHECK: i64 ptrtoint ({ [22 x i8]*, i32, i32 }* [[LOCDESCR]] to i64)
+; CHECK: call void @__asan_register_elf_globals(i64 ptrtoint (i64* @___asan_globals_registered to i64), i64 ptrtoint (i64* @__start_asan_globals to i64), i64 ptrtoint (i64* @__stop_asan_globals to i64))
 
 ; Function Attrs: nounwind sanitize_address
 define internal void @__cxx_global_var_init() #0 section ".text.startup" {
diff --git a/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll b/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll
index a8fe6a9f625..1723b336362 100644
--- a/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll
+++ b/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll
@@ -2,7 +2,7 @@
 ; allowing dead stripping to be performed, and that the appropriate runtime
 ; routines are invoked.
 
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -asan-globals-live-support=1 -S | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.11.0"
@@ -26,16 +26,16 @@ target triple = "x86_64-apple-macosx10.11.0"
 ; CHECK: @llvm.compiler.used {{.*}} @__asan_binder_global {{.*}} section "llvm.metadata"
 
 ; Test that there is the flag global variable:
-; CHECK: @__asan_globals_registered = common hidden global i64 0
+; CHECK: @___asan_globals_registered = common hidden global i64 0
 
 ; Test that __asan_register_image_globals is invoked from the constructor:
 ; CHECK-LABEL: define internal void @asan.module_ctor
 ; CHECK-NOT: ret
-; CHECK: call void @__asan_register_image_globals(i64 ptrtoint (i64* @__asan_globals_registered to i64))
+; CHECK: call void @__asan_register_image_globals(i64 ptrtoint (i64* @___asan_globals_registered to i64))
 ; CHECK: ret
 
 ; Test that __asan_unregister_image_globals is invoked from the destructor:
 ; CHECK-LABEL: define internal void @asan.module_dtor
 ; CHECK-NOT: ret
-; CHECK: call void @__asan_unregister_image_globals(i64 ptrtoint (i64* @__asan_globals_registered to i64))
+; CHECK: call void @__asan_unregister_image_globals(i64 ptrtoint (i64* @___asan_globals_registered to i64))
 ; CHECK: ret
diff --git a/test/Instrumentation/AddressSanitizer/global_metadata_windows.ll b/test/Instrumentation/AddressSanitizer/global_metadata_windows.ll
index 27cbd61ef81..86390966c62 100644
--- a/test/Instrumentation/AddressSanitizer/global_metadata_windows.ll
+++ b/test/Instrumentation/AddressSanitizer/global_metadata_windows.ll
@@ -4,7 +4,7 @@
 
 ; FIXME: Later we can use this to instrument linkonce odr string literals.
 
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -asan-globals-live-support=1 -S | FileCheck %s
 
 target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-windows-msvc19.0.24215"
diff --git a/test/Instrumentation/AddressSanitizer/instrument_global.ll b/test/Instrumentation/AddressSanitizer/instrument_global.ll
index 7df3d22dcde..07a09ff95d0 100644
--- a/test/Instrumentation/AddressSanitizer/instrument_global.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument_global.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -asan-globals-live-support=1 -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 @xxx = global i32 0, align 4
@@ -6,8 +6,8 @@ target triple = "x86_64-unknown-linux-gnu"
 ; If a global is present, __asan_[un]register_globals should be called from
 ; module ctor/dtor
 
-; CHECK: llvm.global_ctors
 ; CHECK: @__asan_gen_ = private constant [8 x i8] c"<stdin>\00", align 1
+; CHECK: llvm.global_ctors
 ; CHECK: llvm.global_dtors
 
 ; Test that we don't instrument global arrays with static initializer
@@ -73,10 +73,10 @@ entry:
 
 ; CHECK-LABEL: define internal void @asan.module_ctor
 ; CHECK-NOT: ret
-; CHECK: call void @__asan_register_globals
+; CHECK: call void @__asan_register_elf_globals
 ; CHECK: ret
 
 ; CHECK-LABEL: define internal void @asan.module_dtor
 ; CHECK-NOT: ret
-; CHECK: call void @__asan_unregister_globals
+; CHECK: call void @__asan_unregister_elf_globals
 ; CHECK: ret
diff --git a/test/Instrumentation/AddressSanitizer/no-globals.ll b/test/Instrumentation/AddressSanitizer/no-globals.ll
new file mode 100644
index 00000000000..30388b1865e
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/no-globals.ll
@@ -0,0 +1,12 @@
+; A module with no asan-instrumented globals has no asan destructor, and has an asan constructor in a comdat.
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu < %s -asan -asan-module -asan-with-comdat=1 -asan-globals-live-support=1 -S | FileCheck %s
+
+define void @f() {
+  ret void
+}
+
+; CHECK-NOT: @llvm.global_dtors
+; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* @asan.module_ctor, i8* bitcast (void ()* @asan.module_ctor to i8*) }]
+; CHECK-NOT: @llvm.global_dtors
+; CHECK: define internal void @asan.module_ctor() comdat
+; CHECK-NOT: @llvm.global_dtors
diff --git a/test/Linker/2011-08-18-unique-class-type.ll b/test/Linker/2011-08-18-unique-class-type.ll
index 752581eecc3..1e9b40ff7ab 100644
--- a/test/Linker/2011-08-18-unique-class-type.ll
+++ b/test/Linker/2011-08-18-unique-class-type.ll
@@ -29,7 +29,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !8 = !{null}
 !9 = !DILocalVariable(name: "mya", line: 4, arg: 1, scope: !5, file: !6, type: !10)
 !10 = !DICompositeType(tag: DW_TAG_class_type, name: "A", line: 3, size: 8, align: 8, file: !17, scope: !11, elements: !2)
-!11 = !DINamespace(name: "N1", line: 2, file: !17, scope: null)
+!11 = !DINamespace(name: "N1", scope: null)
 !12 = !DIFile(filename: "./n.h", directory: "/private/tmp")
 !13 = !DILocation(line: 4, column: 12, scope: !5)
 !14 = !DILocation(line: 4, column: 18, scope: !15)
diff --git a/test/Linker/2011-08-18-unique-class-type2.ll b/test/Linker/2011-08-18-unique-class-type2.ll
index a933cc3fd7d..439ddae05ac 100644
--- a/test/Linker/2011-08-18-unique-class-type2.ll
+++ b/test/Linker/2011-08-18-unique-class-type2.ll
@@ -27,7 +27,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !8 = !{null}
 !9 = !DILocalVariable(name: "youra", line: 4, arg: 1, scope: !5, file: !6, type: !10)
 !10 = !DICompositeType(tag: DW_TAG_class_type, name: "A", line: 3, size: 8, align: 8, file: !17, scope: !11, elements: !2)
-!11 = !DINamespace(name: "N1", line: 2, file: !17, scope: null)
+!11 = !DINamespace(name: "N1", scope: null)
 !12 = !DIFile(filename: "./n.h", directory: "/private/tmp")
 !13 = !DILocation(line: 4, column: 12, scope: !5)
 !14 = !DILocation(line: 4, column: 20, scope: !15)
diff --git a/test/Linker/pr26037.ll b/test/Linker/pr26037.ll
index fafc60825ec..0e6da17e9fb 100644
--- a/test/Linker/pr26037.ll
+++ b/test/Linker/pr26037.ll
@@ -39,7 +39,7 @@ entry:
 !1 = !DIFile(filename: "a2.cc", directory: "")
 !2 = !{}
 !4 = distinct !DISubprogram(name: "a", linkageName: "_ZN1A1aEv", scope: !5, file: !1, line: 7, type: !6, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
-!5 = !DINamespace(name: "A", scope: null, file: !1, line: 1)
+!5 = !DINamespace(name: "A", scope: null)
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
 !8 = distinct !DISubprogram(name: "b", linkageName: "_ZN1A1bEv", scope: !5, file: !1, line: 8, type: !6, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
diff --git a/test/MC/AArch64/adrp-relocation.s b/test/MC/AArch64/adrp-relocation.s
index 3bc6039d5f1..9a809cf864d 100644
--- a/test/MC/AArch64/adrp-relocation.s
+++ b/test/MC/AArch64/adrp-relocation.s
@@ -1,4 +1,6 @@
 // RUN: llvm-mc -triple=aarch64-linux-gnu -filetype=obj -o - %s| llvm-readobj -r - | FileCheck %s
+// RUN: llvm-mc -target-abi=ilp32 -triple=aarch64-linux-gnu -filetype=obj \
+// RUN: -o - %s| llvm-readobj -r - | FileCheck -check-prefix=CHECK-ILP32 %s
         .text
 // These should produce an ADRP/ADD pair to calculate the address of
 // testfn. The important point is that LLVM shouldn't think it can deal with the
@@ -16,3 +18,7 @@ sym:
 // CHECK: R_AARCH64_ADR_GOT_PAGE sym
 // CHECK: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 sym
 // CHECK: R_AARCH64_TLSDESC_ADR_PAGE21 sym
+// CHECK-ILP32: R_AARCH64_P32_ADR_PREL_PG_HI21 sym
+// CHECK-ILP32: R_AARCH64_P32_ADR_GOT_PAGE sym
+// CHECK-ILP32: R_AARCH64_P32_TLSIE_ADR_GOTTPREL_PAGE21 sym
+// CHECK-ILP32: R_AARCH64_P32_TLSDESC_ADR_PAGE21 sym
diff --git a/test/MC/AArch64/arm32-elf-relocs.s b/test/MC/AArch64/arm32-elf-relocs.s
index 28327164de5..6473e2c788a 100644
--- a/test/MC/AArch64/arm32-elf-relocs.s
+++ b/test/MC/AArch64/arm32-elf-relocs.s
@@ -1,4 +1,7 @@
-// RUN: llvm-mc -triple=arm64-linux-gnu -o - < %s | FileCheck %s
+// RUN: llvm-mc -target-abi=ilp32 -triple=arm64-linux-gnu -o - < %s | \
+// RUN:   FileCheck %s
+// RUN: llvm-mc -target-abi=ilp32 -triple=arm64-linux-gnu -show-encoding \
+// RUN:    -o - < %s | FileCheck --check-prefix=CHECK-ENCODING %s
 // RUN: llvm-mc -target-abi=ilp32 -triple=arm64-linux-gnu -filetype=obj < %s | \
 // RUN:   llvm-objdump -triple=arm64-linux-gnu - -r | \
 // RUN:   FileCheck %s --check-prefix=CHECK-OBJ-ILP32
@@ -25,7 +28,7 @@
 
    add x5, x0, #:tlsdesc_lo12:sym
 // CHECK: add x5, x0, :tlsdesc_lo12:sym
-// CHECK-OBJ-ILP32: 14 R_AARCH64_P32_TLSDESC_ADD_LO12_NC sym
+// CHECK-OBJ-ILP32: 14 R_AARCH64_P32_TLSDESC_ADD_LO12 sym
 
         add x0, x2, #:lo12:sym+8
 // CHECK: add x0, x2, :lo12:sym
@@ -49,33 +52,33 @@
 
    add x5, x0, #:tlsdesc_lo12:sym+70
 // CHECK: add x5, x0, :tlsdesc_lo12:sym+70
-// CHECK-OBJ-ILP32: 2c R_AARCH64_P32_TLSDESC_ADD_LO12_NC sym+70
+// CHECK-OBJ-ILP32: 2c R_AARCH64_P32_TLSDESC_ADD_LO12 sym+70
 
         .hword sym + 4 - .
 // CHECK-OBJ-ILP32: 30 R_AARCH64_P32_PREL16 sym+4
         .word sym - . + 8
-// CHECK-OBJ-ILP32 32 R_AARCH64_P32_PREL32 sym+8
+// CHECK-OBJ-ILP32: 32 R_AARCH64_P32_PREL32 sym+8
 
         .hword sym
-// CHECK-OBJ-ILP32 3e R_AARCH64_P32_ABS16 sym
+// CHECK-OBJ-ILP32: 36 R_AARCH64_P32_ABS16 sym
         .word sym+1
-// CHECK-OBJ-ILP32 40 R_AARCH64_P32_ABS32 sym+1
+// CHECK-OBJ-ILP32: 38 R_AARCH64_P32_ABS32 sym+1
 
    adrp x0, sym
 // CHECK: adrp x0, sym
-// CHECK-OBJ-ILP32 4c R_AARCH64_P32_ADR_PREL_PG_HI21 sym
+// CHECK-OBJ-ILP32: 3c R_AARCH64_P32_ADR_PREL_PG_HI21 sym
 
    adrp x15, :got:sym
 // CHECK: adrp x15, :got:sym
-// CHECK-OBJ-ILP32 50 R_AARCH64_P32_ADR_GOT_PAGE sym
+// CHECK-OBJ-ILP32: 40 R_AARCH64_P32_ADR_GOT_PAGE sym
 
    adrp x29, :gottprel:sym
 // CHECK: adrp x29, :gottprel:sym
-// CHECK-OBJ-ILP32 54 R_AARCH64_P32_TLSIE_ADR_GOTTPREL_PAGE21 sym
+// CHECK-OBJ-ILP32: 44 R_AARCH64_P32_TLSIE_ADR_GOTTPREL_PAGE21 sym
 
    adrp x2, :tlsdesc:sym
 // CHECK: adrp x2, :tlsdesc:sym
-// CHECK-OBJ-ILP32 58 R_AARCH64_P32_TLSDESC_ADR_PAGE21 sym
+// CHECK-OBJ-ILP32: 48 R_AARCH64_P32_TLSDESC_ADR_PAGE21 sym
 
    // LLVM is not competent enough to do this relocation because the
    // page boundary could occur anywhere after linking. A relocation
@@ -84,7 +87,7 @@
    .global trickQuestion
 trickQuestion:
 // CHECK: adrp x3, trickQuestion
-// CHECK-OBJ-ILP32 5c R_AARCH64_P32_ADR_PREL_PG_HI21 trickQuestion
+// CHECK-OBJ-ILP32: 4c R_AARCH64_P32_ADR_PREL_PG_HI21 trickQuestion
 
    ldrb w2, [x3, :lo12:sym]
    ldrsb w5, [x7, #:lo12:sym]
@@ -94,10 +97,10 @@ trickQuestion:
 // CHECK: ldrsb w5, [x7, :lo12:sym]
 // CHECK: ldrsb x11, [x13, :lo12:sym]
 // CHECK: ldr b17, [x19, :lo12:sym]
-// CHECK-OBJ-ILP32 R_AARCH64_P32_LDST8_ABS_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_LDST8_ABS_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_LDST8_ABS_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_LDST8_ABS_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_LDST8_ABS_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_LDST8_ABS_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_LDST8_ABS_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_LDST8_ABS_LO12_NC sym
 
    ldrb w23, [x29, #:dtprel_lo12_nc:sym]
    ldrsb w23, [x19, #:dtprel_lo12:sym]
@@ -107,10 +110,10 @@ trickQuestion:
 // CHECK: ldrsb w23, [x19, :dtprel_lo12:sym]
 // CHECK: ldrsb x17, [x13, :dtprel_lo12_nc:sym]
 // CHECK: ldr b11, [x7, :dtprel_lo12:sym]
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLD_LDST8_DTPREL_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLD_LDST8_DTPREL_LO12 sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLD_LDST8_DTPREL_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLD_LDST8_DTPREL_LO12 sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLD_LDST8_DTPREL_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLD_LDST8_DTPREL_LO12 sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLD_LDST8_DTPREL_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLD_LDST8_DTPREL_LO12 sym
 
    ldrb w1, [x2, :tprel_lo12:sym]
    ldrsb w3, [x4, #:tprel_lo12_nc:sym]
@@ -120,10 +123,10 @@ trickQuestion:
 // CHECK: ldrsb w3, [x4, :tprel_lo12_nc:sym]
 // CHECK: ldrsb x5, [x6, :tprel_lo12:sym]
 // CHECK: ldr b7, [x8, :tprel_lo12_nc:sym]
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLE_LDST8_TPREL_LO12 sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLE_LDST8_TPREL_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLE_LDST8_TPREL_LO12 sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLE_LDST8_TPREL_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLE_LDST8_TPREL_LO12 sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLE_LDST8_TPREL_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLE_LDST8_TPREL_LO12 sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLE_LDST8_TPREL_LO12_NC sym
 
    ldrh w2, [x3, #:lo12:sym]
    ldrsh w5, [x7, :lo12:sym]
@@ -133,10 +136,10 @@ trickQuestion:
 // CHECK: ldrsh w5, [x7, :lo12:sym]
 // CHECK: ldrsh x11, [x13, :lo12:sym]
 // CHECK: ldr h17, [x19, :lo12:sym]
-// CHECK-OBJ-ILP32 R_AARCH64_P32_LDST16_ABS_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_LDST16_ABS_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_LDST16_ABS_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_LDST16_ABS_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_LDST16_ABS_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_LDST16_ABS_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_LDST16_ABS_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_LDST16_ABS_LO12_NC sym
 
    ldrh w23, [x29, #:dtprel_lo12_nc:sym]
    ldrsh w23, [x19, :dtprel_lo12:sym]
@@ -146,10 +149,10 @@ trickQuestion:
 // CHECK: ldrsh w23, [x19, :dtprel_lo12:sym]
 // CHECK: ldrsh x17, [x13, :dtprel_lo12_nc:sym]
 // CHECK: ldr h11, [x7, :dtprel_lo12:sym]
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLD_LDST16_DTPREL_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLD_LDST16_DTPREL_LO12 sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLD_LDST16_DTPREL_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLD_LDST16_DTPREL_LO12 sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLD_LDST16_DTPREL_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLD_LDST16_DTPREL_LO12 sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLD_LDST16_DTPREL_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLD_LDST16_DTPREL_LO12 sym
 
    ldrh w1, [x2, :tprel_lo12:sym]
    ldrsh w3, [x4, #:tprel_lo12_nc:sym]
@@ -159,10 +162,10 @@ trickQuestion:
 // CHECK: ldrsh w3, [x4, :tprel_lo12_nc:sym]
 // CHECK: ldrsh x5, [x6, :tprel_lo12:sym]
 // CHECK: ldr h7, [x8, :tprel_lo12_nc:sym]
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLE_LDST16_TPREL_LO12 sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLE_LDST16_TPREL_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLE_LDST16_TPREL_LO12 sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLE_LDST16_TPREL_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLE_LDST16_TPREL_LO12 sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLE_LDST16_TPREL_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLE_LDST16_TPREL_LO12 sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLE_LDST16_TPREL_LO12_NC sym
 
    ldr w1, [x2, #:lo12:sym]
    ldrsw x3, [x4, #:lo12:sym]
@@ -170,9 +173,9 @@ trickQuestion:
 // CHECK: ldr w1, [x2, :lo12:sym]
 // CHECK: ldrsw x3, [x4, :lo12:sym]
 // CHECK: ldr s4, [x5, :lo12:sym]
-// CHECK-OBJ-ILP32 R_AARCH64_P32_LDST32_ABS_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_LDST32_ABS_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_LDST32_ABS_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_LDST32_ABS_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_LDST32_ABS_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_LDST32_ABS_LO12_NC sym
 
    ldr w1, [x2, :dtprel_lo12:sym]
    ldrsw x3, [x4, #:dtprel_lo12_nc:sym]
@@ -180,9 +183,9 @@ trickQuestion:
 // CHECK: ldr w1, [x2, :dtprel_lo12:sym]
 // CHECK: ldrsw x3, [x4, :dtprel_lo12_nc:sym]
 // CHECK: ldr s4, [x5, :dtprel_lo12_nc:sym]
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12 sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12 sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12_NC sym
 
 
    ldr w1, [x2, #:tprel_lo12:sym]
@@ -191,53 +194,69 @@ trickQuestion:
 // CHECK: ldr w1, [x2, :tprel_lo12:sym]
 // CHECK: ldrsw x3, [x4, :tprel_lo12_nc:sym]
 // CHECK: ldr s4, [x5, :tprel_lo12_nc:sym]
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12 sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12 sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12_NC sym
 
    ldr x28, [x27, :lo12:sym]
-   ldr d26, [x25, #:lo12:sym]
+   ldr d26, [x25, :lo12:sym]
 // CHECK: ldr x28, [x27, :lo12:sym]
 // CHECK: ldr d26, [x25, :lo12:sym]
-// CHECK-OBJ-ILP32 R_AARCH64_P32_LDST64_ABS_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_LDST64_ABS_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_LDST64_ABS_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_LDST64_ABS_LO12_NC sym
 
-   ldr x24, [x23, #:got_lo12:sym]
-   ldr d22, [x21, :got_lo12:sym]
-// CHECK: ldr x24, [x23, :got_lo12:sym]
-// CHECK: ldr d22, [x21, :got_lo12:sym]
-// CHECK-OBJ-ILP32 R_AARCH64_LD32_GOT_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_LD32_GOT_LO12_NC sym
+   ldr w24, [x23, :got_lo12:sym]
+   ldr s22, [x21, :got_lo12:sym]
+// CHECK: ldr w24, [x23, :got_lo12:sym]
+// CHECK: ldr s22, [x21, :got_lo12:sym]
+// CHECK-OBJ-ILP32: R_AARCH64_P32_LD32_GOT_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_LD32_GOT_LO12_NC sym
 
    ldr x24, [x23, :dtprel_lo12_nc:sym]
-   ldr d22, [x21, #:dtprel_lo12:sym]
+   ldr d22, [x21, :dtprel_lo12:sym]
 // CHECK: ldr x24, [x23, :dtprel_lo12_nc:sym]
 // CHECK: ldr d22, [x21, :dtprel_lo12:sym]
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLD_LDST64_DTPREL_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLD_LDST64_DTPREL_LO12 sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLD_LDST64_DTPREL_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLD_LDST64_DTPREL_LO12 sym
 
-   ldr x24, [x23, #:tprel_lo12:sym]
+   ldr q24, [x23, :dtprel_lo12_nc:sym]
+   ldr q22, [x21, :dtprel_lo12:sym]
+// CHECK: ldr q24, [x23, :dtprel_lo12_nc:sym]
+// CHECK: ldr q22, [x21, :dtprel_lo12:sym]
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLD_LDST128_DTPREL_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLD_LDST128_DTPREL_LO12 sym
+
+   ldr x24, [x23, :tprel_lo12:sym]
    ldr d22, [x21, :tprel_lo12_nc:sym]
 // CHECK: ldr x24, [x23, :tprel_lo12:sym]
 // CHECK: ldr d22, [x21, :tprel_lo12_nc:sym]
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLE_LDST64_TPREL_LO12 sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSLE_LDST64_TPREL_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLE_LDST64_TPREL_LO12 sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLE_LDST64_TPREL_LO12_NC sym
 
-#   ldr x24, [x23, :gottprel_lo12:sym]
-#   ldr d22, [x21, #:gottprel_lo12:sym]
+   ldr q24, [x23, :tprel_lo12:sym]
+   ldr q22, [x21, :tprel_lo12_nc:sym]
+// CHECK: ldr q24, [x23, :tprel_lo12:sym]
+// CHECK: ldr q22, [x21, :tprel_lo12_nc:sym]
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLE_LDST128_TPREL_LO12 sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSLE_LDST128_TPREL_LO12_NC sym
 
-   ldr x24, [x23, #:tlsdesc_lo12:sym]
-   ldr d22, [x21, :tlsdesc_lo12:sym]
-// CHECK: ldr x24, [x23, :tlsdesc_lo12:sym]
-// CHECK: ldr d22, [x21, :tlsdesc_lo12:sym]
-// Why is there a "_NC" at the end? "ELF for the ARM 64-bit architecture
-// (AArch64) beta" doesn't have that.
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSDESC_LD32_LO12_NC sym
-// CHECK-OBJ-ILP32 R_AARCH64_P32_TLSDESC_LD64_LO12_NC sym
+   ldr w24, [x23, :gottprel_lo12:sym]
+   ldr s22, [x21, :gottprel_lo12:sym]
+
+   ldr w24, [x23, :tlsdesc_lo12:sym]
+   ldr s22, [x21, :tlsdesc_lo12:sym]
+// CHECK: ldr w24, [x23, :tlsdesc_lo12:sym]
+// CHECK: ldr s22, [x21, :tlsdesc_lo12:sym]
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSDESC_LD32_LO12 sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_TLSDESC_LD32_LO12 sym
 
    ldr q20, [x19, #:lo12:sym]
 // CHECK: ldr q20, [x19, :lo12:sym]
-// CHECK-OBJ-ILP32 R_AARCH64_P32_LDST128_ABS_LO12_NC sym
+// CHECK-OBJ-ILP32: R_AARCH64_P32_LDST128_ABS_LO12_NC sym
+// check encoding here, since encoding test doesn't belong with TLS encoding
+// tests, as it isn't a TLS relocation.
+// CHECK-ENCODING: ldr q20, [x19, :lo12:sym] // encoding: [0x74,0bAAAAAA10,0b11AAAAAA,0x3d]
+// CHECK-ENCODING-NEXT:  0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale16
 
 // Since relocated instructions print without a '#', that syntax should
 // certainly be accepted when assembling.
diff --git a/test/MC/AArch64/arm32-large-relocs.s b/test/MC/AArch64/arm32-large-relocs.s
new file mode 100644
index 00000000000..1ac86c0871a
--- /dev/null
+++ b/test/MC/AArch64/arm32-large-relocs.s
@@ -0,0 +1,31 @@
+// RUN: llvm-mc -target-abi=ilp32 -triple=arm64-linux-gnu -show-encoding -o - \
+// RUN:   %s \
+// RUN:   | FileCheck %s
+// RUN: llvm-mc -target-abi=ilp32 -triple=arm64-linux-gnu -show-encoding \
+// RUN:   -filetype=obj -o - %s \
+// RUN:   | llvm-objdump -r - \
+// RUN:   | FileCheck --check-prefix=CHECK-OBJ %s
+
+        movz x2, #:abs_g0:sym
+        movk w3, #:abs_g0_nc:sym
+        movz x13, #:abs_g0_s:sym
+        movn x17, #:abs_g0_s:sym
+// CHECK:   movz x2, #:abs_g0:sym // encoding: [0bAAA00010,A,0b100AAAAA,0xd2]
+// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_aarch64_movw
+// CHECK:   movk w3, #:abs_g0_nc:sym // encoding: [0bAAA00011,A,0b100AAAAA,0x72]
+// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_aarch64_movw
+// CHECK:   movz x13, #:abs_g0_s:sym // encoding: [0bAAA01101,A,0b100AAAAA,0xd2]
+// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_aarch64_movw
+// CHECK:   movn x17, #:abs_g0_s:sym // encoding: [0bAAA10001,A,0b100AAAAA,0x92]
+// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_aarch64_movw
+
+// CHECK-OBJ: 0 R_AARCH64_P32_MOVW_UABS_G0 sym
+// CHECK-OBJ: 4 R_AARCH64_P32_MOVW_UABS_G0_NC sym
+// CHECK-OBJ: 8 R_AARCH64_P32_MOVW_SABS_G0 sym
+// CHECK-OBJ: c R_AARCH64_P32_MOVW_SABS_G0 sym
+
+        movz x4, #:abs_g1:sym
+// CHECK:   movz x4, #:abs_g1:sym    // encoding: [0bAAA00100,A,0b101AAAAA,0xd2]
+// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_aarch64_movw
+
+// CHECK-OBJ: 10 R_AARCH64_P32_MOVW_UABS_G1 sym
diff --git a/test/MC/AArch64/arm32-tls-relocs.s b/test/MC/AArch64/arm32-tls-relocs.s
new file mode 100644
index 00000000000..390da05529f
--- /dev/null
+++ b/test/MC/AArch64/arm32-tls-relocs.s
@@ -0,0 +1,290 @@
+// RUN: llvm-mc -target-abi=ilp32 -triple=arm64-none-linux-gnu \
+// RUN:   -show-encoding < %s | FileCheck --check-prefix=CHECK-ILP32 %s
+// RUN: llvm-mc -target-abi=ilp32 -triple=arm64-none-linux-gnu \
+// RUN:   -filetype=obj < %s -o - | \
+// RUN:   llvm-readobj -r -t | FileCheck --check-prefix=CHECK-ELF-ILP32 %s
+
+////////////////////////////////////////////////////////////////////////////////
+// TLS initial-exec forms
+////////////////////////////////////////////////////////////////////////////////
+
+        adrp x11, :gottprel:var
+        ldr w10, [x0, #:gottprel_lo12:var]
+        ldr w9, :gottprel:var
+// CHECK-ILP32: adrp x11, :gottprel:var      // encoding: [0x0b'A',A,A,0x90'A']
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_aarch64_pcrel_adrp_imm21
+// CHECK-ILP32: ldr  w10, [x0, :gottprel_lo12:var] // encoding: [0x0a,0bAAAAAA00,0b01AAAAAA,0xb9]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4
+// CHECK-ILP32: ldr     w9, :gottprel:var       // encoding: [0bAAA01001,A,A,0x18]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_aarch64_ldr_pcrel_imm19
+
+// CHECK-ELF-ILP32:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSIE_ADR_GOTTPREL_PAGE21 [[VARSYM:[^ ]+]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSIE_LD_GOTTPREL_PREL19 [[VARSYM]]
+
+
+////////////////////////////////////////////////////////////////////////////////
+// TLS local-exec forms
+////////////////////////////////////////////////////////////////////////////////
+
+        movz x5, #:tprel_g1:var
+        movn x6, #:tprel_g1:var
+        movz w7, #:tprel_g1:var
+// CHECK-ILP32: movz    x5, #:tprel_g1:var      // encoding: [0bAAA00101,A,0b101AAAAA,0x92]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
+// CHECK-ILP32: movn    x6, #:tprel_g1:var      // encoding: [0bAAA00110,A,0b101AAAAA,0x92]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
+// CHECK-ILP32: movz    w7, #:tprel_g1:var      // encoding: [0bAAA00111,A,0b101AAAAA,0x12]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
+
+// CHECK-ELF-ILP32: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
+// CHECK-ELF-ILP32: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
+// CHECK-ELF-ILP32: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
+
+
+        movz x11, #:tprel_g0:var
+        movn x12, #:tprel_g0:var
+        movz w13, #:tprel_g0:var
+// CHECK-ILP32: movz    x11, #:tprel_g0:var     // encoding: [0bAAA01011,A,0b100AAAAA,0x92]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
+// CHECK-ILP32: movn    x12, #:tprel_g0:var     // encoding: [0bAAA01100,A,0b100AAAAA,0x92]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
+// CHECK-ILP32: movz    w13, #:tprel_g0:var     // encoding: [0bAAA01101,A,0b100AAAAA,0x12]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
+
+
+        movk w15, #:tprel_g0_nc:var
+        movk w16, #:tprel_g0_nc:var
+// CHECK-ILP32: movk    w15, #:tprel_g0_nc:var  // encoding: [0bAAA01111,A,0b100AAAAA,0x72]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_aarch64_movw
+// CHECK-ILP32: movk    w16, #:tprel_g0_nc:var  // encoding: [0bAAA10000,A,0b100AAAAA,0x72]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_aarch64_movw
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]]
+
+
+        add x21, x22, #:tprel_lo12:var
+// CHECK-ILP32: add     x21, x22, :tprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_add_imm12
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_ADD_TPREL_LO12 [[VARSYM]]
+
+
+        add x25, x26, #:tprel_lo12_nc:var
+// CHECK-ILP32: add     x25, x26, :tprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_add_imm12
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_ADD_TPREL_LO12_NC [[VARSYM]]
+
+
+        ldrb w29, [x30, #:tprel_lo12:var]
+        ldrsb x29, [x28, #:tprel_lo12_nc:var]
+// CHECK-ILP32: ldrb    w29, [x30, :tprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale1
+// CHECK-ILP32: ldrsb   x29, [x28, :tprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale1
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST8_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST8_TPREL_LO12_NC [[VARSYM]]
+
+
+        strh w27, [x26, #:tprel_lo12:var]
+        ldrsh x25, [x24, #:tprel_lo12_nc:var]
+// CHECK-ILP32: strh    w27, [x26, :tprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale2
+// CHECK-ILP32: ldrsh   x25, [x24, :tprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale2
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST16_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST16_TPREL_LO12_NC [[VARSYM]]
+
+
+        ldr w23, [x22, #:tprel_lo12:var]
+        ldrsw x21, [x20, #:tprel_lo12_nc:var]
+// CHECK-ILP32: ldr     w23, [x22, :tprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4
+// CHECK-ILP32: ldrsw   x21, [x20, :tprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale4
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12_NC [[VARSYM]]
+
+        ldr x19, [x18, #:tprel_lo12:var]
+        str x17, [x16, #:tprel_lo12_nc:var]
+// CHECK-ILP32: ldr     x19, [x18, :tprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
+// CHECK-ILP32: str     x17, [x16, :tprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale8
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST64_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST64_TPREL_LO12_NC [[VARSYM]]
+
+
+   ldr q24, [x23, :tprel_lo12:var]
+   str q22, [x21, :tprel_lo12_nc:var]
+// CHECK-ILP32: ldr     q24, [x23, :tprel_lo12:var] // encoding: [0xf8,0bAAAAAA10,0b11AAAAAA,0x3d]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale16
+// CHECK-ILP32: str     q22, [x21, :tprel_lo12_nc:var] // encoding: [0xb6,0bAAAAAA10,0b10AAAAAA,0x3d]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale16
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST128_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST128_TPREL_LO12_NC [[VARSYM]]
+
+////////////////////////////////////////////////////////////////////////////////
+// TLS local-dynamic forms
+////////////////////////////////////////////////////////////////////////////////
+
+        movz x5, #:dtprel_g1:var
+        movn x6, #:dtprel_g1:var
+        movz w7, #:dtprel_g1:var
+// CHECK-ILP32: movz    x5, #:dtprel_g1:var      // encoding: [0bAAA00101,A,0b101AAAAA,0x92]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
+// CHECK-ILP32: movn    x6, #:dtprel_g1:var      // encoding: [0bAAA00110,A,0b101AAAAA,0x92]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
+// CHECK-ILP32: movz    w7, #:dtprel_g1:var      // encoding: [0bAAA00111,A,0b101AAAAA,0x12]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
+
+
+        movz x11, #:dtprel_g0:var
+        movn x12, #:dtprel_g0:var
+        movz w13, #:dtprel_g0:var
+// CHECK-ILP32: movz    x11, #:dtprel_g0:var     // encoding: [0bAAA01011,A,0b100AAAAA,0x92]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
+// CHECK-ILP32: movn    x12, #:dtprel_g0:var     // encoding: [0bAAA01100,A,0b100AAAAA,0x92]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
+// CHECK-ILP32: movz    w13, #:dtprel_g0:var     // encoding: [0bAAA01101,A,0b100AAAAA,0x12]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
+
+
+        movk x15, #:dtprel_g0_nc:var
+        movk w16, #:dtprel_g0_nc:var
+// CHECK-ILP32: movk    x15, #:dtprel_g0_nc:var  // encoding: [0bAAA01111,A,0b100AAAAA,0xf2]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_aarch64_movw
+// CHECK-ILP32: movk    w16, #:dtprel_g0_nc:var  // encoding: [0bAAA10000,A,0b100AAAAA,0x72]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_aarch64_movw
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]]
+
+
+        add x21, x22, #:dtprel_lo12:var
+// CHECK-ILP32: add     x21, x22, :dtprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_add_imm12
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_ADD_DTPREL_LO12 [[VARSYM]]
+
+
+        add x25, x26, #:dtprel_lo12_nc:var
+// CHECK-ILP32: add     x25, x26, :dtprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_add_imm12
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_ADD_DTPREL_LO12_NC [[VARSYM]]
+
+
+	add x0, x0, #:dtprel_hi12:var_tlsld, lsl #12
+	add x0, x0, #:tprel_hi12:var_tlsle, lsl #12
+
+// CHECK-ELF-ILP32: R_AARCH64_P32_TLSLD_ADD_DTPREL_HI12 var_tlsld
+// CHECK-ELF-ILP32: R_AARCH64_P32_TLSLE_ADD_TPREL_HI12 var_tlsle
+
+
+        ldrb w29, [x30, #:dtprel_lo12:var]
+        ldrsb x29, [x28, #:dtprel_lo12_nc:var]
+// CHECK-ILP32: ldrb    w29, [x30, :dtprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale1
+// CHECK-ILP32: ldrsb   x29, [x28, :dtprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale1
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST8_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST8_DTPREL_LO12_NC [[VARSYM]]
+
+
+        strh w27, [x26, #:dtprel_lo12:var]
+        ldrsh x25, [x24, #:dtprel_lo12_nc:var]
+// CHECK-ILP32: strh    w27, [x26, :dtprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale2
+// CHECK-ILP32: ldrsh   x25, [x24, :dtprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale2
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST16_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST16_DTPREL_LO12_NC [[VARSYM]]
+
+
+        ldr w23, [x22, #:dtprel_lo12:var]
+        ldrsw x21, [x20, #:dtprel_lo12_nc:var]
+// CHECK-ILP32: ldr     w23, [x22, :dtprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4
+// CHECK-ILP32: ldrsw   x21, [x20, :dtprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale4
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12_NC [[VARSYM]]
+
+        ldr x19, [x18, #:dtprel_lo12:var]
+        str x17, [x16, #:dtprel_lo12_nc:var]
+// CHECK-ILP32: ldr     x19, [x18, :dtprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
+// CHECK-ILP32: str     x17, [x16, :dtprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale8
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST64_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST64_DTPREL_LO12_NC [[VARSYM]]
+
+        ldr q24, [x23, #:dtprel_lo12:var]
+        str q22, [x21, #:dtprel_lo12_nc:var]
+// CHECK-ILP32: ldr     q24, [x23, :dtprel_lo12:var] // encoding: [0xf8,0bAAAAAA10,0b11AAAAAA,0x3d]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale16
+// CHECK-ILP32: str     q22, [x21, :dtprel_lo12_nc:var] // encoding: [0xb6,0bAAAAAA10,0b10AAAAAA,0x3d]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale16
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST128_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST128_DTPREL_LO12_NC [[VARSYM]]
+
+////////////////////////////////////////////////////////////////////////////////
+// TLS descriptor forms
+////////////////////////////////////////////////////////////////////////////////
+
+        adrp x8, :tlsdesc:var
+        ldr w7, [x6, #:tlsdesc_lo12:var]
+        add x5, x4, #:tlsdesc_lo12:var
+        .tlsdesccall var
+        blr x3
+
+// CHECK-ILP32: adrp    x8, :tlsdesc:var        // encoding: [0x08'A',A,A,0x90'A']
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_aarch64_pcrel_adrp_imm21
+// CHECK-ILP32: ldr     w7, [x6, :tlsdesc_lo12:var] // encoding: [0xc7,0bAAAAAA00,0b01AAAAAA,0xb9]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4
+// CHECK-ILP32: add     x5, x4, :tlsdesc_lo12:var // encoding: [0x85,0bAAAAAA00,0b00AAAAAA,0x91]
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_aarch64_add_imm12
+// CHECK-ILP32: .tlsdesccall var                // encoding: []
+// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: var, kind: fixup_aarch64_tlsdesc_call
+// CHECK-ILP32: blr     x3                      // encoding: [0x60,0x00,0x3f,0xd6]
+
+
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSDESC_ADR_PAGE21 [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSDESC_LD32_LO12 [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSDESC_ADD_LO12 [[VARSYM]]
+// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSDESC_CALL [[VARSYM]]
+
+        // Make sure symbol 5 has type STT_TLS:
+
+// CHECK-ELF-ILP32:      Symbols [
+// CHECK-ELF-ILP32:        Symbol {
+// CHECK-ELF-ILP32:          Name: var
+// CHECK-ELF-ILP32-NEXT:     Value:
+// CHECK-ELF-ILP32-NEXT:     Size:
+// CHECK-ELF-ILP32-NEXT:     Binding: Global
+// CHECK-ELF-ILP32-NEXT:     Type: TLS
diff --git a/test/MC/AArch64/arm64-elf-reloc-condbr.s b/test/MC/AArch64/arm64-elf-reloc-condbr.s
index 31820450702..3552ec28951 100644
--- a/test/MC/AArch64/arm64-elf-reloc-condbr.s
+++ b/test/MC/AArch64/arm64-elf-reloc-condbr.s
@@ -1,5 +1,8 @@
 // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \
 // RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -target-abi=ilp32 -triple=arm64-none-linux-gnu -filetype=obj \
+// RUN:   %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ-ILP32 %s
 
         b.eq somewhere
 
@@ -8,3 +11,9 @@
 // OBJ-NEXT:     0x0 R_AARCH64_CONDBR19 somewhere 0x0
 // OBJ-NEXT:   }
 // OBJ-NEXT: ]
+
+// OBJ-ILP32:      Relocations [
+// OBJ-ILP32-NEXT:   Section {{.*}} .rela.text {
+// OBJ-ILP32-NEXT:     0x0 R_AARCH64_P32_CONDBR19 somewhere 0x0
+// OBJ-ILP32-NEXT:   }
+// OBJ-ILP32-NEXT: ]
diff --git a/test/MC/AArch64/arm64-elf-relocs.s b/test/MC/AArch64/arm64-elf-relocs.s
index 0e4efed7821..7187c258ec6 100644
--- a/test/MC/AArch64/arm64-elf-relocs.s
+++ b/test/MC/AArch64/arm64-elf-relocs.s
@@ -1,5 +1,7 @@
 // RUN: llvm-mc -triple=arm64-linux-gnu -o - < %s | FileCheck %s
-// RUN: llvm-mc                   -triple=arm64-linux-gnu -filetype=obj < %s | \
+// RUN: llvm-mc -triple=arm64-linux-gnu -show-encoding -o - < %s | \
+// RUN:   FileCheck --check-prefix=CHECK-ENCODING %s
+// RUN: llvm-mc -triple=arm64-linux-gnu -filetype=obj < %s | \
 // RUN:   llvm-objdump -triple=arm64-linux-gnu - -r | \
 // RUN:   FileCheck %s --check-prefix=CHECK-OBJ-LP64
 
@@ -25,7 +27,7 @@
 
    add x5, x0, #:tlsdesc_lo12:sym
 // CHECK: add x5, x0, :tlsdesc_lo12:sym
-// CHECK-OBJ-LP64: 14 R_AARCH64_TLSDESC_ADD_LO12_NC sym
+// CHECK-OBJ-LP64: 14 R_AARCH64_TLSDESC_ADD_LO12 sym
 
         add x0, x2, #:lo12:sym+8
 // CHECK: add x0, x2, :lo12:sym
@@ -49,37 +51,37 @@
 
    add x5, x0, #:tlsdesc_lo12:sym+70
 // CHECK: add x5, x0, :tlsdesc_lo12:sym+70
-// CHECK-OBJ-LP64: 2c R_AARCH64_TLSDESC_ADD_LO12_NC sym+70
+// CHECK-OBJ-LP64: 2c R_AARCH64_TLSDESC_ADD_LO12 sym+70
 
         .hword sym + 4 - .
 // CHECK-OBJ-LP64: 30 R_AARCH64_PREL16 sym+4
         .word sym - . + 8
-// CHECK-OBJ-LP64 32 R_AARCH64_PREL32 sym+8
+// CHECK-OBJ-LP64: 32 R_AARCH64_PREL32 sym+8
         .xword sym-.
-// CHECK-OBJ-LP64 36 R_AARCH64_PREL64 sym{{$}}
+// CHECK-OBJ-LP64: 36 R_AARCH64_PREL64 sym{{$}}
 
         .hword sym
-// CHECK-OBJ-LP64 3e R_AARCH64_ABS16 sym
+// CHECK-OBJ-LP64: 3e R_AARCH64_ABS16 sym
         .word sym+1
-// CHECK-OBJ-LP64 40 R_AARCH64_ABS32 sym+1
+// CHECK-OBJ-LP64: 40 R_AARCH64_ABS32 sym+1
         .xword sym+16
-// CHECK-OBJ-LP64 44 R_AARCH64_ABS64 sym+16
+// CHECK-OBJ-LP64: 44 R_AARCH64_ABS64 sym+16
 
    adrp x0, sym
 // CHECK: adrp x0, sym
-// CHECK-OBJ-LP64 4c R_AARCH64_ADR_PREL_PG_HI21 sym
+// CHECK-OBJ-LP64: 4c R_AARCH64_ADR_PREL_PG_HI21 sym
 
    adrp x15, :got:sym
 // CHECK: adrp x15, :got:sym
-// CHECK-OBJ-LP64 50 R_AARCH64_ADR_GOT_PAGE sym
+// CHECK-OBJ-LP64: 50 R_AARCH64_ADR_GOT_PAGE sym
 
    adrp x29, :gottprel:sym
 // CHECK: adrp x29, :gottprel:sym
-// CHECK-OBJ-LP64 54 R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 sym
+// CHECK-OBJ-LP64: 54 R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 sym
 
    adrp x2, :tlsdesc:sym
 // CHECK: adrp x2, :tlsdesc:sym
-// CHECK-OBJ-LP64 58 R_AARCH64_TLSDESC_ADR_PAGE21 sym
+// CHECK-OBJ-LP64: 58 R_AARCH64_TLSDESC_ADR_PAGE21 sym
 
    // LLVM is not competent enough to do this relocation because the
    // page boundary could occur anywhere after linking. A relocation
@@ -88,7 +90,7 @@
    .global trickQuestion
 trickQuestion:
 // CHECK: adrp x3, trickQuestion
-// CHECK-OBJ-LP64 5c R_AARCH64_ADR_PREL_PG_HI21 trickQuestion
+// CHECK-OBJ-LP64: 5c R_AARCH64_ADR_PREL_PG_HI21 trickQuestion
 
    ldrb w2, [x3, :lo12:sym]
    ldrsb w5, [x7, #:lo12:sym]
@@ -98,10 +100,10 @@ trickQuestion:
 // CHECK: ldrsb w5, [x7, :lo12:sym]
 // CHECK: ldrsb x11, [x13, :lo12:sym]
 // CHECK: ldr b17, [x19, :lo12:sym]
-// CHECK-OBJ-LP64 R_AARCH64_LDST8_ABS_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_LDST8_ABS_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_LDST8_ABS_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_LDST8_ABS_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_LDST8_ABS_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_LDST8_ABS_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_LDST8_ABS_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_LDST8_ABS_LO12_NC sym
 
    ldrb w23, [x29, #:dtprel_lo12_nc:sym]
    ldrsb w23, [x19, #:dtprel_lo12:sym]
@@ -111,10 +113,10 @@ trickQuestion:
 // CHECK: ldrsb w23, [x19, :dtprel_lo12:sym]
 // CHECK: ldrsb x17, [x13, :dtprel_lo12_nc:sym]
 // CHECK: ldr b11, [x7, :dtprel_lo12:sym]
-// CHECK-OBJ-LP64 R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSLD_LDST8_DTPREL_LO12 sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSLD_LDST8_DTPREL_LO12 sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLD_LDST8_DTPREL_LO12 sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLD_LDST8_DTPREL_LO12 sym
 
    ldrb w1, [x2, :tprel_lo12:sym]
    ldrsb w3, [x4, #:tprel_lo12_nc:sym]
@@ -124,10 +126,10 @@ trickQuestion:
 // CHECK: ldrsb w3, [x4, :tprel_lo12_nc:sym]
 // CHECK: ldrsb x5, [x6, :tprel_lo12:sym]
 // CHECK: ldr b7, [x8, :tprel_lo12_nc:sym]
-// CHECK-OBJ-LP64 R_AARCH64_TLSLE_LDST8_TPREL_LO12 sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSLE_LDST8_TPREL_LO12 sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLE_LDST8_TPREL_LO12 sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLE_LDST8_TPREL_LO12 sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC sym
 
    ldrh w2, [x3, #:lo12:sym]
    ldrsh w5, [x7, :lo12:sym]
@@ -137,10 +139,10 @@ trickQuestion:
 // CHECK: ldrsh w5, [x7, :lo12:sym]
 // CHECK: ldrsh x11, [x13, :lo12:sym]
 // CHECK: ldr h17, [x19, :lo12:sym]
-// CHECK-OBJ-LP64 R_AARCH64_LDST16_ABS_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_LDST16_ABS_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_LDST16_ABS_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_LDST16_ABS_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_LDST16_ABS_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_LDST16_ABS_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_LDST16_ABS_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_LDST16_ABS_LO12_NC sym
 
    ldrh w23, [x29, #:dtprel_lo12_nc:sym]
    ldrsh w23, [x19, :dtprel_lo12:sym]
@@ -150,10 +152,10 @@ trickQuestion:
 // CHECK: ldrsh w23, [x19, :dtprel_lo12:sym]
 // CHECK: ldrsh x17, [x13, :dtprel_lo12_nc:sym]
 // CHECK: ldr h11, [x7, :dtprel_lo12:sym]
-// CHECK-OBJ-LP64 R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSLD_LDST16_DTPREL_LO12 sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSLD_LDST16_DTPREL_LO12 sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLD_LDST16_DTPREL_LO12 sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLD_LDST16_DTPREL_LO12 sym
 
    ldrh w1, [x2, :tprel_lo12:sym]
    ldrsh w3, [x4, #:tprel_lo12_nc:sym]
@@ -163,10 +165,10 @@ trickQuestion:
 // CHECK: ldrsh w3, [x4, :tprel_lo12_nc:sym]
 // CHECK: ldrsh x5, [x6, :tprel_lo12:sym]
 // CHECK: ldr h7, [x8, :tprel_lo12_nc:sym]
-// CHECK-OBJ-LP64 R_AARCH64_TLSLE_LDST16_TPREL_LO12 sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSLE_LDST16_TPREL_LO12 sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLE_LDST16_TPREL_LO12 sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLE_LDST16_TPREL_LO12 sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC sym
 
    ldr w1, [x2, #:lo12:sym]
    ldrsw x3, [x4, #:lo12:sym]
@@ -174,9 +176,9 @@ trickQuestion:
 // CHECK: ldr w1, [x2, :lo12:sym]
 // CHECK: ldrsw x3, [x4, :lo12:sym]
 // CHECK: ldr s4, [x5, :lo12:sym]
-// CHECK-OBJ-LP64 R_AARCH64_LDST32_ABS_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_LDST32_ABS_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_LDST32_ABS_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_LDST32_ABS_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_LDST32_ABS_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_LDST32_ABS_LO12_NC sym
 
    ldr w1, [x2, :dtprel_lo12:sym]
    ldrsw x3, [x4, #:dtprel_lo12_nc:sym]
@@ -184,9 +186,9 @@ trickQuestion:
 // CHECK: ldr w1, [x2, :dtprel_lo12:sym]
 // CHECK: ldrsw x3, [x4, :dtprel_lo12_nc:sym]
 // CHECK: ldr s4, [x5, :dtprel_lo12_nc:sym]
-// CHECK-OBJ-LP64 R_AARCH64_TLSLD_LDST32_DTPREL_LO12 sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLD_LDST32_DTPREL_LO12 sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC sym
 
 
    ldr w1, [x2, #:tprel_lo12:sym]
@@ -195,55 +197,73 @@ trickQuestion:
 // CHECK: ldr w1, [x2, :tprel_lo12:sym]
 // CHECK: ldrsw x3, [x4, :tprel_lo12_nc:sym]
 // CHECK: ldr s4, [x5, :tprel_lo12_nc:sym]
-// CHECK-OBJ-LP64 R_AARCH64_TLSLE_LDST32_TPREL_LO12 sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLE_LDST32_TPREL_LO12 sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC sym
 
    ldr x28, [x27, :lo12:sym]
    ldr d26, [x25, #:lo12:sym]
 // CHECK: ldr x28, [x27, :lo12:sym]
 // CHECK: ldr d26, [x25, :lo12:sym]
-// CHECK-OBJ-LP64 R_AARCH64_LDST64_ABS_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_LDST64_ABS_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_LDST64_ABS_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_LDST64_ABS_LO12_NC sym
 
    ldr x24, [x23, #:got_lo12:sym]
    ldr d22, [x21, :got_lo12:sym]
 // CHECK: ldr x24, [x23, :got_lo12:sym]
 // CHECK: ldr d22, [x21, :got_lo12:sym]
-// CHECK-OBJ-LP64 R_AARCH64_LD64_GOT_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_LD64_GOT_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_LD64_GOT_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_LD64_GOT_LO12_NC sym
 
    ldr x24, [x23, :dtprel_lo12_nc:sym]
    ldr d22, [x21, #:dtprel_lo12:sym]
 // CHECK: ldr x24, [x23, :dtprel_lo12_nc:sym]
 // CHECK: ldr d22, [x21, :dtprel_lo12:sym]
-// CHECK-OBJ-LP64 R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSLD_LDST64_DTPREL_LO12 sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLD_LDST64_DTPREL_LO12 sym
+
+   ldr q24, [x23, :dtprel_lo12_nc:sym]
+   ldr q22, [x21, #:dtprel_lo12:sym]
+// CHECK: ldr q24, [x23, :dtprel_lo12_nc:sym]
+// CHECK: ldr q22, [x21, :dtprel_lo12:sym]
+// CHECK-OBJ-LP64: R_AARCH64_TLSLD_LDST128_DTPREL_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLD_LDST128_DTPREL_LO12 sym
 
    ldr x24, [x23, #:tprel_lo12:sym]
    ldr d22, [x21, :tprel_lo12_nc:sym]
 // CHECK: ldr x24, [x23, :tprel_lo12:sym]
 // CHECK: ldr d22, [x21, :tprel_lo12_nc:sym]
-// CHECK-OBJ-LP64 R_AARCH64_TLSLE_LDST64_TPREL_LO12 sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLE_LDST64_TPREL_LO12 sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC sym
+
+   ldr q24, [x23, #:tprel_lo12:sym]
+   ldr q22, [x21, :tprel_lo12_nc:sym]
+// CHECK: ldr q24, [x23, :tprel_lo12:sym]
+// CHECK: ldr q22, [x21, :tprel_lo12_nc:sym]
+// CHECK-OBJ-LP64: R_AARCH64_TLSLE_LDST128_TPREL_LO12 sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC sym
 
    ldr x24, [x23, :gottprel_lo12:sym]
    ldr d22, [x21, #:gottprel_lo12:sym]
 // CHECK: ldr x24, [x23, :gottprel_lo12:sym]
 // CHECK: ldr d22, [x21, :gottprel_lo12:sym]
-// CHECK-OBJ-LP64 R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC sym
 
    ldr x24, [x23, #:tlsdesc_lo12:sym]
    ldr d22, [x21, :tlsdesc_lo12:sym]
 // CHECK: ldr x24, [x23, :tlsdesc_lo12:sym]
 // CHECK: ldr d22, [x21, :tlsdesc_lo12:sym]
-// CHECK-OBJ-LP64 R_AARCH64_TLSDESC_LD64_LO12_NC sym
-// CHECK-OBJ-LP64 R_AARCH64_TLSDESC_LD64_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSDESC_LD64_LO12 sym
+// CHECK-OBJ-LP64: R_AARCH64_TLSDESC_LD64_LO12 sym
 
    ldr q20, [x19, #:lo12:sym]
 // CHECK: ldr q20, [x19, :lo12:sym]
-// CHECK-OBJ-LP64 R_AARCH64_LDST128_ABS_LO12_NC sym
+// CHECK-OBJ-LP64: R_AARCH64_LDST128_ABS_LO12_NC sym
+// check encoding here, since encoding test doesn't belong with TLS encoding
+// tests, as it isn't a TLS relocation.
+// CHECK-ENCODING: ldr q20, [x19, :lo12:sym] // encoding: [0x74,0bAAAAAA10,0b11AAAAAA,0x3d]
+// CHECK-ENCODING-NEXT:  0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale16
 
 // Since relocated instructions print without a '#', that syntax should
 // certainly be accepted when assembling.
diff --git a/test/MC/AArch64/arm64-tls-relocs.s b/test/MC/AArch64/arm64-tls-relocs.s
index be7e24a6a3f..431fd37058e 100644
--- a/test/MC/AArch64/arm64-tls-relocs.s
+++ b/test/MC/AArch64/arm64-tls-relocs.s
@@ -2,7 +2,6 @@
 // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj < %s -o - | \
 // RUN:   llvm-readobj -r -t | FileCheck --check-prefix=CHECK-ELF %s
 
-
 ////////////////////////////////////////////////////////////////////////////////
 // TLS initial-exec forms
 ////////////////////////////////////////////////////////////////////////////////
@@ -159,6 +158,15 @@
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST64_TPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC [[VARSYM]]
 
+   ldr q24, [x23, :tprel_lo12:var]
+   str q22, [x21, :tprel_lo12_nc:var]
+// CHECK: ldr     q24, [x23, :tprel_lo12:var] // encoding: [0xf8,0bAAAAAA10,0b11AAAAAA,0x3d]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale16
+// CHECK: str     q22, [x21, :tprel_lo12_nc:var] // encoding: [0xb6,0bAAAAAA10,0b10AAAAAA,0x3d]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale16
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST128_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC [[VARSYM]]
 
 ////////////////////////////////////////////////////////////////////////////////
 // TLS local-dynamic forms
@@ -283,6 +291,16 @@
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST64_DTPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC [[VARSYM]]
 
+        ldr q24, [x23, #:dtprel_lo12:var]
+        str q22, [x21, #:dtprel_lo12_nc:var]
+// CHECK: ldr     q24, [x23, :dtprel_lo12:var] // encoding: [0xf8,0bAAAAAA10,0b11AAAAAA,0x3d]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale16
+// CHECK: str     q22, [x21, :dtprel_lo12_nc:var] // encoding: [0xb6,0bAAAAAA10,0b10AAAAAA,0x3d]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale16
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST128_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST128_DTPREL_LO12_NC [[VARSYM]]
+
 ////////////////////////////////////////////////////////////////////////////////
 // TLS descriptor forms
 ////////////////////////////////////////////////////////////////////////////////
@@ -305,8 +323,8 @@
 
 
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSDESC_ADR_PAGE21 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSDESC_LD64_LO12_NC [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSDESC_ADD_LO12_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSDESC_LD64_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSDESC_ADD_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSDESC_CALL [[VARSYM]]
 
         // Make sure symbol 5 has type STT_TLS:
diff --git a/test/MC/AArch64/directive-arch-negative.s b/test/MC/AArch64/directive-arch-negative.s
index 43ccd792725..21fd90ebdf1 100644
--- a/test/MC/AArch64/directive-arch-negative.s
+++ b/test/MC/AArch64/directive-arch-negative.s
@@ -36,6 +36,14 @@
 # CHECK: error: instruction requires: ras
 # CHECK:         esb
 
+// PR32873: without extra features, '.arch' is currently ignored.
+// Add an unrelated feature to accept the directive.
+	.arch armv8+crc
+        casa  w5, w7, [x19]
+
+# CHECK: error: instruction requires: lse
+# CHECK:        casa  w5, w7, [x19]
+
 	.arch armv8.1-a+nolse
         casa  w5, w7, [x20]
 
diff --git a/test/MC/AArch64/elf-reloc-ldrlit.s b/test/MC/AArch64/elf-reloc-ldrlit.s
index 017d66cb2a2..e55902cdf94 100644
--- a/test/MC/AArch64/elf-reloc-ldrlit.s
+++ b/test/MC/AArch64/elf-reloc-ldrlit.s
@@ -1,5 +1,8 @@
 // RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
 // RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -target-abi=ilp32 -triple=aarch64-none-linux-gnu \
+// RUN:   -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ-ILP32 %s
 
         ldr x0, some_label
         ldr w3, some_label
@@ -14,3 +17,12 @@
 // OBJ-NEXT:     0xC R_AARCH64_LD_PREL_LO19 some_label 0x0
 // OBJ-NEXT:   }
 // OBJ-NEXT: ]
+
+// OBJ-ILP32:      Relocations [
+// OBJ-ILP32-NEXT:   Section {{.*}} .rela.text {
+// OBJ-ILP32-NEXT:     0x0 R_AARCH64_P32_LD_PREL_LO19 some_label 0x0
+// OBJ-ILP32-NEXT:     0x4 R_AARCH64_P32_LD_PREL_LO19 some_label 0x0
+// OBJ-ILP32-NEXT:     0x8 R_AARCH64_P32_LD_PREL_LO19 some_label 0x0
+// OBJ-ILP32-NEXT:     0xC R_AARCH64_P32_LD_PREL_LO19 some_label 0x0
+// OBJ-ILP32-NEXT:   }
+// OBJ-ILP32-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-pcreladdressing-ilp32.s b/test/MC/AArch64/elf-reloc-pcreladdressing-ilp32.s
new file mode 100644
index 00000000000..c08192e7e0d
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-pcreladdressing-ilp32.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -target-abi=ilp32 -triple=aarch64-none-linux-gnu \
+// RUN:   -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ-ILP32 %s
+        adr x2, some_label
+        adrp x5, some_label
+
+        adrp x5, :got:some_label
+        ldr w0, [x5, #:got_lo12:some_label]
+
+// OBJ-ILP32:      Relocations [
+// OBJ-ILP32-NEXT:   Section {{.*}} .rela.text {
+// OBJ-ILP32-NEXT:     0x0 R_AARCH64_P32_ADR_PREL_LO21    some_label 0x0
+// OBJ-ILP32-NEXT:     0x4 R_AARCH64_P32_ADR_PREL_PG_HI21 some_label 0x0
+// OBJ-ILP32-NEXT:     0x8 R_AARCH64_P32_ADR_GOT_PAGE     some_label 0x0
+// OBJ-ILP32-NEXT:     0xC R_AARCH64_P32_LD32_GOT_LO12_NC some_label 0x0
+// OBJ-ILP32-NEXT:   }
+// OBJ-ILP32-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-tstb.s b/test/MC/AArch64/elf-reloc-tstb.s
index e6828e69171..1070c4d7f3e 100644
--- a/test/MC/AArch64/elf-reloc-tstb.s
+++ b/test/MC/AArch64/elf-reloc-tstb.s
@@ -1,5 +1,8 @@
 // RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
 // RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -target-abi=ilp32 -triple=aarch64-none-linux-gnu \
+// RUN:   -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ-ILP32 %s
 
         tbz x6, #45, somewhere
         tbnz w3, #15, somewhere
@@ -10,3 +13,10 @@
 // OBJ-NEXT:     0x4  R_AARCH64_TSTBR14 somewhere 0x0
 // OBJ-NEXT:   }
 // OBJ-NEXT: ]
+
+// OBJ-ILP32:      Relocations [
+// OBJ-ILP32-NEXT:   Section {{.*}} .rela.text {
+// OBJ-ILP32-NEXT:     0x0  R_AARCH64_P32_TSTBR14 somewhere 0x0
+// OBJ-ILP32-NEXT:     0x4  R_AARCH64_P32_TSTBR14 somewhere 0x0
+// OBJ-ILP32-NEXT:   }
+// OBJ-ILP32-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-uncondbrimm.s b/test/MC/AArch64/elf-reloc-uncondbrimm.s
index ff852be37b6..373779d03d1 100644
--- a/test/MC/AArch64/elf-reloc-uncondbrimm.s
+++ b/test/MC/AArch64/elf-reloc-uncondbrimm.s
@@ -1,5 +1,8 @@
 // RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
 // RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -target-abi=ilp32 -triple=aarch64-none-linux-gnu \
+// RUN:   -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ-ILP32 %s
 
         b somewhere
         bl somewhere
@@ -10,3 +13,10 @@
 // OBJ-NEXT:     0x4 R_AARCH64_CALL26 somewhere 0x0
 // OBJ-NEXT:   }
 // OBJ-NEXT: ]
+
+// OBJ-ILP32:      Relocations [
+// OBJ-ILP32-NEXT:   Section {{.*}} .rela.text {
+// OBJ-ILP32-NEXT:     0x0 R_AARCH64_P32_JUMP26 somewhere 0x0
+// OBJ-ILP32-NEXT:     0x4 R_AARCH64_P32_CALL26 somewhere 0x0
+// OBJ-ILP32-NEXT:   }
+// OBJ-ILP32-NEXT: ]
diff --git a/test/MC/AArch64/error-location.s b/test/MC/AArch64/error-location.s
index a4f083bddba..a8f9a7df6d3 100644
--- a/test/MC/AArch64/error-location.s
+++ b/test/MC/AArch64/error-location.s
@@ -31,7 +31,7 @@
 // CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: invalid fixup for 16-bit load/store instruction
   ldrh w0, [x1, :gottprel_lo12:undef]
 
-// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: invalid fixup for 32-bit load/store instruction
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: LP64 32-bit load/store relocation not supported (ILP32 eqv: TLSIE_LD32_GOTTPREL_LO12_NC)
   ldr w0, [x1, :gottprel_lo12:undef]
 
 
diff --git a/test/MC/AArch64/ilp32-diagnostics.s b/test/MC/AArch64/ilp32-diagnostics.s
index 47c24e242a1..f8fd41cfa2f 100644
--- a/test/MC/AArch64/ilp32-diagnostics.s
+++ b/test/MC/AArch64/ilp32-diagnostics.s
@@ -1,9 +1,13 @@
 // RUN: not llvm-mc -triple aarch64-none-linux-gnu -target-abi=ilp32 \
-// RUN:  < %s 2> %t2 -filetype=obj 
+// RUN:  < %s 2> %t2 -filetype=obj >/dev/null
 // RUN: FileCheck --check-prefix=CHECK-ERROR %s < %t2
 
         .xword sym-.
 // CHECK-ERROR: error: ILP32 8 byte PC relative data relocation not supported (LP64 eqv: PREL64)
+// CHECK-ERROR: ^
+
+        .xword sym+16
+// CHECK-ERROR: error: ILP32 8 byte absolute data relocation not supported (LP64 eqv: ABS64)
 // CHECK-ERROR: ^
 
         movz x7, #:abs_g3:some_label
@@ -64,4 +68,30 @@
         movk x13, #:gottprel_g0_nc:var
 // CHECK-ERROR: error: ILP32 absolute MOV relocation not supported (LP64 eqv: TLSIE_MOVW_GOTTPREL_G0_NC)
 // CHECK-ERROR: movk x13, #:gottprel_g0_nc:var
+// CHECK-ERROR: ^
+
+        ldr x10, [x0, #:gottprel_lo12:var]
+// CHECK-ERROR: error: ILP32 64-bit load/store relocation not supported (LP64 eqv: TLSIE_LD64_GOTTPREL_LO12_NC)
+// CHECK-ERROR: ldr x10, [x0, #:gottprel_lo12:var]
+// CHECK-ERROR: ^
+
+   ldr x24, [x23, #:got_lo12:sym]
+// CHECK-ERROR: error: ILP32 64-bit load/store relocation not supported (LP64 eqv: LD64_GOT_LO12_NC)
+// CHECK-ERROR: ^
+
+   ldr x24, [x23, :gottprel_lo12:sym]
+// CHECK-ERROR: error: ILP32 64-bit load/store relocation not supported (LP64 eqv: TLSIE_LD64_GOTTPREL_LO12_NC)
+// CHECK-ERROR: ^
+
+        ldr x10, [x0, #:gottprel_lo12:var]
+// CHECK-ERROR: error: ILP32 64-bit load/store relocation not supported (LP64 eqv: TLSIE_LD64_GOTTPREL_LO12_NC)
+// CHECK-ERROR: ldr x10, [x0, #:gottprel_lo12:var]
+// CHECK-ERROR: ^
+
+   ldr x24, [x23, #:got_lo12:sym]
+// CHECK-ERROR: error: ILP32 64-bit load/store relocation not supported (LP64 eqv: LD64_GOT_LO12_NC)
+// CHECK-ERROR: ^
+
+   ldr x24, [x23, :gottprel_lo12:sym]
+// CHECK-ERROR: error: ILP32 64-bit load/store relocation not supported (LP64 eqv: TLSIE_LD64_GOTTPREL_LO12_NC)
 // CHECK-ERROR: ^
diff --git a/test/MC/AArch64/inline-asm-modifiers.s b/test/MC/AArch64/inline-asm-modifiers.s
index c3ba1cf6287..1dc5fe60d3b 100644
--- a/test/MC/AArch64/inline-asm-modifiers.s
+++ b/test/MC/AArch64/inline-asm-modifiers.s
@@ -30,7 +30,7 @@ test_inline_modifier_L:                 // @test_inline_modifier_L
 
 // CHECK: R_AARCH64_ADD_ABS_LO12_NC var_simple
 // CHECK: R_AARCH64_LD64_GOT_LO12_NC var_got
-// CHECK: R_AARCH64_TLSDESC_ADD_LO12_NC var_tlsgd
+// CHECK: R_AARCH64_TLSDESC_ADD_LO12 var_tlsgd
 // CHECK: R_AARCH64_TLSLD_ADD_DTPREL_LO12 var_tlsld
 // CHECK: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC var_tlsie
 // CHECK: R_AARCH64_TLSLE_ADD_TPREL_LO12 var_tlsle
diff --git a/test/MC/AArch64/lp64-diagnostics.s b/test/MC/AArch64/lp64-diagnostics.s
new file mode 100644
index 00000000000..942923ffccb
--- /dev/null
+++ b/test/MC/AArch64/lp64-diagnostics.s
@@ -0,0 +1,13 @@
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2> %t2 -filetype=obj \
+// RUN:   >/dev/null
+// RUN: FileCheck --check-prefix=CHECK-ERROR %s < %t2
+
+   ldr w24, [x23, :tlsdesc_lo12:sym]
+   ldr s22, [x21, :tlsdesc_lo12:sym]
+
+// CHECK-ERROR: error: LP64 4 byte TLSDESC load/store relocation not supported (ILP32 eqv: TLSDESC_LD64_LO12)
+// CHECK-ERROR:   ldr w24, [x23, :tlsdesc_lo12:sym]
+// CHECK-ERROR:   ^
+// CHECK-ERROR: error: LP64 4 byte TLSDESC load/store relocation not supported (ILP32 eqv: TLSDESC_LD64_LO12)
+// CHECK-ERROR:   ldr s22, [x21, :tlsdesc_lo12:sym]
+// CHECK-ERROR:   ^
diff --git a/test/MC/AArch64/tls-relocs.s b/test/MC/AArch64/tls-relocs.s
index fab9edcc159..c3b4b6c5229 100644
--- a/test/MC/AArch64/tls-relocs.s
+++ b/test/MC/AArch64/tls-relocs.s
@@ -392,8 +392,8 @@
 // CHECK: blr    x3                      // encoding: [0x60,0x00,0x3f,0xd6]
 
 // CHECK-ELF-NEXT:     0x104 R_AARCH64_TLSDESC_ADR_PAGE21 [[VARSYM]]
-// CHECK-ELF-NEXT:     0x108 R_AARCH64_TLSDESC_LD64_LO12_NC [[VARSYM]]
-// CHECK-ELF-NEXT:     0x10C R_AARCH64_TLSDESC_ADD_LO12_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0x108 R_AARCH64_TLSDESC_LD64_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x10C R_AARCH64_TLSDESC_ADD_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x110 R_AARCH64_TLSDESC_CALL [[VARSYM]]
 
 
diff --git a/test/MC/AMDGPU/vop_dpp.s b/test/MC/AMDGPU/vop_dpp.s
index 19f21c48ca2..664ef97e3a5 100644
--- a/test/MC/AMDGPU/vop_dpp.s
+++ b/test/MC/AMDGPU/vop_dpp.s
@@ -1,60 +1,62 @@
-// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=VI --check-prefix=VI9
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX9 --check-prefix=VI9
 
 // RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI
 // RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI
 // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI
 // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGFX9
 
 //===----------------------------------------------------------------------===//
 // Check dpp_ctrl values
 //===----------------------------------------------------------------------===//
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v0, v0 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x58,0x00,0xff]
+// VI9: v_mov_b32_dpp v0, v0 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x58,0x00,0xff]
 v_mov_b32 v0, v0 quad_perm:[0,2,1,1]
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v0, v0 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x01,0xff]
+// VI9: v_mov_b32_dpp v0, v0 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x01,0xff]
 v_mov_b32 v0, v0 row_shl:1
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v0, v0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x1f,0x01,0xff]
+// VI9: v_mov_b32_dpp v0, v0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x1f,0x01,0xff]
 v_mov_b32 v0, v0 row_shr:0xf
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v0, v0 row_ror:12 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x2c,0x01,0xff]
+// VI9: v_mov_b32_dpp v0, v0 row_ror:12 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x2c,0x01,0xff]
 v_mov_b32 v0, v0 row_ror:0xc
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v0, v0 wave_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x30,0x01,0xff]
+// VI9: v_mov_b32_dpp v0, v0 wave_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x30,0x01,0xff]
 v_mov_b32 v0, v0 wave_shl:1
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v0, v0 wave_rol:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x34,0x01,0xff]
+// VI9: v_mov_b32_dpp v0, v0 wave_rol:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x34,0x01,0xff]
 v_mov_b32 v0, v0 wave_rol:1
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v0, v0 wave_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x38,0x01,0xff]
+// VI9: v_mov_b32_dpp v0, v0 wave_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x38,0x01,0xff]
 v_mov_b32 v0, v0 wave_shr:1
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v0, v0 wave_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x3c,0x01,0xff]
+// VI9: v_mov_b32_dpp v0, v0 wave_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x3c,0x01,0xff]
 v_mov_b32 v0, v0 wave_ror:1
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v0, v0 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x40,0x01,0xff]
+// VI9: v_mov_b32_dpp v0, v0 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x40,0x01,0xff]
 v_mov_b32 v0, v0 row_mirror
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v0, v0 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x41,0x01,0xff]
+// VI9: v_mov_b32_dpp v0, v0 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x41,0x01,0xff]
 v_mov_b32 v0, v0 row_half_mirror
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v0, v0 row_bcast:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x42,0x01,0xff]
+// VI9: v_mov_b32_dpp v0, v0 row_bcast:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x42,0x01,0xff]
 v_mov_b32 v0, v0 row_bcast:15
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v0, v0 row_bcast:31 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x43,0x01,0xff]
+// VI9: v_mov_b32_dpp v0, v0 row_bcast:31 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x43,0x01,0xff]
 v_mov_b32 v0, v0 row_bcast:31
 
 //===----------------------------------------------------------------------===//
@@ -62,31 +64,31 @@ v_mov_b32 v0, v0 row_bcast:31
 //===----------------------------------------------------------------------===//
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xa1]
+// VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xa1]
 v_mov_b32 v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xaf]
+// VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xaf]
 v_mov_b32 v0, v0 quad_perm:[1,3,0,1] row_mask:0xa
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xf1]
+// VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xf1]
 v_mov_b32 v0, v0 quad_perm:[1,3,0,1] bank_mask:0x1
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xff]
+// VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xff]
 v_mov_b32 v0, v0 quad_perm:[1,3,0,1] bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xa1]
+// VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xa1]
 v_mov_b32 v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xaf]
+// VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xaf]
 v_mov_b32 v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xf1]
+// VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xf1]
 v_mov_b32 v0, v0 quad_perm:[1,3,0,1] bank_mask:0x1 bound_ctrl:0
 
 //===----------------------------------------------------------------------===//
@@ -94,19 +96,19 @@ v_mov_b32 v0, v0 quad_perm:[1,3,0,1] bank_mask:0x1 bound_ctrl:0
 //===----------------------------------------------------------------------===//
 
 // NOSICI: error:
-// VI: v_add_f32_dpp v0, -v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x19,0xa1]
+// VI9: v_add_f32_dpp v0, -v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x19,0xa1]
 v_add_f32 v0, -v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_add_f32_dpp v0, v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x89,0xa1]
+// VI9: v_add_f32_dpp v0, v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x89,0xa1]
 v_add_f32 v0, v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_add_f32_dpp v0, -v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x99,0xa1]
+// VI9: v_add_f32_dpp v0, -v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x99,0xa1]
 v_add_f32 v0, -v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_add_f32_dpp v0, |v0|, -v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x69,0xa1]
+// VI9: v_add_f32_dpp v0, |v0|, -v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x69,0xa1]
 v_add_f32 v0, |v0|, -v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 //===----------------------------------------------------------------------===//
@@ -114,223 +116,223 @@ v_add_f32 v0, |v0|, -v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 //===----------------------------------------------------------------------===//
 
 // NOSICI: error:
-// VI: v_nop row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_nop row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x7e,0x00,0x01,0x09,0xa1]
 v_nop row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_cvt_u32_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x0e,0x00,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_cvt_u32_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x0e,0x00,0x7e,0x00,0x01,0x09,0xa1]
 v_cvt_u32_f32 v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_fract_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x36,0x00,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_fract_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x36,0x00,0x7e,0x00,0x01,0x09,0xa1]
 v_fract_f32 v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_sin_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x52,0x00,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_sin_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x52,0x00,0x7e,0x00,0x01,0x09,0xa1]
 v_sin_f32 v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_mov_b32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_mov_b32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_mov_b32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_cvt_f32_i32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x0a,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_cvt_f32_i32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x0a,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_cvt_f32_i32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_cvt_f32_u32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x0c,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_cvt_f32_u32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x0c,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_cvt_f32_u32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_cvt_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x10,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_cvt_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x10,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_cvt_i32_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_cvt_f16_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x14,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_cvt_f16_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x14,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_cvt_f16_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_cvt_f32_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x16,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_cvt_f32_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x16,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_cvt_f32_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_cvt_rpi_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x18,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_cvt_rpi_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x18,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_cvt_rpi_i32_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_cvt_flr_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x1a,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_cvt_flr_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x1a,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_cvt_flr_i32_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_cvt_off_f32_i4_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x1c,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_cvt_off_f32_i4_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x1c,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_cvt_off_f32_i4 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_cvt_f32_ubyte0_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x22,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_cvt_f32_ubyte0_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x22,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_cvt_f32_ubyte0 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_cvt_f32_ubyte1_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x24,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_cvt_f32_ubyte1_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x24,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_cvt_f32_ubyte1 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_cvt_f32_ubyte2_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x26,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_cvt_f32_ubyte2_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x26,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_cvt_f32_ubyte2 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_cvt_f32_ubyte3_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x28,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_cvt_f32_ubyte3_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x28,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_cvt_f32_ubyte3 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_trunc_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x38,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_trunc_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x38,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_trunc_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_ceil_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x3a,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_ceil_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x3a,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_ceil_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_rndne_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x3c,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_rndne_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x3c,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_rndne_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_floor_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x3e,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_floor_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x3e,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_floor_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_exp_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x40,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_exp_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x40,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_exp_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_log_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x42,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_log_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x42,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_log_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_rcp_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x44,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_rcp_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x44,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_rcp_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_rcp_iflag_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x46,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_rcp_iflag_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x46,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_rcp_iflag_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_rsq_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x48,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_rsq_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x48,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_rsq_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_sqrt_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x4e,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_sqrt_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x4e,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_sqrt_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_cos_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x54,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_cos_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x54,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_cos_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_not_b32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x56,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_not_b32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x56,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_not_b32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_bfrev_b32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x58,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_bfrev_b32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x58,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_bfrev_b32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_ffbh_u32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x5a,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_ffbh_u32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x5a,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_ffbh_u32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_ffbl_b32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x5c,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_ffbl_b32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x5c,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_ffbl_b32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_ffbh_i32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x5e,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_ffbh_i32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x5e,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_ffbh_i32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_frexp_exp_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x66,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_frexp_exp_i32_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x66,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_frexp_exp_i32_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_frexp_mant_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x68,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_frexp_mant_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x68,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_frexp_mant_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_log_legacy_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x98,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_log_legacy_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x98,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_log_legacy_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_exp_legacy_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x96,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_exp_legacy_f32_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x96,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_exp_legacy_f32 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_cvt_f16_u16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x72,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_cvt_f16_u16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x72,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_cvt_f16_u16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_cvt_f16_i16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x74,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_cvt_f16_i16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x74,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_cvt_f16_i16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_cvt_u16_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x76,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_cvt_u16_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x76,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_cvt_u16_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_cvt_i16_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x78,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_cvt_i16_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x78,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_cvt_i16_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_rcp_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x7a,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_rcp_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x7a,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_rcp_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_sqrt_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x7c,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_sqrt_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x7c,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_sqrt_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_rsq_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x7e,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_rsq_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x7e,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_rsq_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_log_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x80,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_log_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x80,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_log_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_exp_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x82,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_exp_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x82,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_exp_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_frexp_mant_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x84,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_frexp_mant_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x84,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_frexp_mant_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_frexp_exp_i16_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x86,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_frexp_exp_i16_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x86,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_frexp_exp_i16_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_floor_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x88,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_floor_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x88,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_floor_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_ceil_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x8a,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_ceil_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x8a,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_ceil_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_trunc_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x8c,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_trunc_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x8c,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_trunc_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_rndne_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x8e,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_rndne_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x8e,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_rndne_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_fract_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x90,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_fract_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x90,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_fract_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_sin_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x92,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_sin_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x92,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_sin_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_cos_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x94,0x02,0x7e,0x00,0x01,0x09,0xa1]
+// VI9: v_cos_f16_dpp v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x94,0x02,0x7e,0x00,0x01,0x09,0xa1]
 v_cos_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 //===----------------------------------------------------------------------===//
@@ -339,195 +341,195 @@ v_cos_f16 v1, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 // ToDo: VOP2bInst instructions: v_add_u32, v_sub_u32 ... (vcc and ApplyMnemonic in AsmMatcherEmitter.cpp)
 
 // NOSICI: error:
-// VI: v_mac_f32_dpp v0, v0, v0  row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x00,0x00,0x2c,0x00,0x01,0x01,0xff]
+// VI9: v_mac_f32_dpp v0, v0, v0  row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x00,0x00,0x2c,0x00,0x01,0x01,0xff]
 v_mac_f32 v0, v0, v0 row_shl:1
 
 // NOSICI: error:
-// VI: v_mac_f32_dpp v0, v0, v0  row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x00,0x00,0x2c,0x00,0x1f,0x01,0xff]
+// VI9: v_mac_f32_dpp v0, v0, v0  row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x00,0x00,0x2c,0x00,0x1f,0x01,0xff]
 v_mac_f32 v0, v0, v0 row_shr:0xf
 
 // NOSICI: error:
-// VI: v_mac_f32_dpp v0, v0, v0  quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x2c,0x00,0x4d,0x08,0xaf]
+// VI9: v_mac_f32_dpp v0, v0, v0  quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x2c,0x00,0x4d,0x08,0xaf]
 v_mac_f32 v0, v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_add_f32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x09,0xa1]
+// VI9: v_add_f32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x09,0xa1]
 v_add_f32 v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_min_f32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x14,0x00,0x01,0x09,0xa1]
+// VI9: v_min_f32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x14,0x00,0x01,0x09,0xa1]
 v_min_f32 v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_and_b32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x26,0x00,0x01,0x09,0xa1]
+// VI9: v_and_b32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x26,0x00,0x01,0x09,0xa1]
 v_and_b32 v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_mul_i32_i24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x0c,0x02,0x01,0x09,0xa1]
+// VI9: v_mul_i32_i24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x0c,0x02,0x01,0x09,0xa1]
 v_mul_i32_i24 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_sub_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x04,0x02,0x01,0x09,0xa1]
+// VI9: v_sub_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x04,0x02,0x01,0x09,0xa1]
 v_sub_f32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_subrev_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x06,0x02,0x01,0x09,0xa1]
+// VI9: v_subrev_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x06,0x02,0x01,0x09,0xa1]
 v_subrev_f32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_mul_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x0a,0x02,0x01,0x09,0xa1]
+// VI9: v_mul_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x0a,0x02,0x01,0x09,0xa1]
 v_mul_f32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_mul_hi_i32_i24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x0e,0x02,0x01,0x09,0xa1]
+// VI9: v_mul_hi_i32_i24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x0e,0x02,0x01,0x09,0xa1]
 v_mul_hi_i32_i24 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_mul_u32_u24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x10,0x02,0x01,0x09,0xa1]
+// VI9: v_mul_u32_u24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x10,0x02,0x01,0x09,0xa1]
 v_mul_u32_u24 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_mul_hi_u32_u24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x12,0x02,0x01,0x09,0xa1]
+// VI9: v_mul_hi_u32_u24_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x12,0x02,0x01,0x09,0xa1]
 v_mul_hi_u32_u24 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_max_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x16,0x02,0x01,0x09,0xa1]
+// VI9: v_max_f32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x16,0x02,0x01,0x09,0xa1]
 v_max_f32 v1, v2 v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_min_i32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x18,0x02,0x01,0x09,0xa1]
+// VI9: v_min_i32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x18,0x02,0x01,0x09,0xa1]
 v_min_i32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_max_i32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x1a,0x02,0x01,0x09,0xa1]
+// VI9: v_max_i32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x1a,0x02,0x01,0x09,0xa1]
 v_max_i32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_min_u32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x1c,0x02,0x01,0x09,0xa1]
+// VI9: v_min_u32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x1c,0x02,0x01,0x09,0xa1]
 v_min_u32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_max_u32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x1e,0x02,0x01,0x09,0xa1]
+// VI9: v_max_u32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x1e,0x02,0x01,0x09,0xa1]
 v_max_u32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_lshrrev_b32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x20,0x02,0x01,0x09,0xa1]
+// VI9: v_lshrrev_b32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x20,0x02,0x01,0x09,0xa1]
 v_lshrrev_b32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_ashrrev_i32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x22,0x02,0x01,0x09,0xa1]
+// VI9: v_ashrrev_i32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x22,0x02,0x01,0x09,0xa1]
 v_ashrrev_i32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_lshlrev_b32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x24,0x02,0x01,0x09,0xa1]
+// VI9: v_lshlrev_b32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x24,0x02,0x01,0x09,0xa1]
 v_lshlrev_b32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_or_b32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x28,0x02,0x01,0x09,0xa1]
+// VI9: v_or_b32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x28,0x02,0x01,0x09,0xa1]
 v_or_b32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_xor_b32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x2a,0x02,0x01,0x09,0xa1]
+// VI9: v_xor_b32_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x2a,0x02,0x01,0x09,0xa1]
 v_xor_b32 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_add_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3e,0x02,0x01,0x09,0xa1]
+// VI9: v_add_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3e,0x02,0x01,0x09,0xa1]
 v_add_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_sub_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x40,0x02,0x01,0x09,0xa1]
+// VI9: v_sub_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x40,0x02,0x01,0x09,0xa1]
 v_sub_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_subrev_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x42,0x02,0x01,0x09,0xa1]
+// VI9: v_subrev_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x42,0x02,0x01,0x09,0xa1]
 v_subrev_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_mul_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x44,0x02,0x01,0x09,0xa1]
+// VI9: v_mul_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x44,0x02,0x01,0x09,0xa1]
 v_mul_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_mac_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x46,0x02,0x01,0x09,0xa1]
+// VI9: v_mac_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x46,0x02,0x01,0x09,0xa1]
 v_mac_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_add_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x4c,0x02,0x01,0x09,0xa1]
+// VI9: v_add_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x4c,0x02,0x01,0x09,0xa1]
 v_add_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_sub_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x4e,0x02,0x01,0x09,0xa1]
+// VI9: v_sub_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x4e,0x02,0x01,0x09,0xa1]
 v_sub_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_subrev_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x50,0x02,0x01,0x09,0xa1]
+// VI9: v_subrev_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x50,0x02,0x01,0x09,0xa1]
 v_subrev_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_mul_lo_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x52,0x02,0x01,0x09,0xa1]
+// VI9: v_mul_lo_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x52,0x02,0x01,0x09,0xa1]
 v_mul_lo_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_lshlrev_b16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x54,0x02,0x01,0x09,0xa1]
+// VI9: v_lshlrev_b16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x54,0x02,0x01,0x09,0xa1]
 v_lshlrev_b16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_lshrrev_b16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x56,0x02,0x01,0x09,0xa1]
+// VI9: v_lshrrev_b16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x56,0x02,0x01,0x09,0xa1]
 v_lshrrev_b16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_ashrrev_i16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x58,0x02,0x01,0x09,0xa1]
+// VI9: v_ashrrev_i16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x58,0x02,0x01,0x09,0xa1]
 v_ashrrev_i16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_max_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x5a,0x02,0x01,0x09,0xa1]
+// VI9: v_max_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x5a,0x02,0x01,0x09,0xa1]
 v_max_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_min_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x5c,0x02,0x01,0x09,0xa1]
+// VI9: v_min_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x5c,0x02,0x01,0x09,0xa1]
 v_min_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_max_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x5e,0x02,0x01,0x09,0xa1]
+// VI9: v_max_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x5e,0x02,0x01,0x09,0xa1]
 v_max_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_max_i16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x60,0x02,0x01,0x09,0xa1]
+// VI9: v_max_i16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x60,0x02,0x01,0x09,0xa1]
 v_max_i16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_min_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x62,0x02,0x01,0x09,0xa1]
+// VI9: v_min_u16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x62,0x02,0x01,0x09,0xa1]
 v_min_u16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_min_i16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x64,0x02,0x01,0x09,0xa1]
+// VI9: v_min_i16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x64,0x02,0x01,0x09,0xa1]
 v_min_i16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_ldexp_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x66,0x02,0x01,0x09,0xa1]
+// VI9: v_ldexp_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x66,0x02,0x01,0x09,0xa1]
 v_ldexp_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_add_i32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1]
+// VI9: v_add_i32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1]
 v_add_i32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_sub_i32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1]
+// VI9: v_sub_i32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1]
 v_sub_i32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_subrev_i32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1]
+// VI9: v_subrev_i32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1]
 v_subrev_i32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_addc_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x38,0x02,0x01,0x09,0xa1]
+// VI9: v_addc_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x38,0x02,0x01,0x09,0xa1]
 v_addc_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_subb_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3a,0x02,0x01,0x09,0xa1]
+// VI9: v_subb_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3a,0x02,0x01,0x09,0xa1]
 v_subb_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
-// VI: v_subbrev_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1]
+// VI9: v_subbrev_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1]
 v_subbrev_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 //===----------------------------------------------------------------------===//
@@ -536,24 +538,30 @@ v_subbrev_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ct
 
 // NOSICI: error:
 // NOVI: error:
+// NOGFX9: error:
 v_mov_b32 v0, 1 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
 // NOVI: error:
+// NOGFX9: error:
 v_and_b32 v0, 42, v1 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
 // NOVI: error:
+// NOGFX9: error:
 v_add_f32 v0, v1, 345 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
 // NOVI: error:
+// NOGFX9: error:
 v_mov_b32 v0, s1 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
 // NOVI: error:
+// NOGFX9: error:
 v_and_b32 v0, s42, v1 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
 
 // NOSICI: error:
 // NOVI: error:
+// NOGFX9: error:
 v_add_f32 v0, v1, s45 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
diff --git a/test/MC/AMDGPU/vop_dpp_expr.s b/test/MC/AMDGPU/vop_dpp_expr.s
index 0ae74647727..1e93424b7a5 100644
--- a/test/MC/AMDGPU/vop_dpp_expr.s
+++ b/test/MC/AMDGPU/vop_dpp_expr.s
@@ -1,35 +1,36 @@
-// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI
+// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI --check-prefix=VI9
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GFX9 --check-prefix=VI9
 
 zero = 0
 two = 2
 one = 1
 
 v_mov_b32 v0, v0 quad_perm:[0+zero,zero-2+two*two,1/one,1]
-// VI: v_mov_b32_dpp v0, v0 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x58,0x00,0xff]
+// VI9: v_mov_b32_dpp v0, v0 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x58,0x00,0xff]
 
 v_mov_b32 v0, v0 row_shl:two-1
-// VI: v_mov_b32_dpp v0, v0 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x01,0xff]
+// VI9: v_mov_b32_dpp v0, v0 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x01,0xff]
 
 v_mov_b32 v0, v0 row_shr:0xe+one
-// VI: v_mov_b32_dpp v0, v0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x1f,0x01,0xff]
+// VI9: v_mov_b32_dpp v0, v0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x1f,0x01,0xff]
 
 v_mov_b32 v0, v0 row_ror:0x6*two
-// VI: v_mov_b32_dpp v0, v0 row_ror:12 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x2c,0x01,0xff]
+// VI9: v_mov_b32_dpp v0, v0 row_ror:12 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x2c,0x01,0xff]
 
 v_mov_b32 v0, v0 wave_shl:two/2
-// VI: v_mov_b32_dpp v0, v0 wave_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x30,0x01,0xff]
+// VI9: v_mov_b32_dpp v0, v0 wave_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x30,0x01,0xff]
 
 v_mov_b32 v0, v0 wave_rol:two-one
-// VI: v_mov_b32_dpp v0, v0 wave_rol:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x34,0x01,0xff]
+// VI9: v_mov_b32_dpp v0, v0 wave_rol:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x34,0x01,0xff]
 
 v_mov_b32 v0, v0 wave_shr:1+zero
-// VI: v_mov_b32_dpp v0, v0 wave_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x38,0x01,0xff]
+// VI9: v_mov_b32_dpp v0, v0 wave_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x38,0x01,0xff]
 
 v_mov_b32 v0, v0 wave_ror:two*2-3
-// VI: v_mov_b32_dpp v0, v0 wave_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x3c,0x01,0xff]
+// VI9: v_mov_b32_dpp v0, v0 wave_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x3c,0x01,0xff]
 
 v_mov_b32 v0, v0 row_bcast:150/(two*2+zero/one+two*3)
-// VI: v_mov_b32_dpp v0, v0 row_bcast:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x42,0x01,0xff]
+// VI9: v_mov_b32_dpp v0, v0 row_bcast:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x42,0x01,0xff]
 
 v_mov_b32 v0, v0 quad_perm:[one,two+one,zero,2-one] row_mask:2*5 bank_mask:0x2-one bound_ctrl:1-1
-// VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xa1]
+// VI9: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xa1]
diff --git a/test/MC/AVR/inst-lds.s b/test/MC/AVR/inst-lds.s
index a3d36060756..e8151a32b86 100644
--- a/test/MC/AVR/inst-lds.s
+++ b/test/MC/AVR/inst-lds.s
@@ -12,5 +12,5 @@ foo:
 ; CHECK: lds r29, 190                 ; encoding: [0xd0,0x91,0xbe,0x00]
 ; CHECK: lds r22, 172                 ; encoding: [0x60,0x91,0xac,0x00]
 ; CHECK: lds r27, 92                  ; encoding: [0xb0,0x91,0x5c,0x00]
-; CHECK: lds r4, SYMBOL+12            ; encoding: [0x40'A',0x90'A',0x00,0x00]
-; CHECK:                              ;   fixup A - offset: 0, value: SYMBOL+12, kind: fixup_16
+; CHECK: lds r4, SYMBOL+12            ; encoding: [0x40,0x90,A,A]
+; CHECK:                              ;   fixup A - offset: 2, value: SYMBOL+12, kind: fixup_16
diff --git a/test/MC/AVR/inst-sts.s b/test/MC/AVR/inst-sts.s
index 821c207b902..0f5af7da6f3 100644
--- a/test/MC/AVR/inst-sts.s
+++ b/test/MC/AVR/inst-sts.s
@@ -9,6 +9,6 @@ foo:
 
 ; CHECK:  sts 3,   r5                 ; encoding: [0x50,0x92,0x03,0x00]
 ; CHECK:  sts 255, r7                 ; encoding: [0x70,0x92,0xff,0x00]
-; CHECK:  sts SYMBOL+1, r25           ; encoding: [0x90'A',0x93'A',0x00,0x00]
-; CHECK:                              ;   fixup A - offset: 0, value: SYMBOL+1, kind: fixup_16
+; CHECK:  sts SYMBOL+1, r25           ; encoding: [0x90,0x93,A,A]
+; CHECK:                              ;   fixup A - offset: 2, value: SYMBOL+1, kind: fixup_16
 
diff --git a/test/MC/AsmParser/altmacro_expression.s b/test/MC/AsmParser/altmacro_expression.s
new file mode 100644
index 00000000000..58d8b486cf8
--- /dev/null
+++ b/test/MC/AsmParser/altmacro_expression.s
@@ -0,0 +1,65 @@
+# RUN: llvm-mc -triple i386-linux-gnu %s | FileCheck %s
+
+# Checking that the '%' was evaluated as a string first
+# In a fail scenario: The asmprint will print: addl $%(1+4), %eax
+
+# CHECK:  addl $5, %eax
+.altmacro
+.macro percent_expr arg
+    addl $\arg, %eax
+.endm
+
+percent_expr %(1+4)
+
+
+# Checking that the second '%' acts as modulo operator
+# The altmacro percent '%' must be located before the first argument
+# If a percent is located in the middle of the estimated argument without any
+# '%' in the beginning , error will be generated.
+# The second percent '%' after the first altmacro percent '%' is a regular operator.
+
+# CHECK:  addl $1, %eax
+.macro inner_percent arg
+    addl $\arg, %eax
+.endm
+
+inner_percent %(1%4)
+
+
+# Checking for nested macro
+# The first argument use is for the calling function and the second use is for the evaluation.
+
+# CHECK:  addl    $1, %eax
+.macro macro_call_0 number
+    addl $\number, %eax
+.endm
+
+.macro macro_call_1 number
+    macro_call_\number %(\number + 1)
+.endm
+
+macro_call_1 %(1-1)
+
+
+# Checking the ability to pass a number of arguments.
+# The arguments can be separated by ',' or not.
+
+# CHECK: label013:
+# CHECK:  addl $0, %eax
+# CHECK:  addl $1, %eax
+# CHECK:  addl $3, %eax
+
+# CHECK: label014:
+# CHECK:  addl $0, %eax
+# CHECK:  addl $1, %eax
+# CHECK:  addl $4, %eax
+
+.macro multi_args_macro arg1 arg2 arg3
+    label\arg1\arg2\arg3:
+	addl $\arg1, %eax
+	addl $\arg2, %eax
+	addl $\arg3, %eax
+.endm
+
+multi_args_macro %(1+4-5) 1 %2+1
+multi_args_macro %(1+4-5),1,%4%10
diff --git a/test/MC/AsmParser/negativ_altmacro_expression.s b/test/MC/AsmParser/negativ_altmacro_expression.s
new file mode 100644
index 00000000000..edcc9c99869
--- /dev/null
+++ b/test/MC/AsmParser/negativ_altmacro_expression.s
@@ -0,0 +1,34 @@
+# RUN: not llvm-mc -triple i386-linux-gnu %s 2>&1 | FileCheck %s
+
+# This test is a negative test for the altmacro expression.
+# In this test we check the '.noaltmacro' directive.
+# We expect that '.altmacro' and '.noaltmacro' will act as a switch on/off directives to the alternate macro mode.
+# .noaltmacro returns the format into a regular macro handling.
+# The defult mode is ".noaltmacro" as first test checks.
+
+# CHECK:  error: unknown token in expression
+# CHECK-NEXT: addl $%(1%4), %eax
+.macro inner_percent arg
+    addl $\arg, %eax
+.endm
+
+inner_percent %(1%4)
+
+.altmacro
+.noaltmacro
+
+# CHECK: multi_args_macro %(1+4-5) 1 %2+1
+# CHECK: error: unknown token in expression
+# CHECK-NEXT: addl $%(1+4-5), %eax
+
+
+# CHECK: multi_args_macro %(1+4-5),1,%4%10
+# CHECK: error: unknown token in expression
+# CHECK-NEXT: addl $%(1+4-5), %eax
+.macro multi_args_macro arg1 arg2 arg3
+  label\arg1\arg2\arg3:
+  addl $\arg1, %eax
+.endm
+
+multi_args_macro %(1+4-5) 1 %2+1
+multi_args_macro %(1+4-5),1,%4%10
diff --git a/test/MC/ELF/section-numeric-invalid-type.s b/test/MC/ELF/section-numeric-invalid-type.s
index 3ae071bc7c1..b8a39600eb1 100644
--- a/test/MC/ELF/section-numeric-invalid-type.s
+++ b/test/MC/ELF/section-numeric-invalid-type.s
@@ -8,7 +8,7 @@
 
 // OBJ:      Section {
 // OBJ:        Name: .sec
-// OBJ-NEXT:   Type: (0x7FFFFFFF)
+// OBJ-NEXT:   Type: Unknown (0x7FFFFFFF)
 // OBJ:      }
 
 // ASM: unsupported type 0x7fffffff for section .sec
diff --git a/test/MC/Hexagon/PacketRules/registers_readonly.s b/test/MC/Hexagon/PacketRules/registers_readonly.s
new file mode 100644
index 00000000000..ec118589711
--- /dev/null
+++ b/test/MC/Hexagon/PacketRules/registers_readonly.s
@@ -0,0 +1,5 @@
+# RUN: not llvm-mc -arch=hexagon -filetype=obj %s 2>&1 | FileCheck %s
+
+# CHECK: 4:3: error: Cannot write to read-only register `PC'
+{ pc = r0
+  r0 = r0 }
diff --git a/test/MC/Hexagon/PacketRules/solo.s b/test/MC/Hexagon/PacketRules/solo.s
new file mode 100644
index 00000000000..86107d52f1f
--- /dev/null
+++ b/test/MC/Hexagon/PacketRules/solo.s
@@ -0,0 +1,5 @@
+# RUN: not llvm-mc -arch=hexagon -filetype=asm %s 2>%t; FileCheck %s <%t
+
+{ brkpt
+  r0 = r0 }
+# CHECK: 3:3: error: Instruction is marked `isSolo' and cannot have other instructions in the same packet
diff --git a/test/MC/Hexagon/multiple_errs.s b/test/MC/Hexagon/multiple_errs.s
new file mode 100644
index 00000000000..cd04c0efbd3
--- /dev/null
+++ b/test/MC/Hexagon/multiple_errs.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -arch=hexagon -filetype=asm %s 2> %t; FileCheck %s < %t
+#
+
+{
+  if (!p0) r0=r1;
+  if (!p0) r0=r2;
+  trap0(#15);
+}
+# CHECK: error: register `R0' modified more than once
+# CHECK: error: Instruction is marked `isSolo' and cannot have other instructions in the same packet
diff --git a/test/MC/Hexagon/registers_readonly.s b/test/MC/Hexagon/registers_readonly.s
new file mode 100644
index 00000000000..cf109feef03
--- /dev/null
+++ b/test/MC/Hexagon/registers_readonly.s
@@ -0,0 +1,7 @@
+# RUN: not llvm-mc -arch=hexagon -filetype=obj -mv5 %s 2>&1 | FileCheck %s
+
+# CHECK: 4:1: error: Cannot write to read-only register `PC'
+pc = r0
+
+# CHECK: 7:1: error: Cannot write to read-only register `PC'
+c9 = r0
diff --git a/test/MC/Hexagon/ro-c9.s b/test/MC/Hexagon/ro-c9.s
new file mode 100644
index 00000000000..6771430cb95
--- /dev/null
+++ b/test/MC/Hexagon/ro-c9.s
@@ -0,0 +1,6 @@
+# RUN: llvm-mc -arch=hexagon -filetype=asm %s 2> %t; FileCheck %s < %t
+
+# Check that changes to a read-only register is caught.
+
+	{ pc = r0 }
+# CHECK: error: Cannot write to read-only register
diff --git a/test/MC/Hexagon/ro-cc9.s b/test/MC/Hexagon/ro-cc9.s
new file mode 100644
index 00000000000..0596ca1627f
--- /dev/null
+++ b/test/MC/Hexagon/ro-cc9.s
@@ -0,0 +1,7 @@
+# RUN: not llvm-mc -arch=hexagon -filetype=asm %s 2> %t; FileCheck %s < %t
+#
+
+# Check that changes to a read-only register is caught.
+
+{ c9:8 = r1:0 }
+# CHECK: error: Cannot write to read-only register
diff --git a/test/MC/Mips/relocation.s b/test/MC/Mips/relocation.s
index 7e275eaf36a..478d9901b15 100644
--- a/test/MC/Mips/relocation.s
+++ b/test/MC/Mips/relocation.s
@@ -318,7 +318,7 @@ foo_mm:
                                            // ENCLE: addiu $2, $2, %lo(bar) # encoding: [0x42'A',0x30'A',0x00,0x00]
                                            // FIXUP: # fixup A - offset: 0, value: %lo(bar), kind: fixup_MICROMIPS_LO16
 
-// DATA-NEXT:  0010: 30430000 30420004 30430001 30420030
+// DATA-NEXT:  0010: 30430000 30420004 30430001 30420034
         addiu $2, $3, %got(baz)            // RELOC: R_MICROMIPS_GOT16 .text
                                            // ENCBE: addiu $2, $3, %got(baz) # encoding: [0x30,0x43,A,A]
                                            // The placement of the 'A' annotations is incorrect. They use 32-bit little endian instead of 2x 16-bit little endian.
@@ -377,5 +377,11 @@ foo_mm:
                                            // ENCLE: addiu $2, $2, %lo(bar) # encoding: [0x42'A',0x30'A',0x00,0x00]
                                            // FIXUP: # fixup A - offset: 0, value: %lo(bar), kind: fixup_MICROMIPS_LO16
 
+// DATA-NEXT:  0040: 30430000 00000000 00000000 00000000
+        addiu $2, $3, %gottprel(foo)       // RELOC: R_MICROMIPS_TLS_GOTTPREL foo
+                                           // ENCBE: addiu $2, $3, %gottprel(foo) # encoding: [0x30,0x43,A,A]
+                                           // ENCLE: addiu $2, $3, %gottprel(foo) # encoding: [0x43'A',0x30'A',0x00,0x00]
+                                           // FIXUP: # fixup A - offset: 0, value: %gottprel(foo), kind: fixup_MICROMIPS_GOTTPREL
+
         .space 65520, 0
 long_mm:
diff --git a/test/MC/WebAssembly/reloc-code.ll b/test/MC/WebAssembly/reloc-code.ll
new file mode 100644
index 00000000000..5c794400fa0
--- /dev/null
+++ b/test/MC/WebAssembly/reloc-code.ll
@@ -0,0 +1,59 @@
+; RUN: llc -mtriple wasm32-unknown-unknown-wasm -filetype=obj %s -o - | llvm-readobj -r -expand-relocs | FileCheck %s
+
+; Pointers to functions of two different types
+@a = global i64 ()* inttoptr (i64 5 to i64 ()*), align 8
+@b = global i32 ()* inttoptr (i32 7 to i32 ()*), align 8
+
+; External functions
+declare i32 @c()
+declare i32 @d()
+
+define i32 @f1() {
+entry:
+    %aa = load i64 ()*, i64 ()** @a, align 8
+    %bb = load i32 ()*, i32 ()** @b, align 8
+    %tmp1 = call i64 %aa()
+    %tmp2 = call i32 %bb()
+    %tmp3 = call i32 @c()
+    %tmp4 = call i32 @d()
+    ret i32 %tmp2
+}
+
+
+; CHECK: Format: WASM
+; CHECK: Relocations [
+; CHECK-NEXT:   Section (8) CODE {
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Type: R_WEBASSEMBLY_GLOBAL_ADDR_LEB (3)
+; CHECK-NEXT:       Offset: 0x9
+; CHECK-NEXT:       Index: 0x0
+; CHECK-NEXT:       Addend: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Type: R_WEBASSEMBLY_GLOBAL_ADDR_LEB (3)
+; CHECK-NEXT:       Offset: 0x14
+; CHECK-NEXT:       Index: 0x1
+; CHECK-NEXT:       Addend: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Type: R_WEBASSEMBLY_FUNCTION_INDEX_LEB (0)
+; CHECK-NEXT:       Offset: 0x2D
+; CHECK-NEXT:       Index: 0x0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Type: R_WEBASSEMBLY_FUNCTION_INDEX_LEB (0)
+; CHECK-NEXT:       Offset: 0x34
+; CHECK-NEXT:       Index: 0x1
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Type: R_WEBASSEMBLY_TYPE_INDEX_LEB (6)
+; CHECK-NEXT:       Offset: 0x1A
+; CHECK-NEXT:       Index: 0x1
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Type: R_WEBASSEMBLY_TYPE_INDEX_LEB (6)
+; CHECK-NEXT:       Offset: 0x24
+; CHECK-NEXT:       Index: 0x0
+; CHECK-NEXT:     }
+; CHECK-NEXT:   }
+; CHECK-NEXT: ]
diff --git a/test/MC/WebAssembly/reloc-data.ll b/test/MC/WebAssembly/reloc-data.ll
new file mode 100644
index 00000000000..5bd18fa82e8
--- /dev/null
+++ b/test/MC/WebAssembly/reloc-data.ll
@@ -0,0 +1,26 @@
+; RUN: llc -O0 -mtriple wasm32-unknown-unknown-wasm -filetype=obj %s -o - | llvm-readobj -r -expand-relocs | FileCheck %s
+
+; foo and bar are external and internal symbols.  a and b are pointers
+; initialized to these locations offset by 2 and -2 elements respecitively.
+@foo = external global i32, align 4
+@bar = global i64 7, align 4
+@a = global i32* getelementptr (i32, i32* @foo, i32 2), align 8
+@b = global i64* getelementptr (i64, i64* @bar, i64 -2), align 8
+
+; CHECK: Format: WASM
+; CHECK: Relocations [
+; CHECK:   Section (6) DATA {
+; CHECK:     Relocation {
+; CHECK:       Type: R_WEBASSEMBLY_GLOBAL_ADDR_I32 (5)
+; CHECK:       Offset: 0xE
+; CHECK:       Index: 0x0
+; CHECK:       Addend: 8
+; CHECK:     }
+; CHECK:     Relocation {
+; CHECK:       Type: R_WEBASSEMBLY_GLOBAL_ADDR_I32 (5)
+; CHECK:       Offset: 0x16
+; CHECK:       Index: 0x1
+; CHECK:       Addend: -16
+; CHECK:     }
+; CHECK:   }
+; CHECK: ]
diff --git a/test/MC/WebAssembly/sections.ll b/test/MC/WebAssembly/sections.ll
new file mode 100644
index 00000000000..85bf0818509
--- /dev/null
+++ b/test/MC/WebAssembly/sections.ll
@@ -0,0 +1,62 @@
+; RUN: llc -mtriple wasm32-unknown-unknown-wasm -filetype=obj %s -o - | llvm-readobj -s | FileCheck %s
+
+; external function
+declare i32 @a()
+
+; global data
+@b = global i32 3, align 4
+
+; local function
+define i32 @f1() {
+entry:
+    %tmp1 = call i32 @a()
+    ret i32 %tmp1
+}
+
+
+; CHECK: Format: WASM
+; CHECK: Arch: wasm32
+; CHECK: AddressSize: 32bit
+; CHECK: Sections [
+; CHECK:   Section {
+; CHECK:     Type: TYPE (0x1)
+; CHECK:   }
+; CHECK:  Section {
+; CHECK:    Type: IMPORT (0x2)
+; CHECK:  }
+; CHECK:  Section {
+; CHECK:    Type: FUNCTION (0x3)
+; CHECK:  }
+; CHECK:  Section {
+; CHECK:    Type: TABLE (0x4)
+; CHECK:  }
+; CHECK:  Section {
+; CHECK:    Type: MEMORY (0x5)
+; CHECK:    Memories [
+; CHECK:      Memory {
+; CHECK:        InitialPages: 1
+; CHECK:      }
+; CHECK:    ]
+; CHECK:  }
+; CHECK:  Section {
+; CHECK:    Type: GLOBAL (0x6)
+; CHECK:  }
+; CHECK:  Section {
+; CHECK:    Type: EXPORT (0x7)
+; CHECK:  }
+; CHECK:  Section {
+; CHECK:    Type: CODE (0xA)
+; CHECK:  }
+; CHECK:  Section {
+; CHECK:    Type: DATA (0xB)
+; CHECK:  }
+; CHECK:  Section {
+; CHECK:    Type: CUSTOM (0x0)
+; CHECK:    Name: name
+; CHECK:  }
+; CHECK:  Section {
+; CHECK:    Type: CUSTOM (0x0)
+; CHECK:    Name: reloc.CODE
+; CHECK:  }
+; CHECK:]
+
diff --git a/test/MC/X86/pr27884.s b/test/MC/X86/pr27884.s
new file mode 100644
index 00000000000..edd4e8d34a9
--- /dev/null
+++ b/test/MC/X86/pr27884.s
@@ -0,0 +1,7 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown %s
+
+.intel_syntax
+add rbx, 0B0h
+add rbx, 0b0h
+add rax, 0A0h
+add rax, 0a0h
diff --git a/test/Other/new-pm-defaults.ll b/test/Other/new-pm-defaults.ll
index fc1170c666f..a4a1c1f546c 100644
--- a/test/Other/new-pm-defaults.ll
+++ b/test/Other/new-pm-defaults.ll
@@ -43,10 +43,6 @@
 ; CHECK-O-NEXT: Running pass: EarlyCSEPass
 ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
 ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass
-; CHECK-O-NEXT: Running pass: GVNHoistPass
-; CHECK-O-NEXT: Running analysis: AAManager
-; CHECK-O-NEXT: Running analysis: MemoryDependenceAnalysis
-; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
 ; CHECK-O-NEXT: Finished llvm::Function pass manager run.
 ; CHECK-O-NEXT: Running pass: IPSCCPPass
 ; CHECK-O-NEXT: Running pass: GlobalOptPass
@@ -69,6 +65,7 @@
 ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}>
 ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
 ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
+; CHECK-O-NEXT: Running analysis: AAManager
 ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
 ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}>
 ; CHECK-O-NEXT: Starting llvm::Function pass manager run.
@@ -108,13 +105,18 @@
 ; CHECK-O-NEXT: Finished Loop pass manager run.
 ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass
 ; CHECK-Os-NEXT: Running pass: GVN
+; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis
 ; CHECK-Oz-NEXT: Running pass: MergedLoadStoreMotionPass
 ; CHECK-Oz-NEXT: Running pass: GVN
+; CHECK-Oz-NEXT: Running analysis: MemoryDependenceAnalysis
 ; CHECK-O2-NEXT: Running pass: MergedLoadStoreMotionPass
 ; CHECK-O2-NEXT: Running pass: GVN
+; CHECK-O2-NEXT: Running analysis: MemoryDependenceAnalysis
 ; CHECK-O3-NEXT: Running pass: MergedLoadStoreMotionPass
 ; CHECK-O3-NEXT: Running pass: GVN
+; CHECK-O3-NEXT: Running analysis: MemoryDependenceAnalysis
 ; CHECK-O-NEXT: Running pass: MemCpyOptPass
+; CHECK-O1-NEXT: Running analysis: MemoryDependenceAnalysis
 ; CHECK-O-NEXT: Running pass: SCCPPass
 ; CHECK-O-NEXT: Running pass: BDCEPass
 ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis
diff --git a/test/TableGen/GlobalISelEmitter.td b/test/TableGen/GlobalISelEmitter.td
index 8d59036100a..9f89602ae4a 100644
--- a/test/TableGen/GlobalISelEmitter.td
+++ b/test/TableGen/GlobalISelEmitter.td
@@ -32,22 +32,18 @@ def m1Z : OperandWithDefaultOps <i32, (ops (i32 -1), R0)>;
 
 def HasA : Predicate<"Subtarget->hasA()">;
 def HasB : Predicate<"Subtarget->hasB()">;
+def HasC : Predicate<"Subtarget->hasC()"> { let RecomputePerFunction = 1; }
 
 //===- Test the function boilerplate. -------------------------------------===//
 
 // CHECK-LABEL: enum SubtargetFeatureBits : uint8_t {
 // CHECK-NEXT:    Feature_HasABit = 0,
 // CHECK-NEXT:    Feature_HasBBit = 1,
-// CHECK-NEXT:  };
-
-// CHECK-LABEL: static const char *SubtargetFeatureNames[] = {
-// CHECK-NEXT:    "Feature_HasA",
-// CHECK-NEXT:    "Feature_HasB",
-// CHECK-NEXT:    nullptr
+// CHECK-NEXT:    Feature_HasCBit = 2,
 // CHECK-NEXT:  };
 
 // CHECK-LABEL: PredicateBitset MyTargetInstructionSelector::
-// CHECK-NEXT:  computeAvailableFeatures(const MachineFunction *MF, const MyTargetSubtarget *Subtarget) const {
+// CHECK-NEXT:  computeAvailableModuleFeatures(const MyTargetSubtarget *Subtarget) const {
 // CHECK-NEXT:    PredicateBitset Features;
 // CHECK-NEXT:    if (Subtarget->hasA())
 // CHECK-NEXT:      Features[Feature_HasABit] = 1;
@@ -56,6 +52,14 @@ def HasB : Predicate<"Subtarget->hasB()">;
 // CHECK-NEXT:    return Features;
 // CHECK-NEXT:  }
 
+// CHECK-LABEL: PredicateBitset MyTargetInstructionSelector::
+// CHECK-NEXT:  computeAvailableFunctionFeatures(const MyTargetSubtarget *Subtarget, const MachineFunction *MF) const {
+// CHECK-NEXT:    PredicateBitset Features;
+// CHECK-NEXT:    if (Subtarget->hasC())
+// CHECK-NEXT:      Features[Feature_HasCBit] = 1;
+// CHECK-NEXT:    return Features;
+// CHECK-NEXT:  }
+
 // CHECK: bool MyTargetInstructionSelector::selectImpl(MachineInstr &I) const {
 // CHECK: MachineFunction &MF = *I.getParent()->getParent();
 // CHECK: const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -216,7 +220,7 @@ def MULADD : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2, GPR32:$src3),
 //===- Test another simple pattern with regclass operands. ----------------===//
 
 // CHECK-LABEL: if ([&]() {
-// CHECK-NEXT:    PredicateBitset ExpectedFeatures = {Feature_HasABit, Feature_HasBBit};
+// CHECK-NEXT:    PredicateBitset ExpectedFeatures = {Feature_HasABit, Feature_HasBBit, Feature_HasCBit};
 // CHECK-NEXT:    if ((AvailableFeatures & ExpectedFeatures) != ExpectedFeatures)
 // CHECK-NEXT:      return false;
 // CHECK-NEXT:    MachineInstr &MI0 = I;
@@ -247,7 +251,7 @@ def MULADD : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2, GPR32:$src3),
 
 def MUL : I<(outs GPR32:$dst), (ins GPR32:$src2, GPR32:$src1),
              [(set GPR32:$dst, (mul GPR32:$src1, GPR32:$src2))]>,
-          Requires<[HasA, HasB]>;
+          Requires<[HasA, HasB, HasC]>;
 
 //===- Test a pattern with ComplexPattern operands. -----------------------===//
 //
diff --git a/test/ThinLTO/X86/debuginfo-cu-import.ll b/test/ThinLTO/X86/debuginfo-cu-import.ll
index d4ce830578c..42a75119186 100644
--- a/test/ThinLTO/X86/debuginfo-cu-import.ll
+++ b/test/ThinLTO/X86/debuginfo-cu-import.ll
@@ -43,7 +43,7 @@ entry:
 !1 = !DIFile(filename: "a2.cc", directory: "")
 !2 = !{!3}
 !3 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "enum1", scope: !4, file: !1, line: 50, size: 32, elements: !5, identifier: "_ZTSN9__gnu_cxx12_Lock_policyE")
-!4 = !DINamespace(name: "A", scope: null, file: !1, line: 1)
+!4 = !DINamespace(name: "A", scope: null)
 !5 = !{}
 !6 = !{!7}
 !7 = !DICompositeType(tag: DW_TAG_structure_type, name: "Base", file: !1, line: 1, size: 32, align: 32, elements: !5, identifier: "_ZTS4Base")
diff --git a/test/Transforms/CodeExtractor/MultipleExitBranchProb.ll b/test/Transforms/CodeExtractor/MultipleExitBranchProb.ll
index e37b7e63645..425e9697359 100644
--- a/test/Transforms/CodeExtractor/MultipleExitBranchProb.ll
+++ b/test/Transforms/CodeExtractor/MultipleExitBranchProb.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -partial-inliner -S | FileCheck %s
+; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck %s
 
 ; This test checks to make sure that CodeExtractor updates
 ;  the exit branch probabilities for multiple exit blocks.
@@ -22,7 +22,7 @@ ret i32 %val
 
 ; CHECK-LABEL: @dummyCaller
 ; CHECK: call
-; CHECK-NEXT: br i1 {{.*}}!prof [[COUNT1:![0-9]+]]
+; CHECK-NEXT: br i1 {{.*}}return.i{{.*}}return.2{{.*}}!prof [[COUNT1:![0-9]+]]
 }
 
 !llvm.module.flags = !{!0}
@@ -31,4 +31,4 @@ ret i32 %val
 !2 = !{!"branch_weights", i32 5, i32 5}
 !3 = !{!"branch_weights", i32 4, i32 1}
 
-; CHECK: [[COUNT1]] = !{!"branch_weights", i32 8, i32 31}
+; CHECK: [[COUNT1]] = !{!"branch_weights", i32 31, i32 8}
diff --git a/test/Transforms/CodeExtractor/PartialInlineAnd.ll b/test/Transforms/CodeExtractor/PartialInlineAnd.ll
new file mode 100644
index 00000000000..e981a5ba581
--- /dev/null
+++ b/test/Transforms/CodeExtractor/PartialInlineAnd.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -partial-inliner -S | FileCheck %s
+; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s
+; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
+; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck  --check-prefix=LIMIT %s
+
+; Function Attrs: nounwind uwtable
+define i32 @bar(i32 %arg) local_unnamed_addr #0 {
+bb:
+  %tmp = icmp slt i32 %arg, 0
+  br i1 %tmp, label %bb1, label %bb5
+
+bb1:                                              ; preds = %bb
+  %tmp2 = tail call i32 (...) @channels() #2
+  %tmp3 = icmp slt i32 %tmp2, %arg
+  br i1 %tmp3, label %bb4, label %bb5
+
+bb4:                                              ; preds = %bb1
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb1, %bb
+  %tmp6 = phi i32 [ 0, %bb4 ], [ 1, %bb1 ], [ 1, %bb ]
+  ret i32 %tmp6
+}
+
+declare i32 @channels(...) local_unnamed_addr #1
+
+declare void @foo(...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+; CHECK-LABEL: @dummy_caller
+; CHECK: br i1
+; CHECK: br i1
+; CHECK: call void @bar.1_
+; LIMIT-LABEL: @dummy_caller
+; LIMIT: br i1
+; LIMIT-NOT: br
+; LIMIT: call void @bar.1_
+  %tmp = tail call i32 @bar(i32 %arg)
+  ret i32 %tmp
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind }
+
diff --git a/test/Transforms/CodeExtractor/PartialInlineAndOr.ll b/test/Transforms/CodeExtractor/PartialInlineAndOr.ll
new file mode 100644
index 00000000000..485e06ce102
--- /dev/null
+++ b/test/Transforms/CodeExtractor/PartialInlineAndOr.ll
@@ -0,0 +1,63 @@
+; RUN: opt < %s -partial-inliner -S | FileCheck %s
+; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s
+; RUN: opt < %s -partial-inliner -max-num-inline-blocks=3 -S | FileCheck --check-prefix=LIMIT %s
+; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=3 -S | FileCheck  --check-prefix=LIMIT %s
+
+; Function Attrs: nounwind uwtable
+define i32 @bar(i32 %arg) local_unnamed_addr #0 {
+bb:
+  %tmp = icmp slt i32 %arg, 0
+  br i1 %tmp, label %bb1, label %bb4
+
+bb1:                                              ; preds = %bb
+  %tmp2 = tail call i32 (...) @n() #2
+  %tmp3 = icmp slt i32 %tmp2, %arg
+  br i1 %tmp3, label %bb7, label %bb4
+
+bb4:                                              ; preds = %bb1, %bb
+  %tmp5 = tail call i32 (...) @m() #2
+  %tmp6 = icmp slt i32 %tmp5, %arg
+  br i1 %tmp6, label %bb7, label %bb8
+
+bb7:                                              ; preds = %bb4, %bb1
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  br label %bb8
+
+bb8:                                              ; preds = %bb7, %bb4
+  %tmp9 = phi i32 [ 0, %bb7 ], [ 1, %bb4 ]
+  ret i32 %tmp9
+}
+
+declare i32 @n(...) local_unnamed_addr #1
+
+declare i32 @m(...) local_unnamed_addr #1
+
+declare void @foo(...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+; CHECK-LABEL: @dummy_caller
+; CHECK: br i1
+; CHECK: br i1
+; CHECK: br i1
+; CHECK: call void @bar.1_
+; LIMIT-LABEL: @dummy_caller
+; LIMIT-NOT: br i1
+; LIMIT: call i32 @bar
+  %tmp = tail call i32 @bar(i32 %arg)
+  ret i32 %tmp
+}
+
+attributes #0 = { nounwind } 
+attributes #1 = { nounwind }
+attributes #2 = { nounwind }
+
diff --git a/test/Transforms/CodeExtractor/PartialInlineOptRemark.ll b/test/Transforms/CodeExtractor/PartialInlineOptRemark.ll
index 3ba03843046..c8808182f71 100644
--- a/test/Transforms/CodeExtractor/PartialInlineOptRemark.ll
+++ b/test/Transforms/CodeExtractor/PartialInlineOptRemark.ll
@@ -7,6 +7,8 @@
 ; RUN: opt -S -passes=partial-inliner  -pass-remarks=partial-inlining  --disable-partial-inlining < %s 2>&1 | FileCheck  --check-prefix=LIMIT %s
 ; RUN: opt -S -partial-inliner -pass-remarks=partial-inlining   -max-partial-inlining=0 < %s 2>&1 | FileCheck --check-prefix=LIMIT  %s
 ; RUN: opt -S -passes=partial-inliner  -pass-remarks=partial-inlining  -max-partial-inlining=0 < %s 2>&1 | FileCheck --check-prefix=LIMIT  %s
+; RUN: opt -S -partial-inliner -pass-remarks=partial-inlining   -inline-threshold=0 < %s 2>&1 | FileCheck --check-prefix=LIMIT  %s
+; RUN: opt -S -passes=partial-inliner  -pass-remarks=partial-inlining  -inline-threshold=0 < %s 2>&1 | FileCheck --check-prefix=LIMIT  %s
 
 define i32 @bar(i32 %arg) local_unnamed_addr #0 !dbg !5 {
 bb:
@@ -30,6 +32,38 @@ bb2:                                              ; preds = %bb1, %bb
   ret i32 %tmp3, !dbg !19
 }
 
+define i32 @bar_noinline(i32 %arg) local_unnamed_addr #1 !dbg !5 {
+bb:
+  %tmp = icmp slt i32 %arg, 0, !dbg !7
+  br i1 %tmp, label %bb1, label %bb2, !dbg !8
+
+bb1:                                              ; preds = %bb
+  tail call void (...) @foo() #0, !dbg !9
+  tail call void (...) @foo() #0, !dbg !10
+  tail call void (...) @foo() #0, !dbg !11
+  br label %bb2, !dbg !18
+
+bb2:                                              ; preds = %bb1, %bb
+  %tmp3 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
+  ret i32 %tmp3, !dbg !19
+}
+
+define i32 @bar_alwaysinline(i32 %arg) local_unnamed_addr #2 !dbg !5 {
+bb:
+  %tmp = icmp slt i32 %arg, 0, !dbg !7
+  br i1 %tmp, label %bb1, label %bb2, !dbg !8
+
+bb1:                                              ; preds = %bb
+  tail call void (...) @foo() #0, !dbg !9
+  tail call void (...) @foo() #0, !dbg !10
+  tail call void (...) @foo() #0, !dbg !11
+  br label %bb2, !dbg !18
+
+bb2:                                              ; preds = %bb1, %bb
+  %tmp3 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
+  ret i32 %tmp3, !dbg !19
+}
+
 ; Function Attrs: nounwind
 declare void @foo(...) local_unnamed_addr #0
 
@@ -37,12 +71,18 @@ declare void @foo(...) local_unnamed_addr #0
 define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 !dbg !20 {
 bb:
 ; CHECK:remark{{.*}}bar partially inlined into dummy_caller
+; CHECK-NOT:remark{{.*}}bar_noinline partially inlined into dummy_caller
+; CHECK-NOT:remark{{.*}}bar_alwaysinline partially inlined into dummy_caller
 ; LIMIT-NOT:remark{{.*}}bar partially inlined into dummy_caller
   %tmp = tail call i32 @bar(i32 %arg), !dbg !21
+  %tmp2 = tail call i32 @bar_noinline(i32 %arg), !dbg !21
+  %tmp3 = tail call i32 @bar_alwaysinline(i32 %arg), !dbg !21
   ret i32 %tmp, !dbg !22
 }
 
 attributes #0 = { nounwind }
+attributes #1 = { noinline nounwind }
+attributes #2 = { alwaysinline nounwind }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3}
diff --git a/test/Transforms/CodeExtractor/PartialInlineOr.ll b/test/Transforms/CodeExtractor/PartialInlineOr.ll
new file mode 100644
index 00000000000..5408b4faaf7
--- /dev/null
+++ b/test/Transforms/CodeExtractor/PartialInlineOr.ll
@@ -0,0 +1,97 @@
+; RUN: opt < %s -partial-inliner -S | FileCheck %s
+; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s
+; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT %s
+; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck  --check-prefix=LIMIT %s
+
+; Function Attrs: nounwind uwtable
+define i32 @bar(i32 %arg) local_unnamed_addr #0 {
+bb:
+  %tmp = icmp slt i32 %arg, 0
+  br i1 %tmp, label %bb4, label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp2 = tail call i32 (...) @channels() #1
+  %tmp3 = icmp slt i32 %tmp2, %arg
+  br i1 %tmp3, label %bb4, label %bb5
+
+bb4:                                              ; preds = %bb1, %bb
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb1
+  %.0 = phi i32 [ 0, %bb4 ], [ 1, %bb1 ]
+  ret i32 %.0
+}
+
+declare i32 @channels(...) local_unnamed_addr
+
+declare void @foo(...) local_unnamed_addr
+
+; Function Attrs: nounwind uwtable
+define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+; CHECK-LABEL: @dummy_caller
+; CHECK: br i1
+; CHECK: br i1
+; CHECK: call void @bar.2_
+; LIMIT-LABEL: @dummy_caller
+; LIMIT-NOT: br
+; LIMIT: call i32 @bar(
+  %tmp = tail call i32 @bar(i32 %arg)
+  ret i32 %tmp
+}
+
+define i32 @bar_multi_ret(i32 %arg) local_unnamed_addr #0 {
+bb:
+  %tmp = icmp slt i32 %arg, 0
+  br i1 %tmp, label %bb4, label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp2 = tail call i32 (...) @channels() #1
+  %tmp3 = icmp slt i32 %tmp2, %arg
+  br i1 %tmp3, label %bb4, label %bb5
+
+bb4:                                              ; preds = %bb1, %bb
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  tail call void (...) @foo() #1
+  %tmp4 = icmp slt i32 %arg, 10
+  br i1 %tmp4, label %bb6, label %bb5
+bb6:
+  tail call void (...) @foo() #1
+  %tmp5 = icmp slt i32 %arg, 3
+  br i1 %tmp5, label %bb7, label %bb5
+bb7:
+  tail call void (...) @foo() #1
+  br label %bb8
+bb8:
+  ret i32 0 
+
+bb5:                                              ; preds = %bb4, %bb1
+  %.0 = phi i32 [ 0, %bb4 ], [ 1, %bb1 ], [0, %bb6]
+  ret i32 %.0
+}
+
+define i32 @dummy_caller2(i32 %arg) local_unnamed_addr #0 {
+; CHECK: br i1
+; CHECK: br i1
+; CHECK: call {{.*}} @bar_multi_ret.1_
+  %tmp = tail call i32 @bar_multi_ret(i32 %arg)
+  ret i32 %tmp
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 5.0.0 (trunk 300576)"}
diff --git a/test/Transforms/CodeExtractor/PartialInlineOrAnd.ll b/test/Transforms/CodeExtractor/PartialInlineOrAnd.ll
new file mode 100644
index 00000000000..282d300fadb
--- /dev/null
+++ b/test/Transforms/CodeExtractor/PartialInlineOrAnd.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -partial-inliner -S | FileCheck %s
+; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s
+; RUN: opt < %s -partial-inliner -max-num-inline-blocks=3 -S | FileCheck --check-prefix=LIMIT3 %s
+; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=3 -S | FileCheck  --check-prefix=LIMIT3 %s
+; RUN: opt < %s -partial-inliner -max-num-inline-blocks=2 -S | FileCheck --check-prefix=LIMIT2 %s
+; RUN: opt < %s -passes=partial-inliner -max-num-inline-blocks=2 -S | FileCheck  --check-prefix=LIMIT2 %s
+
+
+; Function Attrs: nounwind uwtable
+define i32 @bar(i32 %arg) local_unnamed_addr #0 {
+bb:
+  %tmp = icmp slt i32 %arg, 0
+  br i1 %tmp, label %bb4, label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp2 = tail call i32 (...) @n() #2
+  %tmp3 = icmp slt i32 %tmp2, %arg
+  br i1 %tmp3, label %bb4, label %bb8
+
+bb4:                                              ; preds = %bb1, %bb
+  %tmp5 = tail call i32 (...) @m() #2
+  %tmp6 = icmp sgt i32 %tmp5, %arg
+  br i1 %tmp6, label %bb7, label %bb8
+
+bb7:                                              ; preds = %bb4
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  tail call void (...) @foo() #2
+  br label %bb8
+
+bb8:                                              ; preds = %bb7, %bb4, %bb1
+  %tmp9 = phi i32 [ 0, %bb7 ], [ 1, %bb4 ], [ 1, %bb1 ]
+  ret i32 %tmp9
+}
+
+declare i32 @n(...) local_unnamed_addr #1
+
+declare i32 @m(...) local_unnamed_addr #1
+
+declare void @foo(...) local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 {
+bb:
+; CHECK-LABEL: @dummy_caller
+; CHECK: br i1
+; CHECK: br i1
+; CHECK: br i1
+; CHECK: call void @bar.1_
+; LIMIT3-LABEL: @dummy_caller
+; LIMIT3: br i1
+; LIMIT3: br i1
+; LIMIT3-NOT: br i1
+; LIMIT3: call void @bar.1_
+; LIMIT2-LABEL: @dummy_caller
+; LIMIT2-NOT: br i1
+; LIMIT2: call i32 @bar(
+  %tmp = tail call i32 @bar(i32 %arg)
+  ret i32 %tmp
+}
+
+attributes #0 = { nounwind } 
+attributes #1 = { nounwind }
+attributes #2 = { nounwind }
+
diff --git a/test/Transforms/CodeExtractor/SingleCondition.ll b/test/Transforms/CodeExtractor/SingleCondition.ll
new file mode 100644
index 00000000000..90cda889a21
--- /dev/null
+++ b/test/Transforms/CodeExtractor/SingleCondition.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -partial-inliner -S  | FileCheck %s
+; RUN: opt < %s -passes=partial-inliner -S  | FileCheck %s
+
+define internal i32 @inlinedFunc(i1 %cond, i32* align 4 %align.val) {
+entry:
+  br i1 %cond, label %if.then, label %return
+if.then:
+  ; Dummy store to have more than 0 uses
+  store i32 10, i32* %align.val, align 4
+  br label %return
+return:             ; preds = %entry
+  ret i32 0
+}
+
+define internal i32 @dummyCaller(i1 %cond, i32* align 2 %align.val) {
+entry:
+; CHECK-LABEL: @dummyCaller
+; CHECK: br
+; CHECK: call void @inlinedFunc.1_ 
+  %val = call i32 @inlinedFunc(i1 %cond, i32* %align.val)
+  ret i32 %val
+}
+
diff --git a/test/Transforms/CodeExtractor/unreachable-block.ll b/test/Transforms/CodeExtractor/unreachable-block.ll
index d20a35718e6..09f41f6bd2f 100644
--- a/test/Transforms/CodeExtractor/unreachable-block.ll
+++ b/test/Transforms/CodeExtractor/unreachable-block.ll
@@ -9,13 +9,11 @@
 ; CHECK-LABEL: define internal void @tinkywinky.1_ontrue() {
 ; CHECK-NEXT: newFuncRoot:
 ; CHECK-NEXT:   br label %ontrue
-; CHECK: .exitStub:
+; CHECK: onfalse{{.*}}:
 ; CHECK-NEXT:   ret void
 ; CHECK: ontrue:
 ; CHECK-NEXT:   call void @patatino()
-; CHECK-NEXT:   br label %onfalse
-; CHECK: onfalse:
-; CHECK-NEXT:   br label %.exitStub
+; CHECK-NEXT:   br label %onfalse{{.*}}
 ; CHECK-NEXT: }
 
 declare void @patatino()
diff --git a/test/Transforms/EarlyCSE/guards.ll b/test/Transforms/EarlyCSE/guards.ll
index 55185f9fe56..de43264db6f 100644
--- a/test/Transforms/EarlyCSE/guards.ll
+++ b/test/Transforms/EarlyCSE/guards.ll
@@ -3,6 +3,8 @@
 
 declare void @llvm.experimental.guard(i1,...)
 
+declare void @llvm.assume(i1)
+
 define i32 @test0(i32* %ptr, i1 %cond) {
 ; We can do store to load forwarding over a guard, since it does not
 ; clobber memory
@@ -180,3 +182,347 @@ define void @test6(i1 %c, i32* %ptr) {
   store i32 600, i32* %ptr
   ret void
 }
+
+define void @test07(i32 %a, i32 %b) {
+; Check that we are able to remove the guards on the same condition even if the
+; condition is not being recalculated.
+
+; CHECK-LABEL: @test07(
+; CHECK-NEXT:  %cmp = icmp eq i32 %a, %b
+; CHECK-NEXT:  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+; CHECK-NEXT:  ret void
+
+  %cmp = icmp eq i32 %a, %b
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  ret void
+}
+
+define void @test08(i32 %a, i32 %b, i32* %ptr) {
+; Check that we deal correctly with stores when removing guards in the same
+; block in case when the condition is not recalculated.
+
+; CHECK-LABEL: @test08(
+; CHECK-NEXT:  %cmp = icmp eq i32 %a, %b
+; CHECK-NEXT:  store i32 100, i32* %ptr
+; CHECK-NEXT:  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+; CHECK-NEXT:  store i32 400, i32* %ptr
+; CHECK-NEXT:  ret void
+
+  %cmp = icmp eq i32 %a, %b
+  store i32 100, i32* %ptr
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  store i32 200, i32* %ptr
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  store i32 300, i32* %ptr
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  store i32 400, i32* %ptr
+  ret void
+}
+
+define void @test09(i32 %a, i32 %b, i1 %c, i32* %ptr) {
+; Similar to test08, but with more control flow.
+; TODO: Can we get rid of the store in the end of entry given that it is
+; post-dominated by other stores?
+
+; CHECK-LABEL: @test09(
+; CHECK:       entry:
+; CHECK-NEXT:    %cmp = icmp eq i32 %a, %b
+; CHECK-NEXT:    store i32 100, i32* %ptr
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+; CHECK-NEXT:    store i32 400, i32* %ptr
+; CHECK-NEXT:    br i1 %c, label %if.true, label %if.false
+; CHECK:       if.true:
+; CHECK-NEXT:    store i32 500, i32* %ptr
+; CHECK-NEXT:    br label %merge
+; CHECK:       if.false:
+; CHECK-NEXT:    store i32 600, i32* %ptr
+; CHECK-NEXT:    br label %merge
+; CHECK:       merge:
+; CHECK-NEXT:    ret void
+
+entry:
+  %cmp = icmp eq i32 %a, %b
+  store i32 100, i32* %ptr
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  store i32 200, i32* %ptr
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  store i32 300, i32* %ptr
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  store i32 400, i32* %ptr
+  br i1 %c, label %if.true, label %if.false
+
+if.true:
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  store i32 500, i32* %ptr
+  br label %merge
+
+if.false:
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  store i32 600, i32* %ptr
+  br label %merge
+
+merge:
+  ret void
+}
+
+define void @test10(i32 %a, i32 %b, i1 %c, i32* %ptr) {
+; Make sure that non-dominating guards do not cause other guards removal.
+
+; CHECK-LABEL: @test10(
+; CHECK:       entry:
+; CHECK-NEXT:    %cmp = icmp eq i32 %a, %b
+; CHECK-NEXT:    br i1 %c, label %if.true, label %if.false
+; CHECK:       if.true:
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+; CHECK-NEXT:    store i32 100, i32* %ptr
+; CHECK-NEXT:    br label %merge
+; CHECK:       if.false:
+; CHECK-NEXT:    store i32 200, i32* %ptr
+; CHECK-NEXT:    br label %merge
+; CHECK:       merge:
+; CHECK-NEXT:    store i32 300, i32* %ptr
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+; CHECK-NEXT:    store i32 400, i32* %ptr
+; CHECK-NEXT:    ret void
+
+entry:
+  %cmp = icmp eq i32 %a, %b
+  br i1 %c, label %if.true, label %if.false
+
+if.true:
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  store i32 100, i32* %ptr
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  br label %merge
+
+if.false:
+  store i32 200, i32* %ptr
+  br label %merge
+
+merge:
+  store i32 300, i32* %ptr
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  store i32 400, i32* %ptr
+  ret void
+}
+
+define void @test11(i32 %a, i32 %b, i32* %ptr) {
+; Make sure that branching condition is applied to guards.
+
+; CHECK-LABEL: @test11(
+; CHECK:       entry:
+; CHECK-NEXT:    %cmp = icmp eq i32 %a, %b
+; CHECK-NEXT:    br i1 %cmp, label %if.true, label %if.false
+; CHECK:       if.true:
+; CHECK-NEXT:    br label %merge
+; CHECK:       if.false:
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT:    br label %merge
+; CHECK:       merge:
+; CHECK-NEXT:    ret void
+
+entry:
+  %cmp = icmp eq i32 %a, %b
+  br i1 %cmp, label %if.true, label %if.false
+
+if.true:
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  br label %merge
+
+if.false:
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  br label %merge
+
+merge:
+  ret void
+}
+
+define void @test12(i32 %a, i32 %b) {
+; Check that the assume marks its condition as being true (and thus allows to
+; eliminate the dominated guards).
+
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:  %cmp = icmp eq i32 %a, %b
+; CHECK-NEXT:  call void @llvm.assume(i1 %cmp)
+; CHECK-NEXT:  ret void
+
+  %cmp = icmp eq i32 %a, %b
+  call void @llvm.assume(i1 %cmp)
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  ret void
+}
+
+define void @test13(i32 %a, i32 %b, i32* %ptr) {
+; Check that we deal correctly with stores when removing guards due to assume.
+
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:  %cmp = icmp eq i32 %a, %b
+; CHECK-NEXT:  call void @llvm.assume(i1 %cmp)
+; CHECK-NEXT:  store i32 400, i32* %ptr
+; CHECK-NEXT:  ret void
+
+  %cmp = icmp eq i32 %a, %b
+  call void @llvm.assume(i1 %cmp)
+  store i32 100, i32* %ptr
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  store i32 200, i32* %ptr
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  store i32 300, i32* %ptr
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  store i32 400, i32* %ptr
+  ret void
+}
+
+define void @test14(i32 %a, i32 %b, i1 %c, i32* %ptr) {
+; Similar to test13, but with more control flow.
+; TODO: Can we get rid of the store in the end of entry given that it is
+; post-dominated by other stores?
+
+; CHECK-LABEL: @test14(
+; CHECK:       entry:
+; CHECK-NEXT:    %cmp = icmp eq i32 %a, %b
+; CHECK-NEXT:    call void @llvm.assume(i1 %cmp)
+; CHECK-NEXT:    store i32 400, i32* %ptr
+; CHECK-NEXT:    br i1 %c, label %if.true, label %if.false
+; CHECK:       if.true:
+; CHECK-NEXT:    store i32 500, i32* %ptr
+; CHECK-NEXT:    br label %merge
+; CHECK:       if.false:
+; CHECK-NEXT:    store i32 600, i32* %ptr
+; CHECK-NEXT:    br label %merge
+; CHECK:       merge:
+; CHECK-NEXT:    ret void
+
+entry:
+  %cmp = icmp eq i32 %a, %b
+  call void @llvm.assume(i1 %cmp)
+  store i32 100, i32* %ptr
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  store i32 200, i32* %ptr
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  store i32 300, i32* %ptr
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  store i32 400, i32* %ptr
+  br i1 %c, label %if.true, label %if.false
+
+if.true:
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  store i32 500, i32* %ptr
+  br label %merge
+
+if.false:
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  store i32 600, i32* %ptr
+  br label %merge
+
+merge:
+  ret void
+}
+
+define void @test15(i32 %a, i32 %b, i1 %c, i32* %ptr) {
+; Make sure that non-dominating assumes do not cause guards removal.
+
+; CHECK-LABEL: @test15(
+; CHECK:       entry:
+; CHECK-NEXT:    %cmp = icmp eq i32 %a, %b
+; CHECK-NEXT:    br i1 %c, label %if.true, label %if.false
+; CHECK:       if.true:
+; CHECK-NEXT:    call void @llvm.assume(i1 %cmp)
+; CHECK-NEXT:    store i32 100, i32* %ptr
+; CHECK-NEXT:    br label %merge
+; CHECK:       if.false:
+; CHECK-NEXT:    store i32 200, i32* %ptr
+; CHECK-NEXT:    br label %merge
+; CHECK:       merge:
+; CHECK-NEXT:    store i32 300, i32* %ptr
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+; CHECK-NEXT:    store i32 400, i32* %ptr
+; CHECK-NEXT:    ret void
+
+entry:
+  %cmp = icmp eq i32 %a, %b
+  br i1 %c, label %if.true, label %if.false
+
+if.true:
+  call void @llvm.assume(i1 %cmp)
+  store i32 100, i32* %ptr
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  br label %merge
+
+if.false:
+  store i32 200, i32* %ptr
+  br label %merge
+
+merge:
+  store i32 300, i32* %ptr
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  store i32 400, i32* %ptr
+  ret void
+}
+
+define void @test16(i32 %a, i32 %b) {
+; Check that we don't bother to do anything with assumes even if we know the
+; condition being true.
+
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    %cmp = icmp eq i32 %a, %b
+; CHECK-NEXT:    call void @llvm.assume(i1 %cmp)
+; CHECK-NEXT:    call void @llvm.assume(i1 %cmp)
+; CHECK-NEXT:    ret void
+
+  %cmp = icmp eq i32 %a, %b
+  call void @llvm.assume(i1 %cmp)
+  call void @llvm.assume(i1 %cmp)
+  ret void
+}
+
+define void @test17(i32 %a, i32 %b, i1 %c, i32* %ptr) {
+; Check that we don't bother to do anything with assumes even if we know the
+; condition being true or false (includes come control flow).
+
+; CHECK-LABEL: @test17(
+; CHECK:       entry:
+; CHECK-NEXT:    %cmp = icmp eq i32 %a, %b
+; CHECK-NEXT:    br i1 %c, label %if.true, label %if.false
+; CHECK:       if.true:
+; CHECK-NEXT:    call void @llvm.assume(i1 %cmp)
+; CHECK-NEXT:    br label %merge
+; CHECK:       if.false:
+; CHECK-NEXT:    call void @llvm.assume(i1 %cmp)
+; CHECK-NEXT:    br label %merge
+; CHECK:       merge:
+; CHECK-NEXT:    ret void
+
+entry:
+  %cmp = icmp eq i32 %a, %b
+  br i1 %c, label %if.true, label %if.false
+
+if.true:
+  call void @llvm.assume(i1 %cmp)
+  br label %merge
+
+if.false:
+  call void @llvm.assume(i1 %cmp)
+  br label %merge
+
+merge:
+  ret void
+}
+
+define void @test18(i1 %c) {
+; Check that we don't bother to do anything with assumes even if we know the
+; condition being true and not being an instruction.
+
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    call void @llvm.assume(i1 %c)
+; CHECK-NEXT:    call void @llvm.assume(i1 %c)
+; CHECK-NEXT:    ret void
+
+  call void @llvm.assume(i1 %c)
+  call void @llvm.assume(i1 %c)
+  ret void
+}
diff --git a/test/Transforms/GVNHoist/hoist-inline.ll b/test/Transforms/GVNHoist/hoist-inline.ll
index 7d761486ab1..56378d136de 100644
--- a/test/Transforms/GVNHoist/hoist-inline.ll
+++ b/test/Transforms/GVNHoist/hoist-inline.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -O2 < %s | FileCheck %s
+; RUN: opt -S -O2 -enable-gvn-hoist < %s | FileCheck %s
 
 ; Check that the inlined loads are hoisted.
 ; CHECK-LABEL: define i32 @fun(
diff --git a/test/Transforms/GlobalOpt/localize-constexpr-debuginfo.ll b/test/Transforms/GlobalOpt/localize-constexpr-debuginfo.ll
new file mode 100644
index 00000000000..9cdf603cb24
--- /dev/null
+++ b/test/Transforms/GlobalOpt/localize-constexpr-debuginfo.ll
@@ -0,0 +1,70 @@
+; RUN: opt -S < %s -globalopt | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@_ZL1x = internal global [200 x i8]* null, align 8, !dbg !0
+
+define i32 @main(i32 %argc, i8** %argv) norecurse !dbg !18 {
+; CHECK: define i32 @main
+; Make sure we localized the global.
+; CHECK: alloca [200 x i8]*
+; Make sure the metadata is sane. Currently, we just drop the metadata,
+; so it points to nothing.
+; CHECK: call void @llvm.dbg.value(metadata !2,
+; CHECK: !2 = !{}
+entry:
+  call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !22, metadata !23), !dbg !24
+  call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !25, metadata !23), !dbg !26
+  %arrayidx = getelementptr inbounds i8*, i8** %argv, i64 0, !dbg !27
+  %0 = load i8*, i8** %arrayidx, align 8, !dbg !27
+  %1 = bitcast i8* %0 to [200 x i8]*, !dbg !28
+  store [200 x i8]* %1, [200 x i8]** @_ZL1x, align 8, !dbg !29
+  call void @llvm.dbg.value(metadata i8** bitcast ([200 x i8]** @_ZL1x to i8**), i64 0, metadata !30, metadata !23), !dbg !31
+  %2 = load i8*, i8** bitcast ([200 x i8]** @_ZL1x to i8**), align 8, !dbg !32
+  %3 = load i8, i8* %2, align 1, !dbg !33
+  %conv = sext i8 %3 to i32, !dbg !33
+  ret i32 %conv, !dbg !34
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!15, !16}
+!llvm.ident = !{!17}
+
+!0 = !DIGlobalVariableExpression(var: !1)
+!1 = distinct !DIGlobalVariable(name: "x", linkageName: "_ZL1x", scope: !2, file: !14, line: 1, type: !6, isLocal: true, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !13)
+!3 = !DIFile(filename: "-", directory: "/")
+!4 = !{}
+!5 = !{!6, !11}
+!6 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64)
+!7 = !DICompositeType(tag: DW_TAG_array_type, baseType: !8, size: 1600, elements: !9)
+!8 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!9 = !{!10}
+!10 = !DISubrange(count: 200)
+!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64)
+!12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64)
+!13 = !{!0}
+!14 = !DIFile(filename: "<stdin>", directory: "/")
+!15 = !{i32 2, !"Dwarf Version", i32 4}
+!16 = !{i32 2, !"Debug Info Version", i32 3}
+!17 = !{!"clang"}
+!18 = distinct !DISubprogram(name: "main", scope: !14, file: !14, line: 2, type: !19, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !2, variables: !4)
+!19 = !DISubroutineType(types: !20)
+!20 = !{!21, !21, !11}
+!21 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!22 = !DILocalVariable(name: "argc", arg: 1, scope: !18, file: !14, line: 2, type: !21)
+!23 = !DIExpression()
+!24 = !DILocation(line: 2, column: 14, scope: !18)
+!25 = !DILocalVariable(name: "argv", arg: 2, scope: !18, file: !14, line: 2, type: !11)
+!26 = !DILocation(line: 2, column: 26, scope: !18)
+!27 = !DILocation(line: 2, column: 52, scope: !18)
+!28 = !DILocation(line: 2, column: 38, scope: !18)
+!29 = !DILocation(line: 2, column: 36, scope: !18)
+!30 = !DILocalVariable(name: "y", scope: !18, file: !14, line: 2, type: !11)
+!31 = !DILocation(line: 2, column: 68, scope: !18)
+!32 = !DILocation(line: 2, column: 92, scope: !18)
+!33 = !DILocation(line: 2, column: 91, scope: !18)
+!34 = !DILocation(line: 2, column: 84, scope: !18)
diff --git a/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll b/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll
new file mode 100644
index 00000000000..73829e86527
--- /dev/null
+++ b/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
+
+; Test that pure addrspacecast instructions not directly connected to
+; a memory operation are inferred.
+
+; CHECK-LABEL: @addrspacecast_gep_addrspacecast(
+; CHECK: %gep0 = getelementptr i32, i32 addrspace(3)* %ptr, i64 9
+; CHECK-NEXT: store i32 8, i32 addrspace(3)* %gep0, align 8
+; CHECK-NEXT: ret void
+define void @addrspacecast_gep_addrspacecast(i32 addrspace(3)* %ptr) {
+  %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
+  %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9
+  %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i32 addrspace(3)*
+  store i32 8, i32 addrspace(3)* %asc1, align 8
+  ret void
+}
+
+; CHECK-LABEL: @addrspacecast_to_memory(
+; CHECK: %gep0 = getelementptr i32, i32 addrspace(3)* %ptr, i64 9
+; CHECK-NEXT: store volatile i32 addrspace(3)* %gep0, i32 addrspace(3)* addrspace(1)* undef
+; CHECK-NEXT: ret void
+define void @addrspacecast_to_memory(i32 addrspace(3)* %ptr) {
+  %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
+  %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9
+  %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i32 addrspace(3)*
+  store volatile i32 addrspace(3)* %asc1, i32 addrspace(3)* addrspace(1)* undef
+  ret void
+}
+
+; CHECK-LABEL: @multiuse_addrspacecast_gep_addrspacecast(
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
+; CHECK-NEXT: store volatile i32 addrspace(4)* %1, i32 addrspace(4)* addrspace(1)* undef
+; CHECK-NEXT: %gep0 = getelementptr i32, i32 addrspace(3)* %ptr, i64 9
+; CHECK-NEXT: store i32 8, i32 addrspace(3)* %gep0, align 8
+; CHECK-NEXT: ret void
+define void @multiuse_addrspacecast_gep_addrspacecast(i32 addrspace(3)* %ptr) {
+  %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
+  store volatile i32 addrspace(4)* %asc0, i32 addrspace(4)* addrspace(1)* undef
+  %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9
+  %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i32 addrspace(3)*
+  store i32 8, i32 addrspace(3)* %asc1, align 8
+  ret void
+}
diff --git a/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll b/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll
index 6b94a74da35..e2c255dcb3e 100644
--- a/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll
+++ b/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll
@@ -5,10 +5,19 @@
 
 @lds = internal unnamed_addr addrspace(3) global [648 x double] undef, align 8
 
+; CHECK-LABEL: @simplified_constexpr_gep_addrspacecast(
+; CHECK: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0
+; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8
+define void @simplified_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
+  %gep0 = getelementptr inbounds double, double addrspace(4)* addrspacecast (double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384) to double addrspace(4)*), i64 %idx0
+  %asc = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)*
+  store double 1.000000e+00, double addrspace(3)* %asc, align 8
+  ret void
+}
+
 ; CHECK-LABEL: @constexpr_gep_addrspacecast(
-; CHECK: %gep0 = getelementptr inbounds double, double addrspace(4)* addrspacecast (double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384) to double addrspace(4)*), i64 %idx0
-; CHECK-NEXT: %asc = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)*
-; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %asc
+; CHECK-NEXT: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0
+; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8
 define void @constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
   %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0
   %asc = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)*
@@ -18,10 +27,8 @@ define void @constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
 
 ; CHECK-LABEL: @constexpr_gep_gep_addrspacecast(
 ; CHECK: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0
-; CHECK-NEXT: %1 = addrspacecast double addrspace(3)* %gep0 to double addrspace(4)*
-; CHECK-NEXT: %gep1 = getelementptr inbounds double, double addrspace(4)* %1, i64 %idx1
-; CHECK-NEXT: %asc = addrspacecast double addrspace(4)* %gep1 to double addrspace(3)*
-; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %asc, align 8
+; CHECK-NEXT: %gep1 = getelementptr inbounds double, double addrspace(3)* %gep0, i64 %idx1
+; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep1, align 8
 define void @constexpr_gep_gep_addrspacecast(i64 %idx0, i64 %idx1) {
   %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0
   %gep1 = getelementptr inbounds double, double addrspace(4)* %gep0, i64 %idx1
@@ -46,3 +53,21 @@ define amdgpu_kernel void @vector_gep(<4 x [1024 x i32] addrspace(3)*> %array) n
   store i32 99, i32 addrspace(4)* %p3
   ret void
 }
+
+; CHECK-LABEL: @repeated_constexpr_gep_addrspacecast(
+; CHECK-NEXT: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0
+; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8
+; CHECK-NEXT: %gep1 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx1
+; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep1, align 8
+; CHECK-NEXT: ret void
+define void @repeated_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
+  %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0
+  %asc0 = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)*
+  store double 1.0, double addrspace(3)* %asc0, align 8
+
+  %gep1 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx1
+  %asc1 = addrspacecast double addrspace(4)* %gep1 to double addrspace(3)*
+  store double 1.0, double addrspace(3)* %asc1, align 8
+
+  ret void
+}
diff --git a/test/Transforms/InferAddressSpaces/NVPTX/bug31948.ll b/test/Transforms/InferAddressSpaces/NVPTX/bug31948.ll
index b2d8ddb1956..b4e05b2e429 100644
--- a/test/Transforms/InferAddressSpaces/NVPTX/bug31948.ll
+++ b/test/Transforms/InferAddressSpaces/NVPTX/bug31948.ll
@@ -10,7 +10,7 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
 ; CHECK: %tmp = load float*, float* addrspace(3)* getelementptr inbounds (%struct.bar, %struct.bar addrspace(3)* @var1, i64 0, i32 1), align 8
 ; CHECK: %tmp1 = load float, float* %tmp, align 4
 ; CHECK: store float %conv1, float* %tmp, align 4
-; CHECK: store i32 32, i32 addrspace(3)* addrspacecast (i32* bitcast (float** getelementptr (%struct.bar, %struct.bar* addrspacecast (%struct.bar addrspace(3)* @var1 to %struct.bar*), i64 0, i32 1) to i32*) to i32 addrspace(3)*), align 4
+; CHECK: store i32 32, i32 addrspace(3)* bitcast (float* addrspace(3)* getelementptr inbounds (%struct.bar, %struct.bar addrspace(3)* @var1, i64 0, i32 1) to i32 addrspace(3)*), align 4
 define void @bug31948(float %a, float* nocapture readnone %x, float* nocapture readnone %y) local_unnamed_addr #0 {
 entry:
   %tmp = load float*, float** getelementptr (%struct.bar, %struct.bar* addrspacecast (%struct.bar addrspace(3)* @var1 to %struct.bar*), i64 0, i32 1), align 8
diff --git a/test/Transforms/Inline/AArch64/switch.ll b/test/Transforms/Inline/AArch64/switch.ll
new file mode 100644
index 00000000000..96d6bf2db68
--- /dev/null
+++ b/test/Transforms/Inline/AArch64/switch.ll
@@ -0,0 +1,123 @@
+; RUN: opt < %s -inline -inline-threshold=20 -S -mtriple=aarch64-none-linux -inline-generic-switch-cost=true | FileCheck %s
+; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=20 -S -mtriple=aarch64-none-linux -inline-generic-switch-cost=true | FileCheck %s
+
+define i32 @callee_range(i32 %a, i32* %P) {
+  switch i32 %a, label %sw.default [
+    i32 0, label %sw.bb0
+    i32 1000, label %sw.bb1
+    i32 2000, label %sw.bb1
+    i32 3000, label %sw.bb1
+    i32 4000, label %sw.bb1
+    i32 5000, label %sw.bb1
+    i32 6000, label %sw.bb1
+    i32 7000, label %sw.bb1
+    i32 8000, label %sw.bb1
+    i32 9000, label %sw.bb1
+  ]
+
+sw.default:
+  store volatile i32 %a, i32* %P
+  br label %return
+sw.bb0:
+  store volatile i32 %a, i32* %P
+  br label %return
+sw.bb1:
+  store volatile i32 %a, i32* %P
+  br label %return
+return:
+  ret i32 42
+}
+
+define i32 @caller_range(i32 %a, i32* %P) {
+; CHECK-LABEL: @caller_range(
+; CHECK: call i32 @callee_range
+  %r = call i32 @callee_range(i32 %a, i32* %P)
+  ret i32 %r
+}
+
+define i32 @callee_bittest(i32 %a, i32* %P) {
+  switch i32 %a, label %sw.default [
+    i32 0, label %sw.bb0
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb0
+    i32 4, label %sw.bb1
+    i32 5, label %sw.bb2
+    i32 6, label %sw.bb0
+    i32 7, label %sw.bb1
+    i32 8, label %sw.bb2
+  ]
+
+sw.default:
+  store volatile i32 %a, i32* %P
+  br label %return
+
+sw.bb0:
+  store volatile i32 %a, i32* %P
+  br label %return
+
+sw.bb1:
+  store volatile i32 %a, i32* %P
+  br label %return
+
+sw.bb2:
+  br label %return
+
+return:
+  ret i32 42
+}
+
+
+define i32 @caller_bittest(i32 %a, i32* %P) {
+; CHECK-LABEL: @caller_bittest(
+; CHECK-NOT: call i32 @callee_bittest
+  %r= call i32 @callee_bittest(i32 %a, i32* %P)
+  ret i32 %r
+}
+
+define i32 @callee_jumptable(i32 %a, i32* %P) {
+  switch i32 %a, label %sw.default [
+    i32 1001, label %sw.bb101
+    i32 1002, label %sw.bb102
+    i32 1003, label %sw.bb103
+    i32 1004, label %sw.bb104
+    i32 1005, label %sw.bb101
+    i32 1006, label %sw.bb102
+    i32 1007, label %sw.bb103
+    i32 1008, label %sw.bb104
+    i32 1009, label %sw.bb101
+    i32 1010, label %sw.bb102
+    i32 1011, label %sw.bb103
+    i32 1012, label %sw.bb104
+ ]
+
+sw.default:
+  br label %return
+
+sw.bb101:
+  store volatile i32 %a, i32* %P
+  br label %return
+
+sw.bb102:
+  store volatile i32 %a, i32* %P
+  br label %return
+
+sw.bb103:
+  store volatile i32 %a, i32* %P
+  br label %return
+
+sw.bb104:
+  store volatile i32 %a, i32* %P
+  br label %return
+
+return:
+  ret i32 42
+}
+
+define i32 @caller_jumptable(i32 %a, i32 %b, i32* %P) {
+; CHECK-LABEL: @caller_jumptable(
+; CHECK: call i32 @callee_jumptable
+  %r = call i32 @callee_jumptable(i32 %b, i32* %P)
+  ret i32 %r
+}
+
diff --git a/test/Transforms/InstCombine/amdgcn-intrinsics.ll b/test/Transforms/InstCombine/amdgcn-intrinsics.ll
index 357085fd31f..1901997c552 100644
--- a/test/Transforms/InstCombine/amdgcn-intrinsics.ll
+++ b/test/Transforms/InstCombine/amdgcn-intrinsics.ll
@@ -1259,7 +1259,7 @@ define i64 @icmp_constant_inputs_false() {
 }
 
 ; CHECK-LABEL: @icmp_constant_inputs_true(
-; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #4
+; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #5
 define i64 @icmp_constant_inputs_true() {
   %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 34)
   ret i64 %result
@@ -1524,7 +1524,7 @@ define i64 @fcmp_constant_inputs_false() {
 }
 
 ; CHECK-LABEL: @fcmp_constant_inputs_true(
-; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #4
+; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #5
 define i64 @fcmp_constant_inputs_true() {
   %result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 4)
   ret i64 %result
@@ -1537,4 +1537,4 @@ define i64 @fcmp_constant_to_rhs_olt(float %x) {
   ret i64 %result
 }
 
-; CHECK: attributes #4 = { convergent }
+; CHECK: attributes #5 = { convergent }
diff --git a/test/Transforms/InstCombine/and-or-not.ll b/test/Transforms/InstCombine/and-or-not.ll
index a2e8a10735f..a8e32bd77f7 100644
--- a/test/Transforms/InstCombine/and-or-not.ll
+++ b/test/Transforms/InstCombine/and-or-not.ll
@@ -496,31 +496,23 @@ define i32 @xor_to_xor12(float %fa, float %fb) {
   ret i32 %xor
 }
 
-; ~(~(a | b) | (a & b)) --> (a | b) & ~(a & b) -> a ^ b
+; https://bugs.llvm.org/show_bug.cgi?id=32830
+; Make sure we're matching operands correctly and not folding things wrongly.
 
-define i32 @demorgan_plus_and_to_xor(i32 %a, i32 %b) {
-; CHECK-LABEL: @demorgan_plus_and_to_xor(
-; CHECK-NEXT:    [[NOT:%.*]] = xor i32 %b, %a
-; CHECK-NEXT:    ret i32 [[NOT]]
+define i64 @PR32830(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: @PR32830(
+; CHECK-NEXT:    [[NOTA:%.*]] = xor i64 %a, -1
+; CHECK-NEXT:    [[NOTB:%.*]] = xor i64 %b, -1
+; CHECK-NEXT:    [[OR1:%.*]] = or i64 [[NOTB]], %a
+; CHECK-NEXT:    [[OR2:%.*]] = or i64 [[NOTA]], %c
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[OR1]], [[OR2]]
+; CHECK-NEXT:    ret i64 [[AND]]
 ;
-  %or = or i32 %b, %a
-  %notor = xor i32 %or, -1
-  %and = and i32 %b, %a
-  %or2 = or i32 %and, %notor
-  %not = xor i32 %or2, -1
-  ret i32 %not
-}
-
-define <4 x i32> @demorgan_plus_and_to_xor_vec(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: @demorgan_plus_and_to_xor_vec(
-; CHECK-NEXT:    [[NOT:%.*]] = xor <4 x i32> %a, %b
-; CHECK-NEXT:    ret <4 x i32> [[NOT]]
-;
-  %or = or <4 x i32> %a, %b
-  %notor = xor <4 x i32> %or, < i32 -1, i32 -1, i32 -1, i32 -1 >
-  %and = and <4 x i32> %a, %b
-  %or2 = or <4 x i32> %and, %notor
-  %not = xor <4 x i32> %or2, < i32 -1, i32 -1, i32 -1, i32 -1 >
-  ret <4 x i32> %not
+  %nota = xor i64 %a, -1
+  %notb = xor i64 %b, -1
+  %or1 = or i64 %notb, %a
+  %or2 = or i64 %nota, %c
+  %and = and i64 %or1, %or2
+  ret i64 %and
 }
 
diff --git a/test/Transforms/InstCombine/and.ll b/test/Transforms/InstCombine/and.ll
index a2715c12fa8..8ef7870891f 100644
--- a/test/Transforms/InstCombine/and.ll
+++ b/test/Transforms/InstCombine/and.ll
@@ -172,19 +172,6 @@ define i8 @test16(i8 %A) {
   ret i8 %C
 }
 
-;; ~(~X & Y) --> (X | ~Y)
-define i8 @test17(i8 %X, i8 %Y) {
-; CHECK-LABEL: @test17(
-; CHECK-NEXT:    [[Y_NOT:%.*]] = xor i8 %Y, -1
-; CHECK-NEXT:    [[D:%.*]] = or i8 [[Y_NOT]], %X
-; CHECK-NEXT:    ret i8 [[D]]
-;
-  %B = xor i8 %X, -1
-  %C = and i8 %B, %Y
-  %D = xor i8 %C, -1
-  ret i8 %D
-}
-
 define i1 @test18(i32 %A) {
 ; CHECK-LABEL: @test18(
 ; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 %A, 127
diff --git a/test/Transforms/InstCombine/apint-and.ll b/test/Transforms/InstCombine/apint-and.ll
new file mode 100644
index 00000000000..f0381dfc284
--- /dev/null
+++ b/test/Transforms/InstCombine/apint-and.ll
@@ -0,0 +1,126 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; FIXME: Some of these tests belong in InstSimplify.
+
+; Integer BitWidth <= 64 && BitWidth % 8 != 0.
+
+define i39 @test0(i39 %A) {
+; CHECK-LABEL: @test0(
+; CHECK-NEXT:    ret i39 0
+;
+  %B = and i39 %A, 0 ; zero result
+  ret i39 %B
+}
+
+define i15 @test2(i15 %x) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i15 %x
+;
+  %tmp.2 = and i15 %x, -1 ; noop
+  ret i15 %tmp.2
+}
+
+define i23 @test3(i23 %x) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret i23 0
+;
+  %tmp.0 = and i23 %x, 127
+  %tmp.2 = and i23 %tmp.0, 128
+  ret i23 %tmp.2
+}
+
+define i1 @test4(i37 %x) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[B:%.*]] = icmp ugt i37 %x, 2147483647
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %A = and i37 %x, -2147483648
+  %B = icmp ne i37 %A, 0
+  ret i1 %B
+}
+
+define i7 @test5(i7 %A, i7* %P) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[B:%.*]] = or i7 %A, 3
+; CHECK-NEXT:    [[C:%.*]] = xor i7 [[B]], 12
+; CHECK-NEXT:    store i7 [[C]], i7* %P, align 1
+; CHECK-NEXT:    ret i7 3
+;
+  %B = or i7 %A, 3
+  %C = xor i7 %B, 12
+  store i7 %C, i7* %P
+  %r = and i7 %C, 3
+  ret i7 %r
+}
+
+define i47 @test7(i47 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i47 %A, 39
+; CHECK-NEXT:    ret i47 [[TMP1]]
+;
+  %X = ashr i47 %A, 39 ;; sign extend
+  %C1 = and i47 %X, 255
+  ret i47 %C1
+}
+
+; Integer BitWidth > 64 && BitWidth <= 1024.
+
+define i999 @test8(i999 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    ret i999 0
+;
+  %B = and i999 %A, 0 ; zero result
+  ret i999 %B
+}
+
+define i1005 @test9(i1005 %x) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    ret i1005 %x
+;
+  %tmp.2 = and i1005 %x, -1 ; noop
+  ret i1005 %tmp.2
+}
+
+define i123 @test10(i123 %x) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    ret i123 0
+;
+  %tmp.0 = and i123 %x, 127
+  %tmp.2 = and i123 %tmp.0, 128
+  ret i123 %tmp.2
+}
+
+define i1 @test11(i737 %x) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[B:%.*]] = icmp ugt i737 %x, 2147483647
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %A = and i737 %x, -2147483648
+  %B = icmp ne i737 %A, 0
+  ret i1 %B
+}
+
+define i117 @test12(i117 %A, i117* %P) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[B:%.*]] = or i117 %A, 3
+; CHECK-NEXT:    [[C:%.*]] = xor i117 [[B]], 12
+; CHECK-NEXT:    store i117 [[C]], i117* %P, align 4
+; CHECK-NEXT:    ret i117 3
+;
+  %B = or i117 %A, 3
+  %C = xor i117 %B, 12
+  store i117 %C, i117* %P
+  %r = and i117 %C, 3
+  ret i117 %r
+}
+
+define i1024 @test13(i1024 %A) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i1024 %A, 1016
+; CHECK-NEXT:    ret i1024 [[TMP1]]
+;
+  %X = ashr i1024 %A, 1016 ;; sign extend
+  %C1 = and i1024 %X, 255
+  ret i1024 %C1
+}
+
diff --git a/test/Transforms/InstCombine/apint-and1.ll b/test/Transforms/InstCombine/apint-and1.ll
deleted file mode 100644
index fcd2dcd23ad..00000000000
--- a/test/Transforms/InstCombine/apint-and1.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; This test makes sure that and instructions are properly eliminated.
-; This test is for Integer BitWidth <= 64 && BitWidth % 8 != 0.
-
-; RUN: opt < %s -instcombine -S | not grep "and "
-; END.
-
-define i39 @test0(i39 %A) {
-        %B = and i39 %A, 0 ; zero result
-        ret i39 %B
-}
-
-define i47 @test1(i47 %A, i47 %B) {
-        ;; (~A & ~B) == (~(A | B)) - De Morgan's Law
-        %NotA = xor i47 %A, -1
-        %NotB = xor i47 %B, -1
-        %C1 = and i47 %NotA, %NotB
-        ret i47 %C1
-}
-
-define i15 @test2(i15 %x) {
-        %tmp.2 = and i15 %x, -1 ; noop
-        ret i15 %tmp.2
-}
-
-define i23 @test3(i23 %x) {
-        %tmp.0 = and i23 %x, 127
-        %tmp.2 = and i23 %tmp.0, 128
-        ret i23 %tmp.2
-}
-
-define i1 @test4(i37 %x) {
-        %A = and i37 %x, -2147483648
-        %B = icmp ne i37 %A, 0
-        ret i1 %B
-}
-
-define i7 @test5(i7 %A, i7* %P) {
-        %B = or i7 %A, 3
-        %C = xor i7 %B, 12
-        store i7 %C, i7* %P
-        %r = and i7 %C, 3
-        ret i7 %r
-}
-
-define i7 @test6(i7 %A, i7 %B) {
-        ;; ~(~X & Y) --> (X | ~Y)
-        %t0 = xor i7 %A, -1
-        %t1 = and i7 %t0, %B
-        %r = xor i7 %t1, -1
-        ret i7 %r
-}
-
-define i47 @test7(i47 %A) {
-        %X = ashr i47 %A, 39 ;; sign extend
-        %C1 = and i47 %X, 255
-        ret i47 %C1
-}
diff --git a/test/Transforms/InstCombine/apint-and2.ll b/test/Transforms/InstCombine/apint-and2.ll
deleted file mode 100644
index 78dc8f990d0..00000000000
--- a/test/Transforms/InstCombine/apint-and2.ll
+++ /dev/null
@@ -1,82 +0,0 @@
-; This test makes sure that and instructions are properly eliminated.
-; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
-
-; RUN: opt < %s -instcombine -S | not grep "and "
-; END.
-
-
-define i999 @test0(i999 %A) {
-        %B = and i999 %A, 0 ; zero result
-        ret i999 %B
-}
-
-define i477 @test1(i477 %A, i477 %B) {
-        ;; (~A & ~B) == (~(A | B)) - De Morgan's Law
-        %NotA = xor i477 %A, -1
-        %NotB = xor i477 %B, -1
-        %C1 = and i477 %NotA, %NotB
-        ret i477 %C1
-}
-
-define i129 @tst(i129 %A, i129 %B) {
-        ;; (~A & ~B) == (~(A | B)) - De Morgan's Law
-        %NotA = xor i129 %A, -1
-        %NotB = xor i129 %B, -1
-        %C1 = and i129 %NotA, %NotB
-        ret i129 %C1
-}
-
-define i65 @test(i65 %A, i65 %B) {
-        ;; (~A & ~B) == (~(A | B)) - De Morgan's Law
-        %NotA = xor i65 %A, -1
-        %NotB = xor i65 -1, %B
-        %C1 = and i65 %NotA, %NotB
-        ret i65 %C1
-}
-
-define i66 @tes(i66 %A, i66 %B) {
-        ;; (~A & ~B) == (~(A | B)) - De Morgan's Law
-        %NotA = xor i66 %A, -1
-        %NotB = xor i66 %B, -1
-        %C1 = and i66 %NotA, %NotB
-        ret i66 %C1
-}
-
-define i1005 @test2(i1005 %x) {
-        %tmp.2 = and i1005 %x, -1 ; noop
-        ret i1005 %tmp.2
-}
-
-define i123 @test3(i123 %x) {
-        %tmp.0 = and i123 %x, 127
-        %tmp.2 = and i123 %tmp.0, 128
-        ret i123 %tmp.2
-}
-
-define i1 @test4(i737 %x) {
-        %A = and i737 %x, -2147483648
-        %B = icmp ne i737 %A, 0
-        ret i1 %B
-}
-
-define i117 @test5(i117 %A, i117* %P) {
-        %B = or i117 %A, 3
-        %C = xor i117 %B, 12
-        store i117 %C, i117* %P
-        %r = and i117 %C, 3
-        ret i117 %r
-}
-
-define i117 @test6(i117 %A, i117 %B) {
-        ;; ~(~X & Y) --> (X | ~Y)
-        %t0 = xor i117 %A, -1
-        %t1 = and i117 %t0, %B
-        %r = xor i117 %t1, -1
-        ret i117 %r
-}
-
-define i1024 @test7(i1024 %A) {
-        %X = ashr i1024 %A, 1016 ;; sign extend
-        %C1 = and i1024 %X, 255
-        ret i1024 %C1
-}
diff --git a/test/Transforms/InstCombine/apint-not.ll b/test/Transforms/InstCombine/apint-not.ll
index 488b7f2c98e..c5b12fd5dee 100644
--- a/test/Transforms/InstCombine/apint-not.ll
+++ b/test/Transforms/InstCombine/apint-not.ll
@@ -15,28 +15,3 @@ define i1 @test2(i52 %A, i52 %B) {
 	ret i1 %Ret
 }
 
-; Test that demorgans law can be instcombined
-define i47 @test3(i47 %A, i47 %B) {
-	%a = xor i47 %A, -1
-	%b = xor i47 %B, -1
-	%c = and i47 %a, %b
-	%d = xor i47 %c, -1
-	ret i47 %d
-}
-
-; Test that demorgens law can work with constants
-define i61 @test4(i61 %A, i61 %B) {
-	%a = xor i61 %A, -1
-	%c = and i61 %a, 5    ; 5 = ~c2
-	%d = xor i61 %c, -1
-	ret i61 %d
-}
-
-; test the mirror of demorgans law...
-define i71 @test5(i71 %A, i71 %B) {
-	%a = xor i71 %A, -1
-	%b = xor i71 %B, -1
-	%c = or i71 %a, %b
-	%d = xor i71 %c, -1
-	ret i71 %d
-}
diff --git a/test/Transforms/InstCombine/apint-or.ll b/test/Transforms/InstCombine/apint-or.ll
index e2312b61f2b..33304bf8ad6 100644
--- a/test/Transforms/InstCombine/apint-or.ll
+++ b/test/Transforms/InstCombine/apint-or.ll
@@ -27,18 +27,6 @@ define i39 @test2(i39 %V, i39 %M) {
 ; CHECK-NEXT: ret i39 %A
 }
 
-define i43 @test3(i43 %A, i43 %B) {
-    ;; (~A | ~B) == (~(A & B)) - De Morgan's Law
-    %NotA = xor i43 %A, -1
-    %NotB = xor i43 %B, -1
-    %C1 = or i43 %NotA, %NotB
-    ret i43 %C1
-; CHECK-LABEL: @test3
-; CHECK-NEXT: %C1.demorgan = and i43 %A, %B
-; CHECK-NEXT: %C1 = xor i43 %C1.demorgan, -1
-; CHECK-NEXT: ret i43 %C1
-}
-
 ; These tests are for Integer BitWidth > 64 && BitWidth <= 1024.
 define i1023 @test4(i1023 %A) {
     ;; A | ~A == -1
@@ -66,14 +54,3 @@ define i399 @test5(i399 %V, i399 %M) {
 ; CHECK-NEXT: ret i399 %A
 }
 
-define i129 @test6(i129 %A, i129 %B) {
-    ;; (~A | ~B) == (~(A & B)) - De Morgan's Law
-    %NotA = xor i129 %A, -1
-    %NotB = xor i129 %B, -1
-    %C1 = or i129 %NotA, %NotB
-    ret i129 %C1
-; CHECK-LABEL: @test6
-; CHECK-NEXT: %C1.demorgan = and i129 %A, %B
-; CHECK-NEXT: %C1 = xor i129 %C1.demorgan, -1
-; CHECK-NEXT: ret i129 %C1
-}
diff --git a/test/Transforms/InstCombine/assume2.ll b/test/Transforms/InstCombine/assume2.ll
index e8fbc049f41..8dc8831fffa 100644
--- a/test/Transforms/InstCombine/assume2.ll
+++ b/test/Transforms/InstCombine/assume2.ll
@@ -21,8 +21,8 @@ define i32 @test1(i32 %a) #0 {
 
 define i32 @test2(i32 %a) #0 {
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[A_NOT:%.*]] = or i32 [[A:%.*]], -16
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A_NOT]], -6
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[A:%.*]], 15
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 10
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
 ; CHECK-NEXT:    ret i32 2
 ;
@@ -50,8 +50,8 @@ define i32 @test3(i32 %a) #0 {
 
 define i32 @test4(i32 %a) #0 {
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[A_NOT:%.*]] = and i32 [[A:%.*]], 15
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A_NOT]], 10
+; CHECK-NEXT:    [[V:%.*]] = or i32 [[A:%.*]], -16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V]], -6
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
 ; CHECK-NEXT:    ret i32 2
 ;
diff --git a/test/Transforms/InstCombine/demorgan-zext.ll b/test/Transforms/InstCombine/demorgan-zext.ll
deleted file mode 100644
index d537730ba9a..00000000000
--- a/test/Transforms/InstCombine/demorgan-zext.ll
+++ /dev/null
@@ -1,81 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-; PR22723: Recognize De Morgan's Laws when obfuscated by zexts.
-
-define i32 @demorgan_or(i1 %X, i1 %Y) {
-; CHECK-LABEL: @demorgan_or(
-; CHECK-NEXT:    [[OR1_DEMORGAN:%.*]] = and i1 %X, %Y
-; CHECK-NEXT:    [[OR1:%.*]] = xor i1 [[OR1_DEMORGAN]], true
-; CHECK-NEXT:    [[OR:%.*]] = zext i1 [[OR:%.*]]1 to i32
-; CHECK-NEXT:    ret i32 [[OR]]
-;
-  %zextX = zext i1 %X to i32
-  %zextY = zext i1 %Y to i32
-  %notX  = xor i32 %zextX, 1
-  %notY  = xor i32 %zextY, 1
-  %or    = or i32 %notX, %notY
-  ret i32 %or
-}
-
-define i32 @demorgan_and(i1 %X, i1 %Y) {
-; CHECK-LABEL: @demorgan_and(
-; CHECK-NEXT:    [[AND1_DEMORGAN:%.*]] = or i1 %X, %Y
-; CHECK-NEXT:    [[AND1:%.*]] = xor i1 [[AND1_DEMORGAN]], true
-; CHECK-NEXT:    [[AND:%.*]] = zext i1 [[AND:%.*]]1 to i32
-; CHECK-NEXT:    ret i32 [[AND]]
-;
-  %zextX = zext i1 %X to i32
-  %zextY = zext i1 %Y to i32
-  %notX  = xor i32 %zextX, 1
-  %notY  = xor i32 %zextY, 1
-  %and   = and i32 %notX, %notY
-  ret i32 %and
-}
-
-define <2 x i32> @demorgan_or_vec(<2 x i1> %X, <2 x i1> %Y) {
-; CHECK-LABEL: @demorgan_or_vec(
-; CHECK-NEXT:    [[OR1_DEMORGAN:%.*]] = and <2 x i1> %X, %Y
-; CHECK-NEXT:    [[OR1:%.*]] = xor <2 x i1> [[OR1_DEMORGAN]], <i1 true, i1 true>
-; CHECK-NEXT:    [[OR:%.*]] = zext <2 x i1> [[OR:%.*]]1 to <2 x i32>
-; CHECK-NEXT:    ret <2 x i32> [[OR]]
-;
-  %zextX = zext <2 x i1> %X to <2 x i32>
-  %zextY = zext <2 x i1> %Y to <2 x i32>
-  %notX  = xor <2 x i32> %zextX, <i32 1, i32 1>
-  %notY  = xor <2 x i32> %zextY, <i32 1, i32 1>
-  %or    = or <2 x i32> %notX, %notY
-  ret <2 x i32> %or
-}
-
-define <2 x i32> @demorgan_and_vec(<2 x i1> %X, <2 x i1> %Y) {
-; CHECK-LABEL: @demorgan_and_vec(
-; CHECK-NEXT:    [[AND1_DEMORGAN:%.*]] = or <2 x i1> %X, %Y
-; CHECK-NEXT:    [[AND1:%.*]] = xor <2 x i1> [[AND1_DEMORGAN]], <i1 true, i1 true>
-; CHECK-NEXT:    [[AND:%.*]] = zext <2 x i1> [[AND:%.*]]1 to <2 x i32>
-; CHECK-NEXT:    ret <2 x i32> [[AND]]
-;
-  %zextX = zext <2 x i1> %X to <2 x i32>
-  %zextY = zext <2 x i1> %Y to <2 x i32>
-  %notX  = xor <2 x i32> %zextX, <i32 1, i32 1>
-  %notY  = xor <2 x i32> %zextY, <i32 1, i32 1>
-  %and   = and <2 x i32> %notX, %notY
-  ret <2 x i32> %and
-}
-
-define i32 @PR28476(i32 %x, i32 %y) {
-; CHECK-LABEL: @PR28476(
-; CHECK-NEXT:    [[NOTLHS:%.*]] = icmp eq i32 %x, 0
-; CHECK-NEXT:    [[NOTRHS:%.*]] = icmp eq i32 %y, 0
-; CHECK-NEXT:    [[TMP1:%.*]] = or i1 [[NOTRHS]], [[NOTLHS]]
-; CHECK-NEXT:    [[COND:%.*]] = zext i1 [[TMP1]] to i32
-; CHECK-NEXT:    ret i32 [[COND]]
-;
-  %cmp0 = icmp ne i32 %x, 0
-  %cmp1 = icmp ne i32 %y, 0
-  %and = and i1 %cmp0, %cmp1
-  %zext = zext i1 %and to i32
-  %cond = xor i32 %zext, 1
-  ret i32 %cond
-}
-
diff --git a/test/Transforms/InstCombine/demorgan.ll b/test/Transforms/InstCombine/demorgan.ll
new file mode 100644
index 00000000000..26c2270a3fd
--- /dev/null
+++ b/test/Transforms/InstCombine/demorgan.ll
@@ -0,0 +1,501 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; (~A | ~B) == ~(A & B)
+
+define i43 @demorgan_or_apint1(i43 %A, i43 %B) {
+; CHECK-LABEL: @demorgan_or_apint1(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = and i43 %A, %B
+; CHECK-NEXT:    [[C:%.*]] = xor i43 [[C_DEMORGAN]], -1
+; CHECK-NEXT:    ret i43 [[C]]
+;
+  %NotA = xor i43 %A, -1
+  %NotB = xor i43 %B, -1
+  %C = or i43 %NotA, %NotB
+  ret i43 %C
+}
+
+; (~A | ~B) == ~(A & B)
+
+define i129 @demorgan_or_apint2(i129 %A, i129 %B) {
+; CHECK-LABEL: @demorgan_or_apint2(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = and i129 %A, %B
+; CHECK-NEXT:    [[C:%.*]] = xor i129 [[C_DEMORGAN]], -1
+; CHECK-NEXT:    ret i129 [[C]]
+;
+  %NotA = xor i129 %A, -1
+  %NotB = xor i129 %B, -1
+  %C = or i129 %NotA, %NotB
+  ret i129 %C
+}
+
+; (~A & ~B) == ~(A | B)
+
+define i477 @demorgan_and_apint1(i477 %A, i477 %B) {
+; CHECK-LABEL: @demorgan_and_apint1(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = or i477 %A, %B
+; CHECK-NEXT:    [[C:%.*]] = xor i477 [[C_DEMORGAN]], -1
+; CHECK-NEXT:    ret i477 [[C]]
+;
+  %NotA = xor i477 %A, -1
+  %NotB = xor i477 %B, -1
+  %C = and i477 %NotA, %NotB
+  ret i477 %C
+}
+
+; (~A & ~B) == ~(A | B)
+
+define i129 @demorgan_and_apint2(i129 %A, i129 %B) {
+; CHECK-LABEL: @demorgan_and_apint2(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = or i129 %A, %B
+; CHECK-NEXT:    [[C:%.*]] = xor i129 [[C_DEMORGAN]], -1
+; CHECK-NEXT:    ret i129 [[C]]
+;
+  %NotA = xor i129 %A, -1
+  %NotB = xor i129 %B, -1
+  %C = and i129 %NotA, %NotB
+  ret i129 %C
+}
+
+; (~A & ~B) == ~(A | B)
+
+define i65 @demorgan_and_apint3(i65 %A, i65 %B) {
+; CHECK-LABEL: @demorgan_and_apint3(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = or i65 %A, %B
+; CHECK-NEXT:    [[C:%.*]] = xor i65 [[C_DEMORGAN]], -1
+; CHECK-NEXT:    ret i65 [[C]]
+;
+  %NotA = xor i65 %A, -1
+  %NotB = xor i65 -1, %B
+  %C = and i65 %NotA, %NotB
+  ret i65 %C
+}
+
+; (~A & ~B) == ~(A | B)
+
+define i66 @demorgan_and_apint4(i66 %A, i66 %B) {
+; CHECK-LABEL: @demorgan_and_apint4(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = or i66 %A, %B
+; CHECK-NEXT:    [[C:%.*]] = xor i66 [[C_DEMORGAN]], -1
+; CHECK-NEXT:    ret i66 [[C]]
+;
+  %NotA = xor i66 %A, -1
+  %NotB = xor i66 %B, -1
+  %C = and i66 %NotA, %NotB
+  ret i66 %C
+}
+
+; (~A & ~B) == ~(A | B)
+
+define i47 @demorgan_and_apint5(i47 %A, i47 %B) {
+; CHECK-LABEL: @demorgan_and_apint5(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = or i47 %A, %B
+; CHECK-NEXT:    [[C:%.*]] = xor i47 [[C_DEMORGAN]], -1
+; CHECK-NEXT:    ret i47 [[C]]
+;
+  %NotA = xor i47 %A, -1
+  %NotB = xor i47 %B, -1
+  %C = and i47 %NotA, %NotB
+  ret i47 %C
+}
+
+; This is confirming that 2 transforms work together:
+; ~(~A & ~B) --> A | B
+
+define i32 @test3(i32 %A, i32 %B) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = or i32 %A, %B
+; CHECK-NEXT:    ret i32 [[C_DEMORGAN]]
+;
+  %nota = xor i32 %A, -1
+  %notb = xor i32 %B, -1
+  %c = and i32 %nota, %notb
+  %notc = xor i32 %c, -1
+  ret i32 %notc
+}
+
+; Invert a constant if needed:
+; ~(~A & 5) --> A | ~5
+
+define i32 @test4(i32 %A) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[NOTC1:%.*]] = or i32 %A, -6
+; CHECK-NEXT:    ret i32 [[NOTC1]]
+;
+  %nota = xor i32 %A, -1
+  %c = and i32 %nota, 5
+  %notc = xor i32 %c, -1
+  ret i32 %notc
+}
+
+; Test the mirror of DeMorgan's law with an extra 'not'.
+; ~(~A | ~B) --> A & B
+
+define i32 @test5(i32 %A, i32 %B) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = and i32 %A, %B
+; CHECK-NEXT:    ret i32 [[C_DEMORGAN]]
+;
+  %nota = xor i32 %A, -1
+  %notb = xor i32 %B, -1
+  %c = or i32 %nota, %notb
+  %notc = xor i32 %c, -1
+  ret i32 %notc
+}
+
+; Repeat with weird types for extra coverage.
+; ~(~A & ~B) --> A | B
+
+define i47 @test3_apint(i47 %A, i47 %B) {
+; CHECK-LABEL: @test3_apint(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = or i47 %A, %B
+; CHECK-NEXT:    ret i47 [[C_DEMORGAN]]
+;
+  %nota = xor i47 %A, -1
+  %notb = xor i47 %B, -1
+  %c = and i47 %nota, %notb
+  %notc = xor i47 %c, -1
+  ret i47 %notc
+}
+
+; ~(~A & 5) --> A | ~5
+
+define i61 @test4_apint(i61 %A) {
+; CHECK-LABEL: @test4_apint(
+; CHECK-NEXT:    [[NOTA:%.*]] = and i61 %A, 5
+; CHECK-NEXT:    [[C:%.*]] = xor i61 [[NOTA]], 5
+; CHECK-NEXT:    ret i61 [[C]]
+;
+  %nota = xor i61 %A, -1
+  %c = and i61 %nota, 5    ; 5 = ~c2
+  %notc = xor i61 %c, -1
+  ret i61 %c
+}
+
+; ~(~A | ~B) --> A & B
+
+define i71 @test5_apint(i71 %A, i71 %B) {
+; CHECK-LABEL: @test5_apint(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = and i71 %A, %B
+; CHECK-NEXT:    ret i71 [[C_DEMORGAN]]
+;
+  %nota = xor i71 %A, -1
+  %notb = xor i71 %B, -1
+  %c = or i71 %nota, %notb
+  %notc = xor i71 %c, -1
+  ret i71 %notc
+}
+
+; ~(~A & B) --> (A | ~B)
+
+define i8 @demorgan_nand(i8 %A, i8 %B) {
+; CHECK-LABEL: @demorgan_nand(
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i8 %B, -1
+; CHECK-NEXT:    [[NOTC:%.*]] = or i8 [[B_NOT]], %A
+; CHECK-NEXT:    ret i8 [[NOTC]]
+;
+  %notx = xor i8 %A, -1
+  %c = and i8 %notx, %B
+  %notc = xor i8 %c, -1
+  ret i8 %notc
+}
+
+; ~(~A & B) --> (A | ~B)
+
+define i7 @demorgan_nand_apint1(i7 %A, i7 %B) {
+; CHECK-LABEL: @demorgan_nand_apint1(
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i7 %B, -1
+; CHECK-NEXT:    [[NOTC:%.*]] = or i7 [[B_NOT]], %A
+; CHECK-NEXT:    ret i7 [[NOTC]]
+;
+  %nota = xor i7 %A, -1
+  %c = and i7 %nota, %B
+  %notc = xor i7 %c, -1
+  ret i7 %notc
+}
+
+; ~(~A & B) --> (A | ~B)
+
+define i117 @demorgan_nand_apint2(i117 %A, i117 %B) {
+; CHECK-LABEL: @demorgan_nand_apint2(
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i117 %B, -1
+; CHECK-NEXT:    [[NOTC:%.*]] = or i117 [[B_NOT]], %A
+; CHECK-NEXT:    ret i117 [[NOTC]]
+;
+  %nota = xor i117 %A, -1
+  %c = and i117 %nota, %B
+  %notc = xor i117 %c, -1
+  ret i117 %notc
+}
+
+; ~(~A | B) --> (A & ~B)
+
+define i8 @demorgan_nor(i8 %A, i8 %B) {
+; CHECK-LABEL: @demorgan_nor(
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i8 %B, -1
+; CHECK-NEXT:    [[NOTC:%.*]] = and i8 [[B_NOT]], %A
+; CHECK-NEXT:    ret i8 [[NOTC]]
+;
+  %notx = xor i8 %A, -1
+  %c = or i8 %notx, %B
+  %notc = xor i8 %c, -1
+  ret i8 %notc
+}
+
+; ~(~A | B) --> (A & ~B) - what if we use one of the intermediate results?
+
+define i8 @demorgan_nor_use2a(i8 %A, i8 %B) {
+; CHECK-LABEL: @demorgan_nor_use2a(
+; CHECK-NEXT:    [[NOTA:%.*]] = xor i8 %A, -1
+; CHECK-NEXT:    [[USE2A:%.*]] = mul i8 [[NOTA]], 23
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i8 %B, -1
+; CHECK-NEXT:    [[NOTC:%.*]] = and i8 [[B_NOT]], %A
+; CHECK-NEXT:    [[R:%.*]] = sdiv i8 [[NOTC]], [[USE2A]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %nota = xor i8 %A, -1
+  %use2a = mul i8 %nota, 23
+  %c = or i8 %nota, %B
+  %notc = xor i8 %c, -1
+  %r = sdiv i8 %notc, %use2a
+  ret i8 %r
+}
+
+; ~(~A | B) --> (A & ~B) - what if we use one of the intermediate results?
+
+define i8 @demorgan_nor_use2b(i8 %A, i8 %B) {
+; CHECK-LABEL: @demorgan_nor_use2b(
+; CHECK-NEXT:    [[USE2B:%.*]] = mul i8 %B, 23
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i8 %B, -1
+; CHECK-NEXT:    [[NOTC:%.*]] = and i8 [[B_NOT]], %A
+; CHECK-NEXT:    [[R:%.*]] = sdiv i8 [[NOTC]], [[USE2B]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %use2b = mul i8 %B, 23
+  %nota = xor i8 %A, -1
+  %c = or i8 %nota, %B
+  %notc = xor i8 %c, -1
+  %r = sdiv i8 %notc, %use2b
+  ret i8 %r
+}
+
+; ~(~A | B) --> (A & ~B) - what if we use one of the intermediate results?
+
+define i8 @demorgan_nor_use2c(i8 %A, i8 %B) {
+; CHECK-LABEL: @demorgan_nor_use2c(
+; CHECK-NEXT:    [[NOTA:%.*]] = xor i8 %A, -1
+; CHECK-NEXT:    [[C:%.*]] = or i8 [[NOTA]], %B
+; CHECK-NEXT:    [[USE2C:%.*]] = mul i8 [[C]], 23
+; CHECK-NEXT:    [[NOTC:%.*]] = xor i8 [[C]], -1
+; CHECK-NEXT:    [[R:%.*]] = sdiv i8 [[NOTC]], [[USE2C]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %nota = xor i8 %A, -1
+  %c = or i8 %nota, %B
+  %use2c = mul i8 %c, 23
+  %notc = xor i8 %c, -1
+  %r = sdiv i8 %notc, %use2c
+  ret i8 %r
+}
+
+; ~(~A | B) --> (A & ~B) - what if we use two of the intermediate results?
+
+define i8 @demorgan_nor_use2ab(i8 %A, i8 %B) {
+; CHECK-LABEL: @demorgan_nor_use2ab(
+; CHECK-NEXT:    [[USE2B:%.*]] = mul i8 %B, 23
+; CHECK-NEXT:    [[NOTA:%.*]] = xor i8 %A, -1
+; CHECK-NEXT:    [[USE2A:%.*]] = mul i8 [[NOTA]], 17
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i8 %B, -1
+; CHECK-NEXT:    [[NOTC:%.*]] = and i8 [[B_NOT]], %A
+; CHECK-NEXT:    [[R1:%.*]] = sdiv i8 [[NOTC]], [[USE2B]]
+; CHECK-NEXT:    [[R2:%.*]] = sdiv i8 [[R1]], [[USE2A]]
+; CHECK-NEXT:    ret i8 [[R2]]
+;
+  %use2b = mul i8 %B, 23
+  %nota = xor i8 %A, -1
+  %use2a = mul i8 %nota, 17
+  %c = or i8 %nota, %B
+  %notc = xor i8 %c, -1
+  %r1 = sdiv i8 %notc, %use2b
+  %r2 = sdiv i8 %r1, %use2a
+  ret i8 %r2
+}
+
+; ~(~A | B) --> (A & ~B) - what if we use two of the intermediate results?
+
+define i8 @demorgan_nor_use2ac(i8 %A, i8 %B) {
+; CHECK-LABEL: @demorgan_nor_use2ac(
+; CHECK-NEXT:    [[NOTA:%.*]] = xor i8 %A, -1
+; CHECK-NEXT:    [[USE2A:%.*]] = mul i8 [[NOTA]], 17
+; CHECK-NEXT:    [[C:%.*]] = or i8 [[NOTA]], %B
+; CHECK-NEXT:    [[USE2C:%.*]] = mul i8 [[C]], 23
+; CHECK-NEXT:    [[NOTC:%.*]] = xor i8 [[C]], -1
+; CHECK-NEXT:    [[R1:%.*]] = sdiv i8 [[NOTC]], [[USE2C]]
+; CHECK-NEXT:    [[R2:%.*]] = sdiv i8 [[R1]], [[USE2A]]
+; CHECK-NEXT:    ret i8 [[R2]]
+;
+  %nota = xor i8 %A, -1
+  %use2a = mul i8 %nota, 17
+  %c = or i8 %nota, %B
+  %use2c = mul i8 %c, 23
+  %notc = xor i8 %c, -1
+  %r1 = sdiv i8 %notc, %use2c
+  %r2 = sdiv i8 %r1, %use2a
+  ret i8 %r2
+}
+
+; ~(~A | B) --> (A & ~B) - what if we use two of the intermediate results?
+
+define i8 @demorgan_nor_use2bc(i8 %A, i8 %B) {
+; CHECK-LABEL: @demorgan_nor_use2bc(
+; CHECK-NEXT:    [[USE2B:%.*]] = mul i8 %B, 23
+; CHECK-NEXT:    [[NOTA:%.*]] = xor i8 %A, -1
+; CHECK-NEXT:    [[C:%.*]] = or i8 [[NOTA]], %B
+; CHECK-NEXT:    [[USE2C:%.*]] = mul i8 [[C]], 23
+; CHECK-NEXT:    [[NOTC:%.*]] = xor i8 [[C]], -1
+; CHECK-NEXT:    [[R1:%.*]] = sdiv i8 [[NOTC]], [[USE2C]]
+; CHECK-NEXT:    [[R2:%.*]] = sdiv i8 [[R1]], [[USE2B]]
+; CHECK-NEXT:    ret i8 [[R2]]
+;
+  %use2b = mul i8 %B, 23
+  %nota = xor i8 %A, -1
+  %c = or i8 %nota, %B
+  %use2c = mul i8 %c, 23
+  %notc = xor i8 %c, -1
+  %r1 = sdiv i8 %notc, %use2c
+  %r2 = sdiv i8 %r1, %use2b
+  ret i8 %r2
+}
+
+; Do not apply DeMorgan's Law to constants. We prefer 'not' ops.
+
+define i32 @demorganize_constant1(i32 %a) {
+; CHECK-LABEL: @demorganize_constant1(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 %a, 15
+; CHECK-NEXT:    [[AND1:%.*]] = xor i32 [[AND]], -1
+; CHECK-NEXT:    ret i32 [[AND1]]
+;
+  %and = and i32 %a, 15
+  %and1 = xor i32 %and, -1
+  ret i32 %and1
+}
+
+; Do not apply DeMorgan's Law to constants. We prefer 'not' ops.
+
+define i32 @demorganize_constant2(i32 %a) {
+; CHECK-LABEL: @demorganize_constant2(
+; CHECK-NEXT:    [[AND:%.*]] = or i32 %a, 15
+; CHECK-NEXT:    [[AND1:%.*]] = xor i32 [[AND]], -1
+; CHECK-NEXT:    ret i32 [[AND1]]
+;
+  %and = or i32 %a, 15
+  %and1 = xor i32 %and, -1
+  ret i32 %and1
+}
+
+; PR22723: Recognize DeMorgan's Laws when obfuscated by zexts.
+
+define i32 @demorgan_or_zext(i1 %X, i1 %Y) {
+; CHECK-LABEL: @demorgan_or_zext(
+; CHECK-NEXT:    [[OR1_DEMORGAN:%.*]] = and i1 %X, %Y
+; CHECK-NEXT:    [[OR1:%.*]] = xor i1 [[OR1_DEMORGAN]], true
+; CHECK-NEXT:    [[OR:%.*]] = zext i1 [[OR:%.*]]1 to i32
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %zextX = zext i1 %X to i32
+  %zextY = zext i1 %Y to i32
+  %notX  = xor i32 %zextX, 1
+  %notY  = xor i32 %zextY, 1
+  %or    = or i32 %notX, %notY
+  ret i32 %or
+}
+
+define i32 @demorgan_and_zext(i1 %X, i1 %Y) {
+; CHECK-LABEL: @demorgan_and_zext(
+; CHECK-NEXT:    [[AND1_DEMORGAN:%.*]] = or i1 %X, %Y
+; CHECK-NEXT:    [[AND1:%.*]] = xor i1 [[AND1_DEMORGAN]], true
+; CHECK-NEXT:    [[AND:%.*]] = zext i1 [[AND:%.*]]1 to i32
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %zextX = zext i1 %X to i32
+  %zextY = zext i1 %Y to i32
+  %notX  = xor i32 %zextX, 1
+  %notY  = xor i32 %zextY, 1
+  %and   = and i32 %notX, %notY
+  ret i32 %and
+}
+
+define <2 x i32> @demorgan_or_zext_vec(<2 x i1> %X, <2 x i1> %Y) {
+; CHECK-LABEL: @demorgan_or_zext_vec(
+; CHECK-NEXT:    [[OR1_DEMORGAN:%.*]] = and <2 x i1> %X, %Y
+; CHECK-NEXT:    [[OR1:%.*]] = xor <2 x i1> [[OR1_DEMORGAN]], <i1 true, i1 true>
+; CHECK-NEXT:    [[OR:%.*]] = zext <2 x i1> [[OR:%.*]]1 to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[OR]]
+;
+  %zextX = zext <2 x i1> %X to <2 x i32>
+  %zextY = zext <2 x i1> %Y to <2 x i32>
+  %notX  = xor <2 x i32> %zextX, <i32 1, i32 1>
+  %notY  = xor <2 x i32> %zextY, <i32 1, i32 1>
+  %or    = or <2 x i32> %notX, %notY
+  ret <2 x i32> %or
+}
+
+define <2 x i32> @demorgan_and_zext_vec(<2 x i1> %X, <2 x i1> %Y) {
+; CHECK-LABEL: @demorgan_and_zext_vec(
+; CHECK-NEXT:    [[AND1_DEMORGAN:%.*]] = or <2 x i1> %X, %Y
+; CHECK-NEXT:    [[AND1:%.*]] = xor <2 x i1> [[AND1_DEMORGAN]], <i1 true, i1 true>
+; CHECK-NEXT:    [[AND:%.*]] = zext <2 x i1> [[AND:%.*]]1 to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[AND]]
+;
+  %zextX = zext <2 x i1> %X to <2 x i32>
+  %zextY = zext <2 x i1> %Y to <2 x i32>
+  %notX  = xor <2 x i32> %zextX, <i32 1, i32 1>
+  %notY  = xor <2 x i32> %zextY, <i32 1, i32 1>
+  %and   = and <2 x i32> %notX, %notY
+  ret <2 x i32> %and
+}
+
+define i32 @PR28476(i32 %x, i32 %y) {
+; CHECK-LABEL: @PR28476(
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 %x, 0
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 %y, 0
+; CHECK-NEXT:    [[TMP1:%.*]] = or i1 [[CMP1]], [[CMP0]]
+; CHECK-NEXT:    [[COND:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp0 = icmp ne i32 %x, 0
+  %cmp1 = icmp ne i32 %y, 0
+  %and = and i1 %cmp0, %cmp1
+  %zext = zext i1 %and to i32
+  %cond = xor i32 %zext, 1
+  ret i32 %cond
+}
+
+; ~(~(a | b) | (a & b)) --> (a | b) & ~(a & b) -> a ^ b
+
+define i32 @demorgan_plus_and_to_xor(i32 %a, i32 %b) {
+; CHECK-LABEL: @demorgan_plus_and_to_xor(
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 %b, %a
+; CHECK-NEXT:    ret i32 [[NOT]]
+;
+  %or = or i32 %b, %a
+  %notor = xor i32 %or, -1
+  %and = and i32 %b, %a
+  %or2 = or i32 %and, %notor
+  %not = xor i32 %or2, -1
+  ret i32 %not
+}
+
+define <4 x i32> @demorgan_plus_and_to_xor_vec(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @demorgan_plus_and_to_xor_vec(
+; CHECK-NEXT:    [[NOT:%.*]] = xor <4 x i32> %a, %b
+; CHECK-NEXT:    ret <4 x i32> [[NOT]]
+;
+  %or = or <4 x i32> %a, %b
+  %notor = xor <4 x i32> %or, < i32 -1, i32 -1, i32 -1, i32 -1 >
+  %and = and <4 x i32> %a, %b
+  %or2 = or <4 x i32> %and, %notor
+  %not = xor <4 x i32> %or2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+  ret <4 x i32> %not
+}
+
diff --git a/test/Transforms/InstCombine/not.ll b/test/Transforms/InstCombine/not.ll
index d0c242f6555..2760d4ae044 100644
--- a/test/Transforms/InstCombine/not.ll
+++ b/test/Transforms/InstCombine/not.ll
@@ -31,45 +31,8 @@ define i1 @invert_fcmp(float %X, float %Y) {
   ret i1 %not
 }
 
-; Test that De Morgan's law can be instcombined.
-define i32 @test3(i32 %A, i32 %B) {
-; CHECK-LABEL: @test3(
-; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = or i32 %A, %B
-; CHECK-NEXT:    ret i32 [[C_DEMORGAN]]
-;
-  %a = xor i32 %A, -1
-  %b = xor i32 %B, -1
-  %c = and i32 %a, %b
-  %d = xor i32 %c, -1
-  ret i32 %d
-}
-
-; Test that De Morgan's law can work with constants.
-define i32 @test4(i32 %A, i32 %B) {
-; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[D1:%.*]] = or i32 %A, -6
-; CHECK-NEXT:    ret i32 [[D1]]
-;
-  %a = xor i32 %A, -1
-  %c = and i32 %a, 5
-  %d = xor i32 %c, -1
-  ret i32 %d
-}
-
-; Test the mirror of De Morgan's law.
-define i32 @test5(i32 %A, i32 %B) {
-; CHECK-LABEL: @test5(
-; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = and i32 %A, %B
-; CHECK-NEXT:    ret i32 [[C_DEMORGAN]]
-;
-  %a = xor i32 %A, -1
-  %b = xor i32 %B, -1
-  %c = or i32 %a, %b
-  %d = xor i32 %c, -1
-  ret i32 %d
-}
-
 ; PR2298
+
 define zeroext i8 @test6(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i32 %b, %a
diff --git a/test/Transforms/InstSimplify/shufflevector.ll b/test/Transforms/InstSimplify/shufflevector.ll
index e03916c5b90..6af0db8e5a4 100644
--- a/test/Transforms/InstSimplify/shufflevector.ll
+++ b/test/Transforms/InstSimplify/shufflevector.ll
@@ -118,6 +118,14 @@ define <4 x i32> @undef_mask(<4 x i32> %x) {
   ret <4 x i32> %shuf
 }
 
+define <4 x i32> @undef_mask_1(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @undef_mask_1(
+; CHECK-NEXT:    ret <4 x i32> undef
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> undef
+  ret <4 x i32> %shuf
+}
+
 define <4 x i32> @identity_mask_0(<4 x i32> %x) {
 ; CHECK-LABEL: @identity_mask_0(
 ; CHECK-NEXT:    ret <4 x i32> [[X:%.*]]
diff --git a/test/Transforms/JumpThreading/fold-not-thread.ll b/test/Transforms/JumpThreading/fold-not-thread.ll
index 75deca62f75..06ddc10e02b 100644
--- a/test/Transforms/JumpThreading/fold-not-thread.ll
+++ b/test/Transforms/JumpThreading/fold-not-thread.ll
@@ -133,3 +133,114 @@ L3:
   ret void
 }
 
+; Make sure we can do the RAUW for %add...
+;
+; CHECK-LABEL: @rauw_if_possible(
+; CHECK: call void @f4(i32 96) 
+define void @rauw_if_possible(i32 %value) nounwind {
+entry:
+  %cmp = icmp eq i32 %value, 32
+  br i1 %cmp, label %L0, label %L3 
+L0:
+  call i32 @f2()
+  call i32 @f2()
+  %add = add i32 %value, 64
+  switch i32 %add, label %L3 [
+    i32 32, label %L1
+    i32 96, label %L2
+    ]
+
+L1:
+  call void @f3()
+  ret void
+L2:
+  call void @f4(i32 %add)
+  ret void
+L3:
+  call void @f3()
+  ret void
+}
+
+; Make sure we can NOT do the RAUW for %add...
+;
+; CHECK-LABEL: @rauw_if_possible2(
+; CHECK: call void @f4(i32 %add) 
+define void @rauw_if_possible2(i32 %value) nounwind {
+entry:
+  %cmp = icmp eq i32 %value, 32
+  %add = add i32 %value, 64
+  br i1 %cmp, label %L0, label %L2 
+L0:
+  call i32 @f2()
+  call i32 @f2()
+  switch i32 %add, label %L3 [
+    i32 32, label %L1
+    i32 96, label %L2
+    ]
+
+L1:
+  call void @f3()
+  ret void
+L2:
+  call void @f4(i32 %add)
+  ret void
+L3:
+  call void @f3()
+  ret void
+}
+
+; Make sure we can fold this branch ... We will not be able to thread it as
+; L0 is too big to duplicate.
+; We do not attempt to rewrite the indirectbr target here, but we still take
+; its target after L0 into account and that enables us to fold.
+;
+; L2 is the unreachable block here.
+; 
+; CHECK-LABEL: @test_br_folding_not_threading_indirect_branch(
+; CHECK: L1:
+; CHECK: call i32 @f2()
+; CHECK: call void @f3()
+; CHECK-NEXT: ret void
+; CHECK-NOT: br
+; CHECK: L3:
+define void @test_br_folding_not_threading_indirect_branch(i1 %condx, i1 %cond) nounwind {
+entry:
+  br i1 %condx, label %X0, label %X1
+
+X0:
+  br i1 %cond, label %L0, label %L3
+
+X1:
+  br i1 %cond, label %XX1, label %L3
+
+XX1:
+  indirectbr i8* blockaddress(@test_br_folding_not_threading_indirect_branch, %L0), [label %L0]
+
+L0:
+  call i32 @f2()
+  call i32 @f2()
+  call i32 @f2()
+  call i32 @f2()
+  call i32 @f2()
+  call i32 @f2()
+  call i32 @f2()
+  call i32 @f2()
+  call i32 @f2()
+  call i32 @f2()
+  call i32 @f2()
+  call i32 @f2()
+  call i32 @f2()
+  br i1 %cond, label %L1, label %L2
+
+L1:
+  call void @f3()
+  ret void
+
+L2:
+  call void @f3()
+  ret void
+
+L3:
+  call void @f3()
+  ret void
+}
diff --git a/test/Transforms/LoopUnswitch/pr32818.ll b/test/Transforms/LoopUnswitch/pr32818.ll
new file mode 100644
index 00000000000..cda66c9dd36
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/pr32818.ll
@@ -0,0 +1,19 @@
+; Check that the call doesn't get removed even if
+; it has no uses. It could have side-effects.
+; RUN: opt -loop-unswitch -S %s | FileCheck %s
+
+; CHECK-LABEL: @tinky
+define i32 @tinkywinky(i8 %patatino) {
+  %cmp1 = icmp slt i8 %patatino, 5
+  br label %body
+body:
+  %i = select i1 %cmp1, i8 6, i8 undef
+  br i1 true, label %body, label %end
+end:
+  %split = phi i8 [ %i, %body ]
+  %conv4 = sext i8 %split to i32
+; CHECK: tail call fastcc i32 @fn5(
+  %call = tail call fastcc i32 @fn5(i32 %conv4)
+  ret i32 0
+}
+declare fastcc i32 @fn5(i32 returned) unnamed_addr
diff --git a/test/Transforms/NewGVN/pr32852.ll b/test/Transforms/NewGVN/pr32852.ll
new file mode 100644
index 00000000000..1441d17361b
--- /dev/null
+++ b/test/Transforms/NewGVN/pr32852.ll
@@ -0,0 +1,24 @@
+; Make sure GVN doesn't incorrectly think the branch terminating
+; bb2 has a constant condition.
+; RUN: opt -S -newgvn %s | FileCheck %s
+
+@a = common global i32 0
+@patatino = private unnamed_addr constant [3 x i8] c"0\0A\00"
+
+define void @tinkywinky() {
+bb:
+  %tmp = load i32, i32* @a
+  %tmp1 = icmp sge i32 %tmp, 0
+  br i1 %tmp1, label %bb2, label %bb7
+bb2:
+  %tmp4 = icmp sgt i32 %tmp, 0
+; CHECK: br i1 %tmp4, label %bb5, label %bb7
+  br i1 %tmp4, label %bb5, label %bb7
+bb5:
+  %tmp6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @patatino, i32 0, i32 0))
+  br label %bb7
+bb7:
+  ret void
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/Transforms/ObjCARC/rv.ll b/test/Transforms/ObjCARC/rv.ll
index 85a16127c6d..e99ba92dc45 100644
--- a/test/Transforms/ObjCARC/rv.ll
+++ b/test/Transforms/ObjCARC/rv.ll
@@ -291,4 +291,29 @@ define {}* @test24(i8* %p) {
   ret {}* %s
 }
 
+declare i8* @first_test25();
+declare i8* @second_test25(i8*);
+declare void @somecall_test25();
+
+; ARC optimizer used to move the last release between the call to second_test25
+; and the call to objc_retainAutoreleasedReturnValue, causing %second to be
+; released prematurely when %first and %second were pointing to the same object.
+
+; CHECK-LABEL: define void @test25(
+; CHECK: %[[CALL1:.*]] = call i8* @second_test25(
+; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %[[CALL1]])
+
+define void @test25() {
+  %first = call i8* @first_test25()
+  %v0 = call i8* @objc_retain(i8* %first)
+  call void @somecall_test25()
+  %second = call i8* @second_test25(i8* %first)
+  %call2 = call i8* @objc_retainAutoreleasedReturnValue(i8* %second)
+  call void @objc_release(i8* %second), !clang.imprecise_release !0
+  call void @objc_release(i8* %first), !clang.imprecise_release !0
+  ret void
+}
+
+!0 = !{}
+
 ; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/PGOProfile/memop_size_opt_zero.ll b/test/Transforms/PGOProfile/memop_size_opt_zero.ll
new file mode 100644
index 00000000000..ede34f0de80
--- /dev/null
+++ b/test/Transforms/PGOProfile/memop_size_opt_zero.ll
@@ -0,0 +1,19 @@
+; Test to ensure the pgo memop optimization pass doesn't try to scale
+; up a value profile with a 0 count, which would lead to divide by 0.
+; RUN: opt < %s -passes=pgo-memop-opt -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT
+; RUN: opt < %s -pgo-memop-opt -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(i8* %dst, i8* %src, i64 %conv) !prof !0 {
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %conv, i32 1, i1 false), !prof !1
+  ret void
+}
+
+; MEMOP_OPT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %conv, i32 1, i1 false), !prof !1
+
+!0 = !{!"function_entry_count", i64 1}
+!1 = !{!"VP", i32 1, i64 0, i64 1, i64 0, i64 2, i64 0, i64 3, i64 0, i64 9, i64 0, i64 4, i64 0, i64 5, i64 0, i64 6, i64 0, i64 7, i64 0, i64 8, i64 0}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
diff --git a/test/Transforms/SimpleLoopUnswitch/2006-06-13-SingleEntryPHI.ll b/test/Transforms/SimpleLoopUnswitch/2006-06-13-SingleEntryPHI.ll
new file mode 100644
index 00000000000..3b22687ca91
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/2006-06-13-SingleEntryPHI.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+
+	%struct.BLEND_MAP = type { i16, i16, i16, i32, %struct.BLEND_MAP_ENTRY* }
+	%struct.BLEND_MAP_ENTRY = type { float, i8, { [5 x float], [4 x i8] } }
+	%struct.TPATTERN = type { i16, i16, i16, i32, float, float, float, %struct.WARP*, %struct.TPATTERN*, %struct.BLEND_MAP*, { %struct.anon, [4 x i8] } }
+	%struct.TURB = type { i16, %struct.WARP*, [3 x double], i32, float, float }
+	%struct.WARP = type { i16, %struct.WARP* }
+	%struct.anon = type { float, [3 x double] }
+
+define void @Parse_Pattern() {
+entry:
+	br label %bb1096.outer20
+bb671:		; preds = %cond_true1099
+	br label %bb1096.outer23
+bb1096.outer20.loopexit:		; preds = %cond_true1099
+	%Local_Turb.0.ph24.lcssa = phi %struct.TURB* [ %Local_Turb.0.ph24, %cond_true1099 ]		; <%struct.TURB*> [#uses=1]
+	br label %bb1096.outer20
+bb1096.outer20:		; preds = %bb1096.outer20.loopexit, %entry
+	%Local_Turb.0.ph22 = phi %struct.TURB* [ undef, %entry ], [ %Local_Turb.0.ph24.lcssa, %bb1096.outer20.loopexit ]		; <%struct.TURB*> [#uses=1]
+	%tmp1098 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br label %bb1096.outer23
+bb1096.outer23:		; preds = %bb1096.outer20, %bb671
+	%Local_Turb.0.ph24 = phi %struct.TURB* [ %Local_Turb.0.ph22, %bb1096.outer20 ], [ null, %bb671 ]		; <%struct.TURB*> [#uses=2]
+	br label %bb1096
+bb1096:		; preds = %cond_true1099, %bb1096.outer23
+	br i1 %tmp1098, label %cond_true1099, label %bb1102
+cond_true1099:		; preds = %bb1096
+	switch i32 0, label %bb1096.outer20.loopexit [
+		 i32 161, label %bb671
+		 i32 359, label %bb1096
+	]
+bb1102:		; preds = %bb1096
+	%Local_Turb.0.ph24.lcssa1 = phi %struct.TURB* [ %Local_Turb.0.ph24, %bb1096 ]		; <%struct.TURB*> [#uses=0]
+	ret void
+}
diff --git a/test/Transforms/SimpleLoopUnswitch/2006-06-27-DeadSwitchCase.ll b/test/Transforms/SimpleLoopUnswitch/2006-06-27-DeadSwitchCase.ll
new file mode 100644
index 00000000000..04067eb05c8
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/2006-06-27-DeadSwitchCase.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+
+define void @init_caller_save() {
+entry:
+	br label %cond_true78
+cond_next20:		; preds = %cond_true64
+	br label %bb31
+bb31:		; preds = %cond_true64, %cond_true64, %cond_next20
+	%iftmp.29.1 = phi i32 [ 0, %cond_next20 ], [ 0, %cond_true64 ], [ 0, %cond_true64 ]		; <i32> [#uses=0]
+	br label %bb54
+bb54:		; preds = %cond_true78, %bb31
+	br i1 false, label %bb75, label %cond_true64
+cond_true64:		; preds = %bb54
+	switch i32 %i.0.0, label %cond_next20 [
+		 i32 17, label %bb31
+		 i32 18, label %bb31
+	]
+bb75:		; preds = %bb54
+	%tmp74.0 = add i32 %i.0.0, 1		; <i32> [#uses=1]
+	br label %cond_true78
+cond_true78:		; preds = %bb75, %entry
+	%i.0.0 = phi i32 [ 0, %entry ], [ %tmp74.0, %bb75 ]		; <i32> [#uses=2]
+	br label %bb54
+}
+
diff --git a/test/Transforms/SimpleLoopUnswitch/2007-05-09-Unreachable.ll b/test/Transforms/SimpleLoopUnswitch/2007-05-09-Unreachable.ll
new file mode 100644
index 00000000000..4bbcc80ff3b
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/2007-05-09-Unreachable.ll
@@ -0,0 +1,28 @@
+; PR1333
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+	%struct.ada__streams__root_stream_type = type { %struct.ada__tags__dispatch_table* }
+	%struct.ada__tags__dispatch_table = type { [1 x i8*] }
+	%struct.quotes__T173s = type { i8, %struct.quotes__T173s__T174s, [2 x [1 x double]], [2 x i16], i64, i8 }
+	%struct.quotes__T173s__T174s = type { i8, i8, i8, i16, i16, [2 x [1 x double]] }
+
+define void @quotes__write_quote() {
+entry:
+	%tmp606.i = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br label %bb
+bb:		; preds = %cond_next73, %bb, %entry
+	br i1 false, label %bb51, label %bb
+bb51:		; preds = %cond_next73, %bb
+	br i1 %tmp606.i, label %quotes__bid_ask_depth_offset_matrices__get_price.exit, label %cond_true.i
+cond_true.i:		; preds = %bb51
+	unreachable
+quotes__bid_ask_depth_offset_matrices__get_price.exit:		; preds = %bb51
+	br i1 false, label %cond_next73, label %cond_true72
+cond_true72:		; preds = %quotes__bid_ask_depth_offset_matrices__get_price.exit
+	unreachable
+cond_next73:		; preds = %quotes__bid_ask_depth_offset_matrices__get_price.exit
+	br i1 false, label %bb, label %bb51
+}
+
diff --git a/test/Transforms/SimpleLoopUnswitch/2007-05-09-tl.ll b/test/Transforms/SimpleLoopUnswitch/2007-05-09-tl.ll
new file mode 100644
index 00000000000..835715264e1
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/2007-05-09-tl.ll
@@ -0,0 +1,95 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+; PR1333
+
+define void @pp_cxx_expression() {
+entry:
+	%tmp6 = lshr i32 0, 24		; <i32> [#uses=1]
+	br label %tailrecurse
+
+tailrecurse:		; preds = %tailrecurse, %tailrecurse, %entry
+	switch i32 %tmp6, label %bb96 [
+		 i32 24, label %bb10
+		 i32 25, label %bb10
+		 i32 28, label %bb10
+		 i32 29, label %bb48
+		 i32 31, label %bb48
+		 i32 32, label %bb48
+		 i32 33, label %bb48
+		 i32 34, label %bb48
+		 i32 36, label %bb15
+		 i32 51, label %bb89
+		 i32 52, label %bb89
+		 i32 54, label %bb83
+		 i32 57, label %bb59
+		 i32 63, label %bb80
+		 i32 64, label %bb80
+		 i32 68, label %bb80
+		 i32 169, label %bb75
+		 i32 170, label %bb19
+		 i32 171, label %bb63
+		 i32 172, label %bb63
+		 i32 173, label %bb67
+		 i32 174, label %bb67
+		 i32 175, label %bb19
+		 i32 176, label %bb75
+		 i32 178, label %bb59
+		 i32 179, label %bb89
+		 i32 180, label %bb59
+		 i32 182, label %bb48
+		 i32 183, label %bb48
+		 i32 184, label %bb48
+		 i32 185, label %bb48
+		 i32 186, label %bb48
+		 i32 195, label %bb48
+		 i32 196, label %bb59
+		 i32 197, label %bb89
+		 i32 198, label %bb70
+		 i32 199, label %bb59
+		 i32 200, label %bb59
+		 i32 201, label %bb59
+		 i32 202, label %bb59
+		 i32 203, label %bb75
+		 i32 204, label %bb59
+		 i32 205, label %tailrecurse
+		 i32 210, label %tailrecurse
+	]
+
+bb10:		; preds = %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb15:		; preds = %tailrecurse
+	ret void
+
+bb19:		; preds = %tailrecurse, %tailrecurse
+	ret void
+
+bb48:		; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb59:		; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb63:		; preds = %tailrecurse, %tailrecurse
+	ret void
+
+bb67:		; preds = %tailrecurse, %tailrecurse
+	ret void
+
+bb70:		; preds = %tailrecurse
+	ret void
+
+bb75:		; preds = %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb80:		; preds = %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb83:		; preds = %tailrecurse
+	ret void
+
+bb89:		; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb96:		; preds = %tailrecurse
+	ret void
+}
diff --git a/test/Transforms/SimpleLoopUnswitch/2007-07-12-ExitDomInfo.ll b/test/Transforms/SimpleLoopUnswitch/2007-07-12-ExitDomInfo.ll
new file mode 100644
index 00000000000..2a73a429c6a
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/2007-07-12-ExitDomInfo.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -simple-loop-unswitch -instcombine -disable-output
+
+@str3 = external constant [3 x i8]		; <[3 x i8]*> [#uses=1]
+
+define i32 @stringSearch_Clib(i32 %count) {
+entry:
+	%ttmp25 = icmp sgt i32 %count, 0		; <i1> [#uses=1]
+	br i1 %ttmp25, label %bb36.preheader, label %bb44
+
+bb36.preheader:		; preds = %entry
+	%ttmp33 = icmp slt i32 0, 250		; <i1> [#uses=1]
+	br label %bb36.outer
+
+bb36.outer:		; preds = %bb41, %bb36.preheader
+	br i1 %ttmp33, label %bb.nph, label %bb41
+
+bb.nph:		; preds = %bb36.outer
+	%ttmp8 = icmp eq i8* null, null		; <i1> [#uses=1]
+	%ttmp6 = icmp eq i8* null, null		; <i1> [#uses=1]
+	%tmp31 = call i32 @strcspn( i8* null, i8* getelementptr ([3 x i8], [3 x i8]* @str3, i64 0, i64 0) )		; <i32> [#uses=1]
+	br i1 %ttmp8, label %cond_next, label %cond_true
+
+cond_true:		; preds = %bb.nph
+	ret i32 0
+
+cond_next:		; preds = %bb.nph
+	br i1 %ttmp6, label %cond_next28, label %cond_true20
+
+cond_true20:		; preds = %cond_next
+	ret i32 0
+
+cond_next28:		; preds = %cond_next
+	%tmp33 = add i32 %tmp31, 0		; <i32> [#uses=1]
+	br label %bb41
+
+bb41:		; preds = %cond_next28, %bb36.outer
+	%c.2.lcssa = phi i32 [ 0, %bb36.outer ], [ %tmp33, %cond_next28 ]		; <i32> [#uses=1]
+	br i1 false, label %bb36.outer, label %bb44
+
+bb44:		; preds = %bb41, %entry
+	%c.01.1 = phi i32 [ 0, %entry ], [ %c.2.lcssa, %bb41 ]		; <i32> [#uses=1]
+	ret i32 %c.01.1
+}
+
+declare i32 @strcspn(i8*, i8*)
diff --git a/test/Transforms/SimpleLoopUnswitch/2007-07-13-DomInfo.ll b/test/Transforms/SimpleLoopUnswitch/2007-07-13-DomInfo.ll
new file mode 100644
index 00000000000..7299b2b1908
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/2007-07-13-DomInfo.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	%tmp1785365 = icmp ult i32 0, 100		; <i1> [#uses=1]
+	br label %bb
+
+bb:		; preds = %cond_true, %entry
+	br i1 false, label %cond_true, label %cond_next
+
+cond_true:		; preds = %bb
+	br i1 %tmp1785365, label %bb, label %bb1788
+
+cond_next:		; preds = %bb
+	%iftmp.1.0 = select i1 false, i32 0, i32 0		; <i32> [#uses=1]
+	br i1 false, label %cond_true47, label %cond_next74
+
+cond_true47:		; preds = %cond_next
+	%tmp53 = urem i32 %iftmp.1.0, 0		; <i32> [#uses=0]
+	ret i32 0
+
+cond_next74:		; preds = %cond_next
+	ret i32 0
+
+bb1788:		; preds = %cond_true
+	ret i32 0
+}
diff --git a/test/Transforms/SimpleLoopUnswitch/2007-07-18-DomInfo.ll b/test/Transforms/SimpleLoopUnswitch/2007-07-18-DomInfo.ll
new file mode 100644
index 00000000000..e1ef5064a8f
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/2007-07-18-DomInfo.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+; PR1559
+
+target triple = "i686-pc-linux-gnu"
+	%struct.re_pattern_buffer = type { i8*, i32, i32, i32, i8*, i8*, i32, i8 }
+
+define fastcc i32 @byte_regex_compile(i8* %pattern, i32 %size, i32 %syntax, %struct.re_pattern_buffer* %bufp) {
+entry:
+        br i1 false, label %bb147, label %cond_next123
+
+cond_next123:           ; preds = %entry
+        ret i32 0
+
+bb147:          ; preds = %entry
+        switch i32 0, label %normal_char [
+                 i32 91, label %bb1734
+                 i32 92, label %bb5700
+        ]
+
+bb1734:         ; preds = %bb147
+        br label %bb1855.outer.outer
+
+cond_true1831:          ; preds = %bb1855.outer
+        br i1 %tmp1837, label %cond_next1844, label %cond_true1840
+
+cond_true1840:          ; preds = %cond_true1831
+        ret i32 0
+
+cond_next1844:          ; preds = %cond_true1831
+        br i1 false, label %bb1855.outer, label %cond_true1849
+
+cond_true1849:          ; preds = %cond_next1844
+        br label %bb1855.outer.outer
+
+bb1855.outer.outer:             ; preds = %cond_true1849, %bb1734
+        %b.10.ph.ph = phi i8* [ null, %cond_true1849 ], [ null, %bb1734 ]               ; <i8*> [#uses=1]
+        br label %bb1855.outer
+
+bb1855.outer:           ; preds = %bb1855.outer.outer, %cond_next1844
+        %b.10.ph = phi i8* [ null, %cond_next1844 ], [ %b.10.ph.ph, %bb1855.outer.outer ]               ; <i8*> [#uses=1]
+        %tmp1837 = icmp eq i8* null, null               ; <i1> [#uses=2]
+        br i1 false, label %cond_true1831, label %cond_next1915
+
+cond_next1915:          ; preds = %cond_next1961, %bb1855.outer
+        store i8* null, i8** null
+        br i1 %tmp1837, label %cond_next1929, label %cond_true1923
+
+cond_true1923:          ; preds = %cond_next1915
+        ret i32 0
+
+cond_next1929:          ; preds = %cond_next1915
+        br i1 false, label %cond_next1961, label %cond_next2009
+
+cond_next1961:          ; preds = %cond_next1929
+        %tmp1992 = getelementptr i8, i8* %b.10.ph, i32 0            ; <i8*> [#uses=0]
+        br label %cond_next1915
+
+cond_next2009:          ; preds = %cond_next1929
+        ret i32 0
+
+bb5700:         ; preds = %bb147
+        ret i32 0
+
+normal_char:            ; preds = %bb147
+        ret i32 0
+}
diff --git a/test/Transforms/SimpleLoopUnswitch/2007-08-01-Dom.ll b/test/Transforms/SimpleLoopUnswitch/2007-08-01-Dom.ll
new file mode 100644
index 00000000000..52794891c56
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/2007-08-01-Dom.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -licm -simple-loop-unswitch -disable-output 
+; PR 1589
+
+      	%struct.QBasicAtomic = type { i32 }
+
+define void @_ZNK5QDate9addMonthsEi(%struct.QBasicAtomic* sret  %agg.result, %struct.QBasicAtomic* %this, i32 %nmonths) {
+entry:
+	br label %cond_true90
+
+bb16:		; preds = %cond_true90
+	br i1 false, label %bb93, label %cond_true90
+
+bb45:		; preds = %cond_true90
+	br i1 false, label %bb53, label %bb58
+
+bb53:		; preds = %bb45
+	br i1 false, label %bb93, label %cond_true90
+
+bb58:		; preds = %bb45
+	store i32 0, i32* null, align 4
+	br i1 false, label %cond_true90, label %bb93
+
+cond_true90:		; preds = %bb58, %bb53, %bb16, %entry
+	%nmonths_addr.016.1 = phi i32 [ %nmonths, %entry ], [ 0, %bb16 ], [ 0, %bb53 ], [ %nmonths_addr.016.1, %bb58 ]		; <i32> [#uses=2]
+	%tmp14 = icmp slt i32 %nmonths_addr.016.1, -11		; <i1> [#uses=1]
+	br i1 %tmp14, label %bb16, label %bb45
+
+bb93:		; preds = %bb58, %bb53, %bb16
+	ret void
+}
diff --git a/test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll b/test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll
new file mode 100644
index 00000000000..cb65d2fb644
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll
@@ -0,0 +1,55 @@
+; RUN: opt < %s -simple-loop-unswitch -instcombine -disable-output
+	%struct.ClassDef = type { %struct.QByteArray, %struct.QByteArray, %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", i8, i8, %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QMap<QByteArray,QByteArray>", %"struct.QList<ArgumentDef>", %"struct.QMap<QByteArray,QByteArray>", i32, i32 }
+	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+	%struct.Generator = type { %struct.FILE*, %struct.ClassDef*, %"struct.QList<ArgumentDef>", %struct.QByteArray, %"struct.QList<ArgumentDef>" }
+	%struct.QBasicAtomic = type { i32 }
+	%struct.QByteArray = type { %"struct.QByteArray::Data"* }
+	%"struct.QByteArray::Data" = type { %struct.QBasicAtomic, i32, i32, i8*, [1 x i8] }
+	%"struct.QList<ArgumentDef>" = type { %"struct.QList<ArgumentDef>::._19" }
+	%"struct.QList<ArgumentDef>::._19" = type { %struct.QListData }
+	%struct.QListData = type { %"struct.QListData::Data"* }
+	%"struct.QListData::Data" = type { %struct.QBasicAtomic, i32, i32, i32, i8, [1 x i8*] }
+	%"struct.QMap<QByteArray,QByteArray>" = type { %"struct.QMap<QByteArray,QByteArray>::._56" }
+	%"struct.QMap<QByteArray,QByteArray>::._56" = type { %struct.QMapData* }
+	%struct.QMapData = type { %struct.QMapData*, [12 x %struct.QMapData*], %struct.QBasicAtomic, i32, i32, i32, i8 }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+@.str9 = external constant [1 x i8]		; <[1 x i8]*> [#uses=1]
+
+declare i32 @strcmp(i8*, i8*)
+
+define i32 @_ZN9Generator6strregEPKc(%struct.Generator* %this, i8* %s) {
+entry:
+	%s_addr.0 = select i1 false, i8* getelementptr ([1 x i8], [1 x i8]* @.str9, i32 0, i32 0), i8* %s		; <i8*> [#uses=2]
+	%tmp122 = icmp eq i8* %s_addr.0, null		; <i1> [#uses=1]
+	br label %bb184
+
+bb55:		; preds = %bb184
+	ret i32 0
+
+bb88:		; preds = %bb184
+	br i1 %tmp122, label %bb154, label %bb128
+
+bb128:		; preds = %bb88
+	%tmp138 = call i32 @strcmp( i8* null, i8* %s_addr.0 )		; <i32> [#uses=1]
+	%iftmp.37.0.in4 = icmp eq i32 %tmp138, 0		; <i1> [#uses=1]
+	br i1 %iftmp.37.0.in4, label %bb250, label %bb166
+
+bb154:		; preds = %bb88
+	br i1 false, label %bb250, label %bb166
+
+bb166:		; preds = %bb154, %bb128
+	%tmp175 = add i32 %idx.0, 1		; <i32> [#uses=1]
+	%tmp177 = add i32 %tmp175, 0		; <i32> [#uses=1]
+	%tmp181 = add i32 %tmp177, 0		; <i32> [#uses=1]
+	%tmp183 = add i32 %i33.0, 1		; <i32> [#uses=1]
+	br label %bb184
+
+bb184:		; preds = %bb166, %entry
+	%i33.0 = phi i32 [ 0, %entry ], [ %tmp183, %bb166 ]		; <i32> [#uses=2]
+	%idx.0 = phi i32 [ 0, %entry ], [ %tmp181, %bb166 ]		; <i32> [#uses=2]
+	%tmp49 = icmp slt i32 %i33.0, 0		; <i1> [#uses=1]
+	br i1 %tmp49, label %bb88, label %bb55
+
+bb250:		; preds = %bb154, %bb128
+	ret i32 %idx.0
+}
diff --git a/test/Transforms/SimpleLoopUnswitch/2007-10-04-DomFrontier.ll b/test/Transforms/SimpleLoopUnswitch/2007-10-04-DomFrontier.ll
new file mode 100644
index 00000000000..efbb7619591
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/2007-10-04-DomFrontier.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -licm -loop-unroll -disable-output
+
+@resonant = external global i32		; <i32*> [#uses=2]
+
+define void @weightadj() {
+entry:
+	br label %bb
+
+bb:		; preds = %bb158, %entry
+	store i32 0, i32* @resonant, align 4
+	br i1 false, label %g.exit, label %bb158
+
+g.exit:		; preds = %bb68, %bb
+	br i1 false, label %bb68, label %cond_true
+
+cond_true:		; preds = %g.exit
+	store i32 1, i32* @resonant, align 4
+	br label %bb68
+
+bb68:		; preds = %cond_true, %g.exit
+	%tmp71 = icmp slt i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp71, label %g.exit, label %bb158
+
+bb158:		; preds = %bb68, %bb
+	br i1 false, label %bb, label %return
+
+return:		; preds = %bb158
+	ret void
+}
diff --git a/test/Transforms/SimpleLoopUnswitch/2008-06-02-DomInfo.ll b/test/Transforms/SimpleLoopUnswitch/2008-06-02-DomInfo.ll
new file mode 100644
index 00000000000..5b3ca9c187d
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/2008-06-02-DomInfo.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -simple-loop-unswitch -instcombine -gvn -disable-output
+; PR2372
+target triple = "i386-pc-linux-gnu"
+
+define i32 @func_3(i16 signext  %p_5, i16 signext  %p_6) nounwind  {
+entry:
+	%tmp3 = icmp eq i16 %p_5, 0		; <i1> [#uses=1]
+	%tmp1314 = sext i16 %p_6 to i32		; <i32> [#uses=1]
+	%tmp28 = icmp ugt i32 %tmp1314, 3		; <i1> [#uses=1]
+	%bothcond = or i1 %tmp28, false		; <i1> [#uses=1]
+	br label %bb
+bb:		; preds = %bb54, %entry
+	br i1 %tmp3, label %bb54, label %bb5
+bb5:		; preds = %bb
+	br i1 %bothcond, label %bb54, label %bb31
+bb31:		; preds = %bb5
+	br label %bb54
+bb54:		; preds = %bb31, %bb5, %bb
+	br i1 false, label %bb64, label %bb
+bb64:		; preds = %bb54
+	%tmp6566 = sext i16 %p_6 to i32		; <i32> [#uses=1]
+	%tmp68 = tail call i32 (...) @func_18( i32 1, i32 %tmp6566, i32 1 ) nounwind 		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @func_18(...)
diff --git a/test/Transforms/SimpleLoopUnswitch/2008-06-17-DomFrontier.ll b/test/Transforms/SimpleLoopUnswitch/2008-06-17-DomFrontier.ll
new file mode 100644
index 00000000000..e309d60a3e4
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/2008-06-17-DomFrontier.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -licm -simple-loop-unswitch -disable-output
+@g_56 = external global i16		; <i16*> [#uses=2]
+
+define i32 @func_67(i32 %p_68, i8 signext  %p_69, i8 signext  %p_71) nounwind  {
+entry:
+	br label %bb
+bb:		; preds = %bb44, %entry
+	br label %bb3
+bb3:		; preds = %bb36, %bb
+	%bothcond = or i1 false, false		; <i1> [#uses=1]
+	br i1 %bothcond, label %bb29, label %bb19
+bb19:		; preds = %bb3
+	br i1 false, label %bb36, label %bb29
+bb29:		; preds = %bb19, %bb3
+	ret i32 0
+bb36:		; preds = %bb19
+	store i16 0, i16* @g_56, align 2
+	br i1 false, label %bb44, label %bb3
+bb44:		; preds = %bb44, %bb36
+	%tmp46 = load i16, i16* @g_56, align 2		; <i16> [#uses=0]
+	br i1 false, label %bb, label %bb44
+}
diff --git a/test/Transforms/SimpleLoopUnswitch/2010-11-18-LCSSA.ll b/test/Transforms/SimpleLoopUnswitch/2010-11-18-LCSSA.ll
new file mode 100644
index 00000000000..9f71417df8e
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/2010-11-18-LCSSA.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -simple-loop-unswitch
+; PR8622
+@g_38 = external global i32, align 4
+
+define void @func_67(i32 %p_68.coerce) nounwind {
+entry:
+  br i1 true, label %for.end12, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %g_38.promoted = load i32, i32* @g_38
+  br label %for.body
+
+for.body:                                         ; preds = %for.cond, %bb.nph
+  %tobool.i = icmp eq i32 %p_68.coerce, 1
+  %xor4.i = xor i32 %p_68.coerce, 1
+  %call1 = select i1 %tobool.i, i32 0, i32 %xor4.i
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body
+  br i1 true, label %for.cond.for.end12_crit_edge, label %for.body
+
+for.cond.for.end12_crit_edge:                     ; preds = %for.cond
+  store i32 %call1, i32* @g_38
+  br label %for.end12
+
+for.end12:                                        ; preds = %for.cond.for.end12_crit_edge, %entry
+  ret void
+}
diff --git a/test/Transforms/SimpleLoopUnswitch/2011-06-02-CritSwitch.ll b/test/Transforms/SimpleLoopUnswitch/2011-06-02-CritSwitch.ll
new file mode 100644
index 00000000000..8c13e1854c9
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/2011-06-02-CritSwitch.ll
@@ -0,0 +1,28 @@
+; RUN: opt -simple-loop-unswitch -disable-output < %s
+; PR10031
+
+define i32 @test(i32 %command) {
+entry:
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %if.then14, %tailrecurse, %entry
+  br i1 undef, label %if.then, label %tailrecurse
+
+if.then:                                          ; preds = %tailrecurse
+  switch i32 %command, label %sw.bb [
+    i32 2, label %land.lhs.true
+    i32 0, label %land.lhs.true
+  ]
+
+land.lhs.true:                                    ; preds = %if.then, %if.then
+  br i1 undef, label %sw.bb, label %if.then14
+
+if.then14:                                        ; preds = %land.lhs.true
+  switch i32 %command, label %tailrecurse [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %if.then14
+  unreachable
+}
diff --git a/test/Transforms/SimpleLoopUnswitch/2011-09-26-EHCrash.ll b/test/Transforms/SimpleLoopUnswitch/2011-09-26-EHCrash.ll
new file mode 100644
index 00000000000..6b7d9faf70f
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/2011-09-26-EHCrash.ll
@@ -0,0 +1,63 @@
+; RUN: opt < %s -sroa -simple-loop-unswitch -disable-output
+; PR11016
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.2"
+
+%class.MyContainer.1.3.19.29 = type { [6 x %class.MyMemVarClass.0.2.18.28*] }
+%class.MyMemVarClass.0.2.18.28 = type { i32 }
+
+define void @_ZN11MyContainer1fEi(%class.MyContainer.1.3.19.29* %this, i32 %doit) uwtable ssp align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %inc1 = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
+  %conv = sext i32 %inc1 to i64
+  %cmp = icmp ult i64 %conv, 6
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tobool = icmp ne i32 %doit, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %idxprom = sext i32 %inc1 to i64
+  %array_ = getelementptr inbounds %class.MyContainer.1.3.19.29, %class.MyContainer.1.3.19.29* %this, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [6 x %class.MyMemVarClass.0.2.18.28*], [6 x %class.MyMemVarClass.0.2.18.28*]* %array_, i32 0, i64 %idxprom
+  %tmp4 = load %class.MyMemVarClass.0.2.18.28*, %class.MyMemVarClass.0.2.18.28** %arrayidx, align 8
+  %isnull = icmp eq %class.MyMemVarClass.0.2.18.28* %tmp4, null
+  br i1 %isnull, label %for.inc, label %delete.notnull
+
+delete.notnull:                                   ; preds = %if.then
+  invoke void @_ZN13MyMemVarClassD1Ev(%class.MyMemVarClass.0.2.18.28* %tmp4)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %delete.notnull
+  %0 = bitcast %class.MyMemVarClass.0.2.18.28* %tmp4 to i8*
+  call void @_ZdlPv(i8* %0) nounwind
+  br label %for.inc
+
+lpad:                                             ; preds = %delete.notnull
+  %1 = landingpad { i8*, i32 }
+          cleanup
+  %2 = extractvalue { i8*, i32 } %1, 0
+  %3 = extractvalue { i8*, i32 } %1, 1
+  %4 = bitcast %class.MyMemVarClass.0.2.18.28* %tmp4 to i8*
+  call void @_ZdlPv(i8* %4) nounwind
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %2, 0
+  %lpad.val7 = insertvalue { i8*, i32 } %lpad.val, i32 %3, 1
+  resume { i8*, i32 } %lpad.val7
+
+for.inc:                                          ; preds = %invoke.cont, %if.then, %for.body
+  %inc = add nsw i32 %inc1, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+declare void @_ZN13MyMemVarClassD1Ev(%class.MyMemVarClass.0.2.18.28*)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZdlPv(i8*) nounwind
diff --git a/test/Transforms/SimpleLoopUnswitch/2012-04-02-IndirectBr.ll b/test/Transforms/SimpleLoopUnswitch/2012-04-02-IndirectBr.ll
new file mode 100644
index 00000000000..133f41744a9
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/2012-04-02-IndirectBr.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -S -simple-loop-unswitch -verify-loop-info -verify-dom-info | FileCheck %s
+; PR12343: -simple-loop-unswitch crash on indirect branch
+
+; CHECK:       %0 = icmp eq i64 undef, 0
+; CHECK-NEXT:  br i1 %0, label %"5", label %"4"
+
+; CHECK:       "5":                                              ; preds = %entry
+; CHECK-NEXT:  br label %"16"
+
+; CHECK:       "16":                                             ; preds = %"22", %"5"
+; CHECK-NEXT:  indirectbr i8* undef, [label %"22", label %"33"]
+
+; CHECK:       "22":                                             ; preds = %"16"
+; CHECK-NEXT:  br i1 %0, label %"16", label %"26"
+
+; CHECK:       "26":                                             ; preds = %"22"
+; CHECK-NEXT:  unreachable
+
+define void @foo() {
+entry:
+  %0 = icmp eq i64 undef, 0
+  br i1 %0, label %"5", label %"4"
+
+"4":                                              ; preds = %entry
+  unreachable
+
+"5":                                              ; preds = %entry
+  br label %"16"
+
+"16":                                             ; preds = %"22", %"5"
+  indirectbr i8* undef, [label %"22", label %"33"]
+
+"22":                                             ; preds = %"16"
+  br i1 %0, label %"16", label %"26"
+
+"26":                                             ; preds = %"22"
+  unreachable
+
+"33":                                             ; preds = %"16"
+  unreachable
+}
diff --git a/test/Transforms/SimpleLoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll b/test/Transforms/SimpleLoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll
new file mode 100644
index 00000000000..8f05e58b8eb
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll
@@ -0,0 +1,97 @@
+; RUN: opt < %s -basicaa -instcombine -inline -functionattrs -licm -simple-loop-unswitch -gvn -verify
+; PR12573
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379 = type { %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376*, %class.B.21.41.65.101.137.157.177.197.237.241.245.249.261.293.301.337.345.378 }
+%class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376 = type { %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* }
+%class.B.21.41.65.101.137.157.177.197.237.241.245.249.261.293.301.337.345.378 = type { %class.A.20.40.64.100.136.156.176.196.236.240.244.248.260.292.300.336.344.377* }
+%class.A.20.40.64.100.136.156.176.196.236.240.244.248.260.292.300.336.344.377 = type { i8 }
+
+define void @_Z23get_reconstruction_pathv() uwtable ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  %c = alloca %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379, align 8
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.end, %entry
+  invoke void @_ZN1DptEv(%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %c)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %for.cond
+  invoke void @_ZN1C3endEv()
+          to label %for.cond3 unwind label %lpad
+
+for.cond3:                                        ; preds = %invoke.cont6, %invoke.cont
+  invoke void @_ZN1DptEv(%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %c)
+          to label %invoke.cont4 unwind label %lpad
+
+invoke.cont4:                                     ; preds = %for.cond3
+  invoke void @_ZN1C3endEv()
+          to label %invoke.cont6 unwind label %lpad
+
+invoke.cont6:                                     ; preds = %invoke.cont4
+  br i1 undef, label %for.cond3, label %for.end
+
+lpad:                                             ; preds = %for.end, %invoke.cont4, %for.cond3, %invoke.cont, %for.cond
+  %0 = landingpad { i8*, i32 }
+          cleanup
+  resume { i8*, i32 } undef
+
+for.end:                                          ; preds = %invoke.cont6
+  invoke void @_ZN1C13_M_insert_auxER1D()
+          to label %for.cond unwind label %lpad
+}
+
+define void @_ZN1DptEv(%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this) uwtable ssp align 2 {
+entry:
+  %this.addr = alloca %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379*, align 8
+  store %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379** %this.addr, align 8
+  %this1 = load %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379*, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379** %this.addr
+  %px = getelementptr inbounds %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this1, i32 0, i32 0
+  %0 = load %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376*, %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376** %px, align 8
+  %tobool = icmp ne %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376* %0, null
+  br i1 %tobool, label %cond.end, label %cond.false
+
+cond.false:                                       ; preds = %entry
+  call void @_Z10__assert13v() noreturn
+  unreachable
+
+cond.end:                                         ; preds = %entry
+  ret void
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZN1C3endEv()
+
+define void @_ZN1C13_M_insert_auxER1D() uwtable ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1DD1Ev() unnamed_addr uwtable inlinehint ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1DD2Ev() unnamed_addr uwtable inlinehint ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1BD1Ev() unnamed_addr uwtable ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1BD2Ev() unnamed_addr uwtable ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1BaSERS_() uwtable ssp align 2 {
+entry:
+  unreachable
+}
+
+declare void @_Z10__assert13v() noreturn
diff --git a/test/Transforms/SimpleLoopUnswitch/2012-05-20-Phi.ll b/test/Transforms/SimpleLoopUnswitch/2012-05-20-Phi.ll
new file mode 100644
index 00000000000..e5549fe92f9
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/2012-05-20-Phi.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+; PR12887
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = common global i32 0, align 4
+@c = common global i32 0, align 4
+@b = common global i32 0, align 4
+
+define void @func() noreturn nounwind uwtable {
+entry:
+  %0 = load i32, i32* @a, align 4
+  %tobool = icmp eq i32 %0, 0
+  %1 = load i32, i32* @b, align 4
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %entry
+  %d.0 = phi i8 [ undef, %entry ], [ %conv2, %while.body ]
+  %conv = sext i8 %d.0 to i32
+  %cond = select i1 %tobool, i32 0, i32 %conv
+  %conv11 = zext i8 %d.0 to i32
+  %add = add i32 %1, %conv11
+  %conv2 = trunc i32 %add to i8
+  br label %while.body
+}
diff --git a/test/Transforms/SimpleLoopUnswitch/2015-09-18-Addrspace.ll b/test/Transforms/SimpleLoopUnswitch/2015-09-18-Addrspace.ll
new file mode 100644
index 00000000000..ddb888af8ba
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/2015-09-18-Addrspace.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -simple-loop-unswitch -S | FileCheck %s
+
+; In cases where two address spaces do not have the same size pointer, the
+; input for the addrspacecast should not be used as a substitute for itself
+; when manipulating the pointer.
+
+target datalayout = "e-m:e-p:16:16-p1:32:16-i32:16-i64:16-n8:16"
+
+define void @foo() {
+; CHECK-LABEL: @foo
+entry:
+  %arrayidx.i1 = getelementptr inbounds i16, i16* undef, i16 undef
+  %arrayidx.i = addrspacecast i16* %arrayidx.i1 to i16 addrspace(1)*
+  br i1 undef, label %for.body.i, label %bar.exit
+
+for.body.i:                                       ; preds = %for.body.i, %entry
+; When we call makeLoopInvariant (i.e. trivial LICM) on this load, it 
+; will try to find the base object to prove deferenceability.  If we look
+; through the addrspacecast, we'll fail an assertion about bitwidths matching
+; CHECK-LABEL: for.body.i
+; CHECK:   %0 = load i16, i16 addrspace(1)* %arrayidx.i, align 2
+  %0 = load i16, i16 addrspace(1)* %arrayidx.i, align 2
+  %cmp1.i = icmp eq i16 %0, 0
+  br i1 %cmp1.i, label %bar.exit, label %for.body.i
+
+bar.exit:                                         ; preds = %for.body.i, %entry
+  ret void
+}
diff --git a/test/Transforms/SimpleLoopUnswitch/LIV-loop-condtion.ll b/test/Transforms/SimpleLoopUnswitch/LIV-loop-condtion.ll
new file mode 100644
index 00000000000..cbee0fef14b
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/LIV-loop-condtion.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -simple-loop-unswitch -S 2>&1 | FileCheck %s
+
+; This is to test trivial loop unswitch only happens when trivial condition
+; itself is an LIV loop condition (not partial LIV which could occur in and/or).
+
+define i32 @test(i1 %cond1, i32 %var1) {
+entry:
+  br label %loop_begin
+
+loop_begin:
+  %var3 = phi i32 [%var1, %entry], [%var2, %do_something]
+  %cond2 = icmp eq i32 %var3, 10
+  %cond.and = and i1 %cond1, %cond2
+  
+; %cond.and only has %cond1 as LIV so no unswitch should happen.
+; CHECK: br i1 %cond.and, label %do_something, label %loop_exit
+  br i1 %cond.and, label %do_something, label %loop_exit 
+
+do_something:
+  %var2 = add i32 %var3, 1
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+declare void @some_func() noreturn 
diff --git a/test/Transforms/SimpleLoopUnswitch/basictest.ll b/test/Transforms/SimpleLoopUnswitch/basictest.ll
new file mode 100644
index 00000000000..afca20fe251
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/basictest.ll
@@ -0,0 +1,184 @@
+; RUN: opt -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s
+
+define i32 @test(i32* %A, i1 %C) {
+entry:
+	br label %no_exit
+no_exit:		; preds = %no_exit.backedge, %entry
+	%i.0.0 = phi i32 [ 0, %entry ], [ %i.0.0.be, %no_exit.backedge ]		; <i32> [#uses=3]
+	%gep.upgrd.1 = zext i32 %i.0.0 to i64		; <i64> [#uses=1]
+	%tmp.7 = getelementptr i32, i32* %A, i64 %gep.upgrd.1		; <i32*> [#uses=4]
+	%tmp.13 = load i32, i32* %tmp.7		; <i32> [#uses=2]
+	%tmp.14 = add i32 %tmp.13, 1		; <i32> [#uses=1]
+	store i32 %tmp.14, i32* %tmp.7
+	br i1 %C, label %then, label %endif
+then:		; preds = %no_exit
+	%tmp.29 = load i32, i32* %tmp.7		; <i32> [#uses=1]
+	%tmp.30 = add i32 %tmp.29, 2		; <i32> [#uses=1]
+	store i32 %tmp.30, i32* %tmp.7
+	%inc9 = add i32 %i.0.0, 1		; <i32> [#uses=2]
+	%tmp.112 = icmp ult i32 %inc9, 100000		; <i1> [#uses=1]
+	br i1 %tmp.112, label %no_exit.backedge, label %return
+no_exit.backedge:		; preds = %endif, %then
+	%i.0.0.be = phi i32 [ %inc9, %then ], [ %inc, %endif ]		; <i32> [#uses=1]
+	br label %no_exit
+endif:		; preds = %no_exit
+	%inc = add i32 %i.0.0, 1		; <i32> [#uses=2]
+	%tmp.1 = icmp ult i32 %inc, 100000		; <i1> [#uses=1]
+	br i1 %tmp.1, label %no_exit.backedge, label %return
+return:		; preds = %endif, %then
+	ret i32 %tmp.13
+}
+
+; This simple test would normally unswitch, but should be inhibited by the presence of
+; the noduplicate call.
+
+; CHECK-LABEL: @test2(
+define i32 @test2(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32, i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32, i32* %var
+
+  switch i32 %c, label %default [
+      i32 1, label %inc
+      i32 2, label %dec
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+dec:
+; CHECK: call void @decf()
+; CHECK-NOT: call void @decf()
+  call void @decf() noreturn nounwind noduplicate
+  br label %loop_begin
+default:
+  br label %loop_exit
+loop_exit:
+  ret i32 0
+; CHECK: }
+}
+
+; This simple test would normally unswitch, but should be inhibited by the presence of
+; the convergent call that is not control-dependent on the unswitch condition.
+
+; CHECK-LABEL: @test3(
+define i32 @test3(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32, i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32, i32* %var
+
+; CHECK: call void @conv()
+; CHECK-NOT: call void @conv()
+  call void @conv() convergent
+
+  switch i32 %c, label %default [
+      i32 1, label %inc
+      i32 2, label %dec
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+dec:
+  call void @decf() noreturn nounwind
+  br label %loop_begin
+default:
+  br label %loop_exit
+loop_exit:
+  ret i32 0
+; CHECK: }
+}
+
+; Make sure we don't unswitch, as we can not find an input value %a
+; that will effectively unswitch 0 or 3 out of the loop.
+;
+; CHECK: define void @and_or_i2_as_switch_input(i2
+; CHECK: entry:
+; This is an indication that the loop has NOT been unswitched.
+; CHECK-NOT: icmp
+; CHECK: br
+define void @and_or_i2_as_switch_input(i2 %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i2 [ 0, %entry ], [ %inc, %for.inc ]
+  %and = and i2 %a, %i 
+  %or = or i2 %and, %i
+  switch i2 %or, label %sw.default [
+    i2 0, label %sw.bb
+    i2 3, label %sw.bb1
+  ]
+
+sw.bb:
+  br label %sw.epilog
+
+sw.bb1:
+  br label %sw.epilog
+
+sw.default:
+  br label %sw.epilog
+
+sw.epilog:
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i2 %i, 1
+  %cmp = icmp slt i2 %inc, 3 
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; Make sure we don't unswitch, as we can not find an input value %a
+; that will effectively unswitch true/false out of the loop.
+;
+; CHECK: define void @and_or_i1_as_branch_input(i1
+; CHECK: entry:
+; This is an indication that the loop has NOT been unswitched.
+; CHECK-NOT: icmp
+; CHECK: br
+define void @and_or_i1_as_branch_input(i1 %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i1 [ 0, %entry ], [ %inc, %for.inc ]
+  %and = and i1 %a, %i 
+  %or = or i1 %and, %i
+  br i1 %or, label %sw.bb, label %sw.bb1
+
+sw.bb:
+  br label %sw.epilog
+
+sw.bb1:
+  br label %sw.epilog
+
+sw.epilog:
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i1 %i, 1
+  %cmp = icmp slt i1 %inc, 1 
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
+declare void @conv() convergent
diff --git a/test/Transforms/SimpleLoopUnswitch/cleanuppad.ll b/test/Transforms/SimpleLoopUnswitch/cleanuppad.ll
new file mode 100644
index 00000000000..8ab23cb1f61
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/cleanuppad.ll
@@ -0,0 +1,44 @@
+; RUN: opt -S -simple-loop-unswitch < %s | FileCheck %s
+target triple = "x86_64-pc-win32"
+
+define void @f(i32 %doit, i1 %x, i1 %y) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %tobool = icmp eq i32 %doit, 0
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  br i1 %x, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  br i1 %tobool, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  br i1 %y, label %for.inc, label %delete.notnull
+
+delete.notnull:                                   ; preds = %if.then
+  invoke void @g()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %delete.notnull
+  br label %for.inc
+
+lpad:                                             ; preds = %delete.notnull
+  %cp = cleanuppad within none []
+  cleanupret from %cp unwind to caller
+
+for.inc:                                          ; preds = %invoke.cont, %if.then, %for.body
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+declare void @g()
+
+declare i32 @__CxxFrameHandler3(...)
+
+; CHECK-LABEL: define void @f(
+; CHECK: cleanuppad within none []
+; CHECK-NOT: cleanuppad
+
+attributes #0 = { ssp uwtable }
diff --git a/test/Transforms/SimpleLoopUnswitch/copy-metadata.ll b/test/Transforms/SimpleLoopUnswitch/copy-metadata.ll
new file mode 100644
index 00000000000..7085ed8e10f
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/copy-metadata.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -simple-loop-unswitch -S | FileCheck %s
+
+; This test checks if unswitched condition preserve make.implicit metadata.
+define i32 @test(i1 %cond) {
+; CHECK-LABEL: @test(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %{{.*}}, label %entry.split, label %loop_exit, !make.implicit !0
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  br i1 %cond, label %continue, label %loop_exit, !make.implicit !0
+; CHECK:       loop_begin:
+; CHECK-NEXT:    br label %continue
+
+continue:
+  call void @some_func()
+  br label %loop_begin
+; CHECK:       continue:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit:
+  ret i32 0
+; CHECK:       loop_exit:
+; CHECK-NEXT:    ret
+}
+
+declare void @some_func()
+
+!0 = !{}
diff --git a/test/Transforms/SimpleLoopUnswitch/crash.ll b/test/Transforms/SimpleLoopUnswitch/crash.ll
new file mode 100644
index 00000000000..a6c64113c08
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/crash.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+
+define void @test1(i32* %S2) {
+entry:
+	br i1 false, label %list_Length.exit, label %cond_true.i
+cond_true.i:		; preds = %entry
+	ret void
+list_Length.exit:		; preds = %entry
+	br i1 false, label %list_Length.exit9, label %cond_true.i5
+cond_true.i5:		; preds = %list_Length.exit
+	ret void
+list_Length.exit9:		; preds = %list_Length.exit
+	br i1 false, label %bb78, label %return
+bb44:		; preds = %bb78, %cond_next68
+	br i1 %tmp49.not, label %bb62, label %bb62.loopexit
+bb62.loopexit:		; preds = %bb44
+	br label %bb62
+bb62:		; preds = %bb62.loopexit, %bb44
+	br i1 false, label %return.loopexit, label %cond_next68
+cond_next68:		; preds = %bb62
+	br i1 false, label %return.loopexit, label %bb44
+bb78:		; preds = %list_Length.exit9
+	%tmp49.not = icmp eq i32* %S2, null		; <i1> [#uses=1]
+	br label %bb44
+return.loopexit:		; preds = %cond_next68, %bb62
+	%retval.0.ph = phi i32 [ 1, %cond_next68 ], [ 0, %bb62 ]		; <i32> [#uses=1]
+	br label %return
+return:		; preds = %return.loopexit, %list_Length.exit9
+	%retval.0 = phi i32 [ 0, %list_Length.exit9 ], [ %retval.0.ph, %return.loopexit ]		; <i32> [#uses=0]
+	ret void
+}
+
+define void @test2() nounwind {
+entry:
+  br label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %and.i13521 = and <4 x i1> undef, undef         ; <<4 x i1>> [#uses=1]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %bb.nph
+  %or.i = select <4 x i1> %and.i13521, <4 x i32> undef, <4 x i32> undef ; <<4 x i32>> [#uses=0]
+  br i1 false, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; PR6879
+define i32* @test3(i32** %p_45, i16 zeroext %p_46, i64 %p_47, i64 %p_48, i16 signext %p_49) nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond4, %entry
+  br i1 false, label %for.cond4, label %for.end88
+
+for.cond4:                                        ; preds = %for.cond
+  %conv46 = trunc i32 0 to i8                     ; <i8> [#uses=2]
+  %cmp60 = icmp sgt i8 %conv46, 124               ; <i1> [#uses=1]
+  %or.cond = and i1 undef, %cmp60                 ; <i1> [#uses=1]
+  %cond = select i1 %or.cond, i8 %conv46, i8 undef ; <i8> [#uses=0]
+  br label %for.cond
+
+for.end88:                                        ; preds = %for.cond
+  ret i32* undef
+}
diff --git a/test/Transforms/SimpleLoopUnswitch/exponential-behavior.ll b/test/Transforms/SimpleLoopUnswitch/exponential-behavior.ll
new file mode 100644
index 00000000000..52e9aa1acc2
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/exponential-behavior.ll
@@ -0,0 +1,51 @@
+; RUN: opt -simple-loop-unswitch -S < %s | FileCheck %s
+
+define void @f(i32 %n, i32* %ptr) {
+; CHECK-LABEL: @f(
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+  %iv.inc = add i32 %iv, 1
+  %unswitch_cond_root = icmp ne i32 %iv.inc, 42
+  %us.0 = and i1 %unswitch_cond_root, %unswitch_cond_root
+  %us.1 = and i1 %us.0, %us.0
+  %us.2 = and i1 %us.1, %us.1
+  %us.3 = and i1 %us.2, %us.2
+  %us.4 = and i1 %us.3, %us.3
+  %us.5 = and i1 %us.4, %us.4
+  %us.6 = and i1 %us.5, %us.5
+  %us.7 = and i1 %us.6, %us.6
+  %us.8 = and i1 %us.7, %us.7
+  %us.9 = and i1 %us.8, %us.8
+  %us.10 = and i1 %us.9, %us.9
+  %us.11 = and i1 %us.10, %us.10
+  %us.12 = and i1 %us.11, %us.11
+  %us.13 = and i1 %us.12, %us.12
+  %us.14 = and i1 %us.13, %us.13
+  %us.15 = and i1 %us.14, %us.14
+  %us.16 = and i1 %us.15, %us.15
+  %us.17 = and i1 %us.16, %us.16
+  %us.18 = and i1 %us.17, %us.17
+  %us.19 = and i1 %us.18, %us.18
+  %us.20 = and i1 %us.19, %us.19
+  %us.21 = and i1 %us.20, %us.20
+  %us.22 = and i1 %us.21, %us.21
+  %us.23 = and i1 %us.22, %us.22
+  %us.24 = and i1 %us.23, %us.23
+  %us.25 = and i1 %us.24, %us.24
+  %us.26 = and i1 %us.25, %us.25
+  %us.27 = and i1 %us.26, %us.26
+  %us.28 = and i1 %us.27, %us.27
+  %us.29 = and i1 %us.28, %us.28
+  br i1 %us.29, label %leave, label %be
+
+be:
+  store volatile i32 0, i32* %ptr
+  %becond = icmp ult i32 %iv.inc, %n
+  br i1 %becond, label %leave, label %loop
+
+leave:
+  ret void
+}
diff --git a/test/Transforms/SimpleLoopUnswitch/infinite-loop.ll b/test/Transforms/SimpleLoopUnswitch/infinite-loop.ll
new file mode 100644
index 00000000000..9164fd24783
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/infinite-loop.ll
@@ -0,0 +1,64 @@
+; REQUIRES: asserts
+; RUN: opt -simple-loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -simple-loop-unswitch -S < %s | FileCheck %s
+; PR5373
+
+; Loop unswitching shouldn't trivially unswitch the true case of condition %a
+; in the code here because it leads to an infinite loop. While this doesn't
+; contain any instructions with side effects, it's still a kind of side effect.
+; It can trivially unswitch on the false cas of condition %a though.
+
+; STATS: 2 simple-loop-unswitch - Number of branches unswitched
+; STATS: 2 simple-loop-unswitch - Number of unswitches that are trivial
+
+; CHECK-LABEL: @func_16(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0
+
+; CHECK: entry.split:
+; CHECK-NEXT: br i1 %b, label %entry.split.split, label %abort1
+
+; CHECK: entry.split.split:
+; CHECK-NEXT: br label %for.body
+
+; CHECK: for.body:
+; CHECK-NEXT: br label %cond.end
+
+; CHECK: cond.end:
+; CHECK-NEXT: br label %for.body
+
+; CHECK: abort0:
+; CHECK-NEXT: call void @end0() [[NOR_NUW:#[0-9]+]]
+; CHECK-NEXT: unreachable
+
+; CHECK: abort1:
+; CHECK-NEXT: call void @end1() [[NOR_NUW]]
+; CHECK-NEXT: unreachable
+
+; CHECK: }
+
+define void @func_16(i1 %a, i1 %b) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  br i1 %a, label %cond.end, label %abort0
+
+cond.end:
+  br i1 %b, label %for.body, label %abort1
+
+abort0:
+  call void @end0() noreturn nounwind
+  unreachable
+
+abort1:
+  call void @end1() noreturn nounwind
+  unreachable
+}
+
+declare void @end0() noreturn
+declare void @end1() noreturn
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes #1 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }
diff --git a/test/Transforms/SimpleLoopUnswitch/msan.ll b/test/Transforms/SimpleLoopUnswitch/msan.ll
new file mode 100644
index 00000000000..ec1110ac9d6
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/msan.ll
@@ -0,0 +1,141 @@
+; RUN: opt -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s
+
+declare void @unknown()
+declare void @unknown2()
+
+@y = global i64 0, align 8
+
+; The following is approximately:
+; void f(bool *x) {
+;   for (int i = 0; i < 1; ++i) {
+;     if (*x) {
+;       if (y)
+;         unknown();
+;       else
+;         break;
+;     }
+;   }
+; }
+; With MemorySanitizer, the loop can not be unswitched on "y", because "y" could
+; be uninitialized when x == false.
+; Test that the branch on "y" is inside the loop (after the first unconditional
+; branch).
+
+define void @may_not_execute_trivial(i1* %x) sanitize_memory {
+; CHECK-LABEL: @may_not_execute_trivial(
+entry:
+  %y = load i64, i64* @y, align 8
+  %y.cmp = icmp eq i64 %y, 0
+  br label %for.body
+; CHECK: %[[Y:.*]] = load i64, i64* @y
+; CHECK: %[[YCMP:.*]] = icmp eq i64 %[[Y]], 0
+; CHECK-NOT: br i1
+; CHECK: br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %x.load = load i1, i1* %x
+  br i1 %x.load, label %for.inc, label %if.then
+; CHECK: %[[XLOAD:.*]] = load i1, i1* %x
+; CHECK: br i1 %[[XLOAD]]
+
+if.then:
+  br i1 %y.cmp, label %for.end, label %if.then4
+; CHECK: br i1 %[[YCMP]]
+
+if.then4:
+  call void @unknown()
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i, 1
+  %cmp = icmp slt i32 %inc, 1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+
+; The same as above, but "y" is a function parameter instead of a global.
+; This shows that it is not enough to suppress hoisting of load instructions,
+; the actual problem is in the speculative branching.
+
+define void @may_not_execute2_trivial(i1* %x, i1 %y) sanitize_memory {
+; CHECK-LABEL: @may_not_execute2_trivial(
+entry:
+  br label %for.body
+; CHECK-NOT: br i1
+; CHECK: br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %x.load = load i1, i1* %x
+  br i1 %x.load, label %for.inc, label %if.then
+; CHECK: %[[XLOAD:.*]] = load i1, i1* %x
+; CHECK: br i1 %[[XLOAD]]
+
+if.then:
+  br i1 %y, label %for.end, label %if.then4
+; CHECK: br i1 %y
+
+if.then4:
+  call void @unknown()
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i, 1
+  %cmp = icmp slt i32 %inc, 1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+
+; The following is approximately:
+; void f() {
+;   for (int i = 0; i < 1; ++i) {
+;     if (y)
+;       unknown();
+;     else
+;       break;
+;   }
+; }
+; "if (y)" is guaranteed to execute; the loop can be unswitched.
+
+define void @must_execute_trivial() sanitize_memory {
+; CHECK-LABEL: @must_execute_trivial(
+entry:
+  %y = load i64, i64* @y, align 8
+  %y.cmp = icmp eq i64 %y, 0
+  br label %for.body
+; CHECK:   %[[Y:.*]] = load i64, i64* @y
+; CHECK:   %[[YCMP:.*]] = icmp eq i64 %[[Y]], 0
+; CHECK:   br i1 %[[YCMP]], label %[[EXIT_SPLIT:.*]], label %[[PH:.*]]
+;
+; CHECK: [[PH]]:
+; CHECK:   br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  br i1 %y.cmp, label %for.end, label %if.then4
+; CHECK: br label %if.then4
+
+if.then4:
+  call void @unknown()
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i, 1
+  %cmp = icmp slt i32 %inc, 1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+; CHECK: for.end:
+; CHECK:   br label %[[EXIT_SPLIT]]
+;
+; CHECK: [[EXIT_SPLIT]]:
+; CHECK:   ret void
+}
diff --git a/test/Transforms/SimpleLoopUnswitch/preserve-analyses.ll b/test/Transforms/SimpleLoopUnswitch/preserve-analyses.ll
new file mode 100644
index 00000000000..76b41e73ede
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/preserve-analyses.ll
@@ -0,0 +1,129 @@
+; RUN: opt -simple-loop-unswitch -verify-loop-info -verify-dom-info -disable-output < %s
+
+; Loop unswitch should be able to unswitch these loops and
+; preserve LCSSA and LoopSimplify forms.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv6-apple-darwin9"
+
+@delim1 = external global i32                     ; <i32*> [#uses=1]
+@delim2 = external global i32                     ; <i32*> [#uses=1]
+
+define i32 @ineqn(i8* %s, i8* %p) nounwind readonly {
+entry:
+  %0 = load i32, i32* @delim1, align 4                 ; <i32> [#uses=1]
+  %1 = load i32, i32* @delim2, align 4                 ; <i32> [#uses=1]
+  br label %bb8.outer
+
+bb:                                               ; preds = %bb8
+  %2 = icmp eq i8* %p_addr.0, %s                  ; <i1> [#uses=1]
+  br i1 %2, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb
+  %3 = getelementptr inbounds i8, i8* %p_addr.0, i32 1 ; <i8*> [#uses=3]
+  switch i32 %ineq.0.ph, label %bb8.backedge [
+    i32 0, label %bb3
+    i32 1, label %bb6
+  ]
+
+bb8.backedge:                                     ; preds = %bb6, %bb5, %bb2
+  br label %bb8
+
+bb3:                                              ; preds = %bb2
+  %4 = icmp eq i32 %8, %0                         ; <i1> [#uses=1]
+  br i1 %4, label %bb8.outer.loopexit, label %bb5
+
+bb5:                                              ; preds = %bb3
+  br i1 %6, label %bb6, label %bb8.backedge
+
+bb6:                                              ; preds = %bb5, %bb2
+  %5 = icmp eq i32 %8, %1                         ; <i1> [#uses=1]
+  br i1 %5, label %bb7, label %bb8.backedge
+
+bb7:                                              ; preds = %bb6
+  %.lcssa1 = phi i8* [ %3, %bb6 ]                 ; <i8*> [#uses=1]
+  br label %bb8.outer.backedge
+
+bb8.outer.backedge:                               ; preds = %bb8.outer.loopexit, %bb7
+  %.lcssa2 = phi i8* [ %.lcssa1, %bb7 ], [ %.lcssa, %bb8.outer.loopexit ] ; <i8*> [#uses=1]
+  %ineq.0.ph.be = phi i32 [ 0, %bb7 ], [ 1, %bb8.outer.loopexit ] ; <i32> [#uses=1]
+  br label %bb8.outer
+
+bb8.outer.loopexit:                               ; preds = %bb3
+  %.lcssa = phi i8* [ %3, %bb3 ]                  ; <i8*> [#uses=1]
+  br label %bb8.outer.backedge
+
+bb8.outer:                                        ; preds = %bb8.outer.backedge, %entry
+  %ineq.0.ph = phi i32 [ 0, %entry ], [ %ineq.0.ph.be, %bb8.outer.backedge ] ; <i32> [#uses=3]
+  %p_addr.0.ph = phi i8* [ %p, %entry ], [ %.lcssa2, %bb8.outer.backedge ] ; <i8*> [#uses=1]
+  %6 = icmp eq i32 %ineq.0.ph, 1                  ; <i1> [#uses=1]
+  br label %bb8
+
+bb8:                                              ; preds = %bb8.outer, %bb8.backedge
+  %p_addr.0 = phi i8* [ %p_addr.0.ph, %bb8.outer ], [ %3, %bb8.backedge ] ; <i8*> [#uses=3]
+  %7 = load i8, i8* %p_addr.0, align 1                ; <i8> [#uses=2]
+  %8 = sext i8 %7 to i32                          ; <i32> [#uses=2]
+  %9 = icmp eq i8 %7, 0                           ; <i1> [#uses=1]
+  br i1 %9, label %bb10, label %bb
+
+bb10:                                             ; preds = %bb8, %bb
+  %.0 = phi i32 [ %ineq.0.ph, %bb ], [ 0, %bb8 ]  ; <i32> [#uses=1]
+  ret i32 %.0
+}
+
+; This is a simplified form of ineqn from above. It triggers some
+; different cases in the loop-unswitch code.
+
+define void @simplified_ineqn() nounwind readonly {
+entry:
+  br label %bb8.outer
+
+bb8.outer:                                        ; preds = %bb6, %bb2, %entry
+  %x = phi i32 [ 0, %entry ], [ 0, %bb6 ], [ 1, %bb2 ] ; <i32> [#uses=1]
+  br i1 undef, label %return, label %bb2
+
+bb2:                                              ; preds = %bb
+  switch i32 %x, label %bb6 [
+    i32 0, label %bb8.outer
+  ]
+
+bb6:                                              ; preds = %bb2
+  br i1 undef, label %bb8.outer, label %bb2
+
+return:                                             ; preds = %bb8, %bb
+  ret void
+}
+
+; This function requires special handling to preserve LCSSA form.
+; PR4934
+
+define void @pnp_check_irq() nounwind noredzone {
+entry:
+  %conv56 = trunc i64 undef to i32                ; <i32> [#uses=1]
+  br label %while.cond.i
+
+while.cond.i:                                     ; preds = %while.cond.i.backedge, %entry
+  %call.i25 = call i8* @pci_get_device() nounwind noredzone ; <i8*> [#uses=2]
+  br i1 undef, label %if.then65, label %while.body.i
+
+while.body.i:                                     ; preds = %while.cond.i
+  br i1 undef, label %if.then31.i.i, label %while.cond.i.backedge
+
+while.cond.i.backedge:                            ; preds = %if.then31.i.i, %while.body.i
+  br label %while.cond.i
+
+if.then31.i.i:                                    ; preds = %while.body.i
+  switch i32 %conv56, label %while.cond.i.backedge [
+    i32 14, label %if.then42.i.i
+    i32 15, label %if.then42.i.i
+  ]
+
+if.then42.i.i:                                    ; preds = %if.then31.i.i, %if.then31.i.i
+  %call.i25.lcssa48 = phi i8* [ %call.i25, %if.then31.i.i ], [ %call.i25, %if.then31.i.i ] ; <i8*> [#uses=0]
+  unreachable
+
+if.then65:                                        ; preds = %while.cond.i
+  unreachable
+}
+
+declare i8* @pci_get_device() noredzone
diff --git a/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll b/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
new file mode 100644
index 00000000000..42b4f3dea75
--- /dev/null
+++ b/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
@@ -0,0 +1,185 @@
+; RUN: opt -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s
+
+declare void @some_func() noreturn
+
+; This test contains two trivial unswitch condition in one loop.
+; LoopUnswitch pass should be able to unswitch the second one
+; after unswitching the first one.
+define i32 @test1(i32* %var, i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @test1(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %{{.*}}, label %entry.split, label %loop_exit.split
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br i1 %{{.*}}, label %entry.split.split, label %loop_exit
+;
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  br i1 %cond1, label %continue, label %loop_exit	; first trivial condition
+; CHECK:       loop_begin:
+; CHECK-NEXT:    br label %continue
+
+continue:
+  %var_val = load i32, i32* %var
+  br i1 %cond2, label %do_something, label %loop_exit	; second trivial condition
+; CHECK:       continue:
+; CHECK-NEXT:    load
+; CHECK-NEXT:    br label %do_something
+
+do_something:
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+; CHECK:       do_something:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit:
+  ret i32 0
+; CHECK:       loop_exit:
+; CHECK-NEXT:    br label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    ret
+}
+
+; Test for two trivially unswitchable switches.
+define i32 @test3(i32* %var, i32 %cond1, i32 %cond2) {
+; CHECK-LABEL: @test3(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 %cond1, label %entry.split [
+; CHECK-NEXT:      i32 0, label %loop_exit1
+; CHECK-NEXT:    ]
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    switch i32 %cond2, label %loop_exit2 [
+; CHECK-NEXT:      i32 42, label %loop_exit2
+; CHECK-NEXT:      i32 0, label %entry.split.split
+; CHECK-NEXT:    ]
+;
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  switch i32 %cond1, label %continue [
+    i32 0, label %loop_exit1
+  ]
+; CHECK:       loop_begin:
+; CHECK-NEXT:    br label %continue
+
+continue:
+  %var_val = load i32, i32* %var
+  switch i32 %cond2, label %loop_exit2 [
+    i32 0, label %do_something
+    i32 42, label %loop_exit2
+  ]
+; CHECK:       continue:
+; CHECK-NEXT:    load
+; CHECK-NEXT:    br label %do_something
+
+do_something:
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+; CHECK:       do_something:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit1:
+  ret i32 0
+; CHECK:       loop_exit1:
+; CHECK-NEXT:    ret
+
+loop_exit2:
+  ret i32 0
+; CHECK:       loop_exit2:
+; CHECK-NEXT:    ret
+;
+; We shouldn't have any unreachable blocks here because the unswitched switches
+; turn into branches instead.
+; CHECK-NOT:     unreachable
+}
+
+; Test for a trivially unswitchable switch with multiple exiting cases and
+; multiple looping cases.
+define i32 @test4(i32* %var, i32 %cond1, i32 %cond2) {
+; CHECK-LABEL: @test4(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 %cond2, label %loop_exit2 [
+; CHECK-NEXT:      i32 13, label %loop_exit1
+; CHECK-NEXT:      i32 42, label %loop_exit3
+; CHECK-NEXT:      i32 0, label %entry.split
+; CHECK-NEXT:      i32 1, label %entry.split
+; CHECK-NEXT:      i32 2, label %entry.split
+; CHECK-NEXT:    ]
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  %var_val = load i32, i32* %var
+  switch i32 %cond2, label %loop_exit2 [
+    i32 0, label %loop0
+    i32 1, label %loop1
+    i32 13, label %loop_exit1
+    i32 2, label %loop2
+    i32 42, label %loop_exit3
+  ]
+; CHECK:       loop_begin:
+; CHECK-NEXT:    load
+; CHECK-NEXT:    switch i32 %cond2, label %[[UNREACHABLE:.*]] [
+; CHECK-NEXT:      i32 0, label %loop0
+; CHECK-NEXT:      i32 1, label %loop1
+; CHECK-NEXT:      i32 2, label %loop2
+; CHECK-NEXT:    ]
+
+loop0:
+  call void @some_func() noreturn nounwind
+  br label %loop_latch
+; CHECK:       loop0:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_latch
+
+loop1:
+  call void @some_func() noreturn nounwind
+  br label %loop_latch
+; CHECK:       loop1:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_latch
+
+loop2:
+  call void @some_func() noreturn nounwind
+  br label %loop_latch
+; CHECK:       loop2:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_latch
+
+loop_latch:
+  br label %loop_begin
+; CHECK:       loop_latch:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit1:
+  ret i32 0
+; CHECK:       loop_exit1:
+; CHECK-NEXT:    ret
+
+loop_exit2:
+  ret i32 0
+; CHECK:       loop_exit2:
+; CHECK-NEXT:    ret
+
+loop_exit3:
+  ret i32 0
+; CHECK:       loop_exit3:
+; CHECK-NEXT:    ret
+;
+; CHECK:       [[UNREACHABLE]]:
+; CHECK-NEXT:    unreachable
+}
diff --git a/test/Transforms/SimplifyCFG/speculate-call.ll b/test/Transforms/SimplifyCFG/speculate-call.ll
new file mode 100644
index 00000000000..6e9398b6203
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/speculate-call.ll
@@ -0,0 +1,23 @@
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
+
+; CHECK-LABEL: @speculatable_attribute
+; CHECK: select
+define i32 @speculatable_attribute(i32 %a) {
+entry:
+  %c = icmp sgt i32 %a, 64
+  br i1 %c, label %end, label %if
+
+if:
+  %val = call i32 @func() #0
+  br label %end
+
+end:
+  %ret = phi i32 [%val, %if], [0, %entry]
+  ret i32 %ret
+}
+
+define i32 @func() #0 {
+  ret i32 1
+}
+attributes #0 = { nounwind readnone speculatable }
+
diff --git a/test/Verifier/DISubprogram.ll b/test/Verifier/DISubprogram.ll
new file mode 100644
index 00000000000..e78220c8bd7
--- /dev/null
+++ b/test/Verifier/DISubprogram.ll
@@ -0,0 +1,22 @@
+; RUN: not opt -S <%s 2>&1| FileCheck %s
+
+define void @f() !dbg !14 {
+  ret void
+}
+
+!0 = !{null}
+!1 = distinct !DICompositeType(tag: DW_TAG_structure_type)
+!2 = !DIFile(filename: "path/to/file", directory: "/path/to/dir")
+!3 = !DISubroutineType(types: !0)
+!4 = distinct !DICompositeType(tag: DW_TAG_structure_type)
+!8 = distinct !DICompileUnit(language: DW_LANG_Swift, producer: "clang",
+                             file: !2, emissionKind: 2)
+; CHECK: invalid thrown type
+!13 = !{!14}
+!14 = distinct !DISubprogram(name: "f", scope: !1,
+                            file: !2, line: 1, type: !3, isLocal: true,
+                            isDefinition: true, scopeLine: 2,
+                            unit: !8, thrownTypes: !13)
+!15 = !{i32 1, !"Debug Info Version", i32 3}
+!llvm.module.flags = !{!15}
+!llvm.dbg.cu = !{!8}
diff --git a/test/Verifier/speculatable-callsite-invalid.ll b/test/Verifier/speculatable-callsite-invalid.ll
new file mode 100644
index 00000000000..f9a1adfe947
--- /dev/null
+++ b/test/Verifier/speculatable-callsite-invalid.ll
@@ -0,0 +1,24 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+; Make sure that speculatable is not allowed on a call site if the
+; declaration is not also speculatable.
+
+declare i32 @not_speculatable()
+
+; CHECK: speculatable attribute may not apply to call sites
+; CHECK-NEXT: %ret = call i32 @not_speculatable() #0
+define i32 @call_not_speculatable() {
+  %ret = call i32 @not_speculatable() #0
+  ret i32 %ret
+}
+
+@gv = internal unnamed_addr constant i32 0
+
+; CHECK: speculatable attribute may not apply to call sites
+; CHECK-NEXT: %ret = call float bitcast (i32* @gv to float ()*)() #0
+define float @call_bitcast_speculatable() {
+  %ret = call float bitcast (i32* @gv to float()*)() #0
+  ret float %ret
+}
+
+attributes #0 = { speculatable }
diff --git a/test/Verifier/speculatable-callsite.ll b/test/Verifier/speculatable-callsite.ll
new file mode 100644
index 00000000000..fafed831cf9
--- /dev/null
+++ b/test/Verifier/speculatable-callsite.ll
@@ -0,0 +1,20 @@
+; RUN: llvm-as %s -o /dev/null
+
+; Make sure speculatable is accepted on a call site if the declaration
+; is also speculatable.
+
+declare i32 @speculatable() #0
+
+; Make sure this the attribute is accepted on the call site if the
+; declaration matches.
+define i32 @call_speculatable() {
+  %ret = call i32 @speculatable() #0
+  ret i32 %ret
+}
+
+define float @call_bitcast_speculatable() {
+  %ret = call float bitcast (i32()* @speculatable to float()*)() #0
+  ret float %ret
+}
+
+attributes #0 = { speculatable }
diff --git a/test/tools/llvm-lto/error.ll b/test/tools/llvm-lto/error.ll
index 4a58fba7b79..96bd56664c2 100644
--- a/test/tools/llvm-lto/error.ll
+++ b/test/tools/llvm-lto/error.ll
@@ -5,4 +5,4 @@
 ; CHECK-LIST: llvm-lto: error loading file '{{.*}}/Inputs/empty.bc': The file was not recognized as a valid object file
 
 ; RUN: not llvm-lto --thinlto %S/Inputs/empty.bc 2>&1 | FileCheck %s --check-prefix=CHECK-THIN
-; CHECK-THIN: llvm-lto: error loading file '{{.*}}/Inputs/empty.bc': The file was not recognized as a valid object file
+; CHECK-THIN: llvm-lto: error loading file '{{.*}}/Inputs/empty.bc': Invalid bitcode signature
diff --git a/test/tools/llvm-pdbdump/raw-stream-data.test b/test/tools/llvm-pdbdump/raw-stream-data.test
new file mode 100644
index 00000000000..d55980632d4
--- /dev/null
+++ b/test/tools/llvm-pdbdump/raw-stream-data.test
@@ -0,0 +1,47 @@
+; RUN: llvm-pdbdump raw -stream-data=8 %p/Inputs/LoadAddressTest.pdb \
+; RUN:   | FileCheck %s -check-prefix=FULL_STREAM
+; RUN: llvm-pdbdump raw -stream-data=8:4 %p/Inputs/LoadAddressTest.pdb \
+; RUN:   | FileCheck %s -check-prefix=OFFSET_STREAM
+; RUN: llvm-pdbdump raw -stream-data=8:4@24 %p/Inputs/LoadAddressTest.pdb \
+; RUN:   | FileCheck %s -check-prefix=OFFSET_AND_LENGTH
+
+FULL_STREAM:      Stream Data {
+FULL_STREAM-NEXT:   Stream {
+FULL_STREAM-NEXT:     Index: 8
+FULL_STREAM-NEXT:     Type: Public Symbol Records
+FULL_STREAM-NEXT:     Size: 40
+FULL_STREAM-NEXT:     Blocks:
+FULL_STREAM-NEXT:     Data (
+FULL_STREAM-NEXT:       0000: 12000E11 02000000 10000000 01005F6D  |.............._m|
+FULL_STREAM-NEXT:       0010: 61696E00 12002511 00000000 88000000  |ain...%.........|
+FULL_STREAM-NEXT:       0020: 01006D61 696E0000                    |..main..|
+FULL_STREAM-NEXT:     )
+FULL_STREAM-NEXT:   }
+FULL_STREAM-NEXT: }
+
+OFFSET_STREAM:      Stream Data {
+OFFSET_STREAM-NEXT:   Stream {
+OFFSET_STREAM-NEXT:    Index: 8
+OFFSET_STREAM-NEXT:    Type: Public Symbol Records
+OFFSET_STREAM-NEXT:    Size: 40
+OFFSET_STREAM-NEXT:    Blocks: 
+OFFSET_STREAM-NEXT:    Data (
+OFFSET_STREAM-NEXT:      0004: 02000000 10000000 01005F6D 61696E00  |.........._main.|
+OFFSET_STREAM-NEXT:      0014: 12002511 00000000 88000000 01006D61  |..%...........ma|
+OFFSET_STREAM-NEXT:      0024: 696E0000                             |in..|
+OFFSET_STREAM-NEXT:    )
+OFFSET_STREAM-NEXT:  }
+OFFSET_STREAM-NEXT:}
+
+OFFSET_AND_LENGTH:      Stream Data {
+OFFSET_AND_LENGTH-NEXT:   Stream {
+OFFSET_AND_LENGTH-NEXT:    Index: 8
+OFFSET_AND_LENGTH-NEXT:    Type: Public Symbol Records
+OFFSET_AND_LENGTH-NEXT:    Size: 40
+OFFSET_AND_LENGTH-NEXT:    Blocks: 
+OFFSET_AND_LENGTH-NEXT:    Data (
+OFFSET_AND_LENGTH-NEXT:      0004: 02000000 10000000 01005F6D 61696E00  |.........._main.|
+OFFSET_AND_LENGTH-NEXT:      0014: 12002511 00000000                    |..%.....|
+OFFSET_AND_LENGTH-NEXT:    )
+OFFSET_AND_LENGTH-NEXT:  }
+OFFSET_AND_LENGTH-NEXT:}
\ No newline at end of file
diff --git a/test/tools/llvm-readobj/Inputs/const-import.lib b/test/tools/llvm-readobj/Inputs/const-import.lib
new file mode 100644
index 00000000000..88a2ac3f836
Binary files /dev/null and b/test/tools/llvm-readobj/Inputs/const-import.lib differ
diff --git a/test/tools/llvm-readobj/coff-const-import.test b/test/tools/llvm-readobj/coff-const-import.test
new file mode 100644
index 00000000000..3c3c48242c6
--- /dev/null
+++ b/test/tools/llvm-readobj/coff-const-import.test
@@ -0,0 +1,7 @@
+RUN: llvm-readobj -coff-exports %S/Inputs/const-import.lib | FileCheck %s
+
+CHECK: Type: const
+CHECK: Name type: noprefix
+CHECK: Symbol: __imp____CFConstantStringClassReference
+CHECK: Symbol: ___CFConstantStringClassReference
+
diff --git a/test/tools/llvm-readobj/reloc-types.test b/test/tools/llvm-readobj/reloc-types.test
index 74148c07015..abdd0e36530 100644
--- a/test/tools/llvm-readobj/reloc-types.test
+++ b/test/tools/llvm-readobj/reloc-types.test
@@ -253,8 +253,8 @@ ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC (559)
 ELF-AARCH64: Type: R_AARCH64_TLSDESC_LD_PREL19 (560)
 ELF-AARCH64: Type: R_AARCH64_TLSDESC_ADR_PREL21 (561)
 ELF-AARCH64: Type: R_AARCH64_TLSDESC_ADR_PAGE21 (562)
-ELF-AARCH64: Type: R_AARCH64_TLSDESC_LD64_LO12_NC (563)
-ELF-AARCH64: Type: R_AARCH64_TLSDESC_ADD_LO12_NC (564)
+ELF-AARCH64: Type: R_AARCH64_TLSDESC_LD64_LO12 (563)
+ELF-AARCH64: Type: R_AARCH64_TLSDESC_ADD_LO12 (564)
 ELF-AARCH64: Type: R_AARCH64_TLSDESC_OFF_G1 (565)
 ELF-AARCH64: Type: R_AARCH64_TLSDESC_OFF_G0_NC (566)
 ELF-AARCH64: Type: R_AARCH64_TLSDESC_LDR (567)
diff --git a/test/tools/llvm-readobj/relocations.test b/test/tools/llvm-readobj/relocations.test
index 475ac1d7e29..9c7dcf1d659 100644
--- a/test/tools/llvm-readobj/relocations.test
+++ b/test/tools/llvm-readobj/relocations.test
@@ -292,19 +292,18 @@ WASM-NEXT:     Relocation {
 WASM-NEXT:       Type: R_WEBASSEMBLY_TABLE_INDEX_SLEB (1)
 WASM-NEXT:       Offset: 0x6
 WASM-NEXT:       Index: 0x0
-WASM-NEXT:       Addend: 0x0
 WASM-NEXT:     }
 WASM-NEXT:     Relocation {
 WASM-NEXT:       Type: R_WEBASSEMBLY_GLOBAL_ADDR_LEB (3)
 WASM-NEXT:       Offset: 0x15
 WASM-NEXT:       Index: 0x0
-WASM-NEXT:       Addend: 0x0
+WASM-NEXT:       Addend: 0
 WASM-NEXT:     }
 WASM-NEXT:     Relocation {
 WASM-NEXT:       Type: R_WEBASSEMBLY_GLOBAL_ADDR_LEB (3)
 WASM-NEXT:       Offset: 0x24
 WASM-NEXT:       Index: 0x1
-WASM-NEXT:       Addend: 0x0
+WASM-NEXT:       Addend: 0
 WASM-NEXT:     }
 WASM-NEXT:   }
 WASM-NEXT: ]
diff --git a/test/tools/llvm-readobj/resources.test b/test/tools/llvm-readobj/resources.test
new file mode 100644
index 00000000000..46ee8b99a65
--- /dev/null
+++ b/test/tools/llvm-readobj/resources.test
@@ -0,0 +1,19 @@
+RUN: llvm-readobj -coff-resources %p/Inputs/zero-string-table.obj.coff-i386 \
+RUN:   | FileCheck %s -check-prefix RESOURCE
+
+RESOURCE:      Resources [
+RESOURCE-NEXT:   Time/Date Stamp: 1970-01-01 00:00:00 (0x0)
+RESOURCE-NEXT:   .rsrc$01 Data (
+RESOURCE-NEXT:     0000: 00000000 00000000 00000000 00000100  |................|
+RESOURCE-NEXT:     0010: 06000000 18000080 00000000 00000000  |................|
+RESOURCE-NEXT:     0020: 00000000 00000100 01000000 30000080  |............0...|
+RESOURCE-NEXT:     0030: 00000000 00000000 00000000 00000100  |................|
+RESOURCE-NEXT:     0040: 09040000 48000000 00000000 2A000000  |....H.......*...|
+RESOURCE-NEXT:     0050: 00000000 00000000                    |........|
+RESOURCE-NEXT:   )
+RESOURCE-NEXT:   .rsrc$02 Data (
+RESOURCE-NEXT:     0000: 00000500 48006500 6C006C00 6F000000  |....H.e.l.l.o...|
+RESOURCE-NEXT:     0010: 00000000 00000000 00000000 00000000  |................|
+RESOURCE-NEXT:     0020: 00000000 00000000 00000000 00000000  |................|
+RESOURCE-NEXT:   )
+RESOURCE-NEXT: ]
diff --git a/test/tools/llvm-readobj/sections.test b/test/tools/llvm-readobj/sections.test
index 312c47fe4c8..1747ee45d4f 100644
--- a/test/tools/llvm-readobj/sections.test
+++ b/test/tools/llvm-readobj/sections.test
@@ -518,6 +518,11 @@ WASM-NEXT:  Section {
 WASM-NEXT:    Type: MEMORY (0x5)
 WASM-NEXT:    Size: 3
 WASM-NEXT:    Offset: 66
+WASM-NEXT:    Memories [
+WASM-NEXT:      Memory {
+WASM-NEXT:        InitialPages: 0
+WASM-NEXT:      }
+WASM-NEXT:    ]
 WASM-NEXT:  }
 WASM-NEXT:  Section {
 WASM-NEXT:    Type: EXPORT (0x7)
diff --git a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index 84fa0e4d2d9..8ecf1848099 100644
--- a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -78,6 +78,11 @@ static cl::opt<bool>
     SummarizeTypes("summarize-types",
                    cl::desc("Abbreviate the description of type unit entries"));
 
+static cl::opt<bool> Verify("verify", cl::desc("Verify the DWARF debug info"));
+
+static cl::opt<bool> Quiet("quiet",
+                           cl::desc("Use with -verify to not emit to STDOUT."));
+
 static void error(StringRef Filename, std::error_code EC) {
   if (!EC)
     return;
@@ -116,6 +121,46 @@ static void DumpInput(StringRef Filename) {
     }
 }
 
+static bool VerifyObjectFile(ObjectFile &Obj, Twine Filename) {
+  std::unique_ptr<DIContext> DICtx(new DWARFContextInMemory(Obj));
+  
+  // Verify the DWARF and exit with non-zero exit status if verification
+  // fails.
+  raw_ostream &stream = Quiet ? nulls() : outs();
+  stream << "Verifying " << Filename.str() << ":\tfile format "
+  << Obj.getFileFormatName() << "\n";
+  bool Result = DICtx->verify(stream, DumpType);
+  if (Result)
+    stream << "No errors.\n";
+  else
+    stream << "Errors detected.\n";
+  return Result;
+}
+
+static bool VerifyInput(StringRef Filename) {
+  ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
+  MemoryBuffer::getFileOrSTDIN(Filename);
+  error(Filename, BuffOrErr.getError());
+  std::unique_ptr<MemoryBuffer> Buff = std::move(BuffOrErr.get());
+  
+  Expected<std::unique_ptr<Binary>> BinOrErr =
+  object::createBinary(Buff->getMemBufferRef());
+  if (!BinOrErr)
+    error(Filename, errorToErrorCode(BinOrErr.takeError()));
+  
+  bool Result = true;
+  if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get()))
+    Result = VerifyObjectFile(*Obj, Filename);
+  else if (auto *Fat = dyn_cast<MachOUniversalBinary>(BinOrErr->get()))
+    for (auto &ObjForArch : Fat->objects()) {
+      auto MachOOrErr = ObjForArch.getAsObjectFile();
+      error(Filename, errorToErrorCode(MachOOrErr.takeError()));
+      if (!VerifyObjectFile(**MachOOrErr, Filename + " (" + ObjForArch.getArchFlagName() + ")"))
+        Result = false;
+    }
+  return Result;
+}
+
 /// If the input path is a .dSYM bundle (as created by the dsymutil tool),
 /// replace it with individual entries for each of the object files inside the
 /// bundle otherwise return the input path.
@@ -168,7 +213,13 @@ int main(int argc, char **argv) {
     Objects.insert(Objects.end(), Objs.begin(), Objs.end());
   }
 
-  std::for_each(Objects.begin(), Objects.end(), DumpInput);
+  if (Verify) {
+    // If we encountered errors during verify, exit with a non-zero exit status.
+    if (!std::all_of(Objects.begin(), Objects.end(), VerifyInput))
+      exit(1);
+  } else {
+    std::for_each(Objects.begin(), Objects.end(), DumpInput);
+  }
 
   return EXIT_SUCCESS;
 }
diff --git a/tools/llvm-link/CMakeLists.txt b/tools/llvm-link/CMakeLists.txt
index 73177922324..051489f94bc 100644
--- a/tools/llvm-link/CMakeLists.txt
+++ b/tools/llvm-link/CMakeLists.txt
@@ -1,4 +1,5 @@
 set(LLVM_LINK_COMPONENTS
+  BitReader
   BitWriter
   Core
   IRReader
diff --git a/tools/llvm-link/llvm-link.cpp b/tools/llvm-link/llvm-link.cpp
index a024b6926d5..27199d53538 100644
--- a/tools/llvm-link/llvm-link.cpp
+++ b/tools/llvm-link/llvm-link.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/DiagnosticInfo.h"
@@ -23,7 +24,6 @@
 #include "llvm/IR/Verifier.h"
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/Linker/Linker.h"
-#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/ManagedStatic.h"
diff --git a/tools/llvm-lto/llvm-lto.cpp b/tools/llvm-lto/llvm-lto.cpp
index 2f005412a3b..27e5c5e122c 100644
--- a/tools/llvm-lto/llvm-lto.cpp
+++ b/tools/llvm-lto/llvm-lto.cpp
@@ -23,7 +23,6 @@
 #include "llvm/LTO/legacy/LTOCodeGenerator.h"
 #include "llvm/LTO/legacy/LTOModule.h"
 #include "llvm/LTO/legacy/ThinLTOCodeGenerator.h"
-#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/ManagedStatic.h"
@@ -332,12 +331,9 @@ static void createCombinedModuleSummaryIndex() {
   uint64_t NextModuleId = 0;
   for (auto &Filename : InputFilenames) {
     ExitOnError ExitOnErr("llvm-lto: error loading file '" + Filename + "': ");
-    std::unique_ptr<ModuleSummaryIndex> Index =
-        ExitOnErr(llvm::getModuleSummaryIndexForFile(Filename));
-    // Skip files without a module summary.
-    if (!Index)
-      continue;
-    CombinedIndex.mergeFrom(std::move(Index), ++NextModuleId);
+    std::unique_ptr<MemoryBuffer> MB =
+        ExitOnErr(errorOrToExpected(MemoryBuffer::getFileOrSTDIN(Filename)));
+    ExitOnErr(readModuleSummaryIndex(*MB, CombinedIndex, ++NextModuleId));
   }
   std::error_code EC;
   assert(!OutputFilename.empty());
diff --git a/tools/llvm-pdbdump/C13DebugFragmentVisitor.cpp b/tools/llvm-pdbdump/C13DebugFragmentVisitor.cpp
new file mode 100644
index 00000000000..7c680ebb94c
--- /dev/null
+++ b/tools/llvm-pdbdump/C13DebugFragmentVisitor.cpp
@@ -0,0 +1,87 @@
+//===- C13DebugFragmentVisitor.cpp -------------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "C13DebugFragmentVisitor.h"
+
+#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/StringTable.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+C13DebugFragmentVisitor::C13DebugFragmentVisitor(PDBFile &F) : F(F) {}
+
+C13DebugFragmentVisitor::~C13DebugFragmentVisitor() {}
+
+Error C13DebugFragmentVisitor::visitUnknown(
+    codeview::ModuleDebugUnknownFragmentRef &Fragment) {
+  return Error::success();
+}
+
+Error C13DebugFragmentVisitor::visitFileChecksums(
+    codeview::ModuleDebugFileChecksumFragmentRef &Checksums) {
+  assert(!this->Checksums.hasValue());
+  this->Checksums = Checksums;
+  return Error::success();
+}
+
+Error C13DebugFragmentVisitor::visitLines(
+    codeview::ModuleDebugLineFragmentRef &Lines) {
+  this->Lines.push_back(Lines);
+  return Error::success();
+}
+
+Error C13DebugFragmentVisitor::visitInlineeLines(
+    codeview::ModuleDebugInlineeLineFragmentRef &Lines) {
+  this->InlineeLines.push_back(Lines);
+  return Error::success();
+}
+
+Error C13DebugFragmentVisitor::finished() {
+  if (!Checksums.hasValue()) {
+    assert(Lines.empty());
+    return Error::success();
+  }
+  if (auto EC = handleFileChecksums())
+    return EC;
+
+  if (auto EC = handleLines())
+    return EC;
+
+  if (auto EC = handleInlineeLines())
+    return EC;
+
+  return Error::success();
+}
+
+Expected<StringRef>
+C13DebugFragmentVisitor::getNameFromStringTable(uint32_t Offset) {
+  auto ST = F.getStringTable();
+  if (!ST)
+    return ST.takeError();
+
+  return ST->getStringForID(Offset);
+}
+
+Expected<StringRef>
+C13DebugFragmentVisitor::getNameFromChecksumsBuffer(uint32_t Offset) {
+  assert(Checksums.hasValue());
+
+  auto Array = Checksums->getArray();
+  auto ChecksumIter = Array.at(Offset);
+  if (ChecksumIter == Array.end())
+    return make_error<RawError>(raw_error_code::invalid_format);
+  const auto &Entry = *ChecksumIter;
+  return getNameFromStringTable(Entry.FileNameOffset);
+}
diff --git a/tools/llvm-pdbdump/C13DebugFragmentVisitor.h b/tools/llvm-pdbdump/C13DebugFragmentVisitor.h
new file mode 100644
index 00000000000..1054b0c9f6e
--- /dev/null
+++ b/tools/llvm-pdbdump/C13DebugFragmentVisitor.h
@@ -0,0 +1,60 @@
+//===- C13DebugFragmentVisitor.h - Visitor for CodeView Info ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVMPDBDUMP_C13DEBUGFRAGMENTVISITOR_H
+#define LLVM_TOOLS_LLVMPDBDUMP_C13DEBUGFRAGMENTVISITOR_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentVisitor.h"
+#include "llvm/Support/Error.h"
+
+#include <vector>
+
+namespace llvm {
+
+namespace pdb {
+
+class PDBFile;
+
+class C13DebugFragmentVisitor : public codeview::ModuleDebugFragmentVisitor {
+public:
+  C13DebugFragmentVisitor(PDBFile &F);
+  ~C13DebugFragmentVisitor();
+
+  Error visitUnknown(codeview::ModuleDebugUnknownFragmentRef &Fragment) final;
+
+  Error visitFileChecksums(
+      codeview::ModuleDebugFileChecksumFragmentRef &Checksums) final;
+
+  Error visitLines(codeview::ModuleDebugLineFragmentRef &Lines) final;
+
+  Error
+  visitInlineeLines(codeview::ModuleDebugInlineeLineFragmentRef &Lines) final;
+
+  Error finished() final;
+
+protected:
+  virtual Error handleFileChecksums() { return Error::success(); }
+  virtual Error handleLines() { return Error::success(); }
+  virtual Error handleInlineeLines() { return Error::success(); }
+
+  Expected<StringRef> getNameFromStringTable(uint32_t Offset);
+  Expected<StringRef> getNameFromChecksumsBuffer(uint32_t Offset);
+
+  Optional<codeview::ModuleDebugFileChecksumFragmentRef> Checksums;
+  std::vector<codeview::ModuleDebugInlineeLineFragmentRef> InlineeLines;
+  std::vector<codeview::ModuleDebugLineFragmentRef> Lines;
+
+  PDBFile &F;
+};
+}
+}
+
+#endif
diff --git a/tools/llvm-pdbdump/CMakeLists.txt b/tools/llvm-pdbdump/CMakeLists.txt
index e3d7b2ef275..325e38c15ca 100644
--- a/tools/llvm-pdbdump/CMakeLists.txt
+++ b/tools/llvm-pdbdump/CMakeLists.txt
@@ -8,8 +8,9 @@ set(LLVM_LINK_COMPONENTS
 
 add_llvm_tool(llvm-pdbdump
   Analyze.cpp
-  Diff.cpp
+  C13DebugFragmentVisitor.cpp
   CompactTypeDumpVisitor.cpp
+  Diff.cpp
   llvm-pdbdump.cpp
   YamlSymbolDumper.cpp
   YamlTypeDumper.cpp
diff --git a/tools/llvm-pdbdump/CompactTypeDumpVisitor.cpp b/tools/llvm-pdbdump/CompactTypeDumpVisitor.cpp
index 1fc8dd5d51f..5ad0bfad26c 100644
--- a/tools/llvm-pdbdump/CompactTypeDumpVisitor.cpp
+++ b/tools/llvm-pdbdump/CompactTypeDumpVisitor.cpp
@@ -31,14 +31,15 @@ static StringRef getLeafName(TypeLeafKind K) {
 
 CompactTypeDumpVisitor::CompactTypeDumpVisitor(TypeDatabase &TypeDB,
                                                ScopedPrinter *W)
-    : W(W), TI(TypeIndex::None()), Offset(0), TypeDB(TypeDB) {}
+    : CompactTypeDumpVisitor(TypeDB, TypeIndex(TypeIndex::FirstNonSimpleIndex),
+                             W) {}
+
+CompactTypeDumpVisitor::CompactTypeDumpVisitor(TypeDatabase &TypeDB,
+                                               TypeIndex FirstTI,
+                                               ScopedPrinter *W)
+    : W(W), TI(FirstTI), Offset(0), TypeDB(TypeDB) {}
 
 Error CompactTypeDumpVisitor::visitTypeBegin(CVType &Record) {
-  if (TI == TypeIndex::None())
-    TI.setIndex(TypeIndex::FirstNonSimpleIndex);
-  else
-    TI.setIndex(TI.getIndex() + 1);
-
   return Error::success();
 }
 
@@ -52,6 +53,7 @@ Error CompactTypeDumpVisitor::visitTypeEnd(CVType &Record) {
           .str());
 
   Offset += Record.length();
+  TI.setIndex(TI.getIndex() + 1);
 
   return Error::success();
 }
diff --git a/tools/llvm-pdbdump/CompactTypeDumpVisitor.h b/tools/llvm-pdbdump/CompactTypeDumpVisitor.h
index 180eea7b8d6..76fafc93e03 100644
--- a/tools/llvm-pdbdump/CompactTypeDumpVisitor.h
+++ b/tools/llvm-pdbdump/CompactTypeDumpVisitor.h
@@ -27,6 +27,8 @@ namespace pdb {
 class CompactTypeDumpVisitor : public codeview::TypeVisitorCallbacks {
 public:
   CompactTypeDumpVisitor(codeview::TypeDatabase &TypeDB, ScopedPrinter *W);
+  CompactTypeDumpVisitor(codeview::TypeDatabase &TypeDB,
+                         codeview::TypeIndex FirstTI, ScopedPrinter *W);
 
   /// Paired begin/end actions for all types. Receives all record data,
   /// including the fixed-length record prefix.
diff --git a/tools/llvm-pdbdump/LLVMOutputStyle.cpp b/tools/llvm-pdbdump/LLVMOutputStyle.cpp
index 8348751703f..f3e28e0b08f 100644
--- a/tools/llvm-pdbdump/LLVMOutputStyle.cpp
+++ b/tools/llvm-pdbdump/LLVMOutputStyle.cpp
@@ -9,6 +9,7 @@
 
 #include "LLVMOutputStyle.h"
 
+#include "C13DebugFragmentVisitor.h"
 #include "CompactTypeDumpVisitor.h"
 #include "StreamUtil.h"
 #include "llvm-pdbdump.h"
@@ -16,20 +17,25 @@
 #include "llvm/DebugInfo/CodeView/CVTypeDumper.h"
 #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
 #include "llvm/DebugInfo/CodeView/EnumTables.h"
-#include "llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h"
+#include "llvm/DebugInfo/CodeView/Line.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentVisitor.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugUnknownFragment.h"
 #include "llvm/DebugInfo/CodeView/SymbolDumper.h"
 #include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h"
 #include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
 #include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
 #include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
 #include "llvm/DebugInfo/PDB/Native/EnumTables.h"
 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
 #include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h"
 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
-#include "llvm/DebugInfo/PDB/Native/ModInfo.h"
-#include "llvm/DebugInfo/PDB/Native/ModStream.h"
+#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
@@ -74,6 +80,127 @@ struct PageStats {
   // Pages which are marked free in the FPM but are used.
   BitVector UseAfterFreePages;
 };
+
+class C13RawVisitor : public C13DebugFragmentVisitor {
+public:
+  C13RawVisitor(ScopedPrinter &P, PDBFile &F, TypeDatabase &IPI)
+      : C13DebugFragmentVisitor(F), P(P), IPI(IPI) {}
+
+  Error handleLines() override {
+    if (Lines.empty())
+      return Error::success();
+
+    DictScope DD(P, "Lines");
+
+    for (const auto &Fragment : Lines) {
+      DictScope DDD(P, "Block");
+      P.printNumber("RelocSegment", Fragment.header()->RelocSegment);
+      P.printNumber("RelocOffset", Fragment.header()->RelocOffset);
+      P.printNumber("CodeSize", Fragment.header()->CodeSize);
+      P.printBoolean("HasColumns", Fragment.hasColumnInfo());
+
+      for (const auto &L : Fragment) {
+        DictScope DDDD(P, "Lines");
+
+        if (auto EC = printFileName("FileName", L.NameIndex))
+          return EC;
+
+        for (const auto &N : L.LineNumbers) {
+          DictScope DDD(P, "Line");
+          LineInfo LI(N.Flags);
+          P.printNumber("Offset", N.Offset);
+          if (LI.isAlwaysStepInto())
+            P.printString("StepInto", StringRef("Always"));
+          else if (LI.isNeverStepInto())
+            P.printString("StepInto", StringRef("Never"));
+          else
+            P.printNumber("LineNumberStart", LI.getStartLine());
+          P.printNumber("EndDelta", LI.getLineDelta());
+          P.printBoolean("IsStatement", LI.isStatement());
+        }
+        for (const auto &C : L.Columns) {
+          DictScope DDD(P, "Column");
+          P.printNumber("Start", C.StartColumn);
+          P.printNumber("End", C.EndColumn);
+        }
+      }
+    }
+
+    return Error::success();
+  }
+
+  Error handleFileChecksums() override {
+    if (!Checksums.hasValue())
+      return Error::success();
+
+    DictScope DD(P, "FileChecksums");
+    for (const auto &CS : *Checksums) {
+      DictScope DDD(P, "Checksum");
+      if (auto Result = getNameFromStringTable(CS.FileNameOffset))
+        P.printString("FileName", *Result);
+      else
+        return Result.takeError();
+      P.printEnum("Kind", uint8_t(CS.Kind), getFileChecksumNames());
+      P.printBinaryBlock("Checksum", CS.Checksum);
+    }
+    return Error::success();
+  }
+
+  Error handleInlineeLines() override {
+    if (InlineeLines.empty())
+      return Error::success();
+
+    DictScope D(P, "InlineeLines");
+    for (const auto &IL : InlineeLines) {
+      P.printBoolean("HasExtraFiles", IL.hasExtraFiles());
+      ListScope LS(P, "Lines");
+      for (const auto &L : IL) {
+        DictScope DDD(P, "Inlinee");
+        if (auto EC = printFileName("FileName", L.Header->FileID))
+          return EC;
+
+        if (auto EC = dumpTypeRecord("Function", IPI, L.Header->Inlinee))
+          return EC;
+        P.printNumber("SourceLine", L.Header->SourceLineNum);
+        if (IL.hasExtraFiles()) {
+          ListScope DDDD(P, "ExtraFiles");
+          for (const auto &EF : L.ExtraFiles) {
+            if (auto EC = printFileName("File", EF))
+              return EC;
+          }
+        }
+      }
+    }
+    return Error::success();
+  }
+
+private:
+  Error dumpTypeRecord(StringRef Label, TypeDatabase &DB, TypeIndex Index) {
+    CompactTypeDumpVisitor CTDV(DB, Index, &P);
+    CVTypeVisitor Visitor(CTDV);
+    DictScope D(P, Label);
+    if (DB.containsTypeIndex(Index)) {
+      CVType &Type = DB.getTypeRecord(Index);
+      if (auto EC = Visitor.visitTypeRecord(Type))
+        return EC;
+    } else {
+      P.printString(
+          llvm::formatv("Index: {0:x} (unknown function)", Index.getIndex())
+              .str());
+    }
+    return Error::success();
+  }
+  Error printFileName(StringRef Label, uint32_t Offset) {
+    if (auto Result = getNameFromChecksumsBuffer(Offset)) {
+      P.printString(Label, *Result);
+      return Error::success();
+    } else
+      return Result.takeError();
+  }
+
+  ScopedPrinter &P;
+  TypeDatabase &IPI;
+};
 }
 
 static void recordKnownUsedPage(PageStats &Stats, uint32_t UsedIndex) {
@@ -316,6 +443,27 @@ Error LLVMOutputStyle::dumpBlockRanges() {
   return Error::success();
 }
 
+static Error parseStreamSpec(StringRef Str, uint32_t &SI, uint32_t &Offset,
+                             uint32_t &Size) {
+  if (Str.consumeInteger(0, SI))
+    return make_error<RawError>(raw_error_code::invalid_format,
+                                "Invalid Stream Specification");
+  if (Str.consume_front(":")) {
+    if (Str.consumeInteger(0, Offset))
+      return make_error<RawError>(raw_error_code::invalid_format,
+                                  "Invalid Stream Specification");
+  }
+  if (Str.consume_front("@")) {
+    if (Str.consumeInteger(0, Size))
+      return make_error<RawError>(raw_error_code::invalid_format,
+                                  "Invalid Stream Specification");
+  }
+  if (!Str.empty())
+    return make_error<RawError>(raw_error_code::invalid_format,
+                                "Invalid Stream Specification");
+  return Error::success();
+}
+
 Error LLVMOutputStyle::dumpStreamBytes() {
   if (opts::raw::DumpStreamData.empty())
     return Error::success();
@@ -324,7 +472,15 @@ Error LLVMOutputStyle::dumpStreamBytes() {
     discoverStreamPurposes(File, StreamPurposes);
 
   DictScope D(P, "Stream Data");
-  for (uint32_t SI : opts::raw::DumpStreamData) {
+  for (auto &Str : opts::raw::DumpStreamData) {
+    uint32_t SI = 0;
+    uint32_t Begin = 0;
+    uint32_t Size = 0;
+    uint32_t End = 0;
+
+    if (auto EC = parseStreamSpec(Str, SI, Begin, Size))
+      return EC;
+
     if (SI >= File.getNumStreams())
       return make_error<RawError>(raw_error_code::no_stream);
 
@@ -333,6 +489,14 @@ Error LLVMOutputStyle::dumpStreamBytes() {
     if (!S)
       continue;
     DictScope DD(P, "Stream");
+    if (Size == 0)
+      End = S->getLength();
+    else {
+      End = Begin + Size;
+      if (End >= S->getLength())
+        return make_error<RawError>(raw_error_code::index_out_of_bounds,
+                                    "Stream is not long enough!");
+    }
 
     P.printNumber("Index", SI);
     P.printString("Type", StreamPurposes[SI]);
@@ -344,7 +508,9 @@ Error LLVMOutputStyle::dumpStreamBytes() {
     ArrayRef<uint8_t> StreamData;
     if (auto EC = R.readBytes(StreamData, S->getLength()))
       return EC;
-    P.printBinaryBlock("Data", StreamData);
+    Size = End - Begin;
+    StreamData = StreamData.slice(Begin, Size);
+    P.printBinaryBlock("Data", StreamData, Begin);
   }
   return Error::success();
 }
@@ -439,11 +605,12 @@ Error LLVMOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
     Label = "Type Info Stream (IPI)";
     VerLabel = "IPI Version";
   }
-  if (!DumpRecordBytes && !DumpRecords && !DumpTpiHash &&
-      !opts::raw::DumpModuleSyms)
-    return Error::success();
 
   bool IsSilentDatabaseBuild = !DumpRecordBytes && !DumpRecords && !DumpTpiHash;
+  if (IsSilentDatabaseBuild) {
+    outs().flush();
+    errs() << "Building Type Information For " << Label << "\n";
+  }
 
   auto Tpi = (StreamIdx == StreamTPI) ? File.getPDBTpiStream()
                                       : File.getPDBIpiStream();
@@ -502,6 +669,7 @@ Error LLVMOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
 
     if (auto EC = Visitor.visitTypeRecord(Type))
       return EC;
+    T.setIndex(T.getIndex() + 1);
   }
   if (HadError)
     return make_error<RawError>(raw_error_code::corrupt_file,
@@ -584,7 +752,7 @@ Error LLVMOutputStyle::dumpDbiStream() {
       P.printNumber("Num Files", Modi.Info.getNumberOfFiles());
       P.printNumber("Source File Name Idx", Modi.Info.getSourceFileNameIndex());
       P.printNumber("Pdb File Name Idx", Modi.Info.getPdbFilePathNameIndex());
-      P.printNumber("Line Info Byte Size", Modi.Info.getLineInfoByteSize());
+      P.printNumber("Line Info Byte Size", Modi.Info.getC11LineInfoByteSize());
       P.printNumber("C13 Line Info Byte Size",
                     Modi.Info.getC13LineInfoByteSize());
       P.printNumber("Symbol Byte Size", Modi.Info.getSymbolDebugInfoByteSize());
@@ -606,7 +774,7 @@ Error LLVMOutputStyle::dumpDbiStream() {
             File.getMsfLayout(), File.getMsfBuffer(),
             Modi.Info.getModuleStreamIndex());
 
-        ModStream ModS(Modi.Info, std::move(ModStreamData));
+        ModuleDebugStreamRef ModS(Modi.Info, std::move(ModStreamData));
         if (auto EC = ModS.reload())
           return EC;
 
@@ -633,97 +801,11 @@ Error LLVMOutputStyle::dumpDbiStream() {
         }
         if (opts::raw::DumpLineInfo) {
           ListScope SS(P, "LineInfo");
-          bool HadError = false;
-          // Define a locally scoped visitor to print the different
-          // substream types types.
-          class RecordVisitor : public codeview::IModuleSubstreamVisitor {
-          public:
-            RecordVisitor(ScopedPrinter &P, PDBFile &F) : P(P), F(F) {}
-            Error visitUnknown(ModuleSubstreamKind Kind,
-                               BinaryStreamRef Stream) override {
-              DictScope DD(P, "Unknown");
-              ArrayRef<uint8_t> Data;
-              BinaryStreamReader R(Stream);
-              if (auto EC = R.readBytes(Data, R.bytesRemaining())) {
-                return make_error<RawError>(
-                    raw_error_code::corrupt_file,
-                    "DBI stream contained corrupt line info record");
-              }
-              P.printBinaryBlock("Data", Data);
-              return Error::success();
-            }
-            Error
-            visitFileChecksums(BinaryStreamRef Data,
-                               const FileChecksumArray &Checksums) override {
-              DictScope DD(P, "FileChecksums");
-              for (const auto &C : Checksums) {
-                DictScope DDD(P, "Checksum");
-                if (auto Result = getFileNameForOffset(C.FileNameOffset))
-                  P.printString("FileName", Result.get());
-                else
-                  return Result.takeError();
-                P.flush();
-                P.printEnum("Kind", uint8_t(C.Kind), getFileChecksumNames());
-                P.printBinaryBlock("Checksum", C.Checksum);
-              }
-              return Error::success();
-            }
 
-            Error visitLines(BinaryStreamRef Data,
-                             const LineSubstreamHeader *Header,
-                             const LineInfoArray &Lines) override {
-              DictScope DD(P, "Lines");
-              for (const auto &L : Lines) {
-                if (auto Result = getFileNameForOffset2(L.NameIndex))
-                  P.printString("FileName", Result.get());
-                else
-                  return Result.takeError();
-                P.flush();
-                for (const auto &N : L.LineNumbers) {
-                  DictScope DDD(P, "Line");
-                  LineInfo LI(N.Flags);
-                  P.printNumber("Offset", N.Offset);
-                  if (LI.isAlwaysStepInto())
-                    P.printString("StepInto", StringRef("Always"));
-                  else if (LI.isNeverStepInto())
-                    P.printString("StepInto", StringRef("Never"));
-                  else
-                    P.printNumber("LineNumberStart", LI.getStartLine());
-                  P.printNumber("EndDelta", LI.getLineDelta());
-                  P.printBoolean("IsStatement", LI.isStatement());
-                }
-                for (const auto &C : L.Columns) {
-                  DictScope DDD(P, "Column");
-                  P.printNumber("Start", C.StartColumn);
-                  P.printNumber("End", C.EndColumn);
-                }
-              }
-              return Error::success();
-            }
-
-          private:
-            Expected<StringRef> getFileNameForOffset(uint32_t Offset) {
-              auto ST = F.getStringTable();
-              if (!ST)
-                return ST.takeError();
-
-              return ST->getStringForID(Offset);
-            }
-            Expected<StringRef> getFileNameForOffset2(uint32_t Offset) {
-              auto DS = F.getPDBDbiStream();
-              if (!DS)
-                return DS.takeError();
-              return DS->getFileNameForIndex(Offset);
-            }
-            ScopedPrinter &P;
-            PDBFile &F;
-          };
-
-          RecordVisitor V(P, File);
-          for (const auto &L : ModS.lines(&HadError)) {
-            if (auto EC = codeview::visitModuleSubstream(L, V))
-              return EC;
-          }
+          C13RawVisitor V(P, File, ItemDB);
+          if (auto EC = codeview::visitModuleDebugFragments(
+                  ModS.linesAndChecksums(), V))
+            return EC;
         }
       }
     }
diff --git a/tools/llvm-pdbdump/PdbYaml.cpp b/tools/llvm-pdbdump/PdbYaml.cpp
index 65a5a9142d2..d6ba7d64545 100644
--- a/tools/llvm-pdbdump/PdbYaml.cpp
+++ b/tools/llvm-pdbdump/PdbYaml.cpp
@@ -40,6 +40,9 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::PdbSourceFileChecksumEntry)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::PdbSourceLineEntry)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::PdbSourceColumnEntry)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::PdbSourceLineBlock)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::PdbSourceLineInfo)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::PdbInlineeSite)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::PdbInlineeInfo)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::PdbSymbolRecord)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::PdbTpiRecord)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::StreamBlockList)
@@ -162,8 +165,7 @@ template <> struct ScalarEnumerationTraits<llvm::codeview::FileChecksumKind> {
 
 template <> struct ScalarBitSetTraits<llvm::codeview::LineFlags> {
   static void bitset(IO &io, llvm::codeview::LineFlags &Flags) {
-    io.bitSetCase(Flags, "HasColumnInfo",
-                  llvm::codeview::LineFlags::HaveColumns);
+    io.bitSetCase(Flags, "HasColumnInfo", llvm::codeview::LF_HaveColumns);
     io.enumFallback<Hex16>(Flags);
   }
 };
@@ -311,7 +313,7 @@ void MappingContextTraits<pdb::yaml::PdbSourceColumnEntry,
             pdb::yaml::SerializationContext &Context) {
   IO.mapRequired("StartColumn", Obj.StartColumn);
   IO.mapRequired("EndColumn", Obj.EndColumn);
-};
+}
 
 void MappingContextTraits<pdb::yaml::PdbSourceLineBlock,
                           pdb::yaml::SerializationContext>::
@@ -320,7 +322,7 @@ void MappingContextTraits<pdb::yaml::PdbSourceLineBlock,
   IO.mapRequired("FileName", Obj.FileName);
   IO.mapRequired("Lines", Obj.Lines, Context);
   IO.mapRequired("Columns", Obj.Columns, Context);
-};
+}
 
 void MappingContextTraits<pdb::yaml::PdbSourceFileChecksumEntry,
                           pdb::yaml::SerializationContext>::
@@ -329,26 +331,42 @@ void MappingContextTraits<pdb::yaml::PdbSourceFileChecksumEntry,
   IO.mapRequired("FileName", Obj.FileName);
   IO.mapRequired("Kind", Obj.Kind);
   IO.mapRequired("Checksum", Obj.ChecksumBytes);
-};
+}
 
 void MappingContextTraits<pdb::yaml::PdbSourceLineInfo,
                           pdb::yaml::SerializationContext>::
     mapping(IO &IO, PdbSourceLineInfo &Obj,
             pdb::yaml::SerializationContext &Context) {
   IO.mapRequired("CodeSize", Obj.CodeSize);
+
   IO.mapRequired("Flags", Obj.Flags);
   IO.mapRequired("RelocOffset", Obj.RelocOffset);
   IO.mapRequired("RelocSegment", Obj.RelocSegment);
-  IO.mapRequired("LineInfo", Obj.LineInfo, Context);
-};
+  IO.mapRequired("Blocks", Obj.Blocks, Context);
+}
 
 void MappingContextTraits<pdb::yaml::PdbSourceFileInfo,
                           pdb::yaml::SerializationContext>::
     mapping(IO &IO, PdbSourceFileInfo &Obj,
             pdb::yaml::SerializationContext &Context) {
-  IO.mapOptionalWithContext("Lines", Obj.Lines, Context);
   IO.mapOptionalWithContext("Checksums", Obj.FileChecksums, Context);
-};
+  IO.mapOptionalWithContext("Lines", Obj.LineFragments, Context);
+  IO.mapOptionalWithContext("InlineeLines", Obj.Inlinees, Context);
+}
+
+void MappingContextTraits<PdbInlineeSite, SerializationContext>::mapping(
+    IO &IO, PdbInlineeSite &Obj, SerializationContext &Context) {
+  IO.mapRequired("FileName", Obj.FileName);
+  IO.mapRequired("LineNum", Obj.SourceLineNum);
+  IO.mapRequired("Inlinee", Obj.Inlinee);
+  IO.mapOptional("ExtraFiles", Obj.ExtraFiles);
+}
+
+void MappingContextTraits<PdbInlineeInfo, SerializationContext>::mapping(
+    IO &IO, PdbInlineeInfo &Obj, SerializationContext &Context) {
+  IO.mapRequired("HasExtraFiles", Obj.HasExtraFiles);
+  IO.mapRequired("Sites", Obj.Sites, Context);
+}
 
 void MappingContextTraits<PdbTpiRecord, pdb::yaml::SerializationContext>::
     mapping(IO &IO, pdb::yaml::PdbTpiRecord &Obj,
diff --git a/tools/llvm-pdbdump/PdbYaml.h b/tools/llvm-pdbdump/PdbYaml.h
index 96e0583ca23..423845caeb3 100644
--- a/tools/llvm-pdbdump/PdbYaml.h
+++ b/tools/llvm-pdbdump/PdbYaml.h
@@ -99,12 +99,25 @@ struct PdbSourceLineInfo {
   codeview::LineFlags Flags;
   uint32_t CodeSize;
 
-  std::vector<PdbSourceLineBlock> LineInfo;
+  std::vector<PdbSourceLineBlock> Blocks;
+};
+
+struct PdbInlineeSite {
+  codeview::TypeIndex Inlinee;
+  StringRef FileName;
+  uint32_t SourceLineNum;
+  std::vector<StringRef> ExtraFiles;
+};
+
+struct PdbInlineeInfo {
+  bool HasExtraFiles;
+  std::vector<PdbInlineeSite> Sites;
 };
 
 struct PdbSourceFileInfo {
-  PdbSourceLineInfo Lines;
   std::vector<PdbSourceFileChecksumEntry> FileChecksums;
+  std::vector<PdbSourceLineInfo> LineFragments;
+  std::vector<PdbInlineeInfo> Inlinees;
 };
 
 struct PdbDbiModuleInfo {
@@ -258,6 +271,20 @@ struct MappingContextTraits<pdb::yaml::PdbSourceFileInfo,
                       pdb::yaml::SerializationContext &Context);
 };
 
+template <>
+struct MappingContextTraits<pdb::yaml::PdbInlineeInfo,
+                            pdb::yaml::SerializationContext> {
+  static void mapping(IO &IO, pdb::yaml::PdbInlineeInfo &Obj,
+                      pdb::yaml::SerializationContext &Context);
+};
+
+template <>
+struct MappingContextTraits<pdb::yaml::PdbInlineeSite,
+                            pdb::yaml::SerializationContext> {
+  static void mapping(IO &IO, pdb::yaml::PdbInlineeSite &Obj,
+                      pdb::yaml::SerializationContext &Context);
+};
+
 template <>
 struct MappingContextTraits<pdb::yaml::PdbTpiRecord,
                             pdb::yaml::SerializationContext> {
diff --git a/tools/llvm-pdbdump/StreamUtil.cpp b/tools/llvm-pdbdump/StreamUtil.cpp
index db1e01aa015..6577702adac 100644
--- a/tools/llvm-pdbdump/StreamUtil.cpp
+++ b/tools/llvm-pdbdump/StreamUtil.cpp
@@ -11,9 +11,9 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
-#include "llvm/DebugInfo/PDB/Native/ModInfo.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
 
diff --git a/tools/llvm-pdbdump/YAMLOutputStyle.cpp b/tools/llvm-pdbdump/YAMLOutputStyle.cpp
index b329de265e7..807d7f8b82e 100644
--- a/tools/llvm-pdbdump/YAMLOutputStyle.cpp
+++ b/tools/llvm-pdbdump/YAMLOutputStyle.cpp
@@ -9,21 +9,28 @@
 
 #include "YAMLOutputStyle.h"
 
+#include "C13DebugFragmentVisitor.h"
 #include "PdbYaml.h"
 #include "llvm-pdbdump.h"
 
 #include "llvm/DebugInfo/CodeView/Line.h"
-#include "llvm/DebugInfo/CodeView/ModuleSubstream.h"
-#include "llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFragmentVisitor.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugUnknownFragment.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
-#include "llvm/DebugInfo/PDB/Native/ModStream.h"
+#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Native/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
 
 using namespace llvm;
+using namespace llvm::codeview;
 using namespace llvm::pdb;
 
 YAMLOutputStyle::YAMLOutputStyle(PDBFile &File)
@@ -46,6 +53,12 @@ Error YAMLOutputStyle::dump() {
   if (opts::pdb2yaml::DbiModuleInfo)
     opts::pdb2yaml::DbiStream = true;
 
+  // Some names from the module source file info get pulled from the string
+  // table, so if we're writing module source info, we have to write the string
+  // table as well.
+  if (opts::pdb2yaml::DbiModuleSourceLineInfo)
+    opts::pdb2yaml::StringTable = true;
+
   if (auto EC = dumpFileHeaders())
     return EC;
 
@@ -75,22 +88,15 @@ Error YAMLOutputStyle::dump() {
 }
 
 namespace {
-class C13SubstreamVisitor : public codeview::IModuleSubstreamVisitor {
+class C13YamlVisitor : public C13DebugFragmentVisitor {
 public:
-  C13SubstreamVisitor(llvm::pdb::yaml::PdbSourceFileInfo &Info, PDBFile &F)
-      : Info(Info), F(F) {}
+  C13YamlVisitor(llvm::pdb::yaml::PdbSourceFileInfo &Info, PDBFile &F)
+      : C13DebugFragmentVisitor(F), Info(Info) {}
 
-  Error visitUnknown(codeview::ModuleSubstreamKind Kind,
-                     BinaryStreamRef Stream) override {
-    return Error::success();
-  }
-
-  Error
-  visitFileChecksums(BinaryStreamRef Data,
-                     const codeview::FileChecksumArray &Checksums) override {
-    for (const auto &C : Checksums) {
+  Error handleFileChecksums() override {
+    for (const auto &C : *Checksums) {
       llvm::pdb::yaml::PdbSourceFileChecksumEntry Entry;
-      if (auto Result = getGlobalString(C.FileNameOffset))
+      if (auto Result = getNameFromStringTable(C.FileNameOffset))
         Entry.FileName = *Result;
       else
         return Result.takeError();
@@ -102,80 +108,94 @@ public:
     return Error::success();
   }
 
-  Error visitLines(BinaryStreamRef Data,
-                   const codeview::LineSubstreamHeader *Header,
-                   const codeview::LineInfoArray &Lines) override {
+  Error handleLines() override {
+    for (const auto &LF : Lines) {
+      Info.LineFragments.emplace_back();
+      auto &Fragment = Info.LineFragments.back();
 
-    Info.Lines.CodeSize = Header->CodeSize;
-    Info.Lines.Flags =
-        static_cast<codeview::LineFlags>(uint16_t(Header->Flags));
-    Info.Lines.RelocOffset = Header->RelocOffset;
-    Info.Lines.RelocSegment = Header->RelocSegment;
+      Fragment.CodeSize = LF.header()->CodeSize;
+      Fragment.Flags =
+          static_cast<codeview::LineFlags>(uint16_t(LF.header()->Flags));
+      Fragment.RelocOffset = LF.header()->RelocOffset;
+      Fragment.RelocSegment = LF.header()->RelocSegment;
 
-    for (const auto &L : Lines) {
-      llvm::pdb::yaml::PdbSourceLineBlock Block;
+      for (const auto &L : LF) {
+        Fragment.Blocks.emplace_back();
+        auto &Block = Fragment.Blocks.back();
 
-      if (auto Result = getDbiFileName(L.NameIndex))
-        Block.FileName = *Result;
-      else
-        return Result.takeError();
+        if (auto Result = getNameFromChecksumsBuffer(L.NameIndex))
+          Block.FileName = *Result;
+        else
+          return Result.takeError();
 
-      for (const auto &N : L.LineNumbers) {
-        llvm::pdb::yaml::PdbSourceLineEntry Line;
-        Line.Offset = N.Offset;
-        codeview::LineInfo LI(N.Flags);
-        Line.LineStart = LI.getStartLine();
-        Line.EndDelta = LI.getEndLine();
-        Line.IsStatement = LI.isStatement();
-        Block.Lines.push_back(Line);
-      }
+        for (const auto &N : L.LineNumbers) {
+          llvm::pdb::yaml::PdbSourceLineEntry Line;
+          Line.Offset = N.Offset;
+          codeview::LineInfo LI(N.Flags);
+          Line.LineStart = LI.getStartLine();
+          Line.EndDelta = LI.getLineDelta();
+          Line.IsStatement = LI.isStatement();
+          Block.Lines.push_back(Line);
+        }
 
-      if (Info.Lines.Flags & codeview::LineFlags::HaveColumns) {
-        for (const auto &C : L.Columns) {
-          llvm::pdb::yaml::PdbSourceColumnEntry Column;
-          Column.StartColumn = C.StartColumn;
-          Column.EndColumn = C.EndColumn;
-          Block.Columns.push_back(Column);
+        if (LF.hasColumnInfo()) {
+          for (const auto &C : L.Columns) {
+            llvm::pdb::yaml::PdbSourceColumnEntry Column;
+            Column.StartColumn = C.StartColumn;
+            Column.EndColumn = C.EndColumn;
+            Block.Columns.push_back(Column);
+          }
         }
       }
+    }
+    return Error::success();
+  }
 
-      Info.Lines.LineInfo.push_back(Block);
+  Error handleInlineeLines() override {
+    for (const auto &ILF : InlineeLines) {
+      Info.Inlinees.emplace_back();
+      auto &Inlinee = Info.Inlinees.back();
+
+      Inlinee.HasExtraFiles = ILF.hasExtraFiles();
+      for (const auto &IL : ILF) {
+        Inlinee.Sites.emplace_back();
+        auto &Site = Inlinee.Sites.back();
+        if (auto Result = getNameFromChecksumsBuffer(IL.Header->FileID))
+          Site.FileName = *Result;
+        else
+          return Result.takeError();
+
+        Site.Inlinee = IL.Header->Inlinee;
+        Site.SourceLineNum = IL.Header->SourceLineNum;
+        if (ILF.hasExtraFiles()) {
+          for (const auto &EF : IL.ExtraFiles) {
+            if (auto Result = getNameFromChecksumsBuffer(EF))
+              Site.ExtraFiles.push_back(*Result);
+            else
+              return Result.takeError();
+          }
+        }
+      }
     }
     return Error::success();
   }
 
 private:
-  Expected<StringRef> getGlobalString(uint32_t Offset) {
-    auto ST = F.getStringTable();
-    if (!ST)
-      return ST.takeError();
-
-    return ST->getStringForID(Offset);
-  }
-  Expected<StringRef> getDbiFileName(uint32_t Offset) {
-    auto DS = F.getPDBDbiStream();
-    if (!DS)
-      return DS.takeError();
-    return DS->getFileNameForIndex(Offset);
-  }
 
   llvm::pdb::yaml::PdbSourceFileInfo &Info;
-  PDBFile &F;
 };
 }
 
 Expected<Optional<llvm::pdb::yaml::PdbSourceFileInfo>>
-YAMLOutputStyle::getFileLineInfo(const pdb::ModStream &ModS) {
+YAMLOutputStyle::getFileLineInfo(const pdb::ModuleDebugStreamRef &ModS) {
   if (!ModS.hasLineInfo())
     return None;
 
   yaml::PdbSourceFileInfo Info;
-  bool Error = false;
-  C13SubstreamVisitor Visitor(Info, File);
-  for (auto &Substream : ModS.lines(&Error)) {
-    if (auto E = codeview::visitModuleSubstream(Substream, Visitor))
-      return std::move(E);
-  }
+  C13YamlVisitor Visitor(Info, File);
+  if (auto EC = codeview::visitModuleDebugFragments(ModS.linesAndChecksums(),
+                                                    Visitor))
+    return std::move(EC);
 
   return Info;
 }
@@ -283,17 +303,22 @@ Error YAMLOutputStyle::dumpDbiStream() {
   Obj.DbiStream->VerHeader = DS.getDbiVersion();
   if (opts::pdb2yaml::DbiModuleInfo) {
     for (const auto &MI : DS.modules()) {
-      yaml::PdbDbiModuleInfo DMI;
+      Obj.DbiStream->ModInfos.emplace_back();
+      yaml::PdbDbiModuleInfo &DMI = Obj.DbiStream->ModInfos.back();
+
       DMI.Mod = MI.Info.getModuleName();
       DMI.Obj = MI.Info.getObjFileName();
       if (opts::pdb2yaml::DbiModuleSourceFileInfo)
         DMI.SourceFiles = MI.SourceFiles;
 
-      auto ModStreamData = msf::MappedBlockStream::createIndexedStream(
-          File.getMsfLayout(), File.getMsfBuffer(),
-          MI.Info.getModuleStreamIndex());
+      uint16_t ModiStream = MI.Info.getModuleStreamIndex();
+      if (ModiStream == kInvalidStreamIndex)
+        continue;
 
-      pdb::ModStream ModS(MI.Info, std::move(ModStreamData));
+      auto ModStreamData = msf::MappedBlockStream::createIndexedStream(
+          File.getMsfLayout(), File.getMsfBuffer(), ModiStream);
+
+      pdb::ModuleDebugStreamRef ModS(MI.Info, std::move(ModStreamData));
       if (auto EC = ModS.reload())
         return EC;
 
@@ -304,8 +329,7 @@ Error YAMLOutputStyle::dumpDbiStream() {
         DMI.FileLineInfo = *ExpectedInfo;
       }
 
-      if (opts::pdb2yaml::DbiModuleSyms &&
-          MI.Info.getModuleStreamIndex() != kInvalidStreamIndex) {
+      if (opts::pdb2yaml::DbiModuleSyms) {
         DMI.Modi.emplace();
 
         DMI.Modi->Signature = ModS.signature();
@@ -315,7 +339,6 @@ Error YAMLOutputStyle::dumpDbiStream() {
           DMI.Modi->Symbols.push_back(Record);
         }
       }
-      Obj.DbiStream->ModInfos.push_back(DMI);
     }
   }
   return Error::success();
diff --git a/tools/llvm-pdbdump/YAMLOutputStyle.h b/tools/llvm-pdbdump/YAMLOutputStyle.h
index 263af776fa0..517c7d86d7a 100644
--- a/tools/llvm-pdbdump/YAMLOutputStyle.h
+++ b/tools/llvm-pdbdump/YAMLOutputStyle.h
@@ -19,7 +19,7 @@
 
 namespace llvm {
 namespace pdb {
-class ModStream;
+class ModuleDebugStreamRef;
 
 class YAMLOutputStyle : public OutputStyle {
 public:
@@ -29,7 +29,7 @@ public:
 
 private:
   Expected<Optional<llvm::pdb::yaml::PdbSourceFileInfo>>
-  getFileLineInfo(const pdb::ModStream &ModS);
+  getFileLineInfo(const pdb::ModuleDebugStreamRef &ModS);
 
   Error dumpStringTable();
   Error dumpFileHeaders();
diff --git a/tools/llvm-pdbdump/fuzzer/llvm-pdbdump-fuzzer.cpp b/tools/llvm-pdbdump/fuzzer/llvm-pdbdump-fuzzer.cpp
index 38eaf16c65b..14cd222d138 100644
--- a/tools/llvm-pdbdump/fuzzer/llvm-pdbdump-fuzzer.cpp
+++ b/tools/llvm-pdbdump/fuzzer/llvm-pdbdump-fuzzer.cpp
@@ -19,7 +19,7 @@
 #include "llvm/DebugInfo/PDB/Raw/DbiStream.h"
 #include "llvm/DebugInfo/PDB/Raw/IPDBStreamData.h"
 #include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h"
-#include "llvm/DebugInfo/PDB/Raw/ModStream.h"
+#include "llvm/DebugInfo/PDB/Raw/ModuleDebugStream.h"
 #include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Raw/RawSession.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -90,7 +90,7 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t *data, size_t size) {
       consumeError(ModStreamData.takeError());
       return 0;
     }
-    pdb::ModStream ModS(Modi.Info, std::move(*ModStreamData));
+    pdb::ModuleDebugStreamRef ModS(Modi.Info, std::move(*ModStreamData));
     if (auto E = ModS.reload()) {
       consumeError(std::move(E));
       return 0;
diff --git a/tools/llvm-pdbdump/llvm-pdbdump.cpp b/tools/llvm-pdbdump/llvm-pdbdump.cpp
index 7337b1d2874..642e169613b 100644
--- a/tools/llvm-pdbdump/llvm-pdbdump.cpp
+++ b/tools/llvm-pdbdump/llvm-pdbdump.cpp
@@ -28,18 +28,22 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h"
 #include "llvm/DebugInfo/MSF/MSFBuilder.h"
 #include "llvm/DebugInfo/PDB/GenericError.h"
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
 #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
 #include "llvm/DebugInfo/PDB/IPDBSession.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h"
 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
 #include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h"
 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
 #include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h"
-#include "llvm/DebugInfo/PDB/Native/ModInfoBuilder.h"
 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h"
@@ -261,9 +265,10 @@ cl::opt<std::string>
                       cl::cat(MsfOptions), cl::sub(RawSubcommand));
 llvm::Optional<BlockRange> DumpBlockRange;
 
-cl::list<uint32_t>
+cl::list<std::string>
     DumpStreamData("stream-data", cl::CommaSeparated, cl::ZeroOrMore,
-                   cl::desc("Dump binary data from specified streams."),
+                   cl::desc("Dump binary data from specified streams.  Format "
+                            "is SN[:Start][@Size]"),
                    cl::cat(MsfOptions), cl::sub(RawSubcommand));
 
 // TYPE OPTIONS
@@ -419,6 +424,21 @@ cl::list<std::string> InputFilename(cl::Positional,
 
 static ExitOnError ExitOnErr;
 
+static uint32_t
+getFileChecksumOffset(StringRef FileName,
+                      ModuleDebugFileChecksumFragment &Checksums,
+                      StringTableBuilder &Strings) {
+  // The offset in the line info record is the offset of the checksum
+  // entry for the corresponding file.  That entry then contains an
+  // offset into the global string table of the file name.  So to
+  // compute the proper offset to write into the line info record, we
+  // must first get its offset in the global string table, then ask the
+  // checksum builder to find the offset in its serialized buffer that
+  // it mapped that filename string table offset to.
+  uint32_t StringOffset = Strings.insert(FileName);
+  return Checksums.mapChecksumOffset(StringOffset);
+}
+
 static void yamlToPdb(StringRef Path) {
   BumpPtrAllocator Allocator;
   ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorOrBuffer =
@@ -490,6 +510,80 @@ static void yamlToPdb(StringRef Path) {
       for (auto Symbol : ModiStream.Symbols)
         ModiBuilder.addSymbol(Symbol.Record);
     }
+    if (MI.FileLineInfo.hasValue()) {
+      const auto &FLI = *MI.FileLineInfo;
+
+      // File Checksums must be emitted before line information, because line
+      // info records use offsets into the checksum buffer to reference a file's
+      // source file name.
+      auto Checksums = llvm::make_unique<ModuleDebugFileChecksumFragment>();
+      auto &ChecksumRef = *Checksums;
+      if (!FLI.FileChecksums.empty()) {
+        auto &Strings = Builder.getStringTableBuilder();
+        for (auto &FC : FLI.FileChecksums) {
+          uint32_t STOffset = Strings.insert(FC.FileName);
+          Checksums->addChecksum(STOffset, FC.Kind, FC.ChecksumBytes.Bytes);
+        }
+      }
+      ModiBuilder.setC13FileChecksums(std::move(Checksums));
+
+      // FIXME: StringTable / StringTableBuilder should really be in
+      // DebugInfoCodeView.  This would allow us to construct the
+      // ModuleDebugLineFragment with a reference to the string table,
+      // and we could just pass strings around rather than having to
+      // remember how to calculate the right offset.
+      auto &Strings = Builder.getStringTableBuilder();
+
+      for (const auto &Fragment : FLI.LineFragments) {
+        auto Lines = llvm::make_unique<ModuleDebugLineFragment>();
+        Lines->setCodeSize(Fragment.CodeSize);
+        Lines->setRelocationAddress(Fragment.RelocSegment,
+                                    Fragment.RelocOffset);
+        Lines->setFlags(Fragment.Flags);
+        for (const auto &LC : Fragment.Blocks) {
+          uint32_t ChecksumOffset =
+              getFileChecksumOffset(LC.FileName, ChecksumRef, Strings);
+
+          Lines->createBlock(ChecksumOffset);
+          if (Lines->hasColumnInfo()) {
+            for (const auto &Item : zip(LC.Lines, LC.Columns)) {
+              auto &L = std::get<0>(Item);
+              auto &C = std::get<1>(Item);
+              uint32_t LE = L.LineStart + L.EndDelta;
+              Lines->addLineAndColumnInfo(
+                  L.Offset, LineInfo(L.LineStart, LE, L.IsStatement),
+                  C.StartColumn, C.EndColumn);
+            }
+          } else {
+            for (const auto &L : LC.Lines) {
+              uint32_t LE = L.LineStart + L.EndDelta;
+              Lines->addLineInfo(L.Offset,
+                                 LineInfo(L.LineStart, LE, L.IsStatement));
+            }
+          }
+        }
+        ModiBuilder.addC13Fragment(std::move(Lines));
+      }
+
+      for (const auto &Inlinee : FLI.Inlinees) {
+        auto Inlinees = llvm::make_unique<ModuleDebugInlineeLineFragment>(
+            Inlinee.HasExtraFiles);
+        for (const auto &Site : Inlinee.Sites) {
+          uint32_t FileOff =
+              getFileChecksumOffset(Site.FileName, ChecksumRef, Strings);
+
+          Inlinees->addInlineSite(Site.Inlinee, FileOff, Site.SourceLineNum);
+          if (!Inlinee.HasExtraFiles)
+            continue;
+
+          for (auto EF : Site.ExtraFiles) {
+            FileOff = getFileChecksumOffset(EF, ChecksumRef, Strings);
+            Inlinees->addExtraFile(FileOff);
+          }
+        }
+        ModiBuilder.addC13Fragment(std::move(Inlinees));
+      }
+    }
   }
 
   auto &TpiBuilder = Builder.getTpiBuilder();
diff --git a/tools/llvm-pdbdump/llvm-pdbdump.h b/tools/llvm-pdbdump/llvm-pdbdump.h
index f080d6d5525..8b1dde9399b 100644
--- a/tools/llvm-pdbdump/llvm-pdbdump.h
+++ b/tools/llvm-pdbdump/llvm-pdbdump.h
@@ -60,7 +60,7 @@ struct BlockRange {
 };
 
 extern llvm::Optional<BlockRange> DumpBlockRange;
-extern llvm::cl::list<uint32_t> DumpStreamData;
+extern llvm::cl::list<std::string> DumpStreamData;
 
 extern llvm::cl::opt<bool> CompactRecords;
 extern llvm::cl::opt<bool> DumpGlobals;
diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp
index 9836c137ed2..a7088c1c741 100644
--- a/tools/llvm-readobj/COFFDumper.cpp
+++ b/tools/llvm-readobj/COFFDumper.cpp
@@ -25,6 +25,9 @@
 #include "llvm/DebugInfo/CodeView/CVTypeDumper.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/Line.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h"
 #include "llvm/DebugInfo/CodeView/RecordSerialization.h"
 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
 #include "llvm/DebugInfo/CodeView/SymbolDumpDelegate.h"
@@ -38,11 +41,12 @@
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/BinaryStreamReader.h"
 #include "llvm/Support/COFF.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Format.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/SourceMgr.h"
@@ -78,6 +82,7 @@ public:
   void printCOFFDirectives() override;
   void printCOFFBaseReloc() override;
   void printCOFFDebugDirectory() override;
+  void printCOFFResources() override;
   void printCodeViewDebugInfo() override;
   void mergeCodeViewTypes(llvm::codeview::TypeTableBuilder &CVIDs,
                           llvm::codeview::TypeTableBuilder &CVTypes) override;
@@ -496,19 +501,19 @@ WeakExternalCharacteristics[] = {
 };
 
 static const EnumEntry<uint32_t> SubSectionTypes[] = {
-  LLVM_READOBJ_ENUM_CLASS_ENT(ModuleSubstreamKind, Symbols),
-  LLVM_READOBJ_ENUM_CLASS_ENT(ModuleSubstreamKind, Lines),
-  LLVM_READOBJ_ENUM_CLASS_ENT(ModuleSubstreamKind, StringTable),
-  LLVM_READOBJ_ENUM_CLASS_ENT(ModuleSubstreamKind, FileChecksums),
-  LLVM_READOBJ_ENUM_CLASS_ENT(ModuleSubstreamKind, FrameData),
-  LLVM_READOBJ_ENUM_CLASS_ENT(ModuleSubstreamKind, InlineeLines),
-  LLVM_READOBJ_ENUM_CLASS_ENT(ModuleSubstreamKind, CrossScopeImports),
-  LLVM_READOBJ_ENUM_CLASS_ENT(ModuleSubstreamKind, CrossScopeExports),
-  LLVM_READOBJ_ENUM_CLASS_ENT(ModuleSubstreamKind, ILLines),
-  LLVM_READOBJ_ENUM_CLASS_ENT(ModuleSubstreamKind, FuncMDTokenMap),
-  LLVM_READOBJ_ENUM_CLASS_ENT(ModuleSubstreamKind, TypeMDTokenMap),
-  LLVM_READOBJ_ENUM_CLASS_ENT(ModuleSubstreamKind, MergedAssemblyInput),
-  LLVM_READOBJ_ENUM_CLASS_ENT(ModuleSubstreamKind, CoffSymbolRVA),
+    LLVM_READOBJ_ENUM_CLASS_ENT(ModuleDebugFragmentKind, Symbols),
+    LLVM_READOBJ_ENUM_CLASS_ENT(ModuleDebugFragmentKind, Lines),
+    LLVM_READOBJ_ENUM_CLASS_ENT(ModuleDebugFragmentKind, StringTable),
+    LLVM_READOBJ_ENUM_CLASS_ENT(ModuleDebugFragmentKind, FileChecksums),
+    LLVM_READOBJ_ENUM_CLASS_ENT(ModuleDebugFragmentKind, FrameData),
+    LLVM_READOBJ_ENUM_CLASS_ENT(ModuleDebugFragmentKind, InlineeLines),
+    LLVM_READOBJ_ENUM_CLASS_ENT(ModuleDebugFragmentKind, CrossScopeImports),
+    LLVM_READOBJ_ENUM_CLASS_ENT(ModuleDebugFragmentKind, CrossScopeExports),
+    LLVM_READOBJ_ENUM_CLASS_ENT(ModuleDebugFragmentKind, ILLines),
+    LLVM_READOBJ_ENUM_CLASS_ENT(ModuleDebugFragmentKind, FuncMDTokenMap),
+    LLVM_READOBJ_ENUM_CLASS_ENT(ModuleDebugFragmentKind, TypeMDTokenMap),
+    LLVM_READOBJ_ENUM_CLASS_ENT(ModuleDebugFragmentKind, MergedAssemblyInput),
+    LLVM_READOBJ_ENUM_CLASS_ENT(ModuleDebugFragmentKind, CoffSymbolRVA),
 };
 
 static const EnumEntry<uint32_t> FrameDataFlags[] = {
@@ -730,11 +735,11 @@ void COFFDumper::initializeFileAndStringTables(StringRef Data) {
     error(consume(Data, SubSectionSize));
     if (SubSectionSize > Data.size())
       return error(object_error::parse_failed);
-    switch (ModuleSubstreamKind(SubType)) {
-    case ModuleSubstreamKind::FileChecksums:
+    switch (ModuleDebugFragmentKind(SubType)) {
+    case ModuleDebugFragmentKind::FileChecksums:
       CVFileChecksumTable = Data.substr(0, SubSectionSize);
       break;
-    case ModuleSubstreamKind::StringTable:
+    case ModuleDebugFragmentKind::StringTable:
       CVStringTable = Data.substr(0, SubSectionSize);
       break;
     default:
@@ -800,20 +805,20 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
       printBinaryBlockWithRelocs("SubSectionContents", Section, SectionContents,
                                  Contents);
 
-    switch (ModuleSubstreamKind(SubType)) {
-    case ModuleSubstreamKind::Symbols:
+    switch (ModuleDebugFragmentKind(SubType)) {
+    case ModuleDebugFragmentKind::Symbols:
       printCodeViewSymbolsSubsection(Contents, Section, SectionContents);
       break;
 
-    case ModuleSubstreamKind::InlineeLines:
+    case ModuleDebugFragmentKind::InlineeLines:
       printCodeViewInlineeLines(Contents);
       break;
 
-    case ModuleSubstreamKind::FileChecksums:
+    case ModuleDebugFragmentKind::FileChecksums:
       printCodeViewFileChecksums(Contents);
       break;
 
-    case ModuleSubstreamKind::Lines: {
+    case ModuleDebugFragmentKind::Lines: {
       // Holds a PC to file:line table.  Some data to parse this subsection is
       // stored in the other subsections, so just check sanity and store the
       // pointers for deferred processing.
@@ -839,7 +844,7 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
       FunctionNames.push_back(LinkageName);
       break;
     }
-    case ModuleSubstreamKind::FrameData: {
+    case ModuleDebugFragmentKind::FrameData: {
       // First four bytes is a relocation against the function.
       BinaryByteStream S(Contents, llvm::support::little);
       BinaryStreamReader SR(S);
@@ -890,45 +895,29 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
     ListScope S(W, "FunctionLineTable");
     W.printString("LinkageName", Name);
 
-    DataExtractor DE(FunctionLineTables[Name], true, 4);
-    uint32_t Offset = 6;  // Skip relocations.
-    uint16_t Flags = DE.getU16(&Offset);
-    W.printHex("Flags", Flags);
-    bool HasColumnInformation = Flags & codeview::LineFlags::HaveColumns;
-    uint32_t FunctionSize = DE.getU32(&Offset);
-    W.printHex("CodeSize", FunctionSize);
-    while (DE.isValidOffset(Offset)) {
-      // For each range of lines with the same filename, we have a segment
-      // in the line table.  The filename string is accessed using double
-      // indirection to the string table subsection using the index subsection.
-      uint32_t OffsetInIndex = DE.getU32(&Offset),
-               NumLines = DE.getU32(&Offset),
-               FullSegmentSize = DE.getU32(&Offset);
+    BinaryByteStream LineTableInfo(FunctionLineTables[Name], support::little);
+    BinaryStreamReader Reader(LineTableInfo);
 
-      uint32_t ColumnOffset = Offset + 8 * NumLines;
-      DataExtractor ColumnDE(DE.getData(), true, 4);
+    ModuleDebugLineFragmentRef LineInfo;
+    error(LineInfo.initialize(Reader));
 
-      if (FullSegmentSize !=
-          12 + 8 * NumLines + (HasColumnInformation ? 4 * NumLines : 0)) {
-        error(object_error::parse_failed);
-        return;
-      }
+    W.printHex("Flags", LineInfo.header()->Flags);
+    W.printHex("CodeSize", LineInfo.header()->CodeSize);
+    for (const auto &Entry : LineInfo) {
 
       ListScope S(W, "FilenameSegment");
-      printFileNameForOffset("Filename", OffsetInIndex);
-      for (unsigned LineIdx = 0;
-           LineIdx != NumLines && DE.isValidOffset(Offset); ++LineIdx) {
-        // Then go the (PC, LineNumber) pairs.  The line number is stored in the
-        // least significant 31 bits of the respective word in the table.
-        uint32_t PC = DE.getU32(&Offset), LineData = DE.getU32(&Offset);
-        if (PC >= FunctionSize) {
+      printFileNameForOffset("Filename", Entry.NameIndex);
+      uint32_t ColumnIndex = 0;
+      for (const auto &Line : Entry.LineNumbers) {
+        if (Line.Offset >= LineInfo.header()->CodeSize) {
           error(object_error::parse_failed);
           return;
         }
-        char Buffer[32];
-        format("+0x%X", PC).snprint(Buffer, 32);
-        ListScope PCScope(W, Buffer);
-        LineInfo LI(LineData);
+
+        std::string PC = formatv("+{0:X}", uint32_t(Line.Offset));
+        ListScope PCScope(W, PC);
+        codeview::LineInfo LI(Line.Flags);
+
         if (LI.isAlwaysStepInto())
           W.printString("StepInto", StringRef("Always"));
         else if (LI.isNeverStepInto())
@@ -937,19 +926,10 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
           W.printNumber("LineNumberStart", LI.getStartLine());
         W.printNumber("LineNumberEndDelta", LI.getLineDelta());
         W.printBoolean("IsStatement", LI.isStatement());
-        if (HasColumnInformation &&
-            ColumnDE.isValidOffsetForDataOfSize(ColumnOffset, 4)) {
-          uint16_t ColStart = ColumnDE.getU16(&ColumnOffset);
-          W.printNumber("ColStart", ColStart);
-          uint16_t ColEnd = ColumnDE.getU16(&ColumnOffset);
-          W.printNumber("ColEnd", ColEnd);
-        }
-      }
-      // Skip over the column data.
-      if (HasColumnInformation) {
-        for (unsigned LineIdx = 0;
-             LineIdx != NumLines && DE.isValidOffset(Offset); ++LineIdx) {
-          DE.getU32(&Offset);
+        if (LineInfo.hasColumnInfo()) {
+          W.printNumber("ColStart", Entry.Columns[ColumnIndex].StartColumn);
+          W.printNumber("ColEnd", Entry.Columns[ColumnIndex].EndColumn);
+          ++ColumnIndex;
         }
       }
     }
@@ -985,56 +965,42 @@ void COFFDumper::printCodeViewSymbolsSubsection(StringRef Subsection,
 void COFFDumper::printCodeViewFileChecksums(StringRef Subsection) {
   BinaryByteStream S(Subsection, llvm::support::little);
   BinaryStreamReader SR(S);
-  while (!SR.empty()) {
+  ModuleDebugFileChecksumFragmentRef Checksums;
+  error(Checksums.initialize(SR));
+
+  for (auto &FC : Checksums) {
     DictScope S(W, "FileChecksum");
-    const FileChecksum *FC;
-    error(SR.readObject(FC));
-    if (FC->FileNameOffset >= CVStringTable.size())
+
+    if (FC.FileNameOffset >= CVStringTable.size())
       error(object_error::parse_failed);
     StringRef Filename =
-        CVStringTable.drop_front(FC->FileNameOffset).split('\0').first;
-    W.printHex("Filename", Filename, FC->FileNameOffset);
-    W.printHex("ChecksumSize", FC->ChecksumSize);
-    W.printEnum("ChecksumKind", uint8_t(FC->ChecksumKind),
+        CVStringTable.drop_front(FC.FileNameOffset).split('\0').first;
+    W.printHex("Filename", Filename, FC.FileNameOffset);
+    W.printHex("ChecksumSize", FC.Checksum.size());
+    W.printEnum("ChecksumKind", uint8_t(FC.Kind),
                 makeArrayRef(FileChecksumKindNames));
-    if (FC->ChecksumSize >= SR.bytesRemaining())
-      error(object_error::parse_failed);
-    ArrayRef<uint8_t> ChecksumBytes;
-    error(SR.readBytes(ChecksumBytes, FC->ChecksumSize));
-    W.printBinary("ChecksumBytes", ChecksumBytes);
-    unsigned PaddedSize = alignTo(FC->ChecksumSize + sizeof(FileChecksum), 4) -
-                          sizeof(FileChecksum);
-    PaddedSize -= ChecksumBytes.size();
-    if (PaddedSize > SR.bytesRemaining())
-      error(object_error::parse_failed);
-    error(SR.skip(PaddedSize));
+
+    W.printBinary("ChecksumBytes", FC.Checksum);
   }
 }
 
 void COFFDumper::printCodeViewInlineeLines(StringRef Subsection) {
   BinaryByteStream S(Subsection, llvm::support::little);
   BinaryStreamReader SR(S);
-  uint32_t Signature;
-  error(SR.readInteger(Signature));
-  bool HasExtraFiles = Signature == unsigned(InlineeLinesSignature::ExtraFiles);
+  ModuleDebugInlineeLineFragmentRef Lines;
+  error(Lines.initialize(SR));
 
-  while (!SR.empty()) {
-    const InlineeSourceLine *ISL;
-    error(SR.readObject(ISL));
+  for (auto &Line : Lines) {
     DictScope S(W, "InlineeSourceLine");
-    printTypeIndex("Inlinee", ISL->Inlinee);
-    printFileNameForOffset("FileID", ISL->FileID);
-    W.printNumber("SourceLineNum", ISL->SourceLineNum);
+    printTypeIndex("Inlinee", Line.Header->Inlinee);
+    printFileNameForOffset("FileID", Line.Header->FileID);
+    W.printNumber("SourceLineNum", Line.Header->SourceLineNum);
 
-    if (HasExtraFiles) {
-      uint32_t ExtraFileCount;
-      error(SR.readInteger(ExtraFileCount));
-      W.printNumber("ExtraFileCount", ExtraFileCount);
+    if (Lines.hasExtraFiles()) {
+      W.printNumber("ExtraFileCount", Line.ExtraFiles.size());
       ListScope ExtraFiles(W, "ExtraFiles");
-      for (unsigned I = 0; I < ExtraFileCount; ++I) {
-        uint32_t FileID;
-        error(SR.readInteger(FileID));
-        printFileNameForOffset("FileID", FileID);
+      for (const auto &FID : Line.ExtraFiles) {
+        printFileNameForOffset("FileID", FID);
       }
     }
   }
@@ -1527,6 +1493,30 @@ void COFFDumper::printCOFFBaseReloc() {
   }
 }
 
+void COFFDumper::printCOFFResources() {
+  ListScope ResourcesD(W, "Resources");
+  for (const SectionRef &S : Obj->sections()) {
+    StringRef Name;
+    error(S.getName(Name));
+    if (!Name.startswith(".rsrc"))
+      continue;
+
+    StringRef Ref;
+    error(S.getContents(Ref));
+
+    if ((Name == ".rsrc") || (Name == ".rsrc$01")) {
+      auto Table =
+          reinterpret_cast<const coff_resource_dir_table *>(Ref.data());
+      char FormattedTime[20];
+      time_t TDS = time_t(Table->TimeDateStamp);
+      strftime(FormattedTime, sizeof(FormattedTime), "%Y-%m-%d %H:%M:%S",
+               gmtime(&TDS));
+      W.printHex("Time/Date Stamp", FormattedTime, Table->TimeDateStamp);
+    }
+    W.printBinaryBlock(Name.str() + " Data", Ref);
+  }
+}
+
 void COFFDumper::printStackMap() const {
   object::SectionRef StackMapSection;
   for (auto Sec : Obj->sections()) {
diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp
index 7893eea5d22..2e9e01d9642 100644
--- a/tools/llvm-readobj/ELFDumper.cpp
+++ b/tools/llvm-readobj/ELFDumper.cpp
@@ -983,57 +983,6 @@ static const EnumEntry<unsigned> AMDGPUSymbolTypes[] = {
   { "AMDGPU_HSA_METADATA",          ELF::STT_AMDGPU_HSA_METADATA }
 };
 
-static const char *getElfSectionType(unsigned Arch, unsigned Type) {
-  switch (Arch) {
-  case ELF::EM_ARM:
-    switch (Type) {
-    LLVM_READOBJ_ENUM_CASE(ELF, SHT_ARM_EXIDX);
-    LLVM_READOBJ_ENUM_CASE(ELF, SHT_ARM_PREEMPTMAP);
-    LLVM_READOBJ_ENUM_CASE(ELF, SHT_ARM_ATTRIBUTES);
-    LLVM_READOBJ_ENUM_CASE(ELF, SHT_ARM_DEBUGOVERLAY);
-    LLVM_READOBJ_ENUM_CASE(ELF, SHT_ARM_OVERLAYSECTION);
-    }
-  case ELF::EM_HEXAGON:
-    switch (Type) { LLVM_READOBJ_ENUM_CASE(ELF, SHT_HEX_ORDERED); }
-  case ELF::EM_X86_64:
-    switch (Type) { LLVM_READOBJ_ENUM_CASE(ELF, SHT_X86_64_UNWIND); }
-  case ELF::EM_MIPS:
-  case ELF::EM_MIPS_RS3_LE:
-    switch (Type) {
-    LLVM_READOBJ_ENUM_CASE(ELF, SHT_MIPS_REGINFO);
-    LLVM_READOBJ_ENUM_CASE(ELF, SHT_MIPS_OPTIONS);
-    LLVM_READOBJ_ENUM_CASE(ELF, SHT_MIPS_ABIFLAGS);
-    LLVM_READOBJ_ENUM_CASE(ELF, SHT_MIPS_DWARF);
-    }
-  }
-
-  switch (Type) {
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_NULL              );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_PROGBITS          );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_SYMTAB            );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_STRTAB            );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_RELA              );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_HASH              );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_DYNAMIC           );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_NOTE              );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_NOBITS            );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_REL               );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_SHLIB             );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_DYNSYM            );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_INIT_ARRAY        );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_FINI_ARRAY        );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_PREINIT_ARRAY     );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_GROUP             );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_SYMTAB_SHNDX      );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES    );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_GNU_HASH          );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_GNU_verdef        );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_GNU_verneed       );
-  LLVM_READOBJ_ENUM_CASE(ELF, SHT_GNU_versym        );
-  default: return "";
-  }
-}
-
 static const char *getGroupType(uint32_t Flag) {
   if (Flag & ELF::GRP_COMDAT)
     return "COMDAT";
@@ -3635,9 +3584,10 @@ template <class ELFT> void LLVMStyle<ELFT>::printSections(const ELFO *Obj) {
     DictScope SectionD(W, "Section");
     W.printNumber("Index", SectionIndex);
     W.printNumber("Name", Name, Sec.sh_name);
-    W.printHex("Type",
-               getElfSectionType(Obj->getHeader()->e_machine, Sec.sh_type),
-               Sec.sh_type);
+    W.printHex(
+        "Type",
+        object::getELFSectionTypeName(Obj->getHeader()->e_machine, Sec.sh_type),
+        Sec.sh_type);
     std::vector<EnumEntry<unsigned>> SectionFlags(std::begin(ElfSectionFlags),
                                                   std::end(ElfSectionFlags));
     switch (Obj->getHeader()->e_machine) {
diff --git a/tools/llvm-readobj/ObjDumper.h b/tools/llvm-readobj/ObjDumper.h
index ff780dae578..48f825c527c 100644
--- a/tools/llvm-readobj/ObjDumper.h
+++ b/tools/llvm-readobj/ObjDumper.h
@@ -67,6 +67,7 @@ public:
   virtual void printCOFFDirectives() { }
   virtual void printCOFFBaseReloc() { }
   virtual void printCOFFDebugDirectory() { }
+  virtual void printCOFFResources() {}
   virtual void printCodeViewDebugInfo() { }
   virtual void mergeCodeViewTypes(llvm::codeview::TypeTableBuilder &CVIDs,
                                   llvm::codeview::TypeTableBuilder &CVTypes) {}
diff --git a/tools/llvm-readobj/WasmDumper.cpp b/tools/llvm-readobj/WasmDumper.cpp
index e27da3b96e5..21614297e46 100644
--- a/tools/llvm-readobj/WasmDumper.cpp
+++ b/tools/llvm-readobj/WasmDumper.cpp
@@ -81,17 +81,30 @@ void WasmDumper::printRelocation(const SectionRef &Section,
   Reloc.getTypeName(RelocTypeName);
   const wasm::WasmRelocation &WasmReloc = Obj->getWasmRelocation(Reloc);
 
+  bool HasAddend = false;
+  switch (RelocType) {
+  case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB:
+  case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB:
+  case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32:
+    HasAddend = true;
+    break;
+  default:
+    break;
+  }
   if (opts::ExpandRelocs) {
     DictScope Group(W, "Relocation");
     W.printNumber("Type", RelocTypeName, RelocType);
     W.printHex("Offset", Reloc.getOffset());
     W.printHex("Index", WasmReloc.Index);
-    W.printHex("Addend", WasmReloc.Addend);
+    if (HasAddend)
+      W.printNumber("Addend", WasmReloc.Addend);
   } else {
     raw_ostream& OS = W.startLine();
     OS << W.hex(Reloc.getOffset())
-       << " " << RelocTypeName << "[" << WasmReloc.Index << "]"
-       << " " << W.hex(WasmReloc.Addend) << "\n";
+       << " " << RelocTypeName << "[" << WasmReloc.Index << "]";
+    if (HasAddend)
+      OS << " " << WasmReloc.Addend;
+    OS << "\n";
   }
 }
 
@@ -137,8 +150,20 @@ void WasmDumper::printSections() {
     W.printEnum("Type", WasmSec.Type, makeArrayRef(WasmSectionTypes));
     W.printNumber("Size", (uint64_t)WasmSec.Content.size());
     W.printNumber("Offset", WasmSec.Offset);
-    if (WasmSec.Type == wasm::WASM_SEC_CUSTOM) {
+    switch (WasmSec.Type) {
+    case wasm::WASM_SEC_CUSTOM:
       W.printString("Name", WasmSec.Name);
+      break;
+    case wasm::WASM_SEC_MEMORY:
+      ListScope Group(W, "Memories");
+      for (const wasm::WasmLimits &Memory : Obj->memories()) {
+        DictScope Group(W, "Memory");
+        W.printNumber("InitialPages", Memory.Initial);
+        if (Memory.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX) {
+          W.printNumber("MaxPages", WasmSec.Offset);
+        }
+      }
+      break;
     }
 
     if (opts::SectionRelocations) {
diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp
index bc2a62e799a..8a9d7bc720c 100644
--- a/tools/llvm-readobj/llvm-readobj.cpp
+++ b/tools/llvm-readobj/llvm-readobj.cpp
@@ -214,6 +214,10 @@ namespace opts {
   COFFDebugDirectory("coff-debug-directory",
                      cl::desc("Display the PE/COFF debug directory"));
 
+  // -coff-resources
+  cl::opt<bool> COFFResources("coff-resources",
+                              cl::desc("Display the PE/COFF .rsrc section"));
+
   // -macho-data-in-code
   cl::opt<bool>
   MachODataInCode("macho-data-in-code",
@@ -445,6 +449,8 @@ static void dumpObject(const ObjectFile *Obj) {
       Dumper->printCOFFBaseReloc();
     if (opts::COFFDebugDirectory)
       Dumper->printCOFFDebugDirectory();
+    if (opts::COFFResources)
+      Dumper->printCOFFResources();
     if (opts::CodeView)
       Dumper->printCodeViewDebugInfo();
     if (opts::CodeViewMergedTypes)
diff --git a/tools/opt/BreakpointPrinter.cpp b/tools/opt/BreakpointPrinter.cpp
index 33b3edcd123..e5614ed061e 100644
--- a/tools/opt/BreakpointPrinter.cpp
+++ b/tools/opt/BreakpointPrinter.cpp
@@ -51,7 +51,7 @@ struct BreakpointPrinter : public ModulePass {
         if (!SP)
           continue;
         getContextName(SP->getScope().resolve(), Name);
-        Name = Name + SP->getDisplayName().str();
+        Name = Name + SP->getName().str();
         if (!Name.empty() && Processed.insert(Name).second) {
           Out << Name << "\n";
         }
diff --git a/unittests/ADT/APIntTest.cpp b/unittests/ADT/APIntTest.cpp
index 2235f271658..bb6cf35fe9e 100644
--- a/unittests/ADT/APIntTest.cpp
+++ b/unittests/ADT/APIntTest.cpp
@@ -1723,21 +1723,21 @@ TEST(APIntTest, getLowBitsSet) {
 }
 
 TEST(APIntTest, getBitsSet) {
-  APInt i64hi1lo1 = APInt::getBitsSet(64, 63, 1);
-  EXPECT_EQ(1u, i64hi1lo1.countLeadingOnes());
-  EXPECT_EQ(0u, i64hi1lo1.countLeadingZeros());
-  EXPECT_EQ(64u, i64hi1lo1.getActiveBits());
-  EXPECT_EQ(0u, i64hi1lo1.countTrailingZeros());
-  EXPECT_EQ(1u, i64hi1lo1.countTrailingOnes());
-  EXPECT_EQ(2u, i64hi1lo1.countPopulation());
+  APInt i64hi1lo1 = APInt::getBitsSet(64, 1, 63);
+  EXPECT_EQ(0u, i64hi1lo1.countLeadingOnes());
+  EXPECT_EQ(1u, i64hi1lo1.countLeadingZeros());
+  EXPECT_EQ(63u, i64hi1lo1.getActiveBits());
+  EXPECT_EQ(1u, i64hi1lo1.countTrailingZeros());
+  EXPECT_EQ(0u, i64hi1lo1.countTrailingOnes());
+  EXPECT_EQ(62u, i64hi1lo1.countPopulation());
 
-  APInt i127hi1lo1 = APInt::getBitsSet(127, 126, 1);
-  EXPECT_EQ(1u, i127hi1lo1.countLeadingOnes());
-  EXPECT_EQ(0u, i127hi1lo1.countLeadingZeros());
-  EXPECT_EQ(127u, i127hi1lo1.getActiveBits());
-  EXPECT_EQ(0u, i127hi1lo1.countTrailingZeros());
-  EXPECT_EQ(1u, i127hi1lo1.countTrailingOnes());
-  EXPECT_EQ(2u, i127hi1lo1.countPopulation());
+  APInt i127hi1lo1 = APInt::getBitsSet(127, 1, 126);
+  EXPECT_EQ(0u, i127hi1lo1.countLeadingOnes());
+  EXPECT_EQ(1u, i127hi1lo1.countLeadingZeros());
+  EXPECT_EQ(126u, i127hi1lo1.getActiveBits());
+  EXPECT_EQ(1u, i127hi1lo1.countTrailingZeros());
+  EXPECT_EQ(0u, i127hi1lo1.countTrailingOnes());
+  EXPECT_EQ(125u, i127hi1lo1.countPopulation());
 }
 
 TEST(APIntTest, getHighBitsSet) {
diff --git a/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp b/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
index 2078e3a96a8..3f0e3dab72b 100644
--- a/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
+++ b/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
@@ -1667,4 +1667,245 @@ TEST(DWARFDebugInfo, TestImplicitConstAbbrevs) {
   EXPECT_EQ(DIEs.find(Val2)->second, AbbrevPtrVal2);
 }
 
+TEST(DWARFDebugInfo, TestDwarfVerifyInvalidCURef) {
+  // Create a single compile unit with a single function that has a DW_AT_type
+  // that is CU relative. The CU offset is not valid becuase it is larger than
+  // the compile unit itself.
+
+  const char *yamldata = R"(
+    debug_str:
+      - ''
+      - /tmp/main.c
+      - main
+    debug_abbrev:
+      - Code:            0x00000001
+        Tag:             DW_TAG_compile_unit
+        Children:        DW_CHILDREN_yes
+        Attributes:
+          - Attribute:       DW_AT_name
+            Form:            DW_FORM_strp
+      - Code:            0x00000002
+        Tag:             DW_TAG_subprogram
+        Children:        DW_CHILDREN_no
+        Attributes:
+          - Attribute:       DW_AT_name
+            Form:            DW_FORM_strp
+          - Attribute:       DW_AT_type
+            Form:            DW_FORM_ref4
+    debug_info:
+      - Length:
+          TotalLength:     22
+        Version:         4
+        AbbrOffset:      0
+        AddrSize:        8
+        Entries:
+          - AbbrCode:        0x00000001
+            Values:
+              - Value:           0x0000000000000001
+          - AbbrCode:        0x00000002
+            Values:
+              - Value:           0x000000000000000D
+              - Value:           0x0000000000001234
+          - AbbrCode:        0x00000000
+            Values:
+  )";
+  auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata));
+  ASSERT_TRUE((bool)ErrOrSections);
+
+  auto &DebugSections = *ErrOrSections;
+
+  DWARFContextInMemory DwarfContext(DebugSections, 8);
+
+  std::string str;
+  raw_string_ostream strm(str);
+  EXPECT_FALSE(DwarfContext.verify(strm, DIDT_All));
+  const char *err = "error: DW_FORM_ref4 CU offset 0x00001234 is invalid "
+                    "(must be less than CU size of 0x0000001a):";
+  EXPECT_TRUE(strm.str().find(err) != std::string::npos);
+}
+
+TEST(DWARFDebugInfo, TestDwarfVerifyInvalidRefAddr) {
+  // Create a single compile unit with a single function that has an invalid
+  // DW_AT_type with an invalid .debug_info offset in its DW_FORM_ref_addr.
+  const char *yamldata = R"(
+    debug_str:
+      - ''
+      - /tmp/main.c
+      - main
+    debug_abbrev:
+      - Code:            0x00000001
+        Tag:             DW_TAG_compile_unit
+        Children:        DW_CHILDREN_yes
+        Attributes:
+          - Attribute:       DW_AT_name
+            Form:            DW_FORM_strp
+      - Code:            0x00000002
+        Tag:             DW_TAG_subprogram
+        Children:        DW_CHILDREN_no
+        Attributes:
+          - Attribute:       DW_AT_name
+            Form:            DW_FORM_strp
+          - Attribute:       DW_AT_type
+            Form:            DW_FORM_ref_addr
+    debug_info:
+      - Length:
+          TotalLength:     22
+        Version:         4
+        AbbrOffset:      0
+        AddrSize:        8
+        Entries:
+          - AbbrCode:        0x00000001
+            Values:
+              - Value:           0x0000000000000001
+          - AbbrCode:        0x00000002
+            Values:
+              - Value:           0x000000000000000D
+              - Value:           0x0000000000001234
+          - AbbrCode:        0x00000000
+            Values:
+  )";
+  auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata));
+  ASSERT_TRUE((bool)ErrOrSections);
+
+  auto &DebugSections = *ErrOrSections;
+
+  DWARFContextInMemory DwarfContext(DebugSections, 8);
+
+  std::string str;
+  raw_string_ostream strm(str);
+  EXPECT_FALSE(DwarfContext.verify(strm, DIDT_All));
+  strm.flush();
+  const char *err = "error: DW_FORM_ref_addr offset beyond .debug_info bounds:";
+  EXPECT_TRUE(strm.str().find(err) != std::string::npos);
+}
+
+TEST(DWARFDebugInfo, TestDwarfVerifyInvalidRanges) {
+  // Create a single compile unit with a DW_AT_ranges whose section offset
+  // isn't valid.
+  const char *yamldata = R"(
+    debug_str:
+      - ''
+      - /tmp/main.c
+    debug_abbrev:
+      - Code:            0x00000001
+        Tag:             DW_TAG_compile_unit
+        Children:        DW_CHILDREN_no
+        Attributes:
+          - Attribute:       DW_AT_name
+            Form:            DW_FORM_strp
+          - Attribute:       DW_AT_ranges
+            Form:            DW_FORM_sec_offset
+    debug_info:
+      - Length:
+          TotalLength:     16
+        Version:         4
+        AbbrOffset:      0
+        AddrSize:        8
+        Entries:
+          - AbbrCode:        0x00000001
+            Values:
+              - Value:           0x0000000000000001
+              - Value:           0x0000000000001000
+
+  )";
+  auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata));
+  ASSERT_TRUE((bool)ErrOrSections);
+
+  auto &DebugSections = *ErrOrSections;
+
+  DWARFContextInMemory DwarfContext(DebugSections, 8);
+
+  std::string str;
+  raw_string_ostream strm(str);
+  EXPECT_FALSE(DwarfContext.verify(strm, DIDT_All));
+  strm.flush();
+  const char *err = "error: DW_AT_ranges offset is beyond .debug_ranges "
+                    "bounds:";
+  EXPECT_TRUE(strm.str().find(err) != std::string::npos);
+}
+
+TEST(DWARFDebugInfo, TestDwarfVerifyInvalidStmtList) {
+  // Create a single compile unit with a DW_AT_stmt_list whose section offset
+  // isn't valid.
+  const char *yamldata = R"(
+    debug_str:
+      - ''
+      - /tmp/main.c
+    debug_abbrev:
+      - Code:            0x00000001
+        Tag:             DW_TAG_compile_unit
+        Children:        DW_CHILDREN_no
+        Attributes:
+          - Attribute:       DW_AT_name
+            Form:            DW_FORM_strp
+          - Attribute:       DW_AT_stmt_list
+            Form:            DW_FORM_sec_offset
+    debug_info:
+      - Length:
+          TotalLength:     16
+        Version:         4
+        AbbrOffset:      0
+        AddrSize:        8
+        Entries:
+          - AbbrCode:        0x00000001
+            Values:
+              - Value:           0x0000000000000001
+              - Value:           0x0000000000001000
+
+  )";
+  auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata));
+  ASSERT_TRUE((bool)ErrOrSections);
+
+  auto &DebugSections = *ErrOrSections;
+
+  DWARFContextInMemory DwarfContext(DebugSections, 8);
+
+  std::string str;
+  raw_string_ostream strm(str);
+  EXPECT_FALSE(DwarfContext.verify(strm, DIDT_All));
+  strm.flush();
+  const char *err = "error: DW_AT_stmt_list offset is beyond .debug_line "
+                    "bounds: 0x00001000";
+  EXPECT_TRUE(strm.str().find(err) != std::string::npos);
+}
+
+TEST(DWARFDebugInfo, TestDwarfVerifyInvalidStrp) {
+  // Create a single compile unit with a single function that has an invalid
+  // DW_FORM_strp for the DW_AT_name.
+  const char *yamldata = R"(
+    debug_str:
+      - ''
+    debug_abbrev:
+      - Code:            0x00000001
+        Tag:             DW_TAG_compile_unit
+        Children:        DW_CHILDREN_no
+        Attributes:
+          - Attribute:       DW_AT_name
+            Form:            DW_FORM_strp
+    debug_info:
+      - Length:
+          TotalLength:     12
+        Version:         4
+        AbbrOffset:      0
+        AddrSize:        8
+        Entries:
+          - AbbrCode:        0x00000001
+            Values:
+              - Value:           0x0000000000001234
+  )";
+  auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata));
+  ASSERT_TRUE((bool)ErrOrSections);
+
+  auto &DebugSections = *ErrOrSections;
+
+  DWARFContextInMemory DwarfContext(DebugSections, 8);
+
+  std::string str;
+  raw_string_ostream strm(str);
+  EXPECT_FALSE(DwarfContext.verify(strm, DIDT_All));
+  strm.flush();
+  const char *err = "error: DW_FORM_strp offset beyond .debug_str bounds:";
+  EXPECT_TRUE(strm.str().find(err) != std::string::npos);
+}
+
 } // end anonymous namespace
diff --git a/unittests/IR/IRBuilderTest.cpp b/unittests/IR/IRBuilderTest.cpp
index 830ae958769..5686c3b2b3a 100644
--- a/unittests/IR/IRBuilderTest.cpp
+++ b/unittests/IR/IRBuilderTest.cpp
@@ -356,6 +356,25 @@ TEST_F(IRBuilderTest, RAIIHelpersTest) {
   EXPECT_EQ(BB, Builder.GetInsertBlock());
 }
 
+TEST_F(IRBuilderTest, createFunction) {
+  IRBuilder<> Builder(BB);
+  DIBuilder DIB(*M);
+  auto File = DIB.createFile("error.swift", "/");
+  auto CU =
+      DIB.createCompileUnit(dwarf::DW_LANG_Swift, File, "swiftc", true, "", 0);
+  auto Type = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None));
+  auto NoErr = DIB.createFunction(CU, "noerr", "", File, 1, Type, false, true, 1,
+                               DINode::FlagZero, true);
+  EXPECT_TRUE(!NoErr->getThrownTypes());
+  auto Int = DIB.createBasicType("Int", 64, dwarf::DW_ATE_signed);
+  auto Error = DIB.getOrCreateArray({Int});
+  auto Err =
+      DIB.createFunction(CU, "err", "", File, 1, Type, false, true, 1,
+      DINode::FlagZero, true, nullptr, nullptr, Error.get());
+  EXPECT_TRUE(Err->getThrownTypes().get() == Error.get());
+  DIB.finalize();
+}
+
 TEST_F(IRBuilderTest, DIBuilder) {
   IRBuilder<> Builder(BB);
   DIBuilder DIB(*M);
diff --git a/unittests/IR/MetadataTest.cpp b/unittests/IR/MetadataTest.cpp
index 103ba4c92dd..e61e649df19 100644
--- a/unittests/IR/MetadataTest.cpp
+++ b/unittests/IR/MetadataTest.cpp
@@ -1512,13 +1512,14 @@ TEST_F(DISubprogramTest, get) {
   MDTuple *TemplateParams = getTuple();
   DISubprogram *Declaration = getSubprogram();
   MDTuple *Variables = getTuple();
+  MDTuple *ThrownTypes = getTuple();
   DICompileUnit *Unit = getUnit();
 
-  auto *N = DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
-                              Type, IsLocalToUnit, IsDefinition, ScopeLine,
-                              ContainingType, Virtuality, VirtualIndex,
-                              ThisAdjustment, Flags, IsOptimized, Unit,
-                              TemplateParams, Declaration, Variables);
+  auto *N = DISubprogram::get(
+      Context, Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
+      IsDefinition, ScopeLine, ContainingType, Virtuality, VirtualIndex,
+      ThisAdjustment, Flags, IsOptimized, Unit, TemplateParams, Declaration,
+      Variables, ThrownTypes);
 
   EXPECT_EQ(dwarf::DW_TAG_subprogram, N->getTag());
   EXPECT_EQ(Scope, N->getScope());
@@ -1540,98 +1541,109 @@ TEST_F(DISubprogramTest, get) {
   EXPECT_EQ(TemplateParams, N->getTemplateParams().get());
   EXPECT_EQ(Declaration, N->getDeclaration());
   EXPECT_EQ(Variables, N->getVariables().get());
-  EXPECT_EQ(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
-                                 Type, IsLocalToUnit, IsDefinition, ScopeLine,
-                                 ContainingType, Virtuality, VirtualIndex,
-                                 ThisAdjustment, Flags, IsOptimized, Unit,
-                                 TemplateParams, Declaration, Variables));
+  EXPECT_EQ(ThrownTypes, N->getThrownTypes().get());
+  EXPECT_EQ(N, DISubprogram::get(
+                   Context, Scope, Name, LinkageName, File, Line, Type,
+                   IsLocalToUnit, IsDefinition, ScopeLine, ContainingType,
+                   Virtuality, VirtualIndex, ThisAdjustment, Flags, IsOptimized,
+                   Unit, TemplateParams, Declaration, Variables, ThrownTypes));
 
   EXPECT_NE(N, DISubprogram::get(
                    Context, getCompositeType(), Name, LinkageName, File, Line,
                    Type, IsLocalToUnit, IsDefinition, ScopeLine, ContainingType,
                    Virtuality, VirtualIndex, ThisAdjustment, Flags, IsOptimized,
-                   Unit, TemplateParams, Declaration, Variables));
+                   Unit, TemplateParams, Declaration, Variables, ThrownTypes));
   EXPECT_NE(N, DISubprogram::get(
                    Context, Scope, "other", LinkageName, File, Line, Type,
                    IsLocalToUnit, IsDefinition, ScopeLine, ContainingType,
                    Virtuality, VirtualIndex, ThisAdjustment, Flags, IsOptimized,
-                   Unit, TemplateParams, Declaration, Variables));
-  EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, "other", File, Line,
-                                 Type, IsLocalToUnit, IsDefinition, ScopeLine,
-                                 ContainingType, Virtuality, VirtualIndex,
-                                 ThisAdjustment, Flags, IsOptimized, Unit,
-                                 TemplateParams, Declaration, Variables));
+                   Unit, TemplateParams, Declaration, Variables, ThrownTypes));
+  EXPECT_NE(N, DISubprogram::get(
+                   Context, Scope, Name, "other", File, Line, Type,
+                   IsLocalToUnit, IsDefinition, ScopeLine, ContainingType,
+                   Virtuality, VirtualIndex, ThisAdjustment, Flags, IsOptimized,
+                   Unit, TemplateParams, Declaration, Variables, ThrownTypes));
   EXPECT_NE(N, DISubprogram::get(
                    Context, Scope, Name, LinkageName, getFile(), Line, Type,
                    IsLocalToUnit, IsDefinition, ScopeLine, ContainingType,
                    Virtuality, VirtualIndex, ThisAdjustment, Flags, IsOptimized,
-                   Unit, TemplateParams, Declaration, Variables));
+                   Unit, TemplateParams, Declaration, Variables, ThrownTypes));
   EXPECT_NE(N, DISubprogram::get(
                    Context, Scope, Name, LinkageName, File, Line + 1, Type,
                    IsLocalToUnit, IsDefinition, ScopeLine, ContainingType,
                    Virtuality, VirtualIndex, ThisAdjustment, Flags, IsOptimized,
-                   Unit, TemplateParams, Declaration, Variables));
-  EXPECT_NE(N,
-            DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
-                              getSubroutineType(), IsLocalToUnit, IsDefinition,
-                              ScopeLine, ContainingType, Virtuality,
-                              VirtualIndex, ThisAdjustment, Flags, IsOptimized,
-                              Unit, TemplateParams, Declaration, Variables));
+                   Unit, TemplateParams, Declaration, Variables, ThrownTypes));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
-                                 Type, !IsLocalToUnit, IsDefinition, ScopeLine,
-                                 ContainingType, Virtuality, VirtualIndex,
-                                 ThisAdjustment, Flags, IsOptimized, Unit,
-                                 TemplateParams, Declaration, Variables));
-  EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
-                                 Type, IsLocalToUnit, !IsDefinition, ScopeLine,
-                                 ContainingType, Virtuality, VirtualIndex,
-                                 ThisAdjustment, Flags, IsOptimized, Unit,
-                                 TemplateParams, Declaration, Variables));
+                                 getSubroutineType(), IsLocalToUnit,
+                                 IsDefinition, ScopeLine, ContainingType,
+                                 Virtuality, VirtualIndex, ThisAdjustment,
+                                 Flags, IsOptimized, Unit, TemplateParams,
+                                 Declaration, Variables, ThrownTypes));
+  EXPECT_NE(N, DISubprogram::get(
+                   Context, Scope, Name, LinkageName, File, Line, Type,
+                   !IsLocalToUnit, IsDefinition, ScopeLine, ContainingType,
+                   Virtuality, VirtualIndex, ThisAdjustment, Flags, IsOptimized,
+                   Unit, TemplateParams, Declaration, Variables, ThrownTypes));
+  EXPECT_NE(N, DISubprogram::get(
+                   Context, Scope, Name, LinkageName, File, Line, Type,
+                   IsLocalToUnit, !IsDefinition, ScopeLine, ContainingType,
+                   Virtuality, VirtualIndex, ThisAdjustment, Flags, IsOptimized,
+                   Unit, TemplateParams, Declaration, Variables, ThrownTypes));
   EXPECT_NE(N, DISubprogram::get(
                    Context, Scope, Name, LinkageName, File, Line, Type,
                    IsLocalToUnit, IsDefinition, ScopeLine + 1, ContainingType,
                    Virtuality, VirtualIndex, ThisAdjustment, Flags, IsOptimized,
-                   Unit, TemplateParams, Declaration, Variables));
-  EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
-                                 Type, IsLocalToUnit, IsDefinition, ScopeLine,
-                                 getCompositeType(), Virtuality, VirtualIndex,
-                                 ThisAdjustment, Flags, IsOptimized, Unit,
-                                 TemplateParams, Declaration, Variables));
+                   Unit, TemplateParams, Declaration, Variables, ThrownTypes));
+  EXPECT_NE(N, DISubprogram::get(
+                   Context, Scope, Name, LinkageName, File, Line, Type,
+                   IsLocalToUnit, IsDefinition, ScopeLine, getCompositeType(),
+                   Virtuality, VirtualIndex, ThisAdjustment, Flags, IsOptimized,
+                   Unit, TemplateParams, Declaration, Variables, ThrownTypes));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality + 1, VirtualIndex,
                                  ThisAdjustment, Flags, IsOptimized, Unit,
-                                 TemplateParams, Declaration, Variables));
+                                 TemplateParams, Declaration, Variables,
+                                 ThrownTypes));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex + 1,
                                  ThisAdjustment, Flags, IsOptimized, Unit,
-                                 TemplateParams, Declaration, Variables));
+                                 TemplateParams, Declaration, Variables,
+                                 ThrownTypes));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex,
                                  ThisAdjustment, Flags, !IsOptimized, Unit,
-                                 TemplateParams, Declaration, Variables));
+                                 TemplateParams, Declaration, Variables,
+                                 ThrownTypes));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex,
                                  ThisAdjustment, Flags, IsOptimized, nullptr,
-                                 TemplateParams, Declaration, Variables));
+                                 TemplateParams, Declaration, Variables,
+                                 ThrownTypes));
+  EXPECT_NE(N, DISubprogram::get(
+                   Context, Scope, Name, LinkageName, File, Line, Type,
+                   IsLocalToUnit, IsDefinition, ScopeLine, ContainingType,
+                   Virtuality, VirtualIndex, ThisAdjustment, Flags, IsOptimized,
+                   Unit, getTuple(), Declaration, Variables, ThrownTypes));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex,
                                  ThisAdjustment, Flags, IsOptimized, Unit,
-                                 getTuple(), Declaration, Variables));
-  EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
-                                 Type, IsLocalToUnit, IsDefinition, ScopeLine,
-                                 ContainingType, Virtuality, VirtualIndex,
-                                 ThisAdjustment, Flags, IsOptimized, Unit,
-                                 TemplateParams, getSubprogram(), Variables));
+                                 TemplateParams, getSubprogram(), Variables,
+                                 ThrownTypes));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex,
                                  ThisAdjustment, Flags, IsOptimized, Unit,
                                  TemplateParams, Declaration, getTuple()));
+  EXPECT_NE(N, DISubprogram::get(
+                   Context, Scope, Name, LinkageName, File, Line, Type,
+                   IsLocalToUnit, IsDefinition, ScopeLine, ContainingType,
+                   Virtuality, VirtualIndex, ThisAdjustment, Flags, IsOptimized,
+                   Unit, TemplateParams, Declaration, Variables, getTuple()));
 
   TempDISubprogram Temp = N->clone();
   EXPECT_EQ(N, MDNode::replaceWithUniqued(std::move(Temp)));
@@ -1720,31 +1732,18 @@ typedef MetadataTest DINamespaceTest;
 
 TEST_F(DINamespaceTest, get) {
   DIScope *Scope = getFile();
-  DIFile *File = getFile();
   StringRef Name = "namespace";
-  unsigned Line = 5;
   bool ExportSymbols = true;
 
-  auto *N = DINamespace::get(Context, Scope, File, Name, Line, ExportSymbols);
+  auto *N = DINamespace::get(Context, Scope, Name, ExportSymbols);
 
   EXPECT_EQ(dwarf::DW_TAG_namespace, N->getTag());
   EXPECT_EQ(Scope, N->getScope());
-  EXPECT_EQ(File, N->getFile());
   EXPECT_EQ(Name, N->getName());
-  EXPECT_EQ(Line, N->getLine());
-  EXPECT_EQ(N,
-    DINamespace::get(Context, Scope, File, Name, Line, ExportSymbols));
-
-  EXPECT_NE(N,
-    DINamespace::get(Context, getFile(), File, Name, Line, ExportSymbols));
-  EXPECT_NE(N,
-    DINamespace::get(Context, Scope, getFile(), Name, Line, ExportSymbols));
-  EXPECT_NE(N,
-    DINamespace::get(Context, Scope, File, "other", Line, ExportSymbols));
-  EXPECT_NE(N,
-    DINamespace::get(Context, Scope, File, Name, Line + 1, ExportSymbols));
-  EXPECT_NE(N,
-    DINamespace::get(Context, Scope, File, Name, Line, !ExportSymbols));
+  EXPECT_EQ(N, DINamespace::get(Context, Scope, Name, ExportSymbols));
+  EXPECT_NE(N, DINamespace::get(Context, getFile(), Name, ExportSymbols));
+  EXPECT_NE(N, DINamespace::get(Context, Scope, "other", ExportSymbols));
+  EXPECT_NE(N, DINamespace::get(Context, Scope, Name, !ExportSymbols));
 
   TempDINamespace Temp = N->clone();
   EXPECT_EQ(N, MDNode::replaceWithUniqued(std::move(Temp)));
diff --git a/unittests/IR/ValueHandleTest.cpp b/unittests/IR/ValueHandleTest.cpp
index 1abc87c2fdc..9a4ce156dc3 100644
--- a/unittests/IR/ValueHandleTest.cpp
+++ b/unittests/IR/ValueHandleTest.cpp
@@ -44,11 +44,29 @@ TEST_F(ValueHandle, WeakVH_BasicOperation) {
   // doesn't matter which method.
   EXPECT_EQ(Type::getInt32Ty(Context), WVH->getType());
   EXPECT_EQ(Type::getInt32Ty(Context), (*WVH).getType());
+
+  WVH = BitcastV.get();
+  BitcastV->replaceAllUsesWith(ConstantV);
+  EXPECT_EQ(WVH, BitcastV.get());
+  BitcastV.reset();
+  EXPECT_EQ(WVH, nullptr);
 }
 
-TEST_F(ValueHandle, WeakVH_Comparisons) {
-  WeakVH BitcastWVH(BitcastV.get());
-  WeakVH ConstantWVH(ConstantV);
+TEST_F(ValueHandle, WeakTrackingVH_BasicOperation) {
+  WeakTrackingVH WVH(BitcastV.get());
+  EXPECT_EQ(BitcastV.get(), WVH);
+  WVH = ConstantV;
+  EXPECT_EQ(ConstantV, WVH);
+
+  // Make sure I can call a method on the underlying Value.  It
+  // doesn't matter which method.
+  EXPECT_EQ(Type::getInt32Ty(Context), WVH->getType());
+  EXPECT_EQ(Type::getInt32Ty(Context), (*WVH).getType());
+}
+
+TEST_F(ValueHandle, WeakTrackingVH_Comparisons) {
+  WeakTrackingVH BitcastWVH(BitcastV.get());
+  WeakTrackingVH ConstantWVH(ConstantV);
 
   EXPECT_TRUE(BitcastWVH == BitcastWVH);
   EXPECT_TRUE(BitcastV.get() == BitcastWVH);
@@ -79,20 +97,20 @@ TEST_F(ValueHandle, WeakVH_Comparisons) {
   EXPECT_EQ(BV >= CV, BitcastWVH >= ConstantV);
 }
 
-TEST_F(ValueHandle, WeakVH_FollowsRAUW) {
-  WeakVH WVH(BitcastV.get());
-  WeakVH WVH_Copy(WVH);
-  WeakVH WVH_Recreated(BitcastV.get());
+TEST_F(ValueHandle, WeakTrackingVH_FollowsRAUW) {
+  WeakTrackingVH WVH(BitcastV.get());
+  WeakTrackingVH WVH_Copy(WVH);
+  WeakTrackingVH WVH_Recreated(BitcastV.get());
   BitcastV->replaceAllUsesWith(ConstantV);
   EXPECT_EQ(ConstantV, WVH);
   EXPECT_EQ(ConstantV, WVH_Copy);
   EXPECT_EQ(ConstantV, WVH_Recreated);
 }
 
-TEST_F(ValueHandle, WeakVH_NullOnDeletion) {
-  WeakVH WVH(BitcastV.get());
-  WeakVH WVH_Copy(WVH);
-  WeakVH WVH_Recreated(BitcastV.get());
+TEST_F(ValueHandle, WeakTrackingVH_NullOnDeletion) {
+  WeakTrackingVH WVH(BitcastV.get());
+  WeakTrackingVH WVH_Copy(WVH);
+  WeakTrackingVH WVH_Recreated(BitcastV.get());
   BitcastV.reset();
   Value *null_value = nullptr;
   EXPECT_EQ(null_value, WVH);
@@ -343,11 +361,11 @@ TEST_F(ValueHandle, DestroyingOtherVHOnSameValueDoesntBreakIteration) {
 
   class DestroyingVH final : public CallbackVH {
   public:
-    std::unique_ptr<WeakVH> ToClear[2];
+    std::unique_ptr<WeakTrackingVH> ToClear[2];
     DestroyingVH(Value *V) {
-      ToClear[0].reset(new WeakVH(V));
+      ToClear[0].reset(new WeakTrackingVH(V));
       setValPtr(V);
-      ToClear[1].reset(new WeakVH(V));
+      ToClear[1].reset(new WeakTrackingVH(V));
     }
     void deleted() override {
       ToClear[0].reset();
@@ -361,9 +379,9 @@ TEST_F(ValueHandle, DestroyingOtherVHOnSameValueDoesntBreakIteration) {
   };
 
   {
-    WeakVH ShouldBeVisited1(BitcastV.get());
+    WeakTrackingVH ShouldBeVisited1(BitcastV.get());
     DestroyingVH C(BitcastV.get());
-    WeakVH ShouldBeVisited2(BitcastV.get());
+    WeakTrackingVH ShouldBeVisited2(BitcastV.get());
 
     BitcastV->replaceAllUsesWith(ConstantV);
     EXPECT_EQ(ConstantV, static_cast<Value*>(ShouldBeVisited1));
@@ -371,9 +389,9 @@ TEST_F(ValueHandle, DestroyingOtherVHOnSameValueDoesntBreakIteration) {
   }
 
   {
-    WeakVH ShouldBeVisited1(BitcastV.get());
+    WeakTrackingVH ShouldBeVisited1(BitcastV.get());
     DestroyingVH C(BitcastV.get());
-    WeakVH ShouldBeVisited2(BitcastV.get());
+    WeakTrackingVH ShouldBeVisited2(BitcastV.get());
 
     BitcastV.reset();
     EXPECT_EQ(nullptr, static_cast<Value*>(ShouldBeVisited1));
@@ -482,6 +500,12 @@ TEST_F(ValueHandle, PoisoningVH_ReducesToPointer) {
 
 #else // !NDEBUG
 
+TEST_F(ValueHandle, TrackingVH_Tracks) {
+  TrackingVH<Value> VH(BitcastV.get());
+  BitcastV->replaceAllUsesWith(ConstantV);
+  EXPECT_EQ(VH, ConstantV);
+}
+
 #ifdef GTEST_HAS_DEATH_TEST
 
 TEST_F(ValueHandle, PoisoningVH_Asserts) {
@@ -502,6 +526,26 @@ TEST_F(ValueHandle, PoisoningVH_Asserts) {
   // Don't clear anything out here as destroying the handles should be fine.
 }
 
+TEST_F(ValueHandle, TrackingVH_Asserts) {
+  {
+    TrackingVH<Value> VH(BitcastV.get());
+
+    // The tracking handle shouldn't assert when the value is deleted.
+    BitcastV.reset(new BitCastInst(ConstantV, Type::getInt32Ty(Context)));
+    // But should when we access the handle.
+    EXPECT_DEATH((void)*VH,
+                 "TrackingVH must be non-null and valid on dereference!");
+  }
+
+  {
+    TrackingVH<Instruction> VH(BitcastV.get());
+
+    BitcastV->replaceAllUsesWith(ConstantV);
+    EXPECT_DEATH((void)*VH,
+                 "Tracked Value was replaced by one with an invalid type!");
+  }
+}
+
 #endif // GTEST_HAS_DEATH_TEST
 
 #endif // NDEBUG
diff --git a/unittests/Support/BinaryStreamTest.cpp b/unittests/Support/BinaryStreamTest.cpp
index 1e646a6cf90..74c51e382d9 100644
--- a/unittests/Support/BinaryStreamTest.cpp
+++ b/unittests/Support/BinaryStreamTest.cpp
@@ -358,12 +358,14 @@ TEST_F(BinaryStreamTest, VarStreamArray) {
 
   struct StringExtractor {
   public:
-    Error operator()(BinaryStreamRef Stream, uint32_t &Len, StringRef &Item) {
-      if (Index == 0)
+    typedef uint32_t ContextType;
+    static Error extract(BinaryStreamRef Stream, uint32_t &Len, StringRef &Item,
+                         uint32_t *Index) {
+      if (*Index == 0)
         Len = strlen("1. Test");
-      else if (Index == 1)
+      else if (*Index == 1)
         Len = strlen("2. Longer Test");
-      else if (Index == 2)
+      else if (*Index == 2)
         Len = strlen("3. Really Long Test");
       else
         Len = strlen("4. Super Extra Longest Test Of All");
@@ -372,16 +374,14 @@ TEST_F(BinaryStreamTest, VarStreamArray) {
         return EC;
       Item =
           StringRef(reinterpret_cast<const char *>(Bytes.data()), Bytes.size());
-      ++Index;
+      ++(*Index);
       return Error::success();
     }
-
-  private:
-    uint32_t Index = 0;
   };
 
   for (auto &Stream : Streams) {
-    VarStreamArray<StringRef, StringExtractor> Array(*Stream.Input);
+    uint32_t Context = 0;
+    VarStreamArray<StringRef, StringExtractor> Array(*Stream.Input, &Context);
     auto Iter = Array.begin();
     ASSERT_EQ("1. Test", *Iter++);
     ASSERT_EQ("2. Longer Test", *Iter++);
diff --git a/unittests/Support/CMakeLists.txt b/unittests/Support/CMakeLists.txt
index a7be18b6a3c..1f677100dce 100644
--- a/unittests/Support/CMakeLists.txt
+++ b/unittests/Support/CMakeLists.txt
@@ -67,3 +67,5 @@ add_llvm_unittest(SupportTests
 
 # ManagedStatic.cpp uses <pthread>.
 target_link_libraries(SupportTests ${LLVM_PTHREAD_LIB})
+
+add_subdirectory(DynamicLibrary)
diff --git a/unittests/Support/DynamicLibrary/CMakeLists.txt b/unittests/Support/DynamicLibrary/CMakeLists.txt
new file mode 100644
index 00000000000..f0e945e78b1
--- /dev/null
+++ b/unittests/Support/DynamicLibrary/CMakeLists.txt
@@ -0,0 +1,19 @@
+set(LLVM_LINK_COMPONENTS Support)
+
+add_llvm_unittest(DynamicLibraryTests DynamicLibraryTest.cpp)
+
+export_executable_symbols(DynamicLibraryTests)
+
+add_library(PipSqueak SHARED PipSqueak.cxx)
+
+set_output_directory(PipSqueak
+  BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}
+  LIBRARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}
+  )
+
+set_target_properties(PipSqueak
+  PROPERTIES PREFIX ""
+  SUFFIX ".so"
+  )
+
+add_dependencies(DynamicLibraryTests PipSqueak)
diff --git a/unittests/Support/DynamicLibrary/DynamicLibraryTest.cpp b/unittests/Support/DynamicLibrary/DynamicLibraryTest.cpp
new file mode 100644
index 00000000000..d46eadc9a04
--- /dev/null
+++ b/unittests/Support/DynamicLibrary/DynamicLibraryTest.cpp
@@ -0,0 +1,133 @@
+//===- llvm/unittest/Support/DynamicLibrary/DynamicLibraryTest.cpp --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Path.h"
+#include "gtest/gtest.h"
+
+#include "PipSqueak.h"
+#include <string>
+
+using namespace llvm;
+using namespace llvm::sys;
+
+extern "C" PIPSQUEAK_EXPORT const char *TestA() { return "ProcessCall"; }
+
+std::string LibPath() {
+  void *Ptr = (void*)(intptr_t)TestA;
+  std::string Path = fs::getMainExecutable("DynamicLibraryTests", Ptr);
+  llvm::SmallString<256> Buf(path::parent_path(Path));
+  path::append(Buf, "PipSqueak.so");
+  return Buf.str();
+}
+
+#if defined(_WIN32) || (defined(HAVE_DLFCN_H) && defined(HAVE_DLOPEN))
+
+typedef void (*SetStrings)(std::string &GStr, std::string &LStr);
+typedef const char *(*GetString)();
+
+template <class T> static T FuncPtr(void *Ptr) {
+  union {
+    T F;
+    void *P;
+  } Tmp;
+  Tmp.P = Ptr;
+  return Tmp.F;
+}
+template <class T> static void* PtrFunc(T *Func) {
+  union {
+    T *F;
+    void *P;
+  } Tmp;
+  Tmp.F = Func;
+  return Tmp.P;
+}
+
+static const char *OverloadTestA() { return "OverloadCall"; }
+
+std::string StdString(const char *Ptr) { return Ptr ? Ptr : ""; }
+
+TEST(DynamicLibrary, Overload) {
+  {
+    std::string Err;
+    llvm_shutdown_obj Shutdown;
+    DynamicLibrary DL =
+        DynamicLibrary::getPermanentLibrary(LibPath().c_str(), &Err);
+    EXPECT_TRUE(DL.isValid());
+    EXPECT_TRUE(Err.empty());
+
+    GetString GS = FuncPtr<GetString>(DL.getAddressOfSymbol("TestA"));
+    EXPECT_TRUE(GS != nullptr && GS != &TestA);
+    EXPECT_EQ(StdString(GS()), "LibCall");
+
+    GS = FuncPtr<GetString>(DynamicLibrary::SearchForAddressOfSymbol("TestA"));
+    EXPECT_TRUE(GS != nullptr && GS != &TestA);
+    EXPECT_EQ(StdString(GS()), "LibCall");
+
+    DL = DynamicLibrary::getPermanentLibrary(nullptr, &Err);
+    EXPECT_TRUE(DL.isValid());
+    EXPECT_TRUE(Err.empty());
+
+    GS = FuncPtr<GetString>(DynamicLibrary::SearchForAddressOfSymbol("TestA"));
+    EXPECT_TRUE(GS != nullptr && GS == &TestA);
+    EXPECT_EQ(StdString(GS()), "ProcessCall");
+
+    GS = FuncPtr<GetString>(DL.getAddressOfSymbol("TestA"));
+    EXPECT_TRUE(GS != nullptr && GS == &TestA);
+    EXPECT_EQ(StdString(GS()), "ProcessCall");
+
+    DynamicLibrary::AddSymbol("TestA", PtrFunc(&OverloadTestA));
+    GS = FuncPtr<GetString>(DL.getAddressOfSymbol("TestA"));
+    EXPECT_TRUE(GS != nullptr && GS != &OverloadTestA);
+
+    GS = FuncPtr<GetString>(DynamicLibrary::SearchForAddressOfSymbol("TestA"));
+    EXPECT_TRUE(GS != nullptr && GS == &OverloadTestA);
+    EXPECT_EQ(StdString(GS()), "OverloadCall");
+  }
+  EXPECT_TRUE(FuncPtr<GetString>(DynamicLibrary::SearchForAddressOfSymbol(
+                  "TestA")) == nullptr);
+}
+
+TEST(DynamicLibrary, Shutdown) {
+  std::string A, B;
+  {
+    std::string Err;
+    llvm_shutdown_obj Shutdown;
+    DynamicLibrary DL =
+        DynamicLibrary::getPermanentLibrary(LibPath().c_str(), &Err);
+    EXPECT_TRUE(DL.isValid());
+    EXPECT_TRUE(Err.empty());
+
+    SetStrings SS = FuncPtr<SetStrings>(
+        DynamicLibrary::SearchForAddressOfSymbol("SetStrings"));
+    EXPECT_TRUE(SS != nullptr);
+
+    SS(A, B);
+    EXPECT_EQ(B, "Local::Local");
+  }
+  EXPECT_EQ(A, "Global::~Global");
+  EXPECT_EQ(B, "Local::~Local");
+  EXPECT_TRUE(FuncPtr<SetStrings>(DynamicLibrary::SearchForAddressOfSymbol(
+                  "SetStrings")) == nullptr);
+}
+
+#else
+
+TEST(DynamicLibrary, Unsupported) {
+  std::string Err;
+  DynamicLibrary DL =
+      DynamicLibrary::getPermanentLibrary(LibPath().c_str(), &Err);
+  EXPECT_FALSE(DL.isValid());
+  EXPECT_EQ(Err, "dlopen() not supported on this platform");
+}
+
+#endif
diff --git a/unittests/Support/DynamicLibrary/PipSqueak.cxx b/unittests/Support/DynamicLibrary/PipSqueak.cxx
new file mode 100644
index 00000000000..d1cf7c042b7
--- /dev/null
+++ b/unittests/Support/DynamicLibrary/PipSqueak.cxx
@@ -0,0 +1,46 @@
+//===- llvm/unittest/Support/DynamicLibrary/PipSqueak.cxx -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PipSqueak.h"
+
+#if defined(_WIN32) && !defined(__GNUC__)
+// Disable warnings from inclusion of xlocale & exception
+#pragma warning(push)
+#pragma warning(disable: 4530)
+#pragma warning(disable: 4577)
+#include <string>
+#pragma warning(pop)
+#else
+#include <string>
+#endif
+
+struct Global {
+  std::string *Str;
+  Global() : Str(nullptr) {}
+  ~Global() {
+    if (Str)
+      *Str = "Global::~Global";
+  }
+};
+
+struct Local {
+  std::string &Str;
+  Local(std::string &S) : Str(S) { Str = "Local::Local"; }
+  ~Local() { Str = "Local::~Local"; }
+};
+
+static Global Glb;
+
+extern "C" PIPSQUEAK_EXPORT void SetStrings(std::string &GStr,
+                                            std::string &LStr) {
+  static Local Lcl(LStr);
+  Glb.Str = &GStr;
+}
+
+extern "C" PIPSQUEAK_EXPORT const char *TestA() { return "LibCall"; }
diff --git a/unittests/Support/DynamicLibrary/PipSqueak.h b/unittests/Support/DynamicLibrary/PipSqueak.h
new file mode 100644
index 00000000000..e6a859d6071
--- /dev/null
+++ b/unittests/Support/DynamicLibrary/PipSqueak.h
@@ -0,0 +1,19 @@
+//===- llvm/unittest/Support/DynamicLibrary/PipSqueak.h -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PIPSQUEAK_H
+#define LLVM_PIPSQUEAK_H
+
+#ifdef _WIN32
+#define PIPSQUEAK_EXPORT __declspec(dllexport)
+#else
+#define PIPSQUEAK_EXPORT
+#endif
+
+#endif
diff --git a/unittests/Target/AArch64/InstSizes.cpp b/unittests/Target/AArch64/InstSizes.cpp
index 22b47c6852a..5adbd7d2de4 100644
--- a/unittests/Target/AArch64/InstSizes.cpp
+++ b/unittests/Target/AArch64/InstSizes.cpp
@@ -30,7 +30,8 @@ std::unique_ptr<TargetMachine> createTargetMachine() {
 
 std::unique_ptr<AArch64InstrInfo> createInstrInfo(TargetMachine *TM) {
   AArch64Subtarget ST(TM->getTargetTriple(), TM->getTargetCPU(),
-                      TM->getTargetFeatureString(), *TM, /* isLittle */ false);
+                      TM->getTargetFeatureString(), *TM, /* isLittle */ false,
+                      /* ForCodeSize */ false);
   return llvm::make_unique<AArch64InstrInfo>(ST);
 }
 
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index 972eb9cd340..ef2cb4208ea 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -2828,7 +2828,8 @@ public:
       if (IntInfo->ModRef & CodeGenIntrinsic::MR_Mod)
         mayStore = true;// Intrinsics that can write to memory are 'mayStore'.
 
-      if (IntInfo->ModRef >= CodeGenIntrinsic::ReadWriteMem)
+      if (IntInfo->ModRef >= CodeGenIntrinsic::ReadWriteMem ||
+          IntInfo->hasSideEffects)
         // ReadWriteMem intrinsics can have other strange effects.
         hasSideEffects = true;
     }
diff --git a/utils/TableGen/CodeGenIntrinsics.h b/utils/TableGen/CodeGenIntrinsics.h
index 6df0e6a62ca..24374127f53 100644
--- a/utils/TableGen/CodeGenIntrinsics.h
+++ b/utils/TableGen/CodeGenIntrinsics.h
@@ -123,6 +123,13 @@ struct CodeGenIntrinsic {
   /// True if the intrinsic is marked as convergent.
   bool isConvergent;
 
+  /// True if the intrinsic has side effects that aren't captured by any
+  /// of the other flags.
+  bool hasSideEffects;
+
+  // True if the intrinsic is marked as speculatable.
+  bool isSpeculatable;
+
   enum ArgAttribute { NoCapture, Returned, ReadOnly, WriteOnly, ReadNone };
   std::vector<std::pair<unsigned, ArgAttribute>> ArgumentAttributes;
 
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index 03c58ac09c2..d1014a5668a 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -515,6 +515,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
   isNoReturn = false;
   isNoDuplicate = false;
   isConvergent = false;
+  isSpeculatable = false;
+  hasSideEffects = false;
 
   if (DefName.size() <= 4 ||
       std::string(DefName.begin(), DefName.begin() + 4) != "int_")
@@ -653,6 +655,10 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
       isConvergent = true;
     else if (Property->getName() == "IntrNoReturn")
       isNoReturn = true;
+    else if (Property->getName() == "IntrSpeculatable")
+      isSpeculatable = true;
+    else if (Property->getName() == "IntrHasSideEffects")
+      hasSideEffects = true;
     else if (Property->isSubClassOf("NoCapture")) {
       unsigned ArgNo = Property->getValueAsInt("ArgNo");
       ArgumentAttributes.push_back(std::make_pair(ArgNo, NoCapture));
diff --git a/utils/TableGen/GlobalISelEmitter.cpp b/utils/TableGen/GlobalISelEmitter.cpp
index dcf10ab511d..65a1ea2f0f2 100644
--- a/utils/TableGen/GlobalISelEmitter.cpp
+++ b/utils/TableGen/GlobalISelEmitter.cpp
@@ -199,21 +199,19 @@ public:
   void emitCxxCapturedInsnList(raw_ostream &OS);
   void emitCxxCaptureStmts(raw_ostream &OS, StringRef Expr);
 
-  void emit(raw_ostream &OS,
-            std::map<Record *, SubtargetFeatureInfo, LessRecordByID>
-                SubtargetFeatures);
+void emit(raw_ostream &OS, SubtargetFeatureInfoMap SubtargetFeatures);
 
-  /// Compare the priority of this object and B.
-  ///
-  /// Returns true if this object is more important than B.
-  bool isHigherPriorityThan(const RuleMatcher &B) const;
+/// Compare the priority of this object and B.
+///
+/// Returns true if this object is more important than B.
+bool isHigherPriorityThan(const RuleMatcher &B) const;
 
-  /// Report the maximum number of temporary operands needed by the rule
-  /// matcher.
-  unsigned countRendererFns() const;
+/// Report the maximum number of temporary operands needed by the rule
+/// matcher.
+unsigned countRendererFns() const;
 
-  // FIXME: Remove this as soon as possible
-  InstructionMatcher &insnmatcher_front() const { return *Matchers.front(); }
+// FIXME: Remove this as soon as possible
+InstructionMatcher &insnmatcher_front() const { return *Matchers.front(); }
 };
 
 template <class PredicateTy> class PredicateListMatcher {
@@ -856,7 +854,9 @@ public:
   }
 
   void emitCxxRenderStmts(raw_ostream &OS, RuleMatcher &Rule) const override {
-    OS << "    MIB.addReg(" << RegisterDef->getValueAsString("Namespace")
+    OS << "    MIB.addReg(" << (RegisterDef->getValue("Namespace")
+                                    ? RegisterDef->getValueAsString("Namespace")
+                                    : "")
        << "::" << RegisterDef->getName() << ");\n";
   }
 };
@@ -951,6 +951,9 @@ private:
 
   /// True if the instruction can be built solely by mutating the opcode.
   bool canMutate() const {
+    if (OperandRenderers.size() != Matched.getNumOperands())
+      return false;
+
     for (const auto &Renderer : enumerate(OperandRenderers)) {
       if (const auto *Copy = dyn_cast<CopyRenderer>(&*Renderer.value())) {
         const OperandMatcher &OM = Matched.getOperand(Copy->getSymbolicName());
@@ -986,12 +989,16 @@ public:
            << ");\n";
 
         for (auto Def : I->ImplicitDefs) {
-          auto Namespace = Def->getValueAsString("Namespace");
+          auto Namespace = Def->getValue("Namespace")
+                               ? Def->getValueAsString("Namespace")
+                               : "";
           OS << "    MIB.addDef(" << Namespace << "::" << Def->getName()
              << ", RegState::Implicit);\n";
         }
         for (auto Use : I->ImplicitUses) {
-          auto Namespace = Use->getValueAsString("Namespace");
+          auto Namespace = Use->getValue("Namespace")
+                               ? Use->getValueAsString("Namespace")
+                               : "";
           OS << "    MIB.addUse(" << Namespace << "::" << Use->getName()
              << ", RegState::Implicit);\n";
         }
@@ -1072,8 +1079,7 @@ void RuleMatcher::emitCxxCaptureStmts(raw_ostream &OS, StringRef Expr) {
 }
 
 void RuleMatcher::emit(raw_ostream &OS,
-                       std::map<Record *, SubtargetFeatureInfo, LessRecordByID>
-                           SubtargetFeatures) {
+                       SubtargetFeatureInfoMap SubtargetFeatures) {
   if (Matchers.empty())
     llvm_unreachable("Unexpected empty matcher!");
 
@@ -1218,7 +1224,7 @@ private:
   DenseMap<const Record *, const Record *> ComplexPatternEquivs;
 
   // Map of predicates to their subtarget features.
-  std::map<Record *, SubtargetFeatureInfo, LessRecordByID> SubtargetFeatures;
+  SubtargetFeatureInfoMap SubtargetFeatures;
 
   void gatherNodeEquivs();
   const CodeGenInstruction *findNodeEquiv(Record *N) const;
@@ -1712,15 +1718,36 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
   OS << "#ifdef GET_GLOBALISEL_IMPL\n";
   SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(SubtargetFeatures,
                                                            OS);
-  SubtargetFeatureInfo::emitNameTable(SubtargetFeatures, OS);
+
+  // Separate subtarget features by how often they must be recomputed.
+  SubtargetFeatureInfoMap ModuleFeatures;
+  std::copy_if(SubtargetFeatures.begin(), SubtargetFeatures.end(),
+               std::inserter(ModuleFeatures, ModuleFeatures.end()),
+               [](const SubtargetFeatureInfoMap::value_type &X) {
+                 return !X.second.mustRecomputePerFunction();
+               });
+  SubtargetFeatureInfoMap FunctionFeatures;
+  std::copy_if(SubtargetFeatures.begin(), SubtargetFeatures.end(),
+               std::inserter(FunctionFeatures, FunctionFeatures.end()),
+               [](const SubtargetFeatureInfoMap::value_type &X) {
+                 return X.second.mustRecomputePerFunction();
+               });
+
   SubtargetFeatureInfo::emitComputeAvailableFeatures(
-      Target.getName(), "InstructionSelector", "computeAvailableFeatures",
-      SubtargetFeatures, OS);
+      Target.getName(), "InstructionSelector", "computeAvailableModuleFeatures",
+      ModuleFeatures, OS);
+  SubtargetFeatureInfo::emitComputeAvailableFeatures(
+      Target.getName(), "InstructionSelector",
+      "computeAvailableFunctionFeatures", FunctionFeatures, OS,
+      "const MachineFunction *MF");
 
   OS << "bool " << Target.getName()
      << "InstructionSelector::selectImpl(MachineInstr &I) const {\n"
      << "  MachineFunction &MF = *I.getParent()->getParent();\n"
-     << "  const MachineRegisterInfo &MRI = MF.getRegInfo();\n";
+     << "  const MachineRegisterInfo &MRI = MF.getRegInfo();\n"
+     << "  // FIXME: This should be computed on a per-function basis rather than per-insn.\n"
+     << "  AvailableFunctionFeatures = computeAvailableFunctionFeatures(&STI, &MF);\n"
+     << "  const PredicateBitset AvailableFeatures = getAvailableFeatures();\n";
 
   for (auto &Rule : Rules) {
     Rule.emit(OS, SubtargetFeatures);
@@ -1730,6 +1757,26 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
   OS << "  return false;\n"
      << "}\n"
      << "#endif // ifdef GET_GLOBALISEL_IMPL\n";
+
+  OS << "#ifdef GET_GLOBALISEL_PREDICATES_DECL\n"
+     << "PredicateBitset AvailableModuleFeatures;\n"
+     << "mutable PredicateBitset AvailableFunctionFeatures;\n"
+     << "PredicateBitset getAvailableFeatures() const {\n"
+     << "  return AvailableModuleFeatures | AvailableFunctionFeatures;\n"
+     << "}\n"
+     << "PredicateBitset\n"
+     << "computeAvailableModuleFeatures(const " << Target.getName()
+     << "Subtarget *Subtarget) const;\n"
+     << "PredicateBitset\n"
+     << "computeAvailableFunctionFeatures(const " << Target.getName()
+     << "Subtarget *Subtarget,\n"
+     << "                                 const MachineFunction *MF) const;\n"
+     << "#endif // ifdef GET_GLOBALISEL_PREDICATES_DECL\n";
+
+  OS << "#ifdef GET_GLOBALISEL_PREDICATES_INIT\n"
+     << "AvailableModuleFeatures(computeAvailableModuleFeatures(&STI)),\n"
+     << "AvailableFunctionFeatures()\n"
+     << "#endif // ifdef GET_GLOBALISEL_PREDICATES_INIT\n";
 }
 
 void GlobalISelEmitter::declareSubtargetFeature(Record *Predicate) {
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index e979b94e46d..1fc18a5dd1d 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -476,6 +476,12 @@ struct AttributeComparator {
     if (L->isConvergent != R->isConvergent)
       return R->isConvergent;
 
+    if (L->isSpeculatable != R->isSpeculatable)
+      return R->isSpeculatable;
+
+    if (L->hasSideEffects != R->hasSideEffects)
+      return R->hasSideEffects;
+
     // Try to order by readonly/readnone attribute.
     CodeGenIntrinsic::ModRefBehavior LK = L->ModRef;
     CodeGenIntrinsic::ModRefBehavior RK = R->ModRef;
@@ -600,7 +606,7 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
     if (!intrinsic.canThrow ||
         intrinsic.ModRef != CodeGenIntrinsic::ReadWriteMem ||
         intrinsic.isNoReturn || intrinsic.isNoDuplicate ||
-        intrinsic.isConvergent) {
+        intrinsic.isConvergent || intrinsic.isSpeculatable) {
       OS << "      const Attribute::AttrKind Atts[] = {";
       bool addComma = false;
       if (!intrinsic.canThrow) {
@@ -625,6 +631,12 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
         OS << "Attribute::Convergent";
         addComma = true;
       }
+      if (intrinsic.isSpeculatable) {
+        if (addComma)
+          OS << ",";
+        OS << "Attribute::Speculatable";
+        addComma = true;
+      }
 
       switch (intrinsic.ModRef) {
       case CodeGenIntrinsic::NoMem:
diff --git a/utils/TableGen/SubtargetFeatureInfo.cpp b/utils/TableGen/SubtargetFeatureInfo.cpp
index 96418dc77d5..5153c35b126 100644
--- a/utils/TableGen/SubtargetFeatureInfo.cpp
+++ b/utils/TableGen/SubtargetFeatureInfo.cpp
@@ -45,8 +45,7 @@ SubtargetFeatureInfo::getAll(const RecordKeeper &Records) {
 }
 
 void SubtargetFeatureInfo::emitSubtargetFeatureFlagEnumeration(
-    std::map<Record *, SubtargetFeatureInfo, LessRecordByID> &SubtargetFeatures,
-    raw_ostream &OS) {
+    SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS) {
   OS << "// Flags for subtarget features that participate in "
      << "instruction matching.\n";
   OS << "enum SubtargetFeatureFlag : "
@@ -60,8 +59,7 @@ void SubtargetFeatureInfo::emitSubtargetFeatureFlagEnumeration(
 }
 
 void SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(
-    std::map<Record *, SubtargetFeatureInfo, LessRecordByID> &SubtargetFeatures,
-    raw_ostream &OS) {
+    SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS) {
   OS << "// Bits for subtarget features that participate in "
      << "instruction matching.\n";
   OS << "enum SubtargetFeatureBits : "
@@ -74,8 +72,7 @@ void SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(
 }
 
 void SubtargetFeatureInfo::emitNameTable(
-    std::map<Record *, SubtargetFeatureInfo, LessRecordByID> &SubtargetFeatures,
-    raw_ostream &OS) {
+    SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS) {
   // Need to sort the name table so that lookup by the log of the enum value
   // gives the proper name. More specifically, for a feature of value 1<<n,
   // SubtargetFeatureNames[n] should be the name of the feature.
@@ -102,11 +99,13 @@ void SubtargetFeatureInfo::emitNameTable(
 
 void SubtargetFeatureInfo::emitComputeAvailableFeatures(
     StringRef TargetName, StringRef ClassName, StringRef FuncName,
-    std::map<Record *, SubtargetFeatureInfo, LessRecordByID> &SubtargetFeatures,
-    raw_ostream &OS) {
+    SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS,
+    StringRef ExtraParams) {
   OS << "PredicateBitset " << TargetName << ClassName << "::\n"
-     << FuncName << "(const MachineFunction *MF, const " << TargetName
-     << "Subtarget *Subtarget) const {\n";
+     << FuncName << "(const " << TargetName << "Subtarget *Subtarget";
+  if (!ExtraParams.empty())
+    OS << ", " << ExtraParams;
+  OS << ") const {\n";
   OS << "  PredicateBitset Features;\n";
   for (const auto &SF : SubtargetFeatures) {
     const SubtargetFeatureInfo &SFI = SF.second;
@@ -120,8 +119,7 @@ void SubtargetFeatureInfo::emitComputeAvailableFeatures(
 
 void SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures(
     StringRef TargetName, StringRef ClassName, StringRef FuncName,
-    std::map<Record *, SubtargetFeatureInfo, LessRecordByID> &SubtargetFeatures,
-    raw_ostream &OS) {
+    SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS) {
   OS << "uint64_t " << TargetName << ClassName << "::\n"
      << FuncName << "(const FeatureBitset& FB) const {\n";
   OS << "  uint64_t Features = 0;\n";
diff --git a/utils/TableGen/SubtargetFeatureInfo.h b/utils/TableGen/SubtargetFeatureInfo.h
index bbaf4525960..c55c16a4031 100644
--- a/utils/TableGen/SubtargetFeatureInfo.h
+++ b/utils/TableGen/SubtargetFeatureInfo.h
@@ -21,6 +21,9 @@ namespace llvm {
 class Record;
 class RecordKeeper;
 
+struct SubtargetFeatureInfo;
+using SubtargetFeatureInfoMap = std::map<Record *, SubtargetFeatureInfo, LessRecordByID>;
+
 /// Helper class for storing information on a subtarget feature which
 /// participates in instruction matching.
 struct SubtargetFeatureInfo {
@@ -43,6 +46,10 @@ struct SubtargetFeatureInfo {
     return "Feature_" + TheDef->getName().str() + "Bit";
   }
 
+  bool mustRecomputePerFunction() const {
+    return TheDef->getValueAsBit("RecomputePerFunction");
+  }
+
   void dump() const;
   static std::vector<std::pair<Record *, SubtargetFeatureInfo>>
   getAll(const RecordKeeper &Records);
@@ -52,21 +59,17 @@ struct SubtargetFeatureInfo {
   /// This version emits the bit value for the feature and is therefore limited
   /// to 64 feature bits.
   static void emitSubtargetFeatureFlagEnumeration(
-      std::map<Record *, SubtargetFeatureInfo, LessRecordByID>
-          &SubtargetFeatures,
-      raw_ostream &OS);
+      SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS);
 
   /// Emit the subtarget feature flag definitions.
   ///
   /// This version emits the bit index for the feature and can therefore support
   /// more than 64 feature bits.
-  static void emitSubtargetFeatureBitEnumeration(
-      std::map<Record *, SubtargetFeatureInfo, LessRecordByID>
-          &SubtargetFeatures,
-      raw_ostream &OS);
+  static void
+  emitSubtargetFeatureBitEnumeration(SubtargetFeatureInfoMap &SubtargetFeatures,
+                                     raw_ostream &OS);
 
-  static void emitNameTable(std::map<Record *, SubtargetFeatureInfo,
-                                     LessRecordByID> &SubtargetFeatures,
+  static void emitNameTable(SubtargetFeatureInfoMap &SubtargetFeatures,
                             raw_ostream &OS);
 
   /// Emit the function to compute the list of available features given a
@@ -82,11 +85,12 @@ struct SubtargetFeatureInfo {
   /// \param FuncName   The name of the function to emit.
   /// \param SubtargetFeatures A map of TableGen records to the
   ///                          SubtargetFeatureInfo equivalent.
-  static void emitComputeAvailableFeatures(
-      StringRef TargetName, StringRef ClassName, StringRef FuncName,
-      std::map<Record *, SubtargetFeatureInfo, LessRecordByID>
-          &SubtargetFeatures,
-      raw_ostream &OS);
+  /// \param ExtraParams Additional arguments to the generated function.
+  static void
+  emitComputeAvailableFeatures(StringRef TargetName, StringRef ClassName,
+                               StringRef FuncName,
+                               SubtargetFeatureInfoMap &SubtargetFeatures,
+                               raw_ostream &OS, StringRef ExtraParams = "");
 
   /// Emit the function to compute the list of available features given a
   /// subtarget.
@@ -103,9 +107,7 @@ struct SubtargetFeatureInfo {
   ///                          SubtargetFeatureInfo equivalent.
   static void emitComputeAssemblerAvailableFeatures(
       StringRef TargetName, StringRef ClassName, StringRef FuncName,
-      std::map<Record *, SubtargetFeatureInfo, LessRecordByID>
-          &SubtargetFeatures,
-      raw_ostream &OS);
+      SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS);
 };
 } // end namespace llvm
 
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index e703bbfc449..4298bc5763b 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -668,7 +668,7 @@ void RecognizableInstr::emitInstructionSpecifier() {
     break;
   case X86Local::MRMSrcReg4VOp3:
     assert(numPhysicalOperands == 3 &&
-           "Unexpected number of operands for MRMSrcRegFrm");
+           "Unexpected number of operands for MRMSrcReg4VOp3Frm");
     HANDLE_OPERAND(roRegister)
     HANDLE_OPERAND(rmRegister)
     HANDLE_OPERAND(vvvvRegister)
@@ -708,7 +708,7 @@ void RecognizableInstr::emitInstructionSpecifier() {
     break;
   case X86Local::MRMSrcMem4VOp3:
     assert(numPhysicalOperands == 3 &&
-           "Unexpected number of operands for MRMSrcMemFrm");
+           "Unexpected number of operands for MRMSrcMem4VOp3Frm");
     HANDLE_OPERAND(roRegister)
     HANDLE_OPERAND(memory)
     HANDLE_OPERAND(vvvvRegister)