Update LLVM to r108428.

2026-05-28 04:12:45 -04:00 · 2010-07-15 17:06:11 +00:00 · 2010-07-15 17:06:11 +00:00 · f3d15b0b37
commit f3d15b0b37
parent 66e41e3c6e
127 changed files with 2487 additions and 877 deletions
--- a/Makefile.rules
+++ b/Makefile.rules
@ -42,7 +42,7 @@ VPATH=$(PROJ_SRC_DIR)
 # Reset the list of suffixes we know how to build.
 #--------------------------------------------------------------------
 .SUFFIXES:
-.SUFFIXES: .c .cpp .cc .h .hpp .o .a .bc .td .ps .dot .ll
+.SUFFIXES: .c .cpp .cc .h .hpp .o .a .bc .td .ps .dot .ll .m .mm
 .SUFFIXES: $(SHLIBEXT) $(SUFFIXES)

 #--------------------------------------------------------------------
@ -632,7 +632,12 @@ ifdef TOOLNAME
  endif
 endif
 endif
+else
+ifneq ($(DARWIN_MAJVERS),4)
+  LD.Flags += $(RPATH) -Wl,@executable_path/../lib
 endif
+endif
+

 #----------------------------------------------------------
 # Options To Invoke Tools
@ -1442,6 +1447,11 @@ $(ObjDir)/%.o: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_SRC_DIR)/Makefile
 	$(Verb) if $(Compile.CXX) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
 	        $(DEPEND_MOVEFILE)

+$(ObjDir)/%.o: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_SRC_DIR)/Makefile
+	$(Echo) "Compiling $*.mm for $(BuildMode) build" $(PIC_FLAG)
+	$(Verb) if $(Compile.CXX) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
+	        $(DEPEND_MOVEFILE)
+
 $(ObjDir)/%.o: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_SRC_DIR)/Makefile
 	$(Echo) "Compiling $*.cc for $(BuildMode) build" $(PIC_FLAG)
 	$(Verb) if $(Compile.CXX) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
@ -1452,6 +1462,11 @@ $(ObjDir)/%.o: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_SRC_DIR)/Makefile
 	$(Verb) if $(Compile.C) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
 	        $(DEPEND_MOVEFILE)

+$(ObjDir)/%.o: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_SRC_DIR)/Makefile
+	$(Echo) "Compiling $*.m for $(BuildMode) build" $(PIC_FLAG)
+	$(Verb) if $(Compile.C) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
+	        $(DEPEND_MOVEFILE)
+
 #---------------------------------------------------------
 # Create .bc files in the ObjDir directory from .cpp .cc and .c files...
 #---------------------------------------------------------
@ -1470,6 +1485,12 @@ $(ObjDir)/%.ll: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
                              $< -o $(ObjDir)/$*.ll -S -emit-llvm ; \
 	        $(BC_DEPEND_MOVEFILE)

+$(ObjDir)/%.ll: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
+	$(Echo) "Compiling $*.mm for $(BuildMode) build (bytecode)"
+	$(Verb) if $(BCCompile.CXX) $(BC_DEPEND_OPTIONS) \
+                              $< -o $(ObjDir)/$*.ll -S -emit-llvm ; \
+	        $(BC_DEPEND_MOVEFILE)
+
 $(ObjDir)/%.ll: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
 	$(Echo) "Compiling $*.cc for $(BuildMode) build (bytecode)"
 	$(Verb) if $(BCCompile.CXX) $(BC_DEPEND_OPTIONS) \
@ -1482,6 +1503,12 @@ $(ObjDir)/%.ll: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
                              $< -o $(ObjDir)/$*.ll -S -emit-llvm ; \
 	        $(BC_DEPEND_MOVEFILE)

+$(ObjDir)/%.ll: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
+	$(Echo) "Compiling $*.m for $(BuildMode) build (bytecode)"
+	$(Verb) if $(BCCompile.C) $(BC_DEPEND_OPTIONS) \
+                              $< -o $(ObjDir)/$*.ll -S -emit-llvm ; \
+	        $(BC_DEPEND_MOVEFILE)
+
 # Provide alternate rule sets if dependencies are disabled
 else

@ -1489,6 +1516,10 @@ $(ObjDir)/%.o: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES)
 	$(Echo) "Compiling $*.cpp for $(BuildMode) build" $(PIC_FLAG)
 	$(Compile.CXX) $< -o $@

+$(ObjDir)/%.o: %.mm $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.mm for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.CXX) $< -o $@
+
 $(ObjDir)/%.o: %.cc $(ObjDir)/.dir $(BUILT_SOURCES)
 	$(Echo) "Compiling $*.cc for $(BuildMode) build" $(PIC_FLAG)
 	$(Compile.CXX) $< -o $@
@ -1497,10 +1528,18 @@ $(ObjDir)/%.o: %.c $(ObjDir)/.dir $(BUILT_SOURCES)
 	$(Echo) "Compiling $*.c for $(BuildMode) build" $(PIC_FLAG)
 	$(Compile.C) $< -o $@

+$(ObjDir)/%.o: %.m $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.m for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.C) $< -o $@
+
 $(ObjDir)/%.ll: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
 	$(Echo) "Compiling $*.cpp for $(BuildMode) build (bytecode)"
 	$(BCCompile.CXX) $< -o $@ -S -emit-llvm

+$(ObjDir)/%.ll: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
+	$(Echo) "Compiling $*.mm for $(BuildMode) build (bytecode)"
+	$(BCCompile.CXX) $< -o $@ -S -emit-llvm
+
 $(ObjDir)/%.ll: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
 	$(Echo) "Compiling $*.cc for $(BuildMode) build (bytecode)"
 	$(BCCompile.CXX) $< -o $@ -S -emit-llvm
@ -1509,6 +1548,10 @@ $(ObjDir)/%.ll: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
 	$(Echo) "Compiling $*.c for $(BuildMode) build (bytecode)"
 	$(BCCompile.C) $< -o $@ -S -emit-llvm

+$(ObjDir)/%.ll: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
+	$(Echo) "Compiling $*.m for $(BuildMode) build (bytecode)"
+	$(BCCompile.C) $< -o $@ -S -emit-llvm
+
 endif


@ -1517,6 +1560,10 @@ $(BuildMode)/%.ii: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES)
 	$(Echo) "Compiling $*.cpp for $(BuildMode) build to .ii file"
 	$(Verb) $(Preprocess.CXX) $< -o $@

+$(BuildMode)/%.ii: %.mm $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.mm for $(BuildMode) build to .ii file"
+	$(Verb) $(Preprocess.CXX) $< -o $@
+
 $(BuildMode)/%.ii: %.cc $(ObjDir)/.dir $(BUILT_SOURCES)
 	$(Echo) "Compiling $*.cc for $(BuildMode) build to .ii file"
 	$(Verb) $(Preprocess.CXX) $< -o $@
@ -1525,11 +1572,19 @@ $(BuildMode)/%.i: %.c $(ObjDir)/.dir $(BUILT_SOURCES)
 	$(Echo) "Compiling $*.c for $(BuildMode) build to .i file"
 	$(Verb) $(Preprocess.C) $< -o $@

+$(BuildMode)/%.i: %.m $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.m for $(BuildMode) build to .i file"
+	$(Verb) $(Preprocess.C) $< -o $@
+

 $(ObjDir)/%.s: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES)
 	$(Echo) "Compiling $*.cpp to asm for $(BuildMode) build" $(PIC_FLAG)
 	$(Compile.CXX) $< -o $@ -S

+$(ObjDir)/%.s: %.mm $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.mm to asm for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.CXX) $< -o $@ -S
+
 $(ObjDir)/%.s: %.cc $(ObjDir)/.dir $(BUILT_SOURCES)
 	$(Echo) "Compiling $*.cc to asm for $(BuildMode) build" $(PIC_FLAG)
 	$(Compile.CXX) $< -o $@ -S
@ -1538,6 +1593,10 @@ $(ObjDir)/%.s: %.c $(ObjDir)/.dir $(BUILT_SOURCES)
 	$(Echo) "Compiling $*.c to asm for $(BuildMode) build" $(PIC_FLAG)
 	$(Compile.C) $< -o $@ -S

+$(ObjDir)/%.s: %.m $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.m to asm for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.C) $< -o $@ -S
+

 # make the C and C++ compilers strip debug info out of bytecode libraries.
 ifdef DEBUG_RUNTIME
@ -1750,7 +1809,7 @@ ifndef DISABLE_AUTO_DEPENDENCIES
 ifndef IS_CLEANING_TARGET

 # Get the list of dependency files
-DependSourceFiles := $(basename $(filter %.cpp %.c %.cc, $(Sources)))
+DependSourceFiles := $(basename $(filter %.cpp %.c %.cc %.m %.mm, $(Sources)))
 DependFiles := $(DependSourceFiles:%=$(PROJ_OBJ_DIR)/$(BuildMode)/%.d)

 # Include bitcode dependency files if using bitcode libraries
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@ -954,7 +954,9 @@ define [<a href="#linkage">linkage</a>] [<a href="#visibility">visibility</a>]

 <h5>Syntax:</h5>
 <pre class="doc_code">
+; An unnamed metadata node, which is referenced by the named metadata.
 !1 = metadata !{metadata !"one"}
+; A named metadata.
 !name = !{null, !1}
 </pre>

@ -7744,7 +7746,7 @@ LLVM</a>.</p>

  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-07-13 14:26:09 +0200 (Tue, 13 Jul 2010) $
+  Last modified: $Date: 2010-07-13 21:48:13 +0200 (Tue, 13 Jul 2010) $
 </address>

 </body>
--- a/docs/ProgrammersManual.html
+++ b/docs/ProgrammersManual.html
@ -457,8 +457,8 @@ StringMap class which is used extensively in LLVM and Clang.</p>
 may have embedded null characters.  Therefore, they cannot simply take
 a <tt>const char *</tt>, and taking a <tt>const std::string&amp;</tt> requires
 clients to perform a heap allocation which is usually unnecessary.  Instead,
-many LLVM APIs use a <tt>const StringRef&amp;</tt> or a <tt>const 
-Twine&amp;</tt> for passing strings efficiently.</p>
+many LLVM APIs use a <tt>StringRef</tt> or a <tt>const Twine&amp;</tt> for
+passing strings efficiently.</p>

 </div>

@ -477,19 +477,17 @@ on <tt>std:string</tt>, but does not require heap allocation.</p>
 an <tt>std::string</tt>, or explicitly with a character pointer and length.
 For example, the <tt>StringRef</tt> find function is declared as:</p>

-<div class="doc_code">
-  iterator find(const StringRef &amp;Key);
-</div>
+<pre class="doc_code">
+  iterator find(StringRef Key);
+</pre>

 <p>and clients can call it using any one of:</p>

-<div class="doc_code">
-<pre>
+<pre class="doc_code">
  Map.find("foo");                 <i>// Lookup "foo"</i>
  Map.find(std::string("bar"));    <i>// Lookup "bar"</i>
  Map.find(StringRef("\0baz", 4)); <i>// Lookup "\0baz"</i>
 </pre>
-</div>

 <p>Similarly, APIs which need to return a string may return a <tt>StringRef</tt>
 instance, which can be used directly or converted to an <tt>std::string</tt>
@ -499,7 +497,8 @@ for more information.</p>

 <p>You should rarely use the <tt>StringRef</tt> class directly, because it contains
 pointers to external memory it is not generally safe to store an instance of the
-class (unless you know that the external storage will not be freed).</p>
+class (unless you know that the external storage will not be freed). StringRef is
+small and pervasive enough in LLVM that it should always be passed by value.</p>

 </div>

@ -3943,7 +3942,7 @@ arguments. An argument has a pointer to the parent Function.</p>
  <a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a> and
  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+  Last modified: $Date: 2010-07-15 00:38:02 +0200 (Thu, 15 Jul 2010) $
 </address>

 </body>
--- a/docs/SourceLevelDebugging.html
+++ b/docs/SourceLevelDebugging.html
@ -1058,7 +1058,7 @@ int main(int argc, char *argv[]) {
  i32 524329,    ;; Tag
  metadata !"MySource.cpp", 
  metadata !"/Users/mine/sources", 
-  metadata !3    ;; Compile unit
+  metadata !2    ;; Compile unit
 }

 ;;
@ -1068,7 +1068,7 @@ int main(int argc, char *argv[]) {
  i32 524329,    ;; Tag
  metadata !"Myheader.h"
  metadata !"/Users/mine/sources", 
-  metadata !3    ;; Compile unit
+  metadata !2    ;; Compile unit
 }

 ...
@ -1780,7 +1780,7 @@ enum Trees {

  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-06-05 00:49:55 +0200 (Sat, 05 Jun 2010) $
+  Last modified: $Date: 2010-07-13 18:53:20 +0200 (Tue, 13 Jul 2010) $
 </address>

 </body>
--- a/include/llvm/ADT/APFloat.h
+++ b/include/llvm/ADT/APFloat.h
@ -179,7 +179,7 @@ namespace llvm {

    // Constructors.
    APFloat(const fltSemantics &); // Default construct to 0.0
-    APFloat(const fltSemantics &, const StringRef &);
+    APFloat(const fltSemantics &, StringRef);
    APFloat(const fltSemantics &, integerPart);
    APFloat(const fltSemantics &, fltCategory, bool negative);
    APFloat(const fltSemantics &, uninitializedTag);
@ -282,7 +282,7 @@ namespace llvm {
                                            bool, roundingMode);
    opStatus convertFromZeroExtendedInteger(const integerPart *, unsigned int,
                                            bool, roundingMode);
-    opStatus convertFromString(const StringRef&, roundingMode);
+    opStatus convertFromString(StringRef, roundingMode);
    APInt bitcastToAPInt() const;
    double convertToDouble() const;
    float convertToFloat() const;
@ -386,8 +386,8 @@ namespace llvm {
                                          roundingMode, bool *) const;
    opStatus convertFromUnsignedParts(const integerPart *, unsigned int,
                                      roundingMode);
-    opStatus convertFromHexadecimalString(const StringRef&, roundingMode);
-    opStatus convertFromDecimalString (const StringRef&, roundingMode);
+    opStatus convertFromHexadecimalString(StringRef, roundingMode);
+    opStatus convertFromDecimalString(StringRef, roundingMode);
    char *convertNormalToHexString(char *, unsigned int, bool,
                                   roundingMode) const;
    opStatus roundSignificandWithExponent(const integerPart *, unsigned int,
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@ -162,7 +162,7 @@ class APInt {
  ///
  /// @param radix 2, 8, 10, or 16
  /// @brief Convert a char array into an APInt
-  void fromString(unsigned numBits, const StringRef &str, uint8_t radix);
+  void fromString(unsigned numBits, StringRef str, uint8_t radix);

  /// This is used by the toString method to divide by the radix. It simply
  /// provides a more convenient form of divide for internal use since KnuthDiv
@ -248,7 +248,7 @@ public:
  /// @param str the string to be interpreted
  /// @param radix the radix to use for the conversion 
  /// @brief Construct an APInt from a string representation.
-  APInt(unsigned numBits, const StringRef &str, uint8_t radix);
+  APInt(unsigned numBits, StringRef str, uint8_t radix);

  /// Simply makes *this a copy of that.
  /// @brief Copy Constructor.
@ -1153,7 +1153,7 @@ public:
  /// This method determines how many bits are required to hold the APInt
  /// equivalent of the string given by \arg str.
  /// @brief Get bits required for string value.
-  static unsigned getBitsNeeded(const StringRef& str, uint8_t radix);
+  static unsigned getBitsNeeded(StringRef str, uint8_t radix);

  /// countLeadingZeros - This function is an APInt version of the
  /// countLeadingZeros_{32,64} functions in MathExtras.h. It counts the number
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@ -106,12 +106,17 @@ public:
  /// into the current block.
  void recomputeInsertPt();

+  struct SavePoint {
+    MachineBasicBlock::iterator InsertPt;
+    DebugLoc DL;
+  };
+
  /// enterLocalValueArea - Prepare InsertPt to begin inserting instructions
  /// into the local value area and return the old insert position.
-  MachineBasicBlock::iterator enterLocalValueArea();
+  SavePoint enterLocalValueArea();

-  /// leaveLocalValueArea - Reset InsertPt to the given old insert position
-  void leaveLocalValueArea(MachineBasicBlock::iterator OldInsertPt);
+  /// leaveLocalValueArea - Reset InsertPt to the given old insert position.
+  void leaveLocalValueArea(SavePoint Old);

  virtual ~FastISel();

@ -302,8 +307,6 @@ protected:
  }

 private:
-  bool SelectLoad(const User *I);
-
  bool SelectBinaryOp(const User *I, unsigned ISDOpcode);

  bool SelectFNeg(const User *I);
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@ -272,10 +272,6 @@ namespace llvm {
    unsigned getNumConflictsWithPhysReg(const LiveInterval &li,
                                        unsigned PhysReg) const;

-    /// processImplicitDefs - Process IMPLICIT_DEF instructions. Add isUndef
-    /// marker to implicit_def defs and their uses.
-    void processImplicitDefs();
-
    /// intervalIsInOneMBB - Returns true if the specified interval is entirely
    /// within a single basic block.
    bool intervalIsInOneMBB(const LiveInterval &li) const;
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@ -344,7 +344,7 @@ public:
    VariableDbgInfo.push_back(std::make_pair(N, std::make_pair(Slot, Loc)));
  }

-  VariableDbgInfoMapTy &getVariableDbgInfo() {  return VariableDbgInfo;  }
+  VariableDbgInfoMapTy &getVariableDbgInfo();

 }; // End class MachineModuleInfo

--- a/include/llvm/CodeGen/ProcessImplicitDefs.h
+++ b/include/llvm/CodeGen/ProcessImplicitDefs.h
@ -12,6 +12,7 @@
 #define LLVM_CODEGEN_PROCESSIMPLICITDEFS_H

 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/SmallSet.h"

 namespace llvm {

@ -24,7 +25,8 @@ namespace llvm {
  private:

    bool CanTurnIntoImplicitDef(MachineInstr *MI, unsigned Reg,
-                                unsigned OpIdx, const TargetInstrInfo *tii_);
+                                unsigned OpIdx, const TargetInstrInfo *tii_,
+                                SmallSet<unsigned, 8> &ImpDefRegs);

  public:
    static char ID;
--- a/include/llvm/MC/MCParser/AsmParser.h
+++ b/include/llvm/MC/MCParser/AsmParser.h
@ -107,7 +107,7 @@ private:
  
  void EatToEndOfStatement();
  
-  bool ParseAssignment(const StringRef &Name);
+  bool ParseAssignment(StringRef Name);

  bool ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc);
  bool ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
--- a/include/llvm/Support/COFF.h
+++ b/include/llvm/Support/COFF.h
@ -10,12 +10,12 @@
 // This file contains an definitions used in Windows COFF Files.
 //
 // Structures and enums defined within this file where created using
-// information from Microsofts publicly available PE/COFF format document:
+// information from Microsoft's publicly available PE/COFF format document:
 // 
 // Microsoft Portable Executable and Common Object File Format Specification
 // Revision 8.1 - February 15, 2008
 //
-// As of 5/2/2010, hosted by microsoft at:
+// As of 5/2/2010, hosted by Microsoft at:
 // http://www.microsoft.com/whdc/system/platform/firmware/pecoff.mspx
 //
 //===----------------------------------------------------------------------===//
@ -57,7 +57,7 @@ namespace COFF {
    uint8_t  NumberOfAuxSymbols;
  };

-  enum symbol_flags {
+  enum SymbolFlags {
    SF_TypeMask = 0x0000FFFF,
    SF_TypeShift = 0,

@ -67,36 +67,70 @@ namespace COFF {
    SF_WeakReference = 0x01000000
  };

-  enum symbol_storage_class {
-    IMAGE_SYM_CLASS_END_OF_FUNCTION  = -1,
-    IMAGE_SYM_CLASS_NULL             = 0,
-    IMAGE_SYM_CLASS_AUTOMATIC        = 1,
-    IMAGE_SYM_CLASS_EXTERNAL         = 2,
-    IMAGE_SYM_CLASS_STATIC           = 3,
-    IMAGE_SYM_CLASS_REGISTER         = 4,
-    IMAGE_SYM_CLASS_EXTERNAL_DEF     = 5,
-    IMAGE_SYM_CLASS_LABEL            = 6,
-    IMAGE_SYM_CLASS_UNDEFINED_LABEL  = 7,
-    IMAGE_SYM_CLASS_MEMBER_OF_STRUCT = 8,
-    IMAGE_SYM_CLASS_ARGUMENT         = 9,
-    IMAGE_SYM_CLASS_STRUCT_TAG       = 10,
-    IMAGE_SYM_CLASS_MEMBER_OF_UNION  = 11,
-    IMAGE_SYM_CLASS_UNION_TAG        = 12,
-    IMAGE_SYM_CLASS_TYPE_DEFINITION  = 13,
-    IMAGE_SYM_CLASS_UNDEFINED_STATIC = 14,
-    IMAGE_SYM_CLASS_ENUM_TAG         = 15,
-    IMAGE_SYM_CLASS_MEMBER_OF_ENUM   = 16,
-    IMAGE_SYM_CLASS_REGISTER_PARAM   = 17,
-    IMAGE_SYM_CLASS_BIT_FIELD        = 18,
+  /// Storage class tells where and what the symbol represents
+  enum SymbolStorageClass {
+    IMAGE_SYM_CLASS_END_OF_FUNCTION  = -1,  ///< Physical end of function
+    IMAGE_SYM_CLASS_NULL             = 0,   ///< No symbol
+    IMAGE_SYM_CLASS_AUTOMATIC        = 1,   ///< Stack variable
+    IMAGE_SYM_CLASS_EXTERNAL         = 2,   ///< External symbol
+    IMAGE_SYM_CLASS_STATIC           = 3,   ///< Static
+    IMAGE_SYM_CLASS_REGISTER         = 4,   ///< Register variable
+    IMAGE_SYM_CLASS_EXTERNAL_DEF     = 5,   ///< External definition
+    IMAGE_SYM_CLASS_LABEL            = 6,   ///< Label
+    IMAGE_SYM_CLASS_UNDEFINED_LABEL  = 7,   ///< Undefined label
+    IMAGE_SYM_CLASS_MEMBER_OF_STRUCT = 8,   ///< Member of structure
+    IMAGE_SYM_CLASS_ARGUMENT         = 9,   ///< Function argument
+    IMAGE_SYM_CLASS_STRUCT_TAG       = 10,  ///< Structure tag
+    IMAGE_SYM_CLASS_MEMBER_OF_UNION  = 11,  ///< Member of union
+    IMAGE_SYM_CLASS_UNION_TAG        = 12,  ///< Union tag
+    IMAGE_SYM_CLASS_TYPE_DEFINITION  = 13,  ///< Type definition
+    IMAGE_SYM_CLASS_UNDEFINED_STATIC = 14,  ///< Undefined static
+    IMAGE_SYM_CLASS_ENUM_TAG         = 15,  ///< Enumeration tag
+    IMAGE_SYM_CLASS_MEMBER_OF_ENUM   = 16,  ///< Member of enumeration
+    IMAGE_SYM_CLASS_REGISTER_PARAM   = 17,  ///< Register parameter
+    IMAGE_SYM_CLASS_BIT_FIELD        = 18,  ///< Bit field
+    /// ".bb" or ".eb" - beginning or end of block
    IMAGE_SYM_CLASS_BLOCK            = 100,
+    /// ".bf" or ".ef" - beginning or end of function
    IMAGE_SYM_CLASS_FUNCTION         = 101,
-    IMAGE_SYM_CLASS_END_OF_STRUCT    = 102,
-    IMAGE_SYM_CLASS_FILE             = 103,
+    IMAGE_SYM_CLASS_END_OF_STRUCT    = 102, ///< End of structure
+    IMAGE_SYM_CLASS_FILE             = 103, ///< File name
+    /// Line number, reformatted as symbol
    IMAGE_SYM_CLASS_SECTION          = 104,
-    IMAGE_SYM_CLASS_WEAK_EXTERNAL    = 105,
+    IMAGE_SYM_CLASS_WEAK_EXTERNAL    = 105, ///< Duplicate tag
+    /// External symbol in dmert public lib
    IMAGE_SYM_CLASS_CLR_TOKEN        = 107
  };

+  enum SymbolBaseType {
+    IMAGE_SYM_TYPE_NULL   = 0,  ///< No type information or unknown base type.
+    IMAGE_SYM_TYPE_VOID   = 1,  ///< Used with void pointers and functions.
+    IMAGE_SYM_TYPE_CHAR   = 2,  ///< A character (signed byte).
+    IMAGE_SYM_TYPE_SHORT  = 3,  ///< A 2-byte signed integer.
+    IMAGE_SYM_TYPE_INT    = 4,  ///< A natural integer type on the target.
+    IMAGE_SYM_TYPE_LONG   = 5,  ///< A 4-byte signed integer.
+    IMAGE_SYM_TYPE_FLOAT  = 6,  ///< A 4-byte floating-point number.
+    IMAGE_SYM_TYPE_DOUBLE = 7,  ///< An 8-byte floating-point number.
+    IMAGE_SYM_TYPE_STRUCT = 8,  ///< A structure.
+    IMAGE_SYM_TYPE_UNION  = 9,  ///< An union.
+    IMAGE_SYM_TYPE_ENUM   = 10, ///< An enumerated type.
+    IMAGE_SYM_TYPE_MOE    = 11, ///< A member of enumeration (a specific value).
+    IMAGE_SYM_TYPE_BYTE   = 12, ///< A byte; unsigned 1-byte integer.
+    IMAGE_SYM_TYPE_WORD   = 13, ///< A word; unsigned 2-byte integer.
+    IMAGE_SYM_TYPE_UINT   = 14, ///< An unsigned integer of natural size.
+    IMAGE_SYM_TYPE_DWORD  = 15  ///< An unsigned 4-byte integer.
+  };
+
+  enum SymbolComplexType {
+    IMAGE_SYM_DTYPE_NULL     = 0, ///< No complex type; simple scalar variable. 
+    IMAGE_SYM_DTYPE_POINTER  = 1, ///< A pointer to base type.
+    IMAGE_SYM_DTYPE_FUNCTION = 2, ///< A function that returns a base type.
+    IMAGE_SYM_DTYPE_ARRAY    = 3, ///< An array of base type.
+    
+    /// Type is formed as (base + (derived << SCT_COMPLEX_TYPE_SHIFT))
+    SCT_COMPLEX_TYPE_SHIFT   = 4
+  };
+
  struct section {
    char     Name[NameSize];
    uint32_t VirtualSize;
@ -110,7 +144,7 @@ namespace COFF {
    uint32_t Characteristics;
  };

-  enum section_characteristics {
+  enum SectionCharacteristics {
    IMAGE_SCN_TYPE_NO_PAD            = 0x00000008,
    IMAGE_SCN_CNT_CODE               = 0x00000020,
    IMAGE_SCN_CNT_INITIALIZED_DATA   = 0x00000040,
@ -154,7 +188,7 @@ namespace COFF {
    uint16_t Type;
  };

-  enum relocation_type_x86 {
+  enum RelocationTypeX86 {
    IMAGE_REL_I386_ABSOLUTE = 0x0000,
    IMAGE_REL_I386_DIR16    = 0x0001,
    IMAGE_REL_I386_REL16    = 0x0002,
--- a/include/llvm/Support/Regex.h
+++ b/include/llvm/Support/Regex.h
@ -36,7 +36,7 @@ namespace llvm {
    /// Compiles the given POSIX Extended Regular Expression \arg Regex.
    /// This implementation supports regexes and matching strings with embedded
    /// NUL characters.
-    Regex(const StringRef &Regex, unsigned Flags = NoFlags);
+    Regex(StringRef Regex, unsigned Flags = NoFlags);
    ~Regex();

    /// isValid - returns the error encountered during regex compilation, or
@ -55,7 +55,7 @@ namespace llvm {
    /// the first group is always the entire pattern.
    ///
    /// This returns true on a successful match.
-    bool match(const StringRef &String, SmallVectorImpl<StringRef> *Matches=0);
+    bool match(StringRef String, SmallVectorImpl<StringRef> *Matches = 0);

    /// sub - Return the result of replacing the first match of the regex in
    /// \arg String with the \arg Repl string. Backreferences like "\0" in the
--- a/include/llvm/Support/StringPool.h
+++ b/include/llvm/Support/StringPool.h
@ -64,7 +64,7 @@ namespace llvm {
    /// intern - Adds a string to the pool and returns a reference-counted
    /// pointer to it. No additional memory is allocated if the string already
    /// exists in the pool.
-    PooledStringPtr intern(const StringRef &Str);
+    PooledStringPtr intern(StringRef Str);

    /// empty - Checks whether the pool is empty. Returns true if so.
    ///
--- a/include/llvm/Target/TargetAsmParser.h
+++ b/include/llvm/Target/TargetAsmParser.h
@ -49,7 +49,7 @@ public:
  /// \param Operands [out] - The list of parsed operands, this returns
  ///        ownership of them to the caller.
  /// \return True on failure.
-  virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
                            SmallVectorImpl<MCParsedAsmOperand*> &Operands) = 0;

  /// ParseDirective - Parse a target specific assembler directive
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@ -371,7 +371,7 @@ public:
                                   unsigned SrcReg, bool isKill, int FrameIndex,
                                   const TargetRegisterClass *RC,
                                   const TargetRegisterInfo *TRI) const {
-    assert(0 && "Target didn't implement TargetInstrInfo::storeRegToStackSlot!");
+  assert(0 && "Target didn't implement TargetInstrInfo::storeRegToStackSlot!");
  }

  /// loadRegFromStackSlot - Load the specified register of the given register
@ -383,7 +383,7 @@ public:
                                    unsigned DestReg, int FrameIndex,
                                    const TargetRegisterClass *RC,
                                    const TargetRegisterInfo *TRI) const {
-    assert(0 && "Target didn't implement TargetInstrInfo::loadRegFromStackSlot!");
+  assert(0 && "Target didn't implement TargetInstrInfo::loadRegFromStackSlot!");
  }
  
  /// spillCalleeSavedRegisters - Issues instruction(s) to spill all callee
@ -392,7 +392,7 @@ public:
  /// storeRegToStackSlot(). Returns false otherwise.
  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator MI,
-                                         const std::vector<CalleeSavedInfo> &CSI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
                                         const TargetRegisterInfo *TRI) const {
    return false;
  }
@ -457,7 +457,7 @@ protected:
  /// take care of adding a MachineMemOperand to the newly created instruction.
  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
                                              MachineInstr* MI,
-                                              const SmallVectorImpl<unsigned> &Ops,
+                                          const SmallVectorImpl<unsigned> &Ops,
                                              MachineInstr* LoadMI) const {
    return 0;
  }
@ -501,7 +501,7 @@ public:
  /// only differences between the two addresses are the offset. It also returns
  /// the offsets by reference.
  virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
-                                       int64_t &Offset1, int64_t &Offset2) const {
+                                    int64_t &Offset1, int64_t &Offset2) const {
    return false;
  }

--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@ -68,7 +68,7 @@ namespace llvm {
  /// this flag is off (the default), the code generator is not allowed to
  /// produce results that are "less precise" than IEEE allows.  This includes
  /// use of X86 instructions like FSIN and FCOS instead of libcalls.
-  /// UnsafeFPMath implies FiniteOnlyFPMath and LessPreciseFPMAD.
+  /// UnsafeFPMath implies LessPreciseFPMAD.
  extern bool UnsafeFPMath;

  /// FiniteOnlyFPMath - This returns true when the -enable-finite-only-fp-math
--- a/include/llvm/Value.h
+++ b/include/llvm/Value.h
@ -266,6 +266,10 @@ public:
    SubclassOptionalData &= V->SubclassOptionalData;
  }

+  /// hasValueHandle - Return true if there is a value handle associated with
+  /// this value.
+  bool hasValueHandle() const { return HasValueHandle; }
+  
  // Methods for support type inquiry through isa, cast, and dyn_cast:
  static inline bool classof(const Value *) {
    return true; // Values are always values.
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@ -440,27 +440,47 @@ void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
                                     const TargetData *TD) {
  assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!");
  
-  // FromHandle - This keeps a weakvh on the from value so that we can know if
-  // it gets deleted out from under us in a recursive simplification.
+  // FromHandle/ToHandle - This keeps a WeakVH on the from/to values so that
+  // we can know if it gets deleted out from under us or replaced in a
+  // recursive simplification.
  WeakVH FromHandle(From);
+  WeakVH ToHandle(To);
  
  while (!From->use_empty()) {
    // Update the instruction to use the new value.
-    Use &U = From->use_begin().getUse();
-    Instruction *User = cast<Instruction>(U.getUser());
-    U = To;
+    Use &TheUse = From->use_begin().getUse();
+    Instruction *User = cast<Instruction>(TheUse.getUser());
+    TheUse = To;
+
+    // Check to see if the instruction can be folded due to the operand
+    // replacement.  For example changing (or X, Y) into (or X, -1) can replace
+    // the 'or' with -1.
+    Value *SimplifiedVal;
+    {
+      // Sanity check to make sure 'User' doesn't dangle across
+      // SimplifyInstruction.
+      AssertingVH<> UserHandle(User);
    
-    // See if we can simplify it.
-    if (Value *V = SimplifyInstruction(User, TD)) {
-      // Recursively simplify this.
-      ReplaceAndSimplifyAllUses(User, V, TD);
-      
-      // If the recursive simplification ended up revisiting and deleting 'From'
-      // then we're done.
-      if (FromHandle == 0)
-        return;
+      SimplifiedVal = SimplifyInstruction(User, TD);
+      if (SimplifiedVal == 0) continue;
    }
+    
+    // Recursively simplify this user to the new value.
+    ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD);
+    From = dyn_cast_or_null<Instruction>((Value*)FromHandle);
+    To = ToHandle;
+      
+    assert(ToHandle && "To value deleted by recursive simplification?");
+      
+    // If the recursive simplification ended up revisiting and deleting
+    // 'From' then we're done.
+    if (From == 0)
+      return;
  }
+  
+  // If 'From' has value handles referring to it, do a real RAUW to update them.
+  From->replaceAllUsesWith(To);
+  
  From->eraseFromParent();
 }

--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@ -71,22 +71,24 @@ ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) {

  // Are there zero predecessors of this block?
  if (PI == PE) {
-    Edge e = getEdge(0,BB);
+    Edge e = getEdge(0, BB);
    Count = getEdgeWeight(e);
  } else {
    // Otherwise, if there are predecessors, the execution count of this block is
    // the sum of the edge frequencies from the incoming edges.
    std::set<const BasicBlock*> ProcessedPreds;
    Count = 0;
-    for (; PI != PE; ++PI)
-      if (ProcessedPreds.insert(*PI).second) {
-        double w = getEdgeWeight(getEdge(*PI, BB));
+    for (; PI != PE; ++PI) {
+      const BasicBlock *P = *PI;
+      if (ProcessedPreds.insert(P).second) {
+        double w = getEdgeWeight(getEdge(P, BB));
        if (w == MissingValue) {
          Count = MissingValue;
          break;
        }
        Count += w;
      }
+    }
  }

  // If the predecessors did not suffice to get block weight, try successors.
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@ -544,20 +544,21 @@ bool LLParser::ParseNamedMetadata() {
    return true;

  SmallVector<MDNode *, 8> Elts;
-  do {
-    // Null is a special case since it is typeless.
-    if (EatIfPresent(lltok::kw_null)) {
-      Elts.push_back(0);
-      continue;
-    }
+  if (Lex.getKind() != lltok::rbrace)
+    do {
+      // Null is a special case since it is typeless.
+      if (EatIfPresent(lltok::kw_null)) {
+        Elts.push_back(0);
+        continue;
+      }

-    if (ParseToken(lltok::exclaim, "Expected '!' here"))
-      return true;
+      if (ParseToken(lltok::exclaim, "Expected '!' here"))
+        return true;
    
-    MDNode *N = 0;
-    if (ParseMDNodeID(N)) return true;
-    Elts.push_back(N);
-  } while (EatIfPresent(lltok::comma));
+      MDNode *N = 0;
+      if (ParseMDNodeID(N)) return true;
+      Elts.push_back(N);
+    } while (EatIfPresent(lltok::comma));

  if (ParseToken(lltok::rbrace, "expected end of metadata node"))
    return true;
@ -2021,33 +2022,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
    ID.StrVal = Lex.getStrVal();
    ID.Kind = ValID::t_LocalName;
    break;
-  case lltok::exclaim:   // !{...} MDNode, !"foo" MDString
-    Lex.Lex();
-    
-    if (EatIfPresent(lltok::lbrace)) {
-      SmallVector<Value*, 16> Elts;
-      if (ParseMDNodeVector(Elts, PFS) ||
-          ParseToken(lltok::rbrace, "expected end of metadata node"))
-        return true;
-
-      ID.MDNodeVal = MDNode::get(Context, Elts.data(), Elts.size());
-      ID.Kind = ValID::t_MDNode;
-      return false;
-    }
-
-    // Standalone metadata reference
-    // !{ ..., !42, ... }
-    if (Lex.getKind() == lltok::APSInt) {
-      if (ParseMDNodeID(ID.MDNodeVal)) return true;
-      ID.Kind = ValID::t_MDNode;
-      return false;
-    }
-    
-    // MDString:
-    //   ::= '!' STRINGCONSTANT
-    if (ParseMDString(ID.MDStringVal)) return true;
-    ID.Kind = ValID::t_MDString;
-    return false;
+  case lltok::exclaim:   // !42, !{...}, or !"foo"
+    return ParseMetadataValue(ID, PFS);
  case lltok::APSInt:
    ID.APSIntVal = Lex.getAPSIntVal();
    ID.Kind = ValID::t_APSInt;
@ -2528,6 +2504,42 @@ bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) {
  return false;
 }

+/// ParseMetadataValue
+///  ::= !42
+///  ::= !{...}
+///  ::= !"string"
+bool LLParser::ParseMetadataValue(ValID &ID, PerFunctionState *PFS) {
+  assert(Lex.getKind() == lltok::exclaim);
+  Lex.Lex();
+
+  // MDNode:
+  // !{ ... }
+  if (EatIfPresent(lltok::lbrace)) {
+    SmallVector<Value*, 16> Elts;
+    if (ParseMDNodeVector(Elts, PFS) ||
+        ParseToken(lltok::rbrace, "expected end of metadata node"))
+      return true;
+
+    ID.MDNodeVal = MDNode::get(Context, Elts.data(), Elts.size());
+    ID.Kind = ValID::t_MDNode;
+    return false;
+  }
+
+  // Standalone metadata reference
+  // !42
+  if (Lex.getKind() == lltok::APSInt) {
+    if (ParseMDNodeID(ID.MDNodeVal)) return true;
+    ID.Kind = ValID::t_MDNode;
+    return false;
+  }
+
+  // MDString:
+  //   ::= '!' STRINGCONSTANT
+  if (ParseMDString(ID.MDStringVal)) return true;
+  ID.Kind = ValID::t_MDString;
+  return false;
+}
+

 //===----------------------------------------------------------------------===//
 // Function Parsing.
@ -3983,6 +3995,10 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
 ///   ::= 'null' | TypeAndValue
 bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts,
                                 PerFunctionState *PFS) {
+  // Check for an empty list.
+  if (Lex.getKind() == lltok::rbrace)
+    return false;
+
  do {
    // Null is a special case since it is typeless.
    if (EatIfPresent(lltok::kw_null)) {
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@ -308,6 +308,7 @@ namespace llvm {
    bool ParseGlobalValue(const Type *Ty, Constant *&V);
    bool ParseGlobalTypeAndValue(Constant *&V);
    bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
+    bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS);
    bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS);

    // Function Parsing.
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@ -820,7 +820,7 @@ bool BitcodeReader::ParseMetadata() {
      IsFunctionLocal = true;
      // fall-through
    case bitc::METADATA_NODE: {
-      if (Record.empty() || Record.size() % 2 == 1)
+      if (Record.size() % 2 == 1)
        return Error("Invalid METADATA_NODE record");

      unsigned Size = Record.size();
@ -834,7 +834,8 @@ bool BitcodeReader::ParseMetadata() {
        else
          Elts.push_back(NULL);
      }
-      Value *V = MDNode::getWhenValsUnresolved(Context, &Elts[0], Elts.size(),
+      Value *V = MDNode::getWhenValsUnresolved(Context,
+                                               Elts.data(), Elts.size(),
                                               IsFunctionLocal);
      IsFunctionLocal = false;
      MDValueList.AssignValue(V, NextMDValueNo++);
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@ -178,7 +178,7 @@ bool AsmPrinter::doInitialization(Module &M) {
  if (!M.getModuleInlineAsm().empty()) {
    OutStreamer.AddComment("Start of file scope inline assembly");
    OutStreamer.AddBlankLine();
-    EmitInlineAsm(M.getModuleInlineAsm(), 0/*no loc cookie*/);
+    EmitInlineAsm(M.getModuleInlineAsm()+"\n", 0/*no loc cookie*/);
    OutStreamer.AddComment("End of file scope inline assembly");
    OutStreamer.AddBlankLine();
  }
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@ -53,17 +53,6 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
  }
  
  SourceMgr SrcMgr;
-
-  // Ensure the buffer is newline terminated.
-  char *TmpString = 0;
-  if (Str.back() != '\n') {
-    TmpString = new char[Str.size() + 2];
-    memcpy(TmpString, Str.data(), Str.size());
-    TmpString[Str.size()] = '\n';
-    TmpString[Str.size() + 1] = 0;
-    isNullTerminated = true;
-    Str = TmpString;
-  }
  
  // If the current LLVMContext has an inline asm handler, set it in SourceMgr.
  LLVMContext &LLVMCtx = MMI->getModule()->getContext();
@ -95,9 +84,6 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
                       /*NoFinalize*/ true);
  if (Res && !HasDiagHandler)
    report_fatal_error("Error parsing inline asm\n");
-
-  if (TmpString)
-    delete[] TmpString;
 }


--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@ -119,6 +119,7 @@ bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const {
 //
 bool LiveInterval::overlapsFrom(const LiveInterval& other,
                                const_iterator StartPos) const {
+  assert(!empty() && "empty interval");
  const_iterator i = begin();
  const_iterator ie = end();
  const_iterator j = StartPos;
@ -161,16 +162,8 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other,
 /// by [Start, End).
 bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const {
  assert(Start < End && "Invalid range");
-  const_iterator I  = begin();
-  const_iterator E  = end();
-  const_iterator si = std::upper_bound(I, E, Start);
-  const_iterator ei = std::upper_bound(I, E, End);
-  if (si != ei)
-    return true;
-  if (si == I)
-    return false;
-  --si;
-  return si->contains(Start);
+  const_iterator I = std::lower_bound(begin(), end(), End);
+  return I != begin() && (--I)->end > Start;
 }

 /// extendIntervalEndTo - This method is used when we want to extend the range
@ -868,6 +861,10 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
          OS << "?";
        else
          OS << vni->def;
+        if (vni->hasPHIKill())
+          OS << "-phikill";
+        if (vni->hasRedefByEC())
+          OS << "-ec";
      }
    }
  }
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@ -497,11 +497,6 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
 /// candidate for LICM. e.g. If the instruction is a call, then it's obviously
 /// not safe to hoist it.
 bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
-  // It is not profitable to hoist implicitdefs.  FIXME: Why not?  what if they
-  // are an argument to some other otherwise-hoistable instruction?
-  if (I.isImplicitDef())
-    return false;
-  
  // Check if it's safe to move the instruction.
  bool DontMoveAcrossStore = true;
  if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore))
@ -717,7 +712,9 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI,

 bool MachineLICM::EliminateCSE(MachineInstr *MI,
          DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
-  if (CI == CSEMap.end())
+  // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
+  // the undef property onto uses.
+  if (CI == CSEMap.end() || MI->isImplicitDef())
    return false;

  if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@ -563,3 +563,26 @@ unsigned MachineModuleInfo::getPersonalityIndex() const {
  return 0;
 }

+namespace {
+  /// VariableDebugSorter - Comparison to sort the VariableDbgInfo map
+  /// by source location, to avoid depending on the arbitrary order that
+  /// instruction selection visits variables in.
+  struct VariableDebugSorter {
+    bool operator()(const MachineModuleInfo::VariableDbgInfoMapTy::value_type &A,
+                    const MachineModuleInfo::VariableDbgInfoMapTy::value_type &B)
+                  const {
+       if (A.second.second.getLine() != B.second.second.getLine())
+         return A.second.second.getLine() < B.second.second.getLine();
+       if (A.second.second.getCol() != B.second.second.getCol())
+         return A.second.second.getCol() < B.second.second.getCol();
+       return false;
+    }
+  };
+}
+
+MachineModuleInfo::VariableDbgInfoMapTy &
+MachineModuleInfo::getVariableDbgInfo() {
+  std::stable_sort(VariableDbgInfo.begin(), VariableDbgInfo.end(),
+                   VariableDebugSorter());
+  return VariableDbgInfo;
+}
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@ -41,21 +41,51 @@ void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
  MachineFunctionPass::getAnalysisUsage(AU);
 }

-bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
-                                                 unsigned Reg, unsigned OpIdx,
-                                                 const TargetInstrInfo *tii_) {
+bool
+ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
+                                            unsigned Reg, unsigned OpIdx,
+                                            const TargetInstrInfo *tii_,
+                                            SmallSet<unsigned, 8> &ImpDefRegs) {
  unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
  if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
-      Reg == SrcReg && DstSubReg == 0)
+      Reg == SrcReg &&
+      (DstSubReg == 0 || ImpDefRegs.count(DstReg)))
    return true;

  switch(OpIdx) {
-    case 1: return MI->isCopy() && MI->getOperand(0).getSubReg() == 0;
-    case 2: return MI->isSubregToReg() && MI->getOperand(0).getSubReg() == 0;
-    default: return false;
+  case 1:
+    return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 ||
+                            ImpDefRegs.count(MI->getOperand(0).getReg()));
+  case 2:
+    return MI->isSubregToReg() && (MI->getOperand(0).getSubReg() == 0 ||
+                                  ImpDefRegs.count(MI->getOperand(0).getReg()));
+  default: return false;
  }
 }

+static bool isUndefCopy(MachineInstr *MI, unsigned Reg,
+                        const TargetInstrInfo *tii_,
+                        SmallSet<unsigned, 8> &ImpDefRegs) {
+  if (MI->isCopy()) {
+    MachineOperand &MO0 = MI->getOperand(0);
+    MachineOperand &MO1 = MI->getOperand(1);
+    if (MO1.getReg() != Reg)
+      return false;
+    if (!MO0.getSubReg() || ImpDefRegs.count(MO0.getReg()))
+      return true;
+    return false;
+  }
+
+  unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+  if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) {
+    if (Reg != SrcReg)
+      return false;
+    if (DstSubReg == 0 || ImpDefRegs.count(DstReg))
+      return true;
+  }
+  return false;
+}
+
 /// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure
 /// there is one implicit_def for each use. Add isUndef marker to
 /// implicit_def defs and their uses.
@ -104,7 +134,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
      // Eliminate %reg1032:sub<def> = COPY undef.
      if (MI->isCopy() && MI->getOperand(0).getSubReg()) {
        MachineOperand &MO = MI->getOperand(1);
-        if (ImpDefRegs.count(MO.getReg())) {
+        if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) {
          if (MO.isKill()) {
            LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg());
            vi.removeKill(MI);
@ -126,7 +156,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
        if (!ImpDefRegs.count(Reg))
          continue;
        // Use is a copy, just turn it into an implicit_def.
-        if (CanTurnIntoImplicitDef(MI, Reg, i, tii_)) {
+        if (CanTurnIntoImplicitDef(MI, Reg, i, tii_, ImpDefRegs)) {
          bool isKill = MO.isKill();
          MI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));
          for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
@ -223,11 +253,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
        MachineInstr *RMI = RUses[i];

        // Turn a copy use into an implicit_def.
-        unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
-        if ((RMI->isCopy() && RMI->getOperand(1).getReg() == Reg &&
-             RMI->getOperand(0).getSubReg() == 0) ||
-            (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
-             Reg == SrcReg && DstSubReg == 0)) {
+        if (isUndefCopy(RMI, Reg, tii_, ImpDefRegs)) {
          RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));

          bool isKill = false;
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@ -135,7 +135,7 @@ unsigned FastISel::getRegForValue(const Value *V) {
       !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V))))
    return FuncInfo.InitializeRegForValue(V);

-  MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea();
+  SavePoint SaveInsertPt = enterLocalValueArea();

  // Materialize the value in a register. Emit any instructions in the
  // local value area.
@ -286,18 +286,22 @@ void FastISel::recomputeInsertPt() {
    ++FuncInfo.InsertPt;
 }

-MachineBasicBlock::iterator FastISel::enterLocalValueArea() {
+FastISel::SavePoint FastISel::enterLocalValueArea() {
  MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
+  DebugLoc OldDL = DL;
  recomputeInsertPt();
-  return OldInsertPt;
+  DL = DebugLoc();
+  SavePoint SP = { OldInsertPt, OldDL };
+  return SP;
 }

-void FastISel::leaveLocalValueArea(MachineBasicBlock::iterator OldInsertPt) {
+void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
  if (FuncInfo.InsertPt != FuncInfo.MBB->begin())
    LastLocalValue = llvm::prior(FuncInfo.InsertPt);

  // Restore the previous insert position.
-  FuncInfo.InsertPt = OldInsertPt;
+  FuncInfo.InsertPt = OldInsertPt.InsertPt;
+  DL = OldInsertPt.DL;
 }

 /// SelectBinaryOp - Select and emit code for a binary operator instruction,
@ -778,40 +782,9 @@ FastISel::SelectFNeg(const User *I) {
  return true;
 }

-bool
-FastISel::SelectLoad(const User *I) {
-  LoadInst *LI = const_cast<LoadInst *>(cast<LoadInst>(I));
-
-  // For a load from an alloca, make a limited effort to find the value
-  // already available in a register, avoiding redundant loads.
-  if (!LI->isVolatile() && isa<AllocaInst>(LI->getPointerOperand())) {
-    BasicBlock::iterator ScanFrom = LI;
-    if (const Value *V = FindAvailableLoadedValue(LI->getPointerOperand(),
-                                                  LI->getParent(), ScanFrom)) {
-      if (!V->use_empty() &&
-          (!isa<Instruction>(V) ||
-           cast<Instruction>(V)->getParent() == LI->getParent() ||
-           (isa<AllocaInst>(V) &&
-            FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V)))) &&
-          (!isa<Argument>(V) ||
-           LI->getParent() == &LI->getParent()->getParent()->getEntryBlock())) {
-      unsigned ResultReg = getRegForValue(V);
-      if (ResultReg != 0) {
-        UpdateValueMap(I, ResultReg);
-        return true;
-      }
-      }
-    }
-  }
-
-  return false;
-}
-
 bool
 FastISel::SelectOperator(const User *I, unsigned Opcode) {
  switch (Opcode) {
-  case Instruction::Load:
-    return SelectLoad(I);
  case Instruction::Add:
    return SelectBinaryOp(I, ISD::ADD);
  case Instruction::FAdd:
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@ -820,7 +820,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
    unsigned InReg = It->second;
    RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
    SDValue Chain = DAG.getEntryNode();
-    return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+    return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
  }

  // Otherwise create a new SDValue and remember it.
@ -3955,7 +3955,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
    if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) ==
        AliasAnalysis::NoAlias) {
      DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, 
-                                false, I.getArgOperand(0), 0, I.getArgOperand(1), 0));
+                                false, I.getArgOperand(0), 0,
+                                I.getArgOperand(1), 0));
      return 0;
    }

@ -5522,10 +5523,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
        break;
      }

-      if (OpInfo.ConstraintType == TargetLowering::C_Other) {
-        assert(!OpInfo.isIndirect &&
-               "Don't know how to handle indirect other inputs yet!");
+      // Treat indirect 'X' constraint as memory.
+      if (OpInfo.ConstraintType == TargetLowering::C_Other && 
+          OpInfo.isIndirect) 
+        OpInfo.ConstraintType = TargetLowering::C_Memory;

+      if (OpInfo.ConstraintType == TargetLowering::C_Other) {
        std::vector<SDValue> Ops;
        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
                                         Ops, DAG);
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@ -712,7 +712,7 @@ bool AsmParser::ParseStatement() {
  return HadError;
 }

-bool AsmParser::ParseAssignment(const StringRef &Name) {
+bool AsmParser::ParseAssignment(StringRef Name) {
  // FIXME: Use better location, we should use proper tokens.
  SMLoc EqualLoc = Lexer.getLoc();

--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@ -761,7 +761,7 @@ APFloat::APFloat(const fltSemantics &ourSemantics,
    makeNaN();
 }

-APFloat::APFloat(const fltSemantics &ourSemantics, const StringRef& text)
+APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text)
 {
  assertArithmeticOK(ourSemantics);
  initialize(&ourSemantics);
@ -2185,8 +2185,7 @@ APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
 }

 APFloat::opStatus
-APFloat::convertFromHexadecimalString(const StringRef &s,
-                                      roundingMode rounding_mode)
+APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
 {
  lostFraction lost_fraction = lfExactlyZero;
  integerPart *significand;
@ -2361,7 +2360,7 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
 }

 APFloat::opStatus
-APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mode)
+APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
 {
  decimalInfo D;
  opStatus fs;
@ -2471,7 +2470,7 @@ APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mo
 }

 APFloat::opStatus
-APFloat::convertFromString(const StringRef &str, roundingMode rounding_mode)
+APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
 {
  assertArithmeticOK(*semantics);
  assert(!str.empty() && "Invalid string length");
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@ -102,7 +102,7 @@ APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
  clearUnusedBits();
 }

-APInt::APInt(unsigned numbits, const StringRef& Str, uint8_t radix)
+APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
  : BitWidth(numbits), VAL(0) {
  assert(BitWidth && "Bitwidth too small");
  fromString(numbits, Str, radix);
@ -613,7 +613,7 @@ APInt& APInt::flip(unsigned bitPosition) {
  return *this;
 }

-unsigned APInt::getBitsNeeded(const StringRef& str, uint8_t radix) {
+unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
  assert(!str.empty() && "Invalid string length");
  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
         "Radix should be 2, 8, 10, or 16!");
@ -2046,7 +2046,7 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS,
  divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
 }

-void APInt::fromString(unsigned numbits, const StringRef& str, uint8_t radix) {
+void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
  // Check our assumptions here
  assert(!str.empty() && "Invalid string length");
  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
--- a/lib/Support/Regex.cpp
+++ b/lib/Support/Regex.cpp
@ -19,7 +19,7 @@
 #include <string>
 using namespace llvm;

-Regex::Regex(const StringRef &regex, unsigned Flags) {
+Regex::Regex(StringRef regex, unsigned Flags) {
  unsigned flags = 0;
  preg = new llvm_regex();
  preg->re_endp = regex.end();
@ -52,7 +52,7 @@ unsigned Regex::getNumMatches() const {
  return preg->re_nsub;
 }

-bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){
+bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
  unsigned nmatch = Matches ? preg->re_nsub+1 : 0;

  // pmatch needs to have at least one element.
--- a/lib/Support/StringPool.cpp
+++ b/lib/Support/StringPool.cpp
@ -22,7 +22,7 @@ StringPool::~StringPool() {
  assert(InternTable.empty() && "PooledStringPtr leaked!");
 }

-PooledStringPtr StringPool::intern(const StringRef &Key) {
+PooledStringPtr StringPool::intern(StringRef Key) {
  table_t::iterator I = InternTable.find(Key);
  if (I != InternTable.end())
    return PooledStringPtr(&*I);
--- a/lib/System/Unix/Program.inc
+++ b/lib/System/Unix/Program.inc
@ -310,12 +310,9 @@ Program::Wait(unsigned secondsToWait,
  // fact of having a handler at all causes the wait below to return with EINTR,
  // unlike if we used SIG_IGN.
  if (secondsToWait) {
-#if !defined(__HAIKU__) && !defined(__minix)
-    Act.sa_sigaction = 0;
-#endif
+    memset(&Act, 0, sizeof(Act));
    Act.sa_handler = TimeOutHandler;
    sigemptyset(&Act.sa_mask);
-    Act.sa_flags = 0;
    sigaction(SIGALRM, &Act, &Old);
    alarm(secondsToWait);
  }
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@ -48,6 +48,8 @@ def FeatureHWDiv  : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
                                     "Enable divide instructions">;
 def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
                                 "Enable Thumb2 extract and pack instructions">;
+def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
+                                         "FP compare + branch is slow">;

 // Some processors have multiply-accumulate instructions that don't
 // play nicely with other VFP instructions, and it's generally better
@ -129,7 +131,7 @@ def : Processor<"arm1156t2f-s",    ARMV6Itineraries,
 // V7 Processors.
 def : Processor<"cortex-a8",        CortexA8Itineraries,
                [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx,
-                 FeatureNEONForFP, FeatureT2ExtractPack]>;
+                 FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2ExtractPack]>;
 def : Processor<"cortex-a9",        CortexA9Itineraries,
                [ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>;
 def : ProcNoItin<"cortex-m3",       [ArchV7M, FeatureThumb2, FeatureHWDiv]>;
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@ -519,9 +519,8 @@ namespace ARM_AM {
  //
  // This is stored in two operands [regaddr, align].  The first is the
  // address register.  The second operand is the value of the alignment
-  // specifier to use or zero if no explicit alignment.
-  // Valid alignments are: 0, 8, 16, and 32 bytes, depending on the specific
-  // instruction.
+  // specifier in bytes or zero if no explicit alignment.
+  // Valid alignments depend on the specific instruction.

  //===--------------------------------------------------------------------===//
  // NEON Modified Immediates
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@ -565,6 +565,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
  case ARMISD::CMPZ:          return "ARMISD::CMPZ";
  case ARMISD::CMPFP:         return "ARMISD::CMPFP";
  case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
+  case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
  case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
  case ARMISD::CMOV:          return "ARMISD::CMOV";
  case ARMISD::CNEG:          return "ARMISD::CNEG";
@ -623,6 +624,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
  case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
  case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
  case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
+  case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
+  case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
  case ARMISD::VDUP:          return "ARMISD::VDUP";
  case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
  case ARMISD::VEXT:          return "ARMISD::VEXT";
@ -2216,7 +2219,7 @@ static bool isFloatingPointZero(SDValue Op) {
 /// the given operands.
 SDValue
 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
-                             SDValue &ARMCC, SelectionDAG &DAG,
+                             SDValue &ARMcc, SelectionDAG &DAG,
                             DebugLoc dl) const {
  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
    unsigned C = RHSC->getZExtValue();
@ -2268,48 +2271,14 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
    CompareType = ARMISD::CMPZ;
    break;
  }
-  ARMCC = DAG.getConstant(CondCode, MVT::i32);
+  ARMcc = DAG.getConstant(CondCode, MVT::i32);
  return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
 }

-static bool canBitcastToInt(SDNode *Op) {
-  return Op->hasOneUse() && 
-    ISD::isNormalLoad(Op) &&
-    Op->getValueType(0) == MVT::f32;
-}
-
-static SDValue bitcastToInt(SDValue Op, SelectionDAG &DAG) {
-  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
-    return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
-                       Ld->getChain(), Ld->getBasePtr(),
-                       Ld->getSrcValue(), Ld->getSrcValueOffset(),
-                       Ld->isVolatile(), Ld->isNonTemporal(),
-                       Ld->getAlignment());
-
-  llvm_unreachable("Unknown VFP cmp argument!");
-}
-
 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
 SDValue
-ARMTargetLowering::getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
-                             SDValue &ARMCC, SelectionDAG &DAG,
+ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
                             DebugLoc dl) const {
-  if (UnsafeFPMath && FiniteOnlyFPMath() &&
-      (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
-       CC == ISD::SETNE || CC == ISD::SETUNE) &&
-      canBitcastToInt(LHS.getNode()) && canBitcastToInt(RHS.getNode())) {
-    // If unsafe fp math optimization is enabled and there are no othter uses of
-    // the CMP operands, and the condition code is EQ oe NE, we can optimize it
-    // to an integer comparison.
-    if (CC == ISD::SETOEQ)
-      CC = ISD::SETEQ;
-    else if (CC == ISD::SETUNE)
-      CC = ISD::SETNE;
-    LHS = bitcastToInt(LHS, DAG);
-    RHS = bitcastToInt(RHS, DAG);
-    return getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
-  }
-
  SDValue Cmp;
  if (!isFloatingPointZero(RHS))
    Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
@ -2328,59 +2297,184 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
  DebugLoc dl = Op.getDebugLoc();

  if (LHS.getValueType() == MVT::i32) {
-    SDValue ARMCC;
+    SDValue ARMcc;
    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
-    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp);
+    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
  }

  ARMCC::CondCodes CondCode, CondCode2;
  FPCCToARMCC(CC, CondCode, CondCode2);

-  SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
+  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-  SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
  SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
-                               ARMCC, CCR, Cmp);
+                               ARMcc, CCR, Cmp);
  if (CondCode2 != ARMCC::AL) {
-    SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
+    SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
    // FIXME: Needs another CMP because flag can have but one use.
-    SDValue Cmp2 = getVFPCmp(LHS, RHS, CC, ARMCC2, DAG, dl);
+    SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
    Result = DAG.getNode(ARMISD::CMOV, dl, VT,
-                         Result, TrueVal, ARMCC2, CCR, Cmp2);
+                         Result, TrueVal, ARMcc2, CCR, Cmp2);
  }
  return Result;
 }

-SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
-  SDValue  Chain = Op.getOperand(0);
+/// canChangeToInt - Given the fp compare operand, return true if it is suitable
+/// to morph to an integer compare sequence.
+static bool canChangeToInt(SDValue Op, bool &SeenZero,
+                           const ARMSubtarget *Subtarget) {
+  SDNode *N = Op.getNode();
+  if (!N->hasOneUse())
+    // Otherwise it requires moving the value from fp to integer registers.
+    return false;
+  if (!N->getNumValues())
+    return false;
+  EVT VT = Op.getValueType();
+  if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
+    // f32 case is generally profitable. f64 case only makes sense when vcmpe +
+    // vmrs are very slow, e.g. cortex-a8.
+    return false;
+
+  if (isFloatingPointZero(Op)) {
+    SeenZero = true;
+    return true;
+  }
+  return ISD::isNormalLoad(N);
+}
+
+static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
+  if (isFloatingPointZero(Op))
+    return DAG.getConstant(0, MVT::i32);
+
+  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
+    return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+                       Ld->getChain(), Ld->getBasePtr(),
+                       Ld->getSrcValue(), Ld->getSrcValueOffset(),
+                       Ld->isVolatile(), Ld->isNonTemporal(),
+                       Ld->getAlignment());
+
+  llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
+                           SDValue &RetVal1, SDValue &RetVal2) {
+  if (isFloatingPointZero(Op)) {
+    RetVal1 = DAG.getConstant(0, MVT::i32);
+    RetVal2 = DAG.getConstant(0, MVT::i32);
+    return;
+  }
+
+  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
+    SDValue Ptr = Ld->getBasePtr();
+    RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+                          Ld->getChain(), Ptr,
+                          Ld->getSrcValue(), Ld->getSrcValueOffset(),
+                          Ld->isVolatile(), Ld->isNonTemporal(),
+                          Ld->getAlignment());
+
+    EVT PtrType = Ptr.getValueType();
+    unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
+    SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
+                                 PtrType, Ptr, DAG.getConstant(4, PtrType));
+    RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+                          Ld->getChain(), NewPtr,
+                          Ld->getSrcValue(), Ld->getSrcValueOffset() + 4,
+                          Ld->isVolatile(), Ld->isNonTemporal(),
+                          NewAlign);
+    return;
+  }
+
+  llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
+/// f32 and even f64 comparisons to integer ones.
+SDValue
+ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
-  SDValue    LHS = Op.getOperand(2);
-  SDValue    RHS = Op.getOperand(3);
-  SDValue   Dest = Op.getOperand(4);
+  SDValue LHS = Op.getOperand(2);
+  SDValue RHS = Op.getOperand(3);
+  SDValue Dest = Op.getOperand(4);
+  DebugLoc dl = Op.getDebugLoc();
+
+  bool SeenZero = false;
+  if (canChangeToInt(LHS, SeenZero, Subtarget) &&
+      canChangeToInt(RHS, SeenZero, Subtarget) &&
+      // If one of the operand is zero, it's safe to ignore the NaN case.
+      (FiniteOnlyFPMath() || SeenZero)) {
+    // If unsafe fp math optimization is enabled and there are no othter uses of
+    // the CMP operands, and the condition code is EQ oe NE, we can optimize it
+    // to an integer comparison.
+    if (CC == ISD::SETOEQ)
+      CC = ISD::SETEQ;
+    else if (CC == ISD::SETUNE)
+      CC = ISD::SETNE;
+
+    SDValue ARMcc;
+    if (LHS.getValueType() == MVT::f32) {
+      LHS = bitcastf32Toi32(LHS, DAG);
+      RHS = bitcastf32Toi32(RHS, DAG);
+      SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+      SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+      return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+                         Chain, Dest, ARMcc, CCR, Cmp);
+    }
+
+    SDValue LHS1, LHS2;
+    SDValue RHS1, RHS2;
+    expandf64Toi32(LHS, DAG, LHS1, LHS2);
+    expandf64Toi32(RHS, DAG, RHS1, RHS2);
+    ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+    ARMcc = DAG.getConstant(CondCode, MVT::i32);
+    SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
+    SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
+    return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
+  }
+
+  return SDValue();
+}
+
+SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue LHS = Op.getOperand(2);
+  SDValue RHS = Op.getOperand(3);
+  SDValue Dest = Op.getOperand(4);
  DebugLoc dl = Op.getDebugLoc();

  if (LHS.getValueType() == MVT::i32) {
-    SDValue ARMCC;
+    SDValue ARMcc;
+    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
-                       Chain, Dest, ARMCC, CCR,Cmp);
+                       Chain, Dest, ARMcc, CCR, Cmp);
  }

  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+
+  if (UnsafeFPMath &&
+      (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
+       CC == ISD::SETNE || CC == ISD::SETUNE)) {
+    SDValue Result = OptimizeVFPBrcond(Op, DAG);
+    if (Result.getNode())
+      return Result;
+  }
+
  ARMCC::CondCodes CondCode, CondCode2;
  FPCCToARMCC(CC, CondCode, CondCode2);

-  SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
-  SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
+  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
-  SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
+  SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
  if (CondCode2 != ARMCC::AL) {
-    ARMCC = DAG.getConstant(CondCode2, MVT::i32);
-    SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) };
+    ARMcc = DAG.getConstant(CondCode2, MVT::i32);
+    SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
    Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
  }
  return Res;
@ -2469,12 +2563,11 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
  EVT VT = Op.getValueType();
  EVT SrcVT = Tmp1.getValueType();
  SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
-  SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
+  SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32);
  SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
-  SDValue Cmp = getVFPCmp(Tmp1, FP0,
-                          ISD::SETLT, ARMCC, DAG, dl);
+  SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl);
  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-  return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
+  return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp);
 }

 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
@ -2553,51 +2646,18 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
 }

 /// getZeroVector - Returns a vector of specified type with all zero elements.
-///
+/// Zero vectors are used to represent vector negation and in those cases
+/// will be implemented with the NEON VNEG instruction.  However, VNEG does
+/// not support i64 elements, so sometimes the zero vectors will need to be
+/// explicitly constructed.  Regardless, use a canonical VMOV to create the
+/// zero vector.
 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
  assert(VT.isVector() && "Expected a vector type");
-
-  // Zero vectors are used to represent vector negation and in those cases
-  // will be implemented with the NEON VNEG instruction.  However, VNEG does
-  // not support i64 elements, so sometimes the zero vectors will need to be
-  // explicitly constructed.  For those cases, and potentially other uses in
-  // the future, always build zero vectors as <16 x i8> or <8 x i8> bitcasted
-  // to their dest type.  This ensures they get CSE'd.
-  SDValue Vec;
-  SDValue Cst = DAG.getTargetConstant(0, MVT::i8);
-  SmallVector<SDValue, 8> Ops;
-  MVT TVT;
-
-  if (VT.getSizeInBits() == 64) {
-    Ops.assign(8, Cst); TVT = MVT::v8i8;
-  } else {
-    Ops.assign(16, Cst); TVT = MVT::v16i8;
-  }
-  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
-
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
-}
-
-/// getOnesVector - Returns a vector of specified type with all bits set.
-///
-static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
-  assert(VT.isVector() && "Expected a vector type");
-
-  // Always build ones vectors as <16 x i8> or <8 x i8> bitcasted to their
-  // dest type. This ensures they get CSE'd.
-  SDValue Vec;
-  SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8);
-  SmallVector<SDValue, 8> Ops;
-  MVT TVT;
-
-  if (VT.getSizeInBits() == 64) {
-    Ops.assign(8, Cst); TVT = MVT::v8i8;
-  } else {
-    Ops.assign(16, Cst); TVT = MVT::v16i8;
-  }
-  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
-
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
+  // The canonical modified immediate encoding of a zero vector is....0!
+  SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
+  EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
+  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
 }

 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
@ -2611,7 +2671,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
  SDValue ShOpLo = Op.getOperand(0);
  SDValue ShOpHi = Op.getOperand(1);
  SDValue ShAmt  = Op.getOperand(2);
-  SDValue ARMCC;
+  SDValue ARMcc;
  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;

  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
@ -2627,9 +2687,9 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,

  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
-                          ARMCC, DAG, dl);
+                          ARMcc, DAG, dl);
  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
-  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC,
+  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
                           CCR, Cmp);

  SDValue Ops[2] = { Lo, Hi };
@ -2647,7 +2707,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
  SDValue ShOpLo = Op.getOperand(0);
  SDValue ShOpHi = Op.getOperand(1);
  SDValue ShAmt  = Op.getOperand(2);
-  SDValue ARMCC;
+  SDValue ARMcc;

  assert(Op.getOpcode() == ISD::SHL_PARTS);
  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
@ -2661,9 +2721,9 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
-                          ARMCC, DAG, dl);
+                          ARMcc, DAG, dl);
  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
-  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC,
+  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
                           CCR, Cmp);

  SDValue Ops[2] = { Lo, Hi };
@ -2850,13 +2910,11 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {

 /// isNEONModifiedImm - Check if the specified splat value corresponds to a
 /// valid vector constant for a NEON instruction with a "modified immediate"
-/// operand (e.g., VMOV).  If so, return either the constant being
-/// splatted or the encoded value, depending on the DoEncode parameter.
+/// operand (e.g., VMOV).  If so, return the encoded value.
 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
                                 unsigned SplatBitSize, SelectionDAG &DAG,
-                                 bool isVMOV, bool DoEncode) {
+                                 EVT &VT, bool is128Bits, bool isVMOV) {
  unsigned OpCmode, Imm;
-  EVT VT;

  // SplatBitSize is set to the smallest size that splats the vector, so a
  // zero vector will always have SplatBitSize == 8.  However, NEON modified
@ -2868,16 +2926,18 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,

  switch (SplatBitSize) {
  case 8:
+    if (!isVMOV)
+      return SDValue();
    // Any 1-byte value is OK.  Op=0, Cmode=1110.
    assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
    OpCmode = 0xe;
    Imm = SplatBits;
-    VT = MVT::i8;
+    VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
    break;

  case 16:
    // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
-    VT = MVT::i16;
+    VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
    if ((SplatBits & ~0xff) == 0) {
      // Value = 0x00nn: Op=x, Cmode=100x.
      OpCmode = 0x8;
@ -2897,7 +2957,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
    // * only one byte is nonzero, or
    // * the least significant byte is 0xff and the second byte is nonzero, or
    // * the least significant 2 bytes are 0xff and the third is nonzero.
-    VT = MVT::i32;
+    VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
    if ((SplatBits & ~0xff) == 0) {
      // Value = 0x000000nn: Op=x, Cmode=000x.
      OpCmode = 0;
@ -2949,9 +3009,9 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
    return SDValue();

  case 64: {
-    // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
    if (!isVMOV)
      return SDValue();
+    // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
    uint64_t BitMask = 0xff;
    uint64_t Val = 0;
    unsigned ImmMask = 1;
@ -2969,7 +3029,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
    // Op=1, Cmode=1110.
    OpCmode = 0x1e;
    SplatBits = Val;
-    VT = MVT::i64;
+    VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
    break;
  }

@ -2978,32 +3038,8 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
    return SDValue();
  }

-  if (DoEncode) {
-    unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
-    return DAG.getTargetConstant(EncodedVal, MVT::i32);
-  }
-  return DAG.getTargetConstant(SplatBits, VT);
-}
-
-/// getNEONModImm - If this is a valid vector constant for a NEON instruction
-/// with a "modified immediate" operand (e.g., VMOV) of the specified element
-/// size, return the encoded value for that immediate.  The ByteSize field
-/// indicates the number of bytes of each element [1248].
-SDValue ARM::getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV,
-                           SelectionDAG &DAG) {
-  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
-  APInt SplatBits, SplatUndef;
-  unsigned SplatBitSize;
-  bool HasAnyUndefs;
-  if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
-                                      HasAnyUndefs, ByteSize * 8))
-    return SDValue();
-
-  if (SplatBitSize > ByteSize * 8)
-    return SDValue();
-
-  return isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
-                           SplatBitSize, DAG, isVMOV, true);
+  unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
+  return DAG.getTargetConstant(EncodedVal, MVT::i32);
 }

 static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
@ -3194,43 +3230,6 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
  return true;
 }

-
-static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) {
-  // Canonicalize all-zeros and all-ones vectors.
-  ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode());
-  if (ConstVal->isNullValue())
-    return getZeroVector(VT, DAG, dl);
-  if (ConstVal->isAllOnesValue())
-    return getOnesVector(VT, DAG, dl);
-
-  EVT CanonicalVT;
-  if (VT.is64BitVector()) {
-    switch (Val.getValueType().getSizeInBits()) {
-    case 8:  CanonicalVT = MVT::v8i8; break;
-    case 16: CanonicalVT = MVT::v4i16; break;
-    case 32: CanonicalVT = MVT::v2i32; break;
-    case 64: CanonicalVT = MVT::v1i64; break;
-    default: llvm_unreachable("unexpected splat element type"); break;
-    }
-  } else {
-    assert(VT.is128BitVector() && "unknown splat vector size");
-    switch (Val.getValueType().getSizeInBits()) {
-    case 8:  CanonicalVT = MVT::v16i8; break;
-    case 16: CanonicalVT = MVT::v8i16; break;
-    case 32: CanonicalVT = MVT::v4i32; break;
-    case 64: CanonicalVT = MVT::v2i64; break;
-    default: llvm_unreachable("unexpected splat element type"); break;
-    }
-  }
-
-  // Build a canonical splat for this value.
-  SmallVector<SDValue, 8> Ops;
-  Ops.assign(CanonicalVT.getVectorNumElements(), Val);
-  SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, &Ops[0],
-                            Ops.size());
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Res);
-}
-
 // If this is a case we can't handle, return null and let the default
 // expansion code take care of it.
 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
@ -3244,11 +3243,25 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
    if (SplatBitSize <= 64) {
      // Check if an immediate VMOV works.
+      EVT VmovVT;
      SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
-                                      SplatUndef.getZExtValue(),
-                                      SplatBitSize, DAG, true, false);
-      if (Val.getNode())
-        return BuildSplat(Val, VT, DAG, dl);
+                                      SplatUndef.getZExtValue(), SplatBitSize,
+                                      DAG, VmovVT, VT.is128BitVector(), true);
+      if (Val.getNode()) {
+        SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
+        return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
+      }
+
+      // Try an immediate VMVN.
+      uint64_t NegatedImm = (SplatBits.getZExtValue() ^
+                             ((1LL << SplatBitSize) - 1));
+      Val = isNEONModifiedImm(NegatedImm,
+                                      SplatUndef.getZExtValue(), SplatBitSize,
+                                      DAG, VmovVT, VT.is128BitVector(), false);
+      if (Val.getNode()) {
+        SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
+        return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
+      }
    }
  }

@ -3825,6 +3838,15 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
  return BB;
 }

+static
+MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
+  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I)
+    if (*I != Succ)
+      return *I;
+  llvm_unreachable("Expecting a BB with two successors!");
+}
+
 MachineBasicBlock *
 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                               MachineBasicBlock *BB) const {
@ -3941,6 +3963,46 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
    return BB;
  }

+  case ARM::BCCi64:
+  case ARM::BCCZi64: {
+    // Compare both parts that make up the double comparison separately for
+    // equality.
+    bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
+
+    unsigned LHS1 = MI->getOperand(1).getReg();
+    unsigned LHS2 = MI->getOperand(2).getReg();
+    if (RHSisZero) {
+      AddDefaultPred(BuildMI(BB, dl,
+                             TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+                     .addReg(LHS1).addImm(0));
+      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+        .addReg(LHS2).addImm(0)
+        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+    } else {
+      unsigned RHS1 = MI->getOperand(3).getReg();
+      unsigned RHS2 = MI->getOperand(4).getReg();
+      AddDefaultPred(BuildMI(BB, dl,
+                             TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+                     .addReg(LHS1).addReg(RHS1));
+      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+        .addReg(LHS2).addReg(RHS2)
+        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+    }
+
+    MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
+    MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
+    if (MI->getOperand(0).getImm() == ARMCC::NE)
+      std::swap(destMBB, exitMBB);
+
+    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+      .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
+    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B))
+      .addMBB(exitMBB);
+
+    MI->eraseFromParent();   // The pseudo instruction is gone now.
+    return BB;
+  }
+
  case ARM::tANDsp:
  case ARM::tADDspr_:
  case ARM::tSUBspi_:
@ -4180,6 +4242,35 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
  return SDValue();
 }

+/// PerformVDUPLANECombine - Target-specific dag combine xforms for
+/// ARMISD::VDUPLANE.
+static SDValue PerformVDUPLANECombine(SDNode *N,
+                                      TargetLowering::DAGCombinerInfo &DCI) {
+  // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
+  // redundant.
+  SDValue Op = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // Ignore bit_converts.
+  while (Op.getOpcode() == ISD::BIT_CONVERT)
+    Op = Op.getOperand(0);
+  if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
+    return SDValue();
+
+  // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
+  unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
+  // The canonical VMOV for a zero vector uses a 32-bit element size.
+  unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  unsigned EltBits;
+  if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
+    EltSize = 8;
+  if (EltSize > VT.getVectorElementType().getSizeInBits())
+    return SDValue();
+
+  SDValue Res = DCI.DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
+  return DCI.CombineTo(N, Res, false);
+}
+
 /// getVShiftImm - Check if this is a valid build_vector for the immediate
 /// operand of a vector shift operation, where all the elements of the
 /// build_vector must have the same constant integer value.
@ -4558,6 +4649,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
  case ISD::SUB:        return PerformSUBCombine(N, DCI);
  case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
+  case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
  case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
  case ISD::SHL:
  case ISD::SRA:
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@ -53,6 +53,8 @@ namespace llvm {
      CMOV,         // ARM conditional move instructions.
      CNEG,         // ARM conditional negate instructions.

+      BCC_i64,
+
      RBIT,         // ARM bitreverse instruction

      FTOSI,        // FP to sint within a FP register.
@ -122,6 +124,10 @@ namespace llvm {
      VGETLANEu,    // zero-extend vector extract element
      VGETLANEs,    // sign-extend vector extract element

+      // Vector move immediate and move negated immediate:
+      VMOVIMM,
+      VMVNIMM,
+
      // Vector duplicate:
      VDUP,
      VDUPLANE,
@ -150,13 +156,6 @@ namespace llvm {

  /// Define some predicates that are used for node matching.
  namespace ARM {
-    /// getNEONModImm - If this is a valid vector constant for a NEON
-    /// instruction with a "modified immediate" operand (e.g., VMOV) of the
-    /// specified element size, return the encoded value for that immediate.
-    /// The ByteSize field indicates the number of bytes of each element [1248].
-    SDValue getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV,
-                          SelectionDAG &DAG);
-
    /// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be
    /// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd)
    /// instruction, returns its 8-bit integer representation. Otherwise,
@ -363,9 +362,11 @@ namespace llvm {
                  DebugLoc dl, SelectionDAG &DAG) const;

    SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
-                      SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
-    SDValue getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
-                      SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
+                      SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
+    SDValue getVFPCmp(SDValue LHS, SDValue RHS,
+                      SelectionDAG &DAG, DebugLoc dl) const;
+
+    SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;

    MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
                                         MachineBasicBlock *BB,
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@ -38,6 +38,12 @@ def SDT_ARMBr2JT   : SDTypeProfile<0, 4,
                                  [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
                                   SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;

+def SDT_ARMBCC_i64 : SDTypeProfile<0, 6,
+                                  [SDTCisVT<0, i32>,
+                                   SDTCisVT<1, i32>, SDTCisVT<2, i32>,
+                                   SDTCisVT<3, i32>, SDTCisVT<4, i32>,
+                                   SDTCisVT<5, OtherVT>]>;
+
 def SDT_ARMCmp     : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;

 def SDT_ARMPICAdd  : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
@ -90,6 +96,9 @@ def ARMbrjt          : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
 def ARMbr2jt         : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
                              [SDNPHasChain]>;

+def ARMBcci64        : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
+                              [SDNPHasChain]>;
+
 def ARMcmp           : SDNode<"ARMISD::CMP", SDT_ARMCmp,
                              [SDNPOutFlag]>;

@ -1685,13 +1694,19 @@ def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
 }

 // (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
 def : ARMPat<(add    GPR:$src, so_imm_neg:$imm),
             (SUBri  GPR:$src, so_imm_neg:$imm)>;
-
-//def : ARMPat<(addc   GPR:$src, so_imm_neg:$imm),
-//             (SUBSri GPR:$src, so_imm_neg:$imm)>;
-//def : ARMPat<(adde   GPR:$src, so_imm_neg:$imm),
-//             (SBCri  GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(addc   GPR:$src, so_imm_neg:$imm),
+             (SUBSri GPR:$src, so_imm_neg:$imm)>;
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
+def : ARMPat<(adde   GPR:$src, so_imm_not:$imm),
+             (SBCri  GPR:$src, so_imm_not:$imm)>;

 // Note: These are implemented in C++ code, because they have to generate
 // ADD/SUBrs instructions, which use a complex pattern that a xform function
@ -2279,6 +2294,22 @@ defm CMNz  : AI1_cmp_irs<0b1011, "cmn",
 def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
             (CMNzri  GPR:$src, so_imm_neg:$imm)>;

+// Pseudo i64 compares for some floating point compares.
+let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
+    Defs = [CPSR] in {
+def BCCi64 : PseudoInst<(outs),
+     (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
+      IIC_Br,
+     "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc",
+    [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
+
+def BCCZi64 : PseudoInst<(outs),
+     (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst),
+      IIC_Br,
+     "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, 0, 0, imm:$cc",
+    [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>;
+} // usesCustomInserter
+

 // Conditional moves
 // FIXME: should be able to write a pattern for ARMcmov, but can't use
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@ -65,6 +65,10 @@ def SDTARMVGETLN  : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
 def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
 def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;

+def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
+def NEONvmovImm   : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
+def NEONvmvnImm   : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
+
 def NEONvdup      : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;

 // VDUPLANE can produce a quad-register result from a double-register source,
@ -94,6 +98,20 @@ def SDTARMFMAX    : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
 def NEONfmax      : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
 def NEONfmin      : SDNode<"ARMISD::FMIN", SDTARMFMAX>;

+def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+  unsigned EltBits;
+  uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+  return (EltBits == 32 && EltVal == 0);
+}]>;
+
+def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+  unsigned EltBits;
+  uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+  return (EltBits == 8 && EltVal == 0xff);
+}]>;
+
 //===----------------------------------------------------------------------===//
 // NEON operand definitions
 //===----------------------------------------------------------------------===//
@ -2318,10 +2336,10 @@ defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,

 // Vector Bitwise Operations.

-def vnot8 : PatFrag<(ops node:$in),
-                    (xor node:$in, (bitconvert (v8i8 immAllOnesV)))>;
-def vnot16 : PatFrag<(ops node:$in),
-                     (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>;
+def vnotd : PatFrag<(ops node:$in),
+                    (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
+def vnotq : PatFrag<(ops node:$in),
+                    (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;


 //   VAND     : Vector Bitwise AND
@ -2347,36 +2365,58 @@ def  VBICd    : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
                     (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,
                     "vbic", "$dst, $src1, $src2", "",
                     [(set DPR:$dst, (v2i32 (and DPR:$src1,
-                                                 (vnot8 DPR:$src2))))]>;
+                                                 (vnotd DPR:$src2))))]>;
 def  VBICq    : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
                     (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ,
                     "vbic", "$dst, $src1, $src2", "",
                     [(set QPR:$dst, (v4i32 (and QPR:$src1,
-                                                 (vnot16 QPR:$src2))))]>;
+                                                 (vnotq QPR:$src2))))]>;

 //   VORN     : Vector Bitwise OR NOT
 def  VORNd    : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst),
                     (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,
                     "vorn", "$dst, $src1, $src2", "",
                     [(set DPR:$dst, (v2i32 (or DPR:$src1,
-                                                (vnot8 DPR:$src2))))]>;
+                                                (vnotd DPR:$src2))))]>;
 def  VORNq    : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst),
                     (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ,
                     "vorn", "$dst, $src1, $src2", "",
                     [(set QPR:$dst, (v4i32 (or QPR:$src1,
-                                                (vnot16 QPR:$src2))))]>;
+                                                (vnotq QPR:$src2))))]>;
+
+//   VMVN     : Vector Bitwise NOT (Immediate)
+
+let isReMaterializable = 1 in {
+def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$dst),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmvn", "i16", "$dst, $SIMM", "",
+                         [(set DPR:$dst, (v4i16 (NEONvmvnImm timm:$SIMM)))]>;
+def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$dst),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmvn", "i16", "$dst, $SIMM", "",
+                         [(set QPR:$dst, (v8i16 (NEONvmvnImm timm:$SIMM)))]>;
+
+def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$dst),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmvn", "i32", "$dst, $SIMM", "",
+                         [(set DPR:$dst, (v2i32 (NEONvmvnImm timm:$SIMM)))]>;
+def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$dst),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmvn", "i32", "$dst, $SIMM", "",
+                         [(set QPR:$dst, (v4i32 (NEONvmvnImm timm:$SIMM)))]>;
+}

 //   VMVN     : Vector Bitwise NOT
 def  VMVNd    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
                     (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD,
                     "vmvn", "$dst, $src", "",
-                     [(set DPR:$dst, (v2i32 (vnot8 DPR:$src)))]>;
+                     [(set DPR:$dst, (v2i32 (vnotd DPR:$src)))]>;
 def  VMVNq    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
                     (outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD,
                     "vmvn", "$dst, $src", "",
-                     [(set QPR:$dst, (v4i32 (vnot16 QPR:$src)))]>;
-def : Pat<(v2i32 (vnot8 DPR:$src)), (VMVNd DPR:$src)>;
-def : Pat<(v4i32 (vnot16 QPR:$src)), (VMVNq QPR:$src)>;
+                     [(set QPR:$dst, (v4i32 (vnotq QPR:$src)))]>;
+def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
+def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;

 //   VBSL     : Vector Bitwise Select
 def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
@ -2385,14 +2425,14 @@ def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
                     "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
                     [(set DPR:$dst,
                       (v2i32 (or (and DPR:$src2, DPR:$src1),
-                                  (and DPR:$src3, (vnot8 DPR:$src1)))))]>;
+                                  (and DPR:$src3, (vnotd DPR:$src1)))))]>;
 def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
                     (ins QPR:$src1, QPR:$src2, QPR:$src3),
                     N3RegFrm, IIC_VCNTiQ,
                     "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
                     [(set QPR:$dst,
                       (v4i32 (or (and QPR:$src2, QPR:$src1),
-                                  (and QPR:$src3, (vnot16 QPR:$src1)))))]>;
+                                  (and QPR:$src3, (vnotq QPR:$src1)))))]>;

 //   VBIF     : Vector Bitwise Insert if False
 //              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
@ -2726,20 +2766,19 @@ defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,

 // Vector Negate.

-def vneg   : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>;
-def vneg8  : PatFrag<(ops node:$in),
-                     (sub (bitconvert (v8i8 immAllZerosV)), node:$in)>;
-def vneg16 : PatFrag<(ops node:$in),
-                     (sub (bitconvert (v16i8 immAllZerosV)), node:$in)>;
+def vnegd  : PatFrag<(ops node:$in),
+                     (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
+def vnegq  : PatFrag<(ops node:$in),
+                     (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;

 class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
  : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src),
        IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
-        [(set DPR:$dst, (Ty (vneg8 DPR:$src)))]>;
+        [(set DPR:$dst, (Ty (vnegd DPR:$src)))]>;
 class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
  : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src),
        IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
-        [(set QPR:$dst, (Ty (vneg16 QPR:$src)))]>;
+        [(set QPR:$dst, (Ty (vnegq QPR:$src)))]>;

 //   VNEG     : Vector Negate (integer)
 def  VNEGs8d  : VNEGD<0b00, "vneg", "s8", v8i8>;
@ -2759,12 +2798,12 @@ def  VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
                    "vneg", "f32", "$dst, $src", "",
                    [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>;

-def : Pat<(v8i8  (vneg8  DPR:$src)), (VNEGs8d DPR:$src)>;
-def : Pat<(v4i16 (vneg8  DPR:$src)), (VNEGs16d DPR:$src)>;
-def : Pat<(v2i32 (vneg8  DPR:$src)), (VNEGs32d DPR:$src)>;
-def : Pat<(v16i8 (vneg16 QPR:$src)), (VNEGs8q QPR:$src)>;
-def : Pat<(v8i16 (vneg16 QPR:$src)), (VNEGs16q QPR:$src)>;
-def : Pat<(v4i32 (vneg16 QPR:$src)), (VNEGs32q QPR:$src)>;
+def : Pat<(v8i8  (vnegd  DPR:$src)), (VNEGs8d DPR:$src)>;
+def : Pat<(v4i16 (vnegd  DPR:$src)), (VNEGs16d DPR:$src)>;
+def : Pat<(v2i32 (vnegd  DPR:$src)), (VNEGs32d DPR:$src)>;
+def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
+def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
+def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;

 //   VQNEG    : Vector Saturating Negate
 defm VQNEG    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 
@ -2818,74 +2857,42 @@ def  VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),

 //   VMOV     : Vector Move (Immediate)

-// VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm.
-def VMOV_get_imm8 : SDNodeXForm<build_vector, [{
-  return ARM::getNEONModImm(N, 1, true, *CurDAG);
-}]>;
-def vmovImm8 : PatLeaf<(build_vector), [{
-  return ARM::getNEONModImm(N, 1, true, *CurDAG).getNode() != 0;
-}], VMOV_get_imm8>;
-
-// VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm.
-def VMOV_get_imm16 : SDNodeXForm<build_vector, [{
-  return ARM::getNEONModImm(N, 2, true, *CurDAG);
-}]>;
-def vmovImm16 : PatLeaf<(build_vector), [{
-  return ARM::getNEONModImm(N, 2, true, *CurDAG).getNode() != 0;
-}], VMOV_get_imm16>;
-
-// VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm.
-def VMOV_get_imm32 : SDNodeXForm<build_vector, [{
-  return ARM::getNEONModImm(N, 4, true, *CurDAG);
-}]>;
-def vmovImm32 : PatLeaf<(build_vector), [{
-  return ARM::getNEONModImm(N, 4, true, *CurDAG).getNode() != 0;
-}], VMOV_get_imm32>;
-
-// VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm.
-def VMOV_get_imm64 : SDNodeXForm<build_vector, [{
-  return ARM::getNEONModImm(N, 8, true, *CurDAG);
-}]>;
-def vmovImm64 : PatLeaf<(build_vector), [{
-  return ARM::getNEONModImm(N, 8, true, *CurDAG).getNode() != 0;
-}], VMOV_get_imm64>;
-
 let isReMaterializable = 1 in {
 def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
                         (ins nModImm:$SIMM), IIC_VMOVImm,
                         "vmov", "i8", "$dst, $SIMM", "",
-                         [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>;
+                         [(set DPR:$dst, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
 def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst),
                         (ins nModImm:$SIMM), IIC_VMOVImm,
                         "vmov", "i8", "$dst, $SIMM", "",
-                         [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>;
+                         [(set QPR:$dst, (v16i8 (NEONvmovImm timm:$SIMM)))]>;

 def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst),
                         (ins nModImm:$SIMM), IIC_VMOVImm,
                         "vmov", "i16", "$dst, $SIMM", "",
-                         [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>;
+                         [(set DPR:$dst, (v4i16 (NEONvmovImm timm:$SIMM)))]>;
 def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst),
                         (ins nModImm:$SIMM), IIC_VMOVImm,
                         "vmov", "i16", "$dst, $SIMM", "",
-                         [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>;
+                         [(set QPR:$dst, (v8i16 (NEONvmovImm timm:$SIMM)))]>;

-def VMOVv2i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 0, 0, 1, (outs DPR:$dst),
+def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$dst),
                         (ins nModImm:$SIMM), IIC_VMOVImm,
                         "vmov", "i32", "$dst, $SIMM", "",
-                         [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>;
-def VMOVv4i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 1, 0, 1, (outs QPR:$dst),
+                         [(set DPR:$dst, (v2i32 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$dst),
                         (ins nModImm:$SIMM), IIC_VMOVImm,
                         "vmov", "i32", "$dst, $SIMM", "",
-                         [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>;
+                         [(set QPR:$dst, (v4i32 (NEONvmovImm timm:$SIMM)))]>;

 def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst),
                         (ins nModImm:$SIMM), IIC_VMOVImm,
                         "vmov", "i64", "$dst, $SIMM", "",
-                         [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>;
+                         [(set DPR:$dst, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
 def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
                         (ins nModImm:$SIMM), IIC_VMOVImm,
                         "vmov", "i64", "$dst, $SIMM", "",
-                         [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>;
+                         [(set QPR:$dst, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
 } // isReMaterializable

 //   VMOV     : Vector Get Lane (move scalar to ARM core register)
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@ -122,6 +122,10 @@ def imm0_255_neg : PatLeaf<(i32 imm), [{
  return (uint32_t)(-N->getZExtValue()) < 255;
 }], imm_neg_XFORM>;

+def imm0_255_not : PatLeaf<(i32 imm), [{
+  return (uint32_t)(~N->getZExtValue()) < 255;
+}], imm_comp_XFORM>;
+
 // Define Thumb2 specific addressing modes.

 // t2addrmode_imm12  := reg + imm12
@ -1391,13 +1395,32 @@ defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb",
                             BinOpFrag<(subc node:$LHS, node:$RHS)>>;

 // (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
+// The AddedComplexity preferences the first variant over the others since
+// it can be shrunk to a 16-bit wide encoding, while the others cannot.
 let AddedComplexity = 1 in
-def : T2Pat<(add       GPR:$src, imm0_255_neg:$imm),
-            (t2SUBri   GPR:$src, imm0_255_neg:$imm)>;
-def : T2Pat<(add       GPR:$src, t2_so_imm_neg:$imm),
-            (t2SUBri   GPR:$src, t2_so_imm_neg:$imm)>;
-def : T2Pat<(add       GPR:$src, imm0_4095_neg:$imm),
-            (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
+def : T2Pat<(add        GPR:$src, imm0_255_neg:$imm),
+            (t2SUBri    GPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(add        GPR:$src, t2_so_imm_neg:$imm),
+            (t2SUBri    GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(add        GPR:$src, imm0_4095_neg:$imm),
+            (t2SUBri12  GPR:$src, imm0_4095_neg:$imm)>;
+let AddedComplexity = 1 in
+def : T2Pat<(addc       GPR:$src, imm0_255_neg:$imm),
+            (t2SUBSri   GPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(addc       GPR:$src, t2_so_imm_neg:$imm),
+            (t2SUBSri   GPR:$src, t2_so_imm_neg:$imm)>;
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
+let AddedComplexity = 1 in
+def : T2Pat<(adde       GPR:$src, imm0_255_not:$imm),
+            (t2SBCSri   GPR:$src, imm0_255_not:$imm)>;
+def : T2Pat<(adde       GPR:$src, t2_so_imm_not:$imm),
+            (t2SBCSri   GPR:$src, t2_so_imm_not:$imm)>;

 // Select Bytes -- for disassembly only

@ -2435,7 +2458,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
    hasExtraDefRegAllocReq = 1 in
  def t2LDM_RET : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
                                         reglist:$dsts, variable_ops), IIC_Br,
-                        "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts",
+                        "ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
                        "$addr.addr = $wb", []> {
  let Inst{31-27} = 0b11101;
  let Inst{26-25} = 0b00;
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@ -54,6 +54,9 @@ protected:
  /// the VML[AS] instructions are slow (if so, don't use them).
  bool SlowVMLx;

+  /// SlowFPBrcc - True if floating point compare + branch is slow.
+  bool SlowFPBrcc;
+
  /// IsThumb - True if we are in thumb mode, false if in ARM mode.
  bool IsThumb;

@ -133,6 +136,7 @@ protected:
  bool hasDivide() const { return HasHardwareDivide; }
  bool hasT2ExtractPack() const { return HasT2ExtractPack; }
  bool useVMLx() const {return hasVFP2() && !SlowVMLx; }
+  bool isFPBrccSlow() const { return SlowFPBrcc; }

  bool hasFP16() const { return HasFP16; }

--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@ -88,7 +88,7 @@ private:
  /// its register number, or -1 if there is no match.  To allow return values
  /// to be used directly in register lists, arm registers have values between
  /// 0 and 15.
-  int MatchRegisterName(const StringRef &Name);
+  int MatchRegisterName(StringRef Name);

  /// }

@ -97,7 +97,7 @@ public:
  ARMAsmParser(const Target &T, MCAsmParser &_Parser)
    : TargetAsmParser(T), Parser(_Parser) {}

-  virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);

  virtual bool ParseDirective(AsmToken DirectiveID);
@ -517,7 +517,7 @@ bool ARMAsmParser::ParseShift(ShiftType &St,
  const AsmToken &Tok = Parser.getTok();
  if (Tok.isNot(AsmToken::Identifier))
    return true;
-  const StringRef &ShiftName = Tok.getString();
+  StringRef ShiftName = Tok.getString();
  if (ShiftName == "lsl" || ShiftName == "LSL")
    St = Lsl;
  else if (ShiftName == "lsr" || ShiftName == "LSR")
@ -549,7 +549,7 @@ bool ARMAsmParser::ParseShift(ShiftType &St,
 }

 /// A hack to allow some testing, to be replaced by a real table gen version.
-int ARMAsmParser::MatchRegisterName(const StringRef &Name) {
+int ARMAsmParser::MatchRegisterName(StringRef Name) {
  if (Name == "r0" || Name == "R0")
    return 0;
  else if (Name == "r1" || Name == "R1")
@ -593,7 +593,7 @@ MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
                 MCInst &Inst) {
  ARMOperand &Op0 = *(ARMOperand*)Operands[0];
  assert(Op0.Kind == ARMOperand::Token && "First operand not a Token");
-  const StringRef &Mnemonic = Op0.getToken();
+  StringRef Mnemonic = Op0.getToken();
  if (Mnemonic == "add" ||
      Mnemonic == "stmfd" ||
      Mnemonic == "str" ||
@ -658,7 +658,7 @@ bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
 }

 /// Parse an arm instruction mnemonic followed by its operands.
-bool ARMAsmParser::ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
  OwningPtr<ARMOperand> Op;
  ARMOperand::CreateToken(Op, Name, NameLoc);
@ -761,7 +761,7 @@ bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
  const AsmToken &Tok = Parser.getTok();
  if (Tok.isNot(AsmToken::Identifier))
    return Error(L, "unexpected token in .syntax directive");
-  const StringRef &Mode = Tok.getString();
+  StringRef Mode = Tok.getString();
  if (Mode == "unified" || Mode == "UNIFIED")
    Parser.Lex();
  else if (Mode == "divided" || Mode == "DIVIDED")
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@ -602,12 +602,8 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op,

  O << "[" << getRegisterName(MO1.getReg());
  if (MO2.getImm()) {
-    unsigned Align = MO2.getImm();
-    assert((Align == 8 || Align == 16 || Align == 32) &&
-           "unexpected NEON load/store alignment");
-    Align <<= 3;
    // FIXME: Both darwin as and GNU as violate ARM docs here.
-    O << ", :" << Align;
+    O << ", :" << (MO2.getImm() << 3);
  }
  O << "]";
 }
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@ -442,7 +442,7 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
  O << "[" << getRegisterName(MO1.getReg());
  if (MO2.getImm()) {
    // FIXME: Both darwin as and GNU as violate ARM docs here.
-    O << ", :" << MO2.getImm();
+    O << ", :" << (MO2.getImm() << 3);
  }
  O << "]";
 }
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@ -590,3 +590,70 @@ than the Z bit, we'll need additional logic to reverse the conditionals
 associated with the comparison. Perhaps a pseudo-instruction for the comparison,
 with a post-codegen pass to clean up and handle the condition codes?
 See PR5694 for testcase.
+
+//===---------------------------------------------------------------------===//
+
+Given the following on armv5:
+int test1(int A, int B) {
+  return (A&-8388481)|(B&8388480);
+}
+
+We currently generate:
+	ldr	r2, .LCPI0_0
+	and	r0, r0, r2
+	ldr	r2, .LCPI0_1
+	and	r1, r1, r2
+	orr	r0, r1, r0
+	bx	lr
+
+We should be able to replace the second ldr+and with a bic (i.e. reuse the
+constant which was already loaded).  Not sure what's necessary to do that.
+
+//===---------------------------------------------------------------------===//
+
+Given the following on ARMv7:
+int test1(int A, int B) {
+  return (A&-8388481)|(B&8388480);
+}
+
+We currently generate:
+	bfc	r0, #7, #16
+	movw	r2, #:lower16:8388480
+	movt	r2, #:upper16:8388480
+	and	r1, r1, r2
+	orr	r0, r1, r0
+	bx	lr
+
+The following is much shorter:
+	lsr	r1, r1, #7
+	bfi	r0, r1, #7, #16
+	bx	lr
+
+
+//===---------------------------------------------------------------------===//
+
+The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal:
+
+int a(int x) { return __builtin_bswap32(x); }
+
+a:
+	mov	r1, #255, 24
+	mov	r2, #255, 16
+	and	r1, r1, r0, lsr #8
+	and	r2, r2, r0, lsl #8
+	orr	r1, r1, r0, lsr #24
+	orr	r0, r2, r0, lsl #24
+	orr	r0, r0, r1
+	bx	lr
+
+Something like the following would be better (fewer instructions/registers):
+	eor     r1, r0, r0, ror #16
+	bic     r1, r1, #0xff0000
+	mov     r1, r1, lsr #8
+	eor     r0, r1, r0, ror #8
+	bx	lr
+
+A custom Thumb version would also be a slight improvement over the generic
+version.
+
+//===---------------------------------------------------------------------===//
--- a/lib/Target/Alpha/AlphaMCAsmInfo.cpp
+++ b/lib/Target/Alpha/AlphaMCAsmInfo.cpp
@ -14,7 +14,7 @@
 #include "AlphaMCAsmInfo.h"
 using namespace llvm;

-AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, const StringRef &TT) {
+AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, StringRef TT) {
  AlignmentIsInBytes = false;
  PrivateGlobalPrefix = "$";
  GPRel32Directive = ".gprel32";
--- a/lib/Target/Alpha/AlphaMCAsmInfo.h
+++ b/lib/Target/Alpha/AlphaMCAsmInfo.h
@ -14,14 +14,14 @@
 #ifndef ALPHATARGETASMINFO_H
 #define ALPHATARGETASMINFO_H

+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"

 namespace llvm {
  class Target;
-  class StringRef;

  struct AlphaMCAsmInfo : public MCAsmInfo {
-    explicit AlphaMCAsmInfo(const Target &T, const StringRef &TT);
+    explicit AlphaMCAsmInfo(const Target &T, StringRef TT);
  };

 } // namespace llvm
--- a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp
@ -15,7 +15,7 @@

 using namespace llvm;

-BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, const StringRef &TT) {
+BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, StringRef TT) {
  GlobalPrefix = "_";
  CommentString = "//";
  HasSetDirective = false;
--- a/lib/Target/Blackfin/BlackfinMCAsmInfo.h
+++ b/lib/Target/Blackfin/BlackfinMCAsmInfo.h
@ -14,14 +14,14 @@
 #ifndef BLACKFINTARGETASMINFO_H
 #define BLACKFINTARGETASMINFO_H

+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"

 namespace llvm {
  class Target;
-  class StringRef;

  struct BlackfinMCAsmInfo : public MCAsmInfo {
-    explicit BlackfinMCAsmInfo(const Target &T, const StringRef &TT);
+    explicit BlackfinMCAsmInfo(const Target &T, StringRef TT);
  };

 } // namespace llvm
--- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp
+++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
@ -14,7 +14,7 @@
 #include "SPUMCAsmInfo.h"
 using namespace llvm;

-SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) {
+SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) {
  ZeroDirective = "\t.space\t";
  Data64bitsDirective = "\t.quad\t";
  AlignmentIsInBytes = false;
--- a/lib/Target/CellSPU/SPUMCAsmInfo.h
+++ b/lib/Target/CellSPU/SPUMCAsmInfo.h
@ -14,14 +14,14 @@
 #ifndef SPUTARGETASMINFO_H
 #define SPUTARGETASMINFO_H

+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"

 namespace llvm {
  class Target;
-  class StringRef;
  
  struct SPULinuxMCAsmInfo : public MCAsmInfo {
-    explicit SPULinuxMCAsmInfo(const Target &T, const StringRef &TT);
+    explicit SPULinuxMCAsmInfo(const Target &T, StringRef TT);
  };
 } // namespace llvm

--- a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
@ -14,7 +14,7 @@
 #include "MBlazeMCAsmInfo.h"
 using namespace llvm;

-MBlazeMCAsmInfo::MBlazeMCAsmInfo(const Target &T, const StringRef &TT) {
+MBlazeMCAsmInfo::MBlazeMCAsmInfo(const Target &T, StringRef TT) {
  AlignmentIsInBytes          = false;
  Data16bitsDirective         = "\t.half\t";
  Data32bitsDirective         = "\t.word\t";
--- a/lib/Target/MBlaze/MBlazeMCAsmInfo.h
+++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.h
@ -14,15 +14,15 @@
 #ifndef MBLAZETARGETASMINFO_H
 #define MBLAZETARGETASMINFO_H

+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"

 namespace llvm {
  class Target;
-  class StringRef;
  
  class MBlazeMCAsmInfo : public MCAsmInfo {
  public:
-    explicit MBlazeMCAsmInfo(const Target &T, const StringRef &TT);
+    explicit MBlazeMCAsmInfo(const Target &T, StringRef TT);
  };

 } // namespace llvm
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@ -1621,8 +1621,7 @@ const char* MSILWriter::getLibraryName(const GlobalVariable* GV) {
 }


-const char* MSILWriter::getLibraryForSymbol(const StringRef &Name, 
-                                            bool isFunction,
+const char* MSILWriter::getLibraryForSymbol(StringRef Name, bool isFunction,
                                            CallingConv::ID CallingConv) {
  // TODO: Read *.def file with function and libraries definitions.
  return "MSVCRT.DLL";  
--- a/lib/Target/MSIL/MSILWriter.h
+++ b/lib/Target/MSIL/MSILWriter.h
@ -246,7 +246,7 @@ namespace llvm {

    const char* getLibraryName(const GlobalVariable* GV); 
    
-    const char* getLibraryForSymbol(const StringRef &Name, bool isFunction,
+    const char* getLibraryForSymbol(StringRef Name, bool isFunction,
                                    CallingConv::ID CallingConv);

    void printExternals();
--- a/lib/Target/MSP430/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MSP430MCAsmInfo.cpp
@ -14,7 +14,7 @@
 #include "MSP430MCAsmInfo.h"
 using namespace llvm;

-MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, const StringRef &TT) {
+MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) {
  PrivateGlobalPrefix = ".L";
  WeakRefDirective ="\t.weak\t";
  PCSymbol=".";
--- a/lib/Target/MSP430/MSP430MCAsmInfo.h
+++ b/lib/Target/MSP430/MSP430MCAsmInfo.h
@ -14,13 +14,14 @@
 #ifndef MSP430TARGETASMINFO_H
 #define MSP430TARGETASMINFO_H

+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"

 namespace llvm {
  class Target;
-  class StringRef;
+
  struct MSP430MCAsmInfo : public MCAsmInfo {
-    explicit MSP430MCAsmInfo(const Target &T, const StringRef &TT);
+    explicit MSP430MCAsmInfo(const Target &T, StringRef TT);
  };

 } // namespace llvm
--- a/lib/Target/Mips/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MipsMCAsmInfo.cpp
@ -14,7 +14,7 @@
 #include "MipsMCAsmInfo.h"
 using namespace llvm;

-MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, const StringRef &TT) {
+MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
  AlignmentIsInBytes          = false;
  Data16bitsDirective         = "\t.half\t";
  Data32bitsDirective         = "\t.word\t";
--- a/lib/Target/Mips/MipsMCAsmInfo.h
+++ b/lib/Target/Mips/MipsMCAsmInfo.h
@ -14,15 +14,15 @@
 #ifndef MIPSTARGETASMINFO_H
 #define MIPSTARGETASMINFO_H

+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"

 namespace llvm {
  class Target;
-  class StringRef;
  
  class MipsMCAsmInfo : public MCAsmInfo {
  public:
-    explicit MipsMCAsmInfo(const Target &T, const StringRef &TT);
+    explicit MipsMCAsmInfo(const Target &T, StringRef TT);
  };

 } // namespace llvm
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@ -416,7 +416,7 @@ void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int Num,
  if (!TagName.empty()) Tmp += ", " + TagName;
  
  for (int i = 0; i<Num; i++)
-    Tmp += "," + utostr(Aux[i] && 0xff);
+    Tmp += "," + utostr(Aux[i] & 0xff);
  
  OS.EmitRawText("\n\t.dim " + Twine(VarName) + ", 1" + Tmp);
 }
--- a/lib/Target/PIC16/PIC16MCAsmInfo.cpp
+++ b/lib/Target/PIC16/PIC16MCAsmInfo.cpp
@ -20,7 +20,7 @@
 #include "PIC16ISelLowering.h"
 using namespace llvm;

-PIC16MCAsmInfo::PIC16MCAsmInfo(const Target &T, const StringRef &TT) {
+PIC16MCAsmInfo::PIC16MCAsmInfo(const Target &T, StringRef TT) {
  CommentString = ";";
  GlobalPrefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
  GlobalDirective = "\tglobal\t";
--- a/lib/Target/PIC16/PIC16MCAsmInfo.h
+++ b/lib/Target/PIC16/PIC16MCAsmInfo.h
@ -25,7 +25,7 @@ namespace llvm {
    const char *RomData16bitsDirective;
    const char *RomData32bitsDirective;
  public:    
-    PIC16MCAsmInfo(const Target &T, const StringRef &TT);
+    PIC16MCAsmInfo(const Target &T, StringRef TT);
    
    virtual const char *getDataASDirective(unsigned size, unsigned AS) const;
  };
--- a/lib/Target/Sparc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/SparcMCAsmInfo.cpp
@ -12,10 +12,9 @@
 //===----------------------------------------------------------------------===//

 #include "SparcMCAsmInfo.h"
-#include "llvm/ADT/SmallVector.h"
 using namespace llvm;

-SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, const StringRef &TT) {
+SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) {
  Data16bitsDirective = "\t.half\t";
  Data32bitsDirective = "\t.word\t";
  Data64bitsDirective = 0;  // .xword is only supported by V9.
--- a/lib/Target/Sparc/SparcMCAsmInfo.h
+++ b/lib/Target/Sparc/SparcMCAsmInfo.h
@ -14,13 +14,14 @@
 #ifndef SPARCTARGETASMINFO_H
 #define SPARCTARGETASMINFO_H

+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"

 namespace llvm {
  class Target;
-  class StringRef;
+
  struct SparcELFMCAsmInfo : public MCAsmInfo {
-    explicit SparcELFMCAsmInfo(const Target &T, const StringRef &TT);
+    explicit SparcELFMCAsmInfo(const Target &T, StringRef TT);
  };

 } // namespace llvm
--- a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
+++ b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
@ -16,7 +16,7 @@
 #include "llvm/MC/MCSectionELF.h"
 using namespace llvm;

-SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, const StringRef &TT) {
+SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) {
  PrivateGlobalPrefix = ".L";
  WeakRefDirective = "\t.weak\t";
  PCSymbol = ".";
--- a/lib/Target/SystemZ/SystemZMCAsmInfo.h
+++ b/lib/Target/SystemZ/SystemZMCAsmInfo.h
@ -21,7 +21,7 @@ namespace llvm {
  class StringRef;

  struct SystemZMCAsmInfo : public MCAsmInfo {
-    explicit SystemZMCAsmInfo(const Target &T, const StringRef &TT);
+    explicit SystemZMCAsmInfo(const Target &T, StringRef TT);
    virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
  };
  
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@ -294,7 +294,7 @@ namespace llvm {
  /// option is specified on the command line. If this returns false (default),
  /// the code generator is not allowed to assume that FP arithmetic arguments
  /// and results are never NaNs or +-Infs.
-  bool FiniteOnlyFPMath() { return UnsafeFPMath || FiniteOnlyFPMathOption; }
+  bool FiniteOnlyFPMath() { return FiniteOnlyFPMathOption; }
  
  /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
  /// that the rounding mode of the FPU can change from its default.
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@ -65,7 +65,7 @@ public:
  X86ATTAsmParser(const Target &T, MCAsmParser &_Parser)
    : TargetAsmParser(T), Parser(_Parser) {}

-  virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);

  virtual bool ParseDirective(AsmToken DirectiveID);
@ -602,7 +602,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
 }

 bool X86ATTAsmParser::
-ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+ParseInstruction(StringRef Name, SMLoc NameLoc,
                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
  // The various flavors of pushf and popf use Requires<In32BitMode> and
  // Requires<In64BitMode>, but the assembler doesn't yet implement that.
@ -612,6 +612,8 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
      return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
    else if (Name == "pushfl")
      return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
+    else if (Name == "pusha")
+      return Error(NameLoc, "pusha cannot be encoded in 64-bit mode");
  } else {
    if (Name == "popfq")
      return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@ -17,7 +17,6 @@
 #include "X86IntelInstPrinter.h"
 #include "X86MCInstLower.h"
 #include "X86.h"
-#include "X86COFF.h"
 #include "X86COFFMachineModuleInfo.h"
 #include "X86MachineFunctionInfo.h"
 #include "X86TargetMachine.h"
@ -35,6 +34,7 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/COFF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetOptions.h"
@ -60,8 +60,10 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
  if (Subtarget->isTargetCOFF()) {
    bool Intrn = MF.getFunction()->hasInternalLinkage();
    OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
-    OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::C_STAT : COFF::C_EXT);
-    OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT);
+    OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC 
+                                              : COFF::IMAGE_SYM_CLASS_EXTERNAL);
+    OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+                                               << COFF::SCT_COMPLEX_TYPE_SHIFT);
    OutStreamer.EndCOFFSymbolDef();
  }

@ -582,8 +584,9 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
                            E = COFFMMI.externals_end();
                            I != E; ++I) {
      OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
-      OutStreamer.EmitCOFFSymbolStorageClass(COFF::C_EXT);
-      OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT);
+      OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL);
+      OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+                                               << COFF::SCT_COMPLEX_TYPE_SHIFT);
      OutStreamer.EndCOFFSymbolDef();
    }

--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@ -154,15 +154,13 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
      
  case X86II::MO_TLVP:      RefKind = MCSymbolRefExpr::VK_TLVP; break;
  case X86II::MO_TLVP_PIC_BASE:
-      Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
-      // Subtract the pic base.
-      Expr 
-        = MCBinaryExpr::CreateSub(Expr,
-                                  MCSymbolRefExpr::Create(GetPICBaseSymbol(),
-                                                          Ctx),
-                                  Ctx);
-  
-      break;
+    Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
+    // Subtract the pic base.
+    Expr = MCBinaryExpr::CreateSub(Expr,
+                                   MCSymbolRefExpr::Create(GetPICBaseSymbol(),
+                                                           Ctx),
+                                   Ctx);
+    break;
  case X86II::MO_TLSGD:     RefKind = MCSymbolRefExpr::VK_TLSGD; break;
  case X86II::MO_GOTTPOFF:  RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
  case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
--- a/lib/Target/X86/X86COFF.h
+++ b/lib/Target/X86/X86COFF.h
@ -1,95 +0,0 @@
-//===--- X86COFF.h - Some definitions from COFF documentations ------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file just defines some symbols found in COFF documentation. They are
-// used to emit function type information for COFF targets (Cygwin/Mingw32).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86COFF_H
-#define X86COFF_H
-
-namespace COFF 
-{
-/// Storage class tells where and what the symbol represents
-enum StorageClass {
-  C_EFCN =   -1,  ///< Physical end of function
-  C_NULL    = 0,  ///< No symbol
-  C_AUTO    = 1,  ///< External definition
-  C_EXT     = 2,  ///< External symbol
-  C_STAT    = 3,  ///< Static
-  C_REG     = 4,  ///< Register variable
-  C_EXTDEF  = 5,  ///< External definition
-  C_LABEL   = 6,  ///< Label
-  C_ULABEL  = 7,  ///< Undefined label
-  C_MOS     = 8,  ///< Member of structure
-  C_ARG     = 9,  ///< Function argument
-  C_STRTAG  = 10, ///< Structure tag
-  C_MOU     = 11, ///< Member of union
-  C_UNTAG   = 12, ///< Union tag
-  C_TPDEF   = 13, ///< Type definition
-  C_USTATIC = 14, ///< Undefined static
-  C_ENTAG   = 15, ///< Enumeration tag
-  C_MOE     = 16, ///< Member of enumeration
-  C_REGPARM = 17, ///< Register parameter
-  C_FIELD   = 18, ///< Bit field
-
-  C_BLOCK  = 100, ///< ".bb" or ".eb" - beginning or end of block
-  C_FCN    = 101, ///< ".bf" or ".ef" - beginning or end of function
-  C_EOS    = 102, ///< End of structure
-  C_FILE   = 103, ///< File name
-  C_LINE   = 104, ///< Line number, reformatted as symbol
-  C_ALIAS  = 105, ///< Duplicate tag
-  C_HIDDEN = 106  ///< External symbol in dmert public lib
-};
-
-/// The type of the symbol. This is made up of a base type and a derived type.
-/// For example, pointer to int is "pointer to T" and "int"
-enum SymbolType {
-  T_NULL   = 0,  ///< No type info
-  T_ARG    = 1,  ///< Void function argument (only used by compiler)
-  T_VOID   = 1,  ///< The same as above. Just named differently in some specs.
-  T_CHAR   = 2,  ///< Character
-  T_SHORT  = 3,  ///< Short integer
-  T_INT    = 4,  ///< Integer
-  T_LONG   = 5,  ///< Long integer
-  T_FLOAT  = 6,  ///< Floating point
-  T_DOUBLE = 7,  ///< Double word
-  T_STRUCT = 8,  ///< Structure
-  T_UNION  = 9,  ///< Union
-  T_ENUM   = 10, ///< Enumeration
-  T_MOE    = 11, ///< Member of enumeration
-  T_UCHAR  = 12, ///< Unsigned character
-  T_USHORT = 13, ///< Unsigned short
-  T_UINT   = 14, ///< Unsigned integer
-  T_ULONG  = 15  ///< Unsigned long
-};
-
-/// Derived type of symbol
-enum SymbolDerivedType {
-  DT_NON = 0, ///< No derived type
-  DT_PTR = 1, ///< Pointer to T
-  DT_FCN = 2, ///< Function returning T
-  DT_ARY = 3  ///< Array of T
-};
-
-/// Masks for extracting parts of type
-enum SymbolTypeMasks {
-  N_BTMASK = 017, ///< Mask for base type
-  N_TMASK  = 060  ///< Mask for derived type
-};
-
-/// Offsets of parts of type
-enum Shifts {
-  N_BTSHFT = 4 ///< Type is formed as (base + derived << N_BTSHIFT)
-};
-
-}
-
-#endif // X86COFF_H
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@ -540,7 +540,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
      StubAM.GVOpFlags = GVFlags;

      // Prepare for inserting code in the local-value area.
-      MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea();
+      SavePoint SaveInsertPt = enterLocalValueArea();

      if (TLI.getPointerTy() == MVT::i64) {
        Opc = X86::MOV64rm;
@ -1279,12 +1279,11 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
    return false;

  // First issue a copy to GR16_ABCD or GR32_ABCD.
-  unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr;
  const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
    ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
  unsigned CopyReg = createResultReg(CopyRC);
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CopyOpc), CopyReg)
-    .addReg(InputReg);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+          CopyReg).addReg(InputReg);

  // Then issue an extract_subreg.
  unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -2458,17 +2458,23 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
    // If the tailcall address may be in a register, then make sure it's
    // possible to register allocate for it. In 32-bit, the call address can
    // only target EAX, EDX, or ECX since the tail call must be scheduled after
-    // callee-saved registers are restored. In 64-bit, it's RAX, RCX, RDX, RSI,
-    // RDI, R8, R9, R11.
-    if (!isa<GlobalAddressSDNode>(Callee) &&
+    // callee-saved registers are restored. These happen to be the same
+    // registers used to pass 'inreg' arguments so watch out for those.
+    if (!Subtarget->is64Bit() &&
+        !isa<GlobalAddressSDNode>(Callee) &&
        !isa<ExternalSymbolSDNode>(Callee)) {
-      unsigned Limit = Subtarget->is64Bit() ? 8 : 3;
      unsigned NumInRegs = 0;
      for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
        CCValAssign &VA = ArgLocs[i];
-        if (VA.isRegLoc()) {
-          if (++NumInRegs == Limit)
+        if (!VA.isRegLoc())
+          continue;
+        unsigned Reg = VA.getLocReg();
+        switch (Reg) {
+        default: break;
+        case X86::EAX: case X86::EDX: case X86::ECX:
+          if (++NumInRegs == 3)
            return false;
+          break;
        }
      }
    }
@ -7993,7 +7999,6 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
                                                       unsigned immOpc,
                                                       unsigned LoadOpc,
                                                       unsigned CXchgOpc,
-                                                       unsigned copyOpc,
                                                       unsigned notOpc,
                                                       unsigned EAXreg,
                                                       TargetRegisterClass *RC,
@ -8070,7 +8075,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
  MIB.addReg(tt);
  (*MIB).addOperand(*argOpers[valArgIndx]);

-  MIB = BuildMI(newMBB, dl, TII->get(copyOpc), EAXreg);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);
  MIB.addReg(t1);

  MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
@ -8081,7 +8086,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
  (*MIB).setMemRefs(bInstr->memoperands_begin(),
                    bInstr->memoperands_end());

-  MIB = BuildMI(newMBB, dl, TII->get(copyOpc), destOper.getReg());
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
  MIB.addReg(EAXreg);

  // insert branch
@ -8117,7 +8122,6 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,

  const TargetRegisterClass *RC = X86::GR32RegisterClass;
  const unsigned LoadOpc = X86::MOV32rm;
-  const unsigned copyOpc = X86::MOV32rr;
  const unsigned NotOpc = X86::NOT32r;
  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
@ -8227,14 +8231,14 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
    MIB.addReg(t2);
  (*MIB).addOperand(*argOpers[valArgIndx + 1]);

-  MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
  MIB.addReg(t1);
-  MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EDX);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);
  MIB.addReg(t2);

-  MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EBX);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);
  MIB.addReg(t5);
-  MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::ECX);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);
  MIB.addReg(t6);

  MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
@ -8245,9 +8249,9 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
  (*MIB).setMemRefs(bInstr->memoperands_begin(),
                    bInstr->memoperands_end());

-  MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t3);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3);
  MIB.addReg(X86::EAX);
-  MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t4);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4);
  MIB.addReg(X86::EDX);

  // insert branch
@ -8326,12 +8330,12 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,

  unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
  if (argOpers[valArgIndx]->isReg())
-    MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
+    MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2);
  else
    MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
  (*MIB).addOperand(*argOpers[valArgIndx]);

-  MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), X86::EAX);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
  MIB.addReg(t1);

  MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr));
@ -8353,7 +8357,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
  (*MIB).setMemRefs(mInstr->memoperands_begin(),
                    mInstr->memoperands_end());

-  MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), destOper.getReg());
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
  MIB.addReg(X86::EAX);

  // insert branch
@ -8735,25 +8739,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
  case X86::ATOMAND32:
    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
                                               X86::AND32ri, X86::MOV32rm,
-                                               X86::LCMPXCHG32, X86::MOV32rr,
+                                               X86::LCMPXCHG32,
                                               X86::NOT32r, X86::EAX,
                                               X86::GR32RegisterClass);
  case X86::ATOMOR32:
    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
                                               X86::OR32ri, X86::MOV32rm,
-                                               X86::LCMPXCHG32, X86::MOV32rr,
+                                               X86::LCMPXCHG32,
                                               X86::NOT32r, X86::EAX,
                                               X86::GR32RegisterClass);
  case X86::ATOMXOR32:
    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
                                               X86::XOR32ri, X86::MOV32rm,
-                                               X86::LCMPXCHG32, X86::MOV32rr,
+                                               X86::LCMPXCHG32,
                                               X86::NOT32r, X86::EAX,
                                               X86::GR32RegisterClass);
  case X86::ATOMNAND32:
    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
                                               X86::AND32ri, X86::MOV32rm,
-                                               X86::LCMPXCHG32, X86::MOV32rr,
+                                               X86::LCMPXCHG32,
                                               X86::NOT32r, X86::EAX,
                                               X86::GR32RegisterClass, true);
  case X86::ATOMMIN32:
@ -8768,25 +8772,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
  case X86::ATOMAND16:
    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
                                               X86::AND16ri, X86::MOV16rm,
-                                               X86::LCMPXCHG16, X86::MOV16rr,
+                                               X86::LCMPXCHG16,
                                               X86::NOT16r, X86::AX,
                                               X86::GR16RegisterClass);
  case X86::ATOMOR16:
    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr,
                                               X86::OR16ri, X86::MOV16rm,
-                                               X86::LCMPXCHG16, X86::MOV16rr,
+                                               X86::LCMPXCHG16,
                                               X86::NOT16r, X86::AX,
                                               X86::GR16RegisterClass);
  case X86::ATOMXOR16:
    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr,
                                               X86::XOR16ri, X86::MOV16rm,
-                                               X86::LCMPXCHG16, X86::MOV16rr,
+                                               X86::LCMPXCHG16,
                                               X86::NOT16r, X86::AX,
                                               X86::GR16RegisterClass);
  case X86::ATOMNAND16:
    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
                                               X86::AND16ri, X86::MOV16rm,
-                                               X86::LCMPXCHG16, X86::MOV16rr,
+                                               X86::LCMPXCHG16,
                                               X86::NOT16r, X86::AX,
                                               X86::GR16RegisterClass, true);
  case X86::ATOMMIN16:
@ -8801,25 +8805,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
  case X86::ATOMAND8:
    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
                                               X86::AND8ri, X86::MOV8rm,
-                                               X86::LCMPXCHG8, X86::MOV8rr,
+                                               X86::LCMPXCHG8,
                                               X86::NOT8r, X86::AL,
                                               X86::GR8RegisterClass);
  case X86::ATOMOR8:
    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr,
                                               X86::OR8ri, X86::MOV8rm,
-                                               X86::LCMPXCHG8, X86::MOV8rr,
+                                               X86::LCMPXCHG8,
                                               X86::NOT8r, X86::AL,
                                               X86::GR8RegisterClass);
  case X86::ATOMXOR8:
    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr,
                                               X86::XOR8ri, X86::MOV8rm,
-                                               X86::LCMPXCHG8, X86::MOV8rr,
+                                               X86::LCMPXCHG8,
                                               X86::NOT8r, X86::AL,
                                               X86::GR8RegisterClass);
  case X86::ATOMNAND8:
    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
                                               X86::AND8ri, X86::MOV8rm,
-                                               X86::LCMPXCHG8, X86::MOV8rr,
+                                               X86::LCMPXCHG8,
                                               X86::NOT8r, X86::AL,
                                               X86::GR8RegisterClass, true);
  // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
@ -8827,25 +8831,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
  case X86::ATOMAND64:
    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
                                               X86::AND64ri32, X86::MOV64rm,
-                                               X86::LCMPXCHG64, X86::MOV64rr,
+                                               X86::LCMPXCHG64,
                                               X86::NOT64r, X86::RAX,
                                               X86::GR64RegisterClass);
  case X86::ATOMOR64:
    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr,
                                               X86::OR64ri32, X86::MOV64rm,
-                                               X86::LCMPXCHG64, X86::MOV64rr,
+                                               X86::LCMPXCHG64,
                                               X86::NOT64r, X86::RAX,
                                               X86::GR64RegisterClass);
  case X86::ATOMXOR64:
    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr,
                                               X86::XOR64ri32, X86::MOV64rm,
-                                               X86::LCMPXCHG64, X86::MOV64rr,
+                                               X86::LCMPXCHG64,
                                               X86::NOT64r, X86::RAX,
                                               X86::GR64RegisterClass);
  case X86::ATOMNAND64:
    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
                                               X86::AND64ri32, X86::MOV64rm,
-                                               X86::LCMPXCHG64, X86::MOV64rr,
+                                               X86::LCMPXCHG64,
                                               X86::NOT64r, X86::RAX,
                                               X86::GR64RegisterClass, true);
  case X86::ATOMMIN64:
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@ -764,7 +764,6 @@ namespace llvm {
                                                    unsigned immOpc,
                                                    unsigned loadOpc,
                                                    unsigned cxchgOpc,
-                                                    unsigned copyOpc,
                                                    unsigned notOpc,
                                                    unsigned EAXreg,
                                                    TargetRegisterClass *RC,
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@ -106,6 +106,7 @@ class VEX    { bit hasVEXPrefix = 1; }
 class VEX_W  { bit hasVEX_WPrefix = 1; }
 class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
 class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
+class VEX_L  { bit hasVEX_L = 1; }

 class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
              string AsmStr, Domain d = GenericDomain>
@ -138,6 +139,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
  bit hasVEX_4VPrefix = 0;  // Does this inst requires the VEX.VVVV field?
  bit hasVEX_i8ImmReg = 0;  // Does this inst requires the last source register
                            // to be encoded in a immediate field?
+  bit hasVEX_L = 0;         // Does this inst uses large (256-bit) registers?

  // TSFlags layout should be kept in sync with X86InstrInfo.h.
  let TSFlags{5-0}   = FormBits;
@ -155,6 +157,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
  let TSFlags{33}    = hasVEX_WPrefix;
  let TSFlags{34}    = hasVEX_4VPrefix;
  let TSFlags{35}    = hasVEX_i8ImmReg;
+  let TSFlags{36}    = hasVEX_L;
 }

 class I<bits<8> o, Format f, dag outs, dag ins, string asm,
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@ -453,7 +453,13 @@ namespace X86II {
    // VEX_I8IMM - Specifies that the last register used in a AVX instruction,
    // must be encoded in the i8 immediate field. This usually happens in
    // instructions with 4 operands.
-    VEX_I8IMM   = 1ULL << 35
+    VEX_I8IMM   = 1ULL << 35,
+
+    // VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
+    // instruction uses 256-bit wide registers. This is usually auto detected if
+    // a VR256 register is used, but some AVX instructions also have this field
+    // marked when using a f256 memory references.
+    VEX_L       = 1ULL << 36
  };
  
  // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@ -666,6 +666,9 @@ defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
 defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load,
                            "cvtdq2ps\t{$src, $dst|$dst, $src}",
                            SSEPackedSingle>, TB, VEX;
+defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, f256mem, load,
+                            "cvtdq2ps\t{$src, $dst|$dst, $src}",
+                            SSEPackedSingle>, TB, VEX;
 }
 let Pattern = []<dag> in {
 defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
@ -806,9 +809,13 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
 // Convert packed single/double fp to doubleword
 let isAsmParserOnly = 1 in {
 def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                     "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+                       "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
 def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                     "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+                       "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                        "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                        "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
 }
 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
@ -862,6 +869,10 @@ def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
 def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
 }
 def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>;
@ -912,14 +923,39 @@ def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
                          [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
                                             (memop addr:$src)))]>;

+let isAsmParserOnly = 1 in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                          "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                         "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                         "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                         "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+                         "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+}
+
 // Convert packed single to packed double
-let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+                  // SSE2 instructions without OpSize prefix
 def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX,
-                       Requires<[HasAVX]>;
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
 def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
-                       "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX,
-                       Requires<[HasAVX]>;
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
 }
 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
@ -949,10 +985,25 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),

 // Convert packed double to packed single
 let isAsmParserOnly = 1 in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
 def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                     "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
-// FIXME: the memory form of this instruction should described using
-// use extra asm syntax
+                       "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                         "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                        "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                        "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+                        "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
 }
 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
@ -1142,6 +1193,16 @@ let isAsmParserOnly = 1 in {
                 "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
                 "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
                 SSEPackedDouble>, OpSize, VEX_4V;
+  let Pattern = []<dag> in {
+    defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_sse_cmp_ps,
+                   "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+                   "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+                   SSEPackedSingle>, VEX_4V;
+    defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_sse2_cmp_pd,
+                   "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+                   "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+                   SSEPackedDouble>, OpSize, VEX_4V;
+  }
 }
 let Constraints = "$src1 = $dst" in {
  defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
@ -2935,19 +2996,46 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
 // SSE3 - Conversion Instructions
 //===---------------------------------------------------------------------===//

+// Convert Packed Double FP to Packed DW Integers
 let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
 def VCVTPD2DQrr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                       "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQXrYr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                       "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                      "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                      "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                      "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+                      "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
 }

 def CVTPD2DQrm  : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                       "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
 def CVTPD2DQrr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+
+// Convert Packed DW Integers to Packed Double FP
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+def VCVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrm  : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+                       "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrr  : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+                       "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
 def CVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                       "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
 def CVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@ -432,6 +432,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
  if (TSFlags & X86II::VEX_W)
    VEX_W = 1;

+  if (TSFlags & X86II::VEX_L)
+    VEX_L = 1;
+
  switch (TSFlags & X86II::Op0Mask) {
  default: assert(0 && "Invalid prefix!");
  case X86II::T8:  // 0F 38
--- a/lib/Target/XCore/XCoreMCAsmInfo.cpp
+++ b/lib/Target/XCore/XCoreMCAsmInfo.cpp
@ -10,7 +10,7 @@
 #include "XCoreMCAsmInfo.h"
 using namespace llvm;

-XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, const StringRef &TT) {
+XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, StringRef TT) {
  SupportsDebugInformation = true;
  Data16bitsDirective = "\t.short\t";
  Data32bitsDirective = "\t.long\t";
--- a/lib/Target/XCore/XCoreMCAsmInfo.h
+++ b/lib/Target/XCore/XCoreMCAsmInfo.h
@ -14,14 +14,15 @@
 #ifndef XCORETARGETASMINFO_H
 #define XCORETARGETASMINFO_H

+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"

 namespace llvm {
  class Target;
-  class StringRef;
+
  class XCoreMCAsmInfo : public MCAsmInfo {
  public:
-    explicit XCoreMCAsmInfo(const Target &T, const StringRef &TT);
+    explicit XCoreMCAsmInfo(const Target &T, StringRef TT);
  };

 } // namespace llvm
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@ -399,7 +399,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
        // We can only inline direct calls to non-declarations.
        if (Callee == 0 || Callee->isDeclaration()) continue;
      
-        // If this call sites was obtained by inlining another function, verify
+        // If this call site was obtained by inlining another function, verify
        // that the include path for the function did not include the callee
        // itself.  If so, we'd be recursively inlinling the same function,
        // which would provide the same callsites, which would cause us to
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@ -603,6 +603,10 @@ static void ThunkGToF(Function *F, Function *G) {
 }

 static void AliasGToF(Function *F, Function *G) {
+  // Darwin will trigger llvm_unreachable if asked to codegen an alias.
+  return ThunkGToF(F, G);
+
+#if 0
  if (!G->hasExternalLinkage() && !G->hasLocalLinkage() && !G->hasWeakLinkage())
    return ThunkGToF(F, G);

@ -614,6 +618,7 @@ static void AliasGToF(Function *F, Function *G) {
  GA->setVisibility(G->getVisibility());
  G->replaceAllUsesWith(GA);
  G->eraseFromParent();
+#endif
 }

 static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@ -472,6 +472,25 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
      Value *NewOr = Builder->CreateOr(Val, Val2);
      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
    }
+    
+    // (icmp ne (A & C1), 0) & (icmp ne (A & C2), 0) -->
+    // (icmp eq (A & (C1|C2)), (C1|C2))
+    if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
+      Instruction *I1 = dyn_cast<Instruction>(Val);
+      Instruction *I2 = dyn_cast<Instruction>(Val2);
+      if (I1 && I1->getOpcode() == Instruction::And &&
+          I2 && I2->getOpcode() == Instruction::And &&
+          I1->getOperand(0) == I1->getOperand(0)) {
+        ConstantInt *CI1 = dyn_cast<ConstantInt>(I1->getOperand(1));
+        ConstantInt *CI2 = dyn_cast<ConstantInt>(I2->getOperand(1));
+        if (CI1 && !CI1->isZero() && CI2 && !CI2->isZero() &&
+            CI1->getValue().operator&(CI2->getValue()) == 0) {
+          Constant *ConstOr = ConstantExpr::getOr(CI1, CI2);
+          Value *NewAnd = Builder->CreateAnd(I1->getOperand(0), ConstOr);
+          return Builder->CreateICmp(ICmpInst::ICMP_EQ, NewAnd, ConstOr);
+        }
+      }
+    }
  }
  
  // From here on, we only handle:
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@ -699,6 +699,34 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
    SI.setOperand(2, TrueVal);
    return &SI;
  }
+  
+  // select (A == 0 | B == 0), T, F--> select (A != 0 & B != 0), F, T
+  // Note: This is a canonicalization rather than an optimization, and is used
+  // to expose opportunities to other instcombine transforms.
+  Instruction* CondInst = dyn_cast<Instruction>(CondVal);
+  if (CondInst && CondInst->hasOneUse() && 
+      CondInst->getOpcode() == Instruction::Or) {
+    ICmpInst *LHSCmp = dyn_cast<ICmpInst>(CondInst->getOperand(0));
+    ICmpInst *RHSCmp = dyn_cast<ICmpInst>(CondInst->getOperand(1));
+    if (LHSCmp && LHSCmp->hasOneUse() &&
+                  LHSCmp->getPredicate() == ICmpInst::ICMP_EQ &&
+        RHSCmp && RHSCmp->hasOneUse() &&
+                  RHSCmp->getPredicate() == ICmpInst::ICMP_EQ) {
+      ConstantInt* C1 = dyn_cast<ConstantInt>(LHSCmp->getOperand(1));
+      ConstantInt* C2 = dyn_cast<ConstantInt>(RHSCmp->getOperand(1));
+      if (C1 && C1->isZero() && C2 && C2->isZero()) {
+        LHSCmp->setPredicate(ICmpInst::ICMP_NE);
+        RHSCmp->setPredicate(ICmpInst::ICMP_NE);
+        Value *And =
+          InsertNewInstBefore(BinaryOperator::CreateAnd(LHSCmp, RHSCmp,
+                                             "and."+CondVal->getName()), SI);
+        SI.setOperand(0, And);
+        SI.setOperand(1, FalseVal);
+        SI.setOperand(2, TrueVal);
+        return &SI;
+      }
+    }
+  }

  return 0;
 }
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@ -2362,7 +2362,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
                                          Formula Base) {
  // TODO: For now, just add the min and max offset, because it usually isn't
  // worthwhile looking at everything inbetween.
-  SmallVector<int64_t, 4> Worklist;
+  SmallVector<int64_t, 2> Worklist;
  Worklist.push_back(LU.MinOffset);
  if (LU.MaxOffset != LU.MinOffset)
    Worklist.push_back(LU.MaxOffset);
@ -2376,7 +2376,14 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
      F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
      if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
                     LU.Kind, LU.AccessTy, TLI)) {
-        F.BaseRegs[i] = SE.getAddExpr(G, SE.getConstant(G->getType(), *I));
+        // Add the offset to the base register.
+        const SCEV *NewG = SE.getAddExpr(G, SE.getConstant(G->getType(), *I));
+        // If it cancelled out, drop the base register, otherwise update it.
+        if (NewG->isZero()) {
+          std::swap(F.BaseRegs[i], F.BaseRegs.back());
+          F.BaseRegs.pop_back();
+        } else
+          F.BaseRegs[i] = NewG;

        (void)InsertFormula(LU, LUIdx, F);
      }
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@ -306,7 +306,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
      WeakVH BIHandle(BI);
      ReplaceAndSimplifyAllUses(Inst, V, TD);
      MadeChange = true;
-      if (BIHandle == 0)
+      if (BIHandle != BI)
        BI = BB->begin();
      continue;
    }
@ -354,12 +354,13 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
    // value into all of its uses.
    assert(PNV != PN && "hasConstantValue broken");
    
+    Value *OldPhiIt = PhiIt;
    ReplaceAndSimplifyAllUses(PN, PNV, TD);
    
    // If recursive simplification ended up deleting the next PHI node we would
    // iterate to, then our iterator is invalid, restart scanning from the top
    // of the block.
-    if (PhiIt == 0) PhiIt = &BB->front();
+    if (PhiIt != OldPhiIt) PhiIt = &BB->front();
  }
 }

--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@ -1377,8 +1377,9 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
 bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
  BasicBlock *BB = BI->getParent();
  Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
-  if (Cond == 0) return false;
-
+  if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
+    Cond->getParent() != BB || !Cond->hasOneUse())
+  return false;
  
  // Only allow this if the condition is a simple instruction that can be
  // executed unconditionally.  It must be in the same block as the branch, and
@ -1387,11 +1388,23 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
  // Ignore dbg intrinsics.
  while(isa<DbgInfoIntrinsic>(FrontIt))
    ++FrontIt;
-  if ((!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
-      Cond->getParent() != BB || &*FrontIt != Cond || !Cond->hasOneUse()) {
-    return false;
+    
+  // Allow a single instruction to be hoisted in addition to the compare
+  // that feeds the branch.  We later ensure that any values that _it_ uses
+  // were also live in the predecessor, so that we don't unnecessarily create
+  // register pressure or inhibit out-of-order execution.
+  Instruction *BonusInst = 0;
+  if (&*FrontIt != Cond &&
+      FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond &&
+      FrontIt->isSafeToSpeculativelyExecute()) {
+    BonusInst = &*FrontIt;
+    ++FrontIt;
  }
  
+  // Only a single bonus inst is allowed.
+  if (&*FrontIt != Cond)
+    return false;
+  
  // Make sure the instruction after the condition is the cond branch.
  BasicBlock::iterator CondIt = Cond; ++CondIt;
  // Ingore dbg intrinsics.
@ -1429,6 +1442,44 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
        !SafeToMergeTerminators(BI, PBI))
      continue;
    
+    // Ensure that any values used in the bonus instruction are also used
+    // by the terminator of the predecessor.  This means that those values
+    // must already have been resolved, so we won't be inhibiting the 
+    // out-of-order core by speculating them earlier.
+    if (BonusInst) {
+      // Collect the values used by the bonus inst
+      SmallPtrSet<Value*, 4> UsedValues;
+      for (Instruction::op_iterator OI = BonusInst->op_begin(),
+           OE = BonusInst->op_end(); OI != OE; ++OI) {
+        Value* V = *OI;
+        if (!isa<Constant>(V))
+          UsedValues.insert(V);
+      }
+
+      SmallVector<std::pair<Value*, unsigned>, 4> Worklist;
+      Worklist.push_back(std::make_pair(PBI->getOperand(0), 0));
+      
+      // Walk up to four levels back up the use-def chain of the predecessor's
+      // terminator to see if all those values were used.  The choice of four
+      // levels is arbitrary, to provide a compile-time-cost bound.
+      while (!Worklist.empty()) {
+        std::pair<Value*, unsigned> Pair = Worklist.back();
+        Worklist.pop_back();
+        
+        if (Pair.second >= 4) continue;
+        UsedValues.erase(Pair.first);
+        if (UsedValues.empty()) break;
+        
+        if (Instruction* I = dyn_cast<Instruction>(Pair.first)) {
+          for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
+               OI != OE; ++OI)
+            Worklist.push_back(std::make_pair(OI->get(), Pair.second+1));
+        }       
+      }
+      
+      if (!UsedValues.empty()) return false;
+    }
+    
    Instruction::BinaryOps Opc;
    bool InvertPredCond = false;

@ -1457,9 +1508,19 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
      PBI->setSuccessor(1, OldTrue);
    }
    
+    // If we have a bonus inst, clone it into the predecessor block.
+    Instruction *NewBonus = 0;
+    if (BonusInst) {
+      NewBonus = BonusInst->clone();
+      PredBlock->getInstList().insert(PBI, NewBonus);
+      NewBonus->takeName(BonusInst);
+      BonusInst->setName(BonusInst->getName()+".old");
+    }
+    
    // Clone Cond into the predecessor basic block, and or/and the
    // two conditions together.
    Instruction *New = Cond->clone();
+    if (BonusInst) New->replaceUsesOfWith(BonusInst, NewBonus);
    PredBlock->getInstList().insert(PBI, New);
    New->takeName(Cond);
    Cond->setName(New->getName()+".old");
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@ -90,8 +90,7 @@ enum PrefixType {
 /// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
 /// prefixed with % (if the string only contains simple characters) or is
 /// surrounded with ""'s (if it has special chars in it).  Print it out.
-static void PrintLLVMName(raw_ostream &OS, const StringRef &Name,
-                          PrefixType Prefix) {
+static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) {
  assert(Name.data() && "Cannot get empty name!");
  switch (Prefix) {
  default: llvm_unreachable("Bad prefix!");
@ -855,8 +854,9 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
  }
 }

-static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
-                             TypePrinting &TypePrinter, SlotTracker *Machine) {
+static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
+                                  TypePrinting &TypePrinter,
+                                  SlotTracker *Machine) {
  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
    if (CI->getType()->isIntegerTy(1)) {
      Out << (CI->getZExtValue() ? "true" : "false");
@ -1147,7 +1147,7 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
  const Constant *CV = dyn_cast<Constant>(V);
  if (CV && !isa<GlobalValue>(CV)) {
    assert(TypePrinter && "Constants require TypePrinting!");
-    WriteConstantInt(Out, CV, *TypePrinter, Machine);
+    WriteConstantInternal(Out, CV, *TypePrinter, Machine);
    return;
  }

@ -2128,7 +2128,7 @@ void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
  } else if (const MDNode *N = dyn_cast<MDNode>(this)) {
    const Function *F = N->getFunction();
    SlotTracker SlotTable(F);
-    AssemblyWriter W(OS, SlotTable, F ? getModuleFromVal(F) : 0, AAW);
+    AssemblyWriter W(OS, SlotTable, F ? F->getParent() : 0, AAW);
    W.printMDNodeBody(N);
  } else if (const NamedMDNode *N = dyn_cast<NamedMDNode>(this)) {
    SlotTracker SlotTable(N->getParent());
@ -2138,7 +2138,7 @@ void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
    TypePrinting TypePrinter;
    TypePrinter.print(C->getType(), OS);
    OS << ' ';
-    WriteConstantInt(OS, C, TypePrinter, 0);
+    WriteConstantInternal(OS, C, TypePrinter, 0);
  } else if (isa<InlineAsm>(this) || isa<MDString>(this) ||
             isa<Argument>(this)) {
    WriteAsOperand(OS, this, true, 0);
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@ -78,7 +78,8 @@ void MDNodeOperand::allUsesReplacedWith(Value *NV) {
 /// getOperandPtr - Helper function to get the MDNodeOperand's coallocated on
 /// the end of the MDNode.
 static MDNodeOperand *getOperandPtr(MDNode *N, unsigned Op) {
-  assert(Op < N->getNumOperands() && "Invalid operand number");
+  // Use <= instead of < to permit a one-past-the-end address.
+  assert(Op <= N->getNumOperands() && "Invalid operand number");
  return reinterpret_cast<MDNodeOperand*>(N+1)+Op;
 }

--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@ -1,16 +1,24 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck -check-prefix=FINITE %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s
 ; rdar://7461510

 define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
 entry:
-; CHECK: t1:
-; CHECK-NOT: vldr
-; CHECK: ldr
-; CHECK: ldr
-; CHECK: cmp r0, r1
-; CHECK-NOT: vcmpe.f32
-; CHECK-NOT: vmrs
-; CHECK: beq
+; FINITE: t1:
+; FINITE-NOT: vldr
+; FINITE: ldr
+; FINITE: ldr
+; FINITE: cmp r0, r1
+; FINITE-NOT: vcmpe.f32
+; FINITE-NOT: vmrs
+; FINITE: beq
+
+; NAN: t1:
+; NAN: vldr.32 s0,
+; NAN: vldr.32 s1,
+; NAN: vcmpe.f32 s1, s0
+; NAN: vmrs apsr_nzcv, fpscr
+; NAN: beq
  %0 = load float* %a
  %1 = load float* %b
  %2 = fcmp une float %0, %1
@ -25,5 +33,50 @@ bb2:
  ret i32 %4
 }

+define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
+entry:
+; FINITE: t2:
+; FINITE-NOT: vldr
+; FINITE: ldrd r0, [r0]
+; FINITE: cmp r0, #0
+; FINITE: cmpeq r1, #0
+; FINITE-NOT: vcmpe.f32
+; FINITE-NOT: vmrs
+; FINITE: bne
+  %0 = load double* %a
+  %1 = fcmp oeq double %0, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+
+bb1:
+  %2 = call i32 @bar()
+  ret i32 %2
+
+bb2:
+  %3 = call i32 @foo()
+  ret i32 %3
+}
+
+define arm_apcscc i32 @t3(float* %a, float* %b) nounwind {
+entry:
+; FINITE: t3:
+; FINITE-NOT: vldr
+; FINITE: ldr r0, [r0]
+; FINITE: cmp r0, #0
+; FINITE-NOT: vcmpe.f32
+; FINITE-NOT: vmrs
+; FINITE: bne
+  %0 = load float* %a
+  %1 = fcmp oeq float %0, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+
+bb1:
+  %2 = call i32 @bar()
+  ret i32 %2
+
+bb2:
+  %3 = call i32 @foo()
+  ret i32 %3
+}
+
 declare i32 @bar()
 declare i32 @foo()
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@ -239,7 +239,7 @@ define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 ; CHECK:        t9:
 ; CHECK:        vldr.64
 ; CHECK-NOT:    vmov d{{.*}}, d0
-; CHECK:        vmov.i8 d1
+; CHECK:        vmov.i32 d1
 ; CHECK-NEXT:   vstmia r0, {d0, d1}
 ; CHECK-NEXT:   vstmia r0, {d0, d1}
  %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
--- a/test/CodeGen/ARM/sub.ll
+++ b/test/CodeGen/ARM/sub.ll
@ -0,0 +1,29 @@
+; RUN: llc -march=arm < %s | FileCheck %s
+
+; 171 = 0x000000ab
+define i64 @f1(i64 %a) {
+; CHECK: f1
+; CHECK: subs r0, r0, #171
+; CHECK: sbc r1, r1, #0
+    %tmp = sub i64 %a, 171
+    ret i64 %tmp
+}
+
+; 66846720 = 0x03fc0000
+define i64 @f2(i64 %a) {
+; CHECK: f2
+; CHECK: subs r0, r0, #255, 14
+; CHECK: sbc r1, r1, #0
+    %tmp = sub i64 %a, 66846720
+    ret i64 %tmp
+}
+
+; 734439407618 = 0x000000ab00000002
+define i64 @f3(i64 %a) {
+; CHECK: f3
+; CHECK: subs r0, r0, #2
+; CHECK: sbc r1, r1, #171
+   %tmp = sub i64 %a, 734439407618
+   ret i64 %tmp
+}
+
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@ -267,3 +267,15 @@ entry:
  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
  ret <2 x double> %0
 }
+
+; Radar 7373643
+;CHECK: redundantVdup:
+;CHECK: vmov.i8
+;CHECK-NOT: vdup.8
+;CHECK: vstr.64
+define void @redundantVdup(<8 x i8>* %ptr) nounwind {
+  %1 = insertelement <8 x i8> undef, i8 -128, i32 0
+  %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  store <8 x i8> %2, <8 x i8>* %ptr, align 8
+  ret void
+}
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@ -18,6 +18,18 @@ define <4 x i16> @v_movi16b() nounwind {
 	ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 >
 }

+define <4 x i16> @v_mvni16a() nounwind {
+;CHECK: v_mvni16a:
+;CHECK: vmvn.i16 d0, #0x10
+	ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 >
+}
+
+define <4 x i16> @v_mvni16b() nounwind {
+;CHECK: v_mvni16b:
+;CHECK: vmvn.i16 d0, #0x1000
+	ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 >
+}
+
 define <2 x i32> @v_movi32a() nounwind {
 ;CHECK: v_movi32a:
 ;CHECK: vmov.i32 d0, #0x20
@ -54,6 +66,42 @@ define <2 x i32> @v_movi32f() nounwind {
 	ret <2 x i32> < i32 2162687, i32 2162687 >
 }

+define <2 x i32> @v_mvni32a() nounwind {
+;CHECK: v_mvni32a:
+;CHECK: vmvn.i32 d0, #0x20
+	ret <2 x i32> < i32 4294967263, i32 4294967263 >
+}
+
+define <2 x i32> @v_mvni32b() nounwind {
+;CHECK: v_mvni32b:
+;CHECK: vmvn.i32 d0, #0x2000
+	ret <2 x i32> < i32 4294959103, i32 4294959103 >
+}
+
+define <2 x i32> @v_mvni32c() nounwind {
+;CHECK: v_mvni32c:
+;CHECK: vmvn.i32 d0, #0x200000
+	ret <2 x i32> < i32 4292870143, i32 4292870143 >
+}
+
+define <2 x i32> @v_mvni32d() nounwind {
+;CHECK: v_mvni32d:
+;CHECK: vmvn.i32 d0, #0x20000000
+	ret <2 x i32> < i32 3758096383, i32 3758096383 >
+}
+
+define <2 x i32> @v_mvni32e() nounwind {
+;CHECK: v_mvni32e:
+;CHECK: vmvn.i32 d0, #0x20FF
+	ret <2 x i32> < i32 4294958848, i32 4294958848 >
+}
+
+define <2 x i32> @v_mvni32f() nounwind {
+;CHECK: v_mvni32f:
+;CHECK: vmvn.i32 d0, #0x20FFFF
+	ret <2 x i32> < i32 4292804608, i32 4292804608 >
+}
+
 define <1 x i64> @v_movi64() nounwind {
 ;CHECK: v_movi64:
 ;CHECK: vmov.i64 d0, #0xFF0000FF0000FFFF
--- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
+++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
@ -12,7 +12,7 @@ define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<
 ; CHECK: _ZNKSs7compareERKSs:
 ; CHECK:      it  eq
 ; CHECK-NEXT: subeq.w r0, r6, r8
-; CHECK-NEXT: ldmia.w sp, {r4, r5, r6, r8, r9, pc}
+; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r8, r9, pc}
 entry:
  %0 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
  %1 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3]
--- a/test/CodeGen/Thumb2/thumb2-sbc.ll
+++ b/test/CodeGen/Thumb2/thumb2-sbc.ll
@ -1,8 +1,54 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc -march=thumb -mattr=+thumb2 < %s | FileCheck %s

 define i64 @f1(i64 %a, i64 %b) {
-; CHECK: f1:
+; CHECK: f1
 ; CHECK: subs r0, r0, r2
    %tmp = sub i64 %a, %b
    ret i64 %tmp
 }
+
+; 734439407618 = 0x000000ab00000002
+define i64 @f2(i64 %a) {
+; CHECK: f2
+; CHECK: subs r0, #2
+; CHECK: sbc r1, r1, #171
+    %tmp = sub i64 %a, 734439407618
+    ret i64 %tmp
+}
+
+; 5066626890203138 = 0x0012001200000002
+define i64 @f3(i64 %a) {
+; CHECK: f3
+; CHECK: subs  r0, #2
+; CHECK: sbc r1, r1, #1179666
+    %tmp = sub i64 %a, 5066626890203138
+    ret i64 %tmp
+}
+
+; 3747052064576897026 = 0x3400340000000002
+define i64 @f4(i64 %a) {
+; CHECK: f4
+; CHECK: subs  r0, #2
+; CHECK: sbc r1, r1, #872428544
+    %tmp = sub i64 %a, 3747052064576897026
+    ret i64 %tmp
+}
+
+; 6221254862626095106 = 0x5656565600000002
+define i64 @f5(i64 %a) {
+; CHECK: f5
+; CHECK: subs  r0, #2
+; CHECK: adc r1, r1, #-1448498775
+    %tmp = sub i64 %a, 6221254862626095106 
+    ret i64 %tmp
+}
+
+; 287104476244869122 = 0x03fc000000000002
+define i64 @f6(i64 %a) {
+; CHECK: f6
+; CHECK: subs  r0, #2
+; CHECK: sbc r1, r1, #66846720
+    %tmp = sub i64 %a, 287104476244869122 
+    ret i64 %tmp
+}
+
--- a/Show more
+++ b/Show more