diff --git a/CMakeLists.txt b/CMakeLists.txt
index cfa32cf4bc5..bb21cc5d751 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -48,7 +48,7 @@ set(CMAKE_MODULE_PATH
set(LLVM_VERSION_MAJOR 3)
set(LLVM_VERSION_MINOR 6)
-set(LLVM_VERSION_PATCH 0)
+set(LLVM_VERSION_PATCH 1)
if (NOT PACKAGE_VERSION)
set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}")
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 3bd8aa1806c..577a7d521ea 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -32,11 +32,11 @@ dnl===-----------------------------------------------------------------------===
dnl Initialize autoconf and define the package name, version number and
dnl address for reporting bugs.
-AC_INIT([LLVM],[3.6.0],[http://llvm.org/bugs/])
+AC_INIT([LLVM],[3.6.1],[http://llvm.org/bugs/])
LLVM_VERSION_MAJOR=3
LLVM_VERSION_MINOR=6
-LLVM_VERSION_PATCH=0
+LLVM_VERSION_PATCH=1
LLVM_VERSION_SUFFIX=
AC_DEFINE_UNQUOTED([LLVM_VERSION_MAJOR], $LLVM_VERSION_MAJOR, [Major version of the LLVM API])
@@ -1714,7 +1714,9 @@ if test "$llvm_cv_os_type" = "MingW" ; then
AC_CHECK_LIB(gcc,_alloca,AC_DEFINE([HAVE__ALLOCA],[1],[Have host's _alloca]))
AC_CHECK_LIB(gcc,__alloca,AC_DEFINE([HAVE___ALLOCA],[1],[Have host's __alloca]))
AC_CHECK_LIB(gcc,__chkstk,AC_DEFINE([HAVE___CHKSTK],[1],[Have host's __chkstk]))
+ AC_CHECK_LIB(gcc,__chkstk_ms,AC_DEFINE([HAVE___CHKSTK_MS],[1],[Have host's __chkstk_ms]))
AC_CHECK_LIB(gcc,___chkstk,AC_DEFINE([HAVE____CHKSTK],[1],[Have host's ___chkstk]))
+ AC_CHECK_LIB(gcc,___chkstk_ms,AC_DEFINE([HAVE____CHKSTK_MS],[1],[Have host's ___chkstk_ms]))
AC_CHECK_LIB(gcc,__ashldi3,AC_DEFINE([HAVE___ASHLDI3],[1],[Have host's __ashldi3]))
AC_CHECK_LIB(gcc,__ashrdi3,AC_DEFINE([HAVE___ASHRDI3],[1],[Have host's __ashrdi3]))
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index f806d9c54ed..6a7538895c4 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -198,7 +198,9 @@ if( PURE_WINDOWS )
check_function_exists(_alloca HAVE__ALLOCA)
check_function_exists(__alloca HAVE___ALLOCA)
check_function_exists(__chkstk HAVE___CHKSTK)
+ check_function_exists(__chkstk_ms HAVE___CHKSTK_MS)
check_function_exists(___chkstk HAVE____CHKSTK)
+ check_function_exists(___chkstk_ms HAVE____CHKSTK_MS)
check_function_exists(__ashldi3 HAVE___ASHLDI3)
check_function_exists(__ashrdi3 HAVE___ASHRDI3)
diff --git a/configure b/configure
index ba5ecbef334..ea5bf52101b 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.60 for LLVM 3.6.0.
+# Generated by GNU Autoconf 2.60 for LLVM 3.6.1.
#
# Report bugs to .
#
@@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='LLVM'
PACKAGE_TARNAME='llvm'
-PACKAGE_VERSION='3.6.0'
-PACKAGE_STRING='LLVM 3.6.0'
+PACKAGE_VERSION='3.6.1'
+PACKAGE_STRING='LLVM 3.6.1'
PACKAGE_BUGREPORT='http://llvm.org/bugs/'
ac_unique_file="lib/IR/Module.cpp"
@@ -1314,7 +1314,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures LLVM 3.6.0 to adapt to many kinds of systems.
+\`configure' configures LLVM 3.6.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1380,7 +1380,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of LLVM 3.6.0:";;
+ short | recursive ) echo "Configuration of LLVM 3.6.1:";;
esac
cat <<\_ACEOF
@@ -1550,7 +1550,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-LLVM configure 3.6.0
+LLVM configure 3.6.1
generated by GNU Autoconf 2.60
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1566,7 +1566,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by LLVM $as_me 3.6.0, which was
+It was created by LLVM $as_me 3.6.1, which was
generated by GNU Autoconf 2.60. Invocation command line was
$ $0 $@
@@ -1922,7 +1922,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
LLVM_VERSION_MAJOR=3
LLVM_VERSION_MINOR=6
-LLVM_VERSION_PATCH=0
+LLVM_VERSION_PATCH=1
LLVM_VERSION_SUFFIX=
@@ -15436,6 +15436,91 @@ cat >>confdefs.h <<\_ACEOF
#define HAVE___CHKSTK 1
_ACEOF
+fi
+
+ { echo "$as_me:$LINENO: checking for __chkstk_ms in -lgcc" >&5
+echo $ECHO_N "checking for __chkstk_ms in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___chkstk_ms+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __chkstk_ms ();
+int
+main ()
+{
+return __chkstk_ms ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest$ac_exeext'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_lib_gcc___chkstk_ms=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_lib_gcc___chkstk_ms=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___chkstk_ms" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___chkstk_ms" >&6; }
+if test $ac_cv_lib_gcc___chkstk_ms = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___CHKSTK_MS 1
+_ACEOF
+
fi
{ echo "$as_me:$LINENO: checking for ___chkstk in -lgcc" >&5
@@ -15521,6 +15606,91 @@ cat >>confdefs.h <<\_ACEOF
#define HAVE____CHKSTK 1
_ACEOF
+fi
+
+ { echo "$as_me:$LINENO: checking for ___chkstk_ms in -lgcc" >&5
+echo $ECHO_N "checking for ___chkstk_ms in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc____chkstk_ms+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char ___chkstk_ms ();
+int
+main ()
+{
+return ___chkstk_ms ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; } &&
+ { ac_try='test -s conftest$ac_exeext'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_lib_gcc____chkstk_ms=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_lib_gcc____chkstk_ms=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc____chkstk_ms" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc____chkstk_ms" >&6; }
+if test $ac_cv_lib_gcc____chkstk_ms = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE____CHKSTK_MS 1
+_ACEOF
+
fi
@@ -18901,7 +19071,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by LLVM $as_me 3.6.0, which was
+This file was extended by LLVM $as_me 3.6.1, which was
generated by GNU Autoconf 2.60. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -18954,7 +19124,7 @@ Report bugs to ."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF
ac_cs_version="\\
-LLVM config.status 3.6.0
+LLVM config.status 3.6.1
configured by $0, generated by GNU Autoconf 2.60,
with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index 04d7f526651..a10f7e004ab 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -25,6 +25,31 @@ them.
Non-comprehensive list of changes in this release
=================================================
+Changes to the MIPS Target
+--------------------------
+
+* Added support for 128-bit integers on 64-bit targets.
+
+* Fixed some remaining N32/N64 calling convention bugs when using small
+ structures on big-endian targets.
+
+* Fixed missing sign-extensions that are required by the N32/N64 calling
+ convention when generating calls to library functions with 32-bit parameters.
+
+* ``-mno-odd-spreg`` is now honoured for vector insertion/extraction operations
+ when using ``-mmsa``.
+
+* Corrected the representation of member function pointers. This makes them
+ usable on microMIPS targets.
+
+* Fixed multiple segfaults and assertions in the disassembler when
+ disassembling instructions that have memory operands.
+
+* Fixed multiple cases of suboptimal code generation involving ``$zero``.
+
+Non-comprehensive list of changes in 3.6.0
+==========================================
+
.. NOTE
For small 1-3 sentence descriptions, just add an entry at the end of
this list. If your description won't fit comfortably in one bullet
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index 27f07d4e39d..5c512f6bca6 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -423,6 +423,9 @@
/* Have host's __chkstk */
#cmakedefine HAVE___CHKSTK ${HAVE___CHKSTK}
+/* Have host's __chkstk_ms */
+#cmakedefine HAVE___CHKSTK_MS ${HAVE___CHKSTK_MS}
+
/* Have host's __cmpdi2 */
#cmakedefine HAVE___CMPDI2 ${HAVE___CMPDI2}
@@ -459,6 +462,9 @@
/* Have host's ___chkstk */
#cmakedefine HAVE____CHKSTK ${HAVE____CHKSTK}
+/* Have host's ___chkstk_ms */
+#cmakedefine HAVE____CHKSTK_MS ${HAVE____CHKSTK_MS}
+
/* Define if we link Polly to the tools */
#cmakedefine LINK_POLLY_INTO_TOOLS
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index ec09c84c5b7..c68c77aa607 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -420,6 +420,9 @@
/* Have host's __chkstk */
#undef HAVE___CHKSTK
+/* Have host's __chkstk_ms */
+#undef HAVE___CHKSTK_MS
+
/* Have host's __cmpdi2 */
#undef HAVE___CMPDI2
@@ -456,6 +459,9 @@
/* Have host's ___chkstk */
#undef HAVE____CHKSTK
+/* Have host's ___chkstk_ms */
+#undef HAVE____CHKSTK_MS
+
/* Linker version detected at compile time. */
#undef HOST_LINK_VERSION
diff --git a/include/llvm/Target/TargetCallingConv.h b/include/llvm/Target/TargetCallingConv.h
index a0f26741a8f..9071bfeec7e 100644
--- a/include/llvm/Target/TargetCallingConv.h
+++ b/include/llvm/Target/TargetCallingConv.h
@@ -134,6 +134,8 @@ namespace ISD {
/// Index original Function's argument.
unsigned OrigArgIndex;
+ /// Sentinel value for implicit machine-level input arguments.
+ static const unsigned NoArgIndex = UINT_MAX;
/// Offset in bytes of current input value relative to the beginning of
/// original argument. E.g. if argument was splitted into four 32 bit
@@ -147,6 +149,15 @@ namespace ISD {
VT = vt.getSimpleVT();
ArgVT = argvt;
}
+
+ bool isOrigArg() const {
+ return OrigArgIndex != NoArgIndex;
+ }
+
+ unsigned getOrigArgIndex() const {
+ assert(OrigArgIndex != NoArgIndex && "Implicit machine-level argument");
+ return OrigArgIndex;
+ }
};
/// OutputArg - This struct carries flags and a value for a
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index c5fed02e17b..43d6f2772c5 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -2806,6 +2806,11 @@ public:
virtual bool useLoadStackGuardNode() const {
return false;
}
+
+ /// Returns true if arguments should be sign-extended in lib calls.
+ virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
+ return IsSigned;
+ }
};
/// Given an LLVM IR type and return type attributes, compute the return value
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 59f19a002ec..7e9e35127a4 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1776,9 +1776,12 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
<< *IsomorphicInc << '\n');
Value *NewInc = OrigInc;
if (OrigInc->getType() != IsomorphicInc->getType()) {
- Instruction *IP = isa(OrigInc)
- ? (Instruction*)L->getHeader()->getFirstInsertionPt()
- : OrigInc->getNextNode();
+ Instruction *IP = nullptr;
+ if (PHINode *PN = dyn_cast(OrigInc))
+ IP = PN->getParent()->getFirstInsertionPt();
+ else
+ IP = OrigInc->getNextNode();
+
IRBuilder<> Builder(IP);
Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
NewInc = Builder.
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index cbd62728ace..96111225db5 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -75,10 +75,9 @@ MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg,
I != E; ++I) {
unsigned MappedDef = *I;
// Source of copy is no longer available for propagation.
- if (AvailCopyMap.erase(MappedDef)) {
- for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR)
- AvailCopyMap.erase(*SR);
- }
+ AvailCopyMap.erase(MappedDef);
+ for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR)
+ AvailCopyMap.erase(*SR);
}
}
}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index afb986f1d7c..1df61e4fb1f 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1160,13 +1160,6 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
LegalOperations = Level >= AfterLegalizeVectorOps;
LegalTypes = Level >= AfterLegalizeTypes;
- // Early exit if this basic block is in an optnone function.
- AttributeSet FnAttrs =
- DAG.getMachineFunction().getFunction()->getAttributes();
- if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeNone))
- return;
-
// Add all the dag nodes to the worklist.
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I)
@@ -2788,9 +2781,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
SplatBitSize = SplatBitSize * 2)
SplatValue |= SplatValue.shl(SplatBitSize);
- Constant = APInt::getAllOnesValue(BitWidth);
- for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
- Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+ // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
+ // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
+ if (SplatBitSize % BitWidth == 0) {
+ Constant = APInt::getAllOnesValue(BitWidth);
+ for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
+ Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+ }
}
}
@@ -11043,7 +11040,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
} else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
// If the input vector is too large, try to split it.
// We don't support having two input vectors that are too large.
- if (VecIn2.getNode())
+ // If the zero vector was used, we can not split the vector,
+ // since we'd need 3 inputs.
+ if (UsesZeroVector || VecIn2.getNode())
return SDValue();
if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
@@ -11055,7 +11054,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy()));
VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
DAG.getConstant(0, TLI.getVectorIdxTy()));
- UsesZeroVector = false;
} else
return SDValue();
}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 97fed230c53..c46539b71db 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -497,7 +497,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
OI != E; ++OI) {
const Value *Idx = *OI;
if (auto *StTy = dyn_cast(Ty)) {
- unsigned Field = cast(Idx)->getZExtValue();
+ uint64_t Field = cast(Idx)->getZExtValue();
if (Field) {
// N = N + Offset
TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
@@ -518,8 +518,8 @@ bool FastISel::selectGetElementPtr(const User *I) {
if (CI->isZero())
continue;
// N = N + Offset
- TotalOffs +=
- DL.getTypeAllocSize(Ty) * cast(CI)->getSExtValue();
+ uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue();
+ TotalOffs += DL.getTypeAllocSize(Ty) * IdxN;
if (TotalOffs >= MaxOffs) {
N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
if (!N) // Unhandled operand. Halt "fast" selection and bail.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 4591e79316d..b5967155434 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -658,7 +658,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
NVT, N->getOperand(0));
return TLI.makeLibCall(DAG, LC,
TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
- &Op, 1, false, dl).first;
+ &Op, 1, Signed, dl).first;
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 8b54e6568b9..5222de1063b 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1423,9 +1423,10 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
// If one or more successors has been unscheduled, then the current
// node is no longer available.
- if (!TrySU->isAvailable)
+ if (!TrySU->isAvailable || !TrySU->NodeQueueId)
CurSU = AvailableQueue->pop();
else {
+ // Available and in AvailableQueue
AvailableQueue->remove(TrySU);
CurSU = TrySU;
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d1929107fcb..66095ee015e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3399,30 +3399,21 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
Ty = StTy->getElementType(Field);
} else {
Ty = cast(Ty)->getElementType();
+ MVT PtrTy = DAG.getTargetLoweringInfo().getPointerTy(AS);
+ unsigned PtrSize = PtrTy.getSizeInBits();
+ APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty));
// If this is a constant subscript, handle it quickly.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (const ConstantInt *CI = dyn_cast(Idx)) {
- if (CI->isZero()) continue;
- uint64_t Offs =
- DL->getTypeAllocSize(Ty)*cast(CI)->getSExtValue();
- SDValue OffsVal;
- EVT PTy = TLI.getPointerTy(AS);
- unsigned PtrBits = PTy.getSizeInBits();
- if (PtrBits < 64)
- OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy,
- DAG.getConstant(Offs, MVT::i64));
- else
- OffsVal = DAG.getConstant(Offs, PTy);
-
- N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N,
- OffsVal);
+ if (const auto *CI = dyn_cast(Idx)) {
+ if (CI->isZero())
+ continue;
+ APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
+ SDValue OffsVal = DAG.getConstant(Offs, PtrTy);
+ N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal);
continue;
}
// N = N + Idx * ElementSize;
- APInt ElementSize =
- APInt(TLI.getPointerSizeInBits(AS), DL->getTypeAllocSize(Ty));
SDValue IdxN = getValue(Idx);
// If the index is smaller or larger than intptr_t, truncate or extend
@@ -5727,6 +5718,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// Skip the first return-type Attribute to get to params.
Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
Args.push_back(Entry);
+
+ // If we have an explicit sret argument that is an Instruction, (i.e., it
+ // might point to function-local memory), we can't meaningfully tail-call.
+ if (Entry.isSRet && isa(V))
+ isTailCall = false;
}
// Check if target-independent constraints permit a tail call here.
@@ -7353,6 +7349,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Entry.Alignment = Align;
CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
+
+ // sret demotion isn't compatible with tail-calls, since the sret argument
+ // points into the callers stack frame.
+ CLI.IsTailCall = false;
} else {
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
EVT VT = RetTys[I];
@@ -7638,7 +7638,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
ISD::ArgFlagsTy Flags;
Flags.setSRet();
MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
- ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 0, 0);
+ ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true,
+ ISD::InputArg::NoArgIndex, 0);
Ins.push_back(RetArg);
}
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 72e0aca8408..f12c035e785 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -96,18 +96,19 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
for (unsigned i = 0; i != NumOps; ++i) {
Entry.Node = Ops[i];
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.isSExt = shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
+ Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy());
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
+ bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
.setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed)
- .setSExtResult(isSigned).setZExtResult(!isSigned);
+ .setSExtResult(signExtend).setZExtResult(!signExtend);
return LowerCallTo(CLI);
}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index 21893d2909e..d75be280786 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -177,25 +177,30 @@ bool RuntimeDyldMachO::isCompatibleFile(const object::ObjectFile &Obj) const {
}
template
-void RuntimeDyldMachOCRTPBase::finalizeLoad(const ObjectFile &ObjImg,
+void RuntimeDyldMachOCRTPBase::finalizeLoad(const ObjectFile &Obj,
ObjSectionToIDMap &SectionMap) {
unsigned EHFrameSID = RTDYLD_INVALID_SECTION_ID;
unsigned TextSID = RTDYLD_INVALID_SECTION_ID;
unsigned ExceptTabSID = RTDYLD_INVALID_SECTION_ID;
- ObjSectionToIDMap::iterator i, e;
- for (i = SectionMap.begin(), e = SectionMap.end(); i != e; ++i) {
- const SectionRef &Section = i->first;
+ for (const auto &Section : Obj.sections()) {
StringRef Name;
Section.getName(Name);
- if (Name == "__eh_frame")
- EHFrameSID = i->second;
- else if (Name == "__text")
- TextSID = i->second;
+
+ // Force emission of the __text, __eh_frame, and __gcc_except_tab sections
+ // if they're present. Otherwise call down to the impl to handle other
+ // sections that have already been emitted.
+ if (Name == "__text")
+ TextSID = findOrEmitSection(Obj, Section, true, SectionMap);
+ else if (Name == "__eh_frame")
+ EHFrameSID = findOrEmitSection(Obj, Section, false, SectionMap);
else if (Name == "__gcc_except_tab")
- ExceptTabSID = i->second;
- else
- impl().finalizeSection(ObjImg, i->second, Section);
+ ExceptTabSID = findOrEmitSection(Obj, Section, true, SectionMap);
+ else {
+ auto I = SectionMap.find(Section);
+ if (I != SectionMap.end())
+ impl().finalizeSection(Obj, I->second, Section);
+ }
}
UnregisteredEHFrameSections.push_back(
EHFrameRelatedSections(EHFrameSID, TextSID, ExceptTabSID));
@@ -238,7 +243,8 @@ unsigned char *RuntimeDyldMachOCRTPBase::processFDE(unsigned char *P,
}
static int64_t computeDelta(SectionEntry *A, SectionEntry *B) {
- int64_t ObjDistance = A->ObjAddress - B->ObjAddress;
+ int64_t ObjDistance =
+ static_cast(A->ObjAddress) - static_cast(B->ObjAddress);
int64_t MemDistance = A->LoadAddress - B->LoadAddress;
return ObjDistance - MemDistance;
}
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
index 9176bf2aeee..39d9a1d0a28 100644
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@@ -1120,27 +1120,18 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
return ConstantInt::get(CI1->getContext(), C1V | C2V);
case Instruction::Xor:
return ConstantInt::get(CI1->getContext(), C1V ^ C2V);
- case Instruction::Shl: {
- uint32_t shiftAmt = C2V.getZExtValue();
- if (shiftAmt < C1V.getBitWidth())
- return ConstantInt::get(CI1->getContext(), C1V.shl(shiftAmt));
- else
- return UndefValue::get(C1->getType()); // too big shift is undef
- }
- case Instruction::LShr: {
- uint32_t shiftAmt = C2V.getZExtValue();
- if (shiftAmt < C1V.getBitWidth())
- return ConstantInt::get(CI1->getContext(), C1V.lshr(shiftAmt));
- else
- return UndefValue::get(C1->getType()); // too big shift is undef
- }
- case Instruction::AShr: {
- uint32_t shiftAmt = C2V.getZExtValue();
- if (shiftAmt < C1V.getBitWidth())
- return ConstantInt::get(CI1->getContext(), C1V.ashr(shiftAmt));
- else
- return UndefValue::get(C1->getType()); // too big shift is undef
- }
+ case Instruction::Shl:
+ if (C2V.ult(C1V.getBitWidth()))
+ return ConstantInt::get(CI1->getContext(), C1V.shl(C2V));
+ return UndefValue::get(C1->getType()); // too big shift is undef
+ case Instruction::LShr:
+ if (C2V.ult(C1V.getBitWidth()))
+ return ConstantInt::get(CI1->getContext(), C1V.lshr(C2V));
+ return UndefValue::get(C1->getType()); // too big shift is undef
+ case Instruction::AShr:
+ if (C2V.ult(C1V.getBitWidth()))
+ return ConstantInt::get(CI1->getContext(), C1V.ashr(C2V));
+ return UndefValue::get(C1->getType()); // too big shift is undef
}
}
diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp
index 245c500cf62..88e0cd094ec 100644
--- a/lib/IR/GCOV.cpp
+++ b/lib/IR/GCOV.cpp
@@ -263,10 +263,12 @@ bool GCOVFunction::readGCDA(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
// required to combine the edge counts that are contained in the GCDA file.
for (uint32_t BlockNo = 0; Count > 0; ++BlockNo) {
// The last block is always reserved for exit block
- if (BlockNo >= Blocks.size()-1) {
+ if (BlockNo >= Blocks.size()) {
errs() << "Unexpected number of edges (in " << Name << ").\n";
return false;
}
+ if (BlockNo == Blocks.size() - 1)
+ errs() << "(" << Name << ") has arcs from exit block.\n";
GCOVBlock &Block = *Blocks[BlockNo];
for (size_t EdgeNo = 0, End = Block.getNumDstEdges(); EdgeNo < End;
++EdgeNo) {
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 8eff90a9ef7..e2a4fc1bef7 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -3636,21 +3636,27 @@ bool AsmParser::parseDirectiveSpace(StringRef IDVal) {
}
/// parseDirectiveLEB128
-/// ::= (.sleb128 | .uleb128) expression
+/// ::= (.sleb128 | .uleb128) [ expression (, expression)* ]
bool AsmParser::parseDirectiveLEB128(bool Signed) {
checkForValidSection();
const MCExpr *Value;
- if (parseExpression(Value))
- return true;
+ for (;;) {
+ if (parseExpression(Value))
+ return true;
- if (getLexer().isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in directive");
+ if (Signed)
+ getStreamer().EmitSLEB128Value(Value);
+ else
+ getStreamer().EmitULEB128Value(Value);
- if (Signed)
- getStreamer().EmitSLEB128Value(Value);
- else
- getStreamer().EmitULEB128Value(Value);
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+ }
return false;
}
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
index 7ccde463459..c421ee84c2b 100644
--- a/lib/Support/Unix/Memory.inc
+++ b/lib/Support/Unix/Memory.inc
@@ -333,23 +333,12 @@ void Memory::InvalidateInstructionCache(const void *Addr,
for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
asm volatile("icbi 0, %0" : : "r"(Line));
asm volatile("isync");
-# elif (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
+# elif (defined(__arm__) || defined(__aarch64__) || defined(__mips__)) && \
+ defined(__GNUC__)
// FIXME: Can we safely always call this for __GNUC__ everywhere?
const char *Start = static_cast(Addr);
const char *End = Start + Len;
__clear_cache(const_cast(Start), const_cast(End));
-# elif defined(__mips__)
- const char *Start = static_cast(Addr);
-# if defined(ANDROID)
- // The declaration of "cacheflush" in Android bionic:
- // extern int cacheflush(long start, long end, long flags);
- const char *End = Start + Len;
- long LStart = reinterpret_cast(const_cast(Start));
- long LEnd = reinterpret_cast(const_cast(End));
- cacheflush(LStart, LEnd, BCACHE);
-# else
- cacheflush(const_cast(Start), Len, BCACHE);
-# endif
# endif
#endif // end apple
diff --git a/lib/Support/Windows/explicit_symbols.inc b/lib/Support/Windows/explicit_symbols.inc
index cd56b13c14c..bbbf7ea6a77 100644
--- a/lib/Support/Windows/explicit_symbols.inc
+++ b/lib/Support/Windows/explicit_symbols.inc
@@ -10,9 +10,15 @@
#ifdef HAVE___CHKSTK
EXPLICIT_SYMBOL(__chkstk)
#endif
+#ifdef HAVE___CHKSTK_MS
+ EXPLICIT_SYMBOL(__chkstk_ms)
+#endif
#ifdef HAVE____CHKSTK
EXPLICIT_SYMBOL(___chkstk)
#endif
+#ifdef HAVE____CHKSTK_MS
+ EXPLICIT_SYMBOL(___chkstk_ms)
+#endif
#ifdef HAVE___MAIN
EXPLICIT_SYMBOL(__main) // FIXME: Don't call it.
#endif
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 08ee687d84a..5159dbf0529 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -12,6 +12,8 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
#include "AArch64.h"
#include "AArch64MCInstLower.h"
#include "AArch64MachineFunctionInfo.h"
@@ -494,24 +496,57 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(OutStreamer, TmpInst);
return;
}
- case AArch64::TLSDESC_BLR: {
- MCOperand Callee, Sym;
- MCInstLowering.lowerOperand(MI->getOperand(0), Callee);
- MCInstLowering.lowerOperand(MI->getOperand(1), Sym);
+ case AArch64::TLSDESC_CALLSEQ: {
+ /// lower this to:
+ /// adrp x0, :tlsdesc:var
+ /// ldr x1, [x0, #:tlsdesc_lo12:var]
+ /// add x0, x0, #:tlsdesc_lo12:var
+ /// .tlsdesccall var
+ /// blr x1
+ /// (TPIDR_EL0 offset now in x0)
+ const MachineOperand &MO_Sym = MI->getOperand(0);
+ MachineOperand MO_TLSDESC_LO12(MO_Sym), MO_TLSDESC(MO_Sym);
+ MCOperand Sym, SymTLSDescLo12, SymTLSDesc;
+ MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
+ MO_TLSDESC.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGE);
+ MCInstLowering.lowerOperand(MO_Sym, Sym);
+ MCInstLowering.lowerOperand(MO_TLSDESC_LO12, SymTLSDescLo12);
+ MCInstLowering.lowerOperand(MO_TLSDESC, SymTLSDesc);
- // First emit a relocation-annotation. This expands to no code, but requests
+ MCInst Adrp;
+ Adrp.setOpcode(AArch64::ADRP);
+ Adrp.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Adrp.addOperand(SymTLSDesc);
+ EmitToStreamer(OutStreamer, Adrp);
+
+ MCInst Ldr;
+ Ldr.setOpcode(AArch64::LDRXui);
+ Ldr.addOperand(MCOperand::CreateReg(AArch64::X1));
+ Ldr.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Ldr.addOperand(SymTLSDescLo12);
+ Ldr.addOperand(MCOperand::CreateImm(0));
+ EmitToStreamer(OutStreamer, Ldr);
+
+ MCInst Add;
+ Add.setOpcode(AArch64::ADDXri);
+ Add.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Add.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Add.addOperand(SymTLSDescLo12);
+ Add.addOperand(MCOperand::CreateImm(AArch64_AM::getShiftValue(0)));
+ EmitToStreamer(OutStreamer, Add);
+
+ // Emit a relocation-annotation. This expands to no code, but requests
// the following instruction gets an R_AARCH64_TLSDESC_CALL.
MCInst TLSDescCall;
TLSDescCall.setOpcode(AArch64::TLSDESCCALL);
TLSDescCall.addOperand(Sym);
EmitToStreamer(OutStreamer, TLSDescCall);
- // Other than that it's just a normal indirect call to the function loaded
- // from the descriptor.
- MCInst BLR;
- BLR.setOpcode(AArch64::BLR);
- BLR.addOperand(Callee);
- EmitToStreamer(OutStreamer, BLR);
+ MCInst Blr;
+ Blr.setOpcode(AArch64::BLR);
+ Blr.addOperand(MCOperand::CreateReg(AArch64::X1));
+ EmitToStreamer(OutStreamer, Blr);
return;
}
diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index aab8e384b8d..ba4fc3b25e0 100644
--- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -62,10 +62,10 @@ struct LDTLSCleanup : public MachineFunctionPass {
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
++I) {
switch (I->getOpcode()) {
- case AArch64::TLSDESC_BLR:
+ case AArch64::TLSDESC_CALLSEQ:
// Make sure it's a local dynamic access.
- if (!I->getOperand(1).isSymbol() ||
- strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
+ if (!I->getOperand(0).isSymbol() ||
+ strcmp(I->getOperand(0).getSymbolName(), "_TLS_MODULE_BASE_"))
break;
if (TLSBaseAddrReg)
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 399b5eeaf5f..6458d56c751 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -64,8 +64,16 @@ EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden,
static cl::opt
EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
- cl::desc("Allow AArch64 SLI/SRI formation"),
- cl::init(false));
+ cl::desc("Allow AArch64 SLI/SRI formation"),
+ cl::init(false));
+
+// FIXME: The necessary dtprel relocations don't seem to be supported
+// well in the GNU bfd and gold linkers at the moment. Therefore, by
+// default, for now, fall back to GeneralDynamic code generation.
+cl::opt EnableAArch64ELFLocalDynamicTLSGeneration(
+ "aarch64-elf-ldtls-generation", cl::Hidden,
+ cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
+ cl::init(false));
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
@@ -760,7 +768,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
- case AArch64ISD::TLSDESC_CALL: return "AArch64ISD::TLSDESC_CALL";
+ case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
case AArch64ISD::ADC: return "AArch64ISD::ADC";
case AArch64ISD::SBC: return "AArch64ISD::SBC";
case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
@@ -2023,18 +2031,19 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
unsigned CurArgIdx = 0;
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ValVT = Ins[i].VT;
- std::advance(CurOrigArg, Ins[i].OrigArgIndex - CurArgIdx);
- CurArgIdx = Ins[i].OrigArgIndex;
-
- // Get type of the original argument.
- EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
- MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
- // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
- if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
- ValVT = MVT::i8;
- else if (ActualMVT == MVT::i16)
- ValVT = MVT::i16;
+ if (Ins[i].isOrigArg()) {
+ std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
+ CurArgIdx = Ins[i].getOrigArgIndex();
+ // Get type of the original argument.
+ EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
+ MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
+ // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
+ if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
+ ValVT = MVT::i8;
+ else if (ActualMVT == MVT::i16)
+ ValVT = MVT::i16;
+ }
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
bool Res =
AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
@@ -3049,61 +3058,34 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
/// When accessing thread-local variables under either the general-dynamic or
/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
-/// is a function pointer to carry out the resolution. This function takes the
-/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All
-/// other registers (except LR, NZCV) are preserved.
+/// is a function pointer to carry out the resolution.
///
-/// Thus, the ideal call sequence on AArch64 is:
+/// The sequence is:
+/// adrp x0, :tlsdesc:var
+/// ldr x1, [x0, #:tlsdesc_lo12:var]
+/// add x0, x0, #:tlsdesc_lo12:var
+/// .tlsdesccall var
+/// blr x1
+/// (TPIDR_EL0 offset now in x0)
///
-/// adrp x0, :tlsdesc:thread_var
-/// ldr x8, [x0, :tlsdesc_lo12:thread_var]
-/// add x0, x0, :tlsdesc_lo12:thread_var
-/// .tlsdesccall thread_var
-/// blr x8
-/// (TPIDR_EL0 offset now in x0).
-///
-/// The ".tlsdesccall" directive instructs the assembler to insert a particular
-/// relocation to help the linker relax this sequence if it turns out to be too
-/// conservative.
-///
-/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this
-/// is harmless.
-SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
- SDValue DescAddr, SDLoc DL,
- SelectionDAG &DAG) const {
+/// The above sequence must be produced unscheduled, to enable the linker to
+/// optimize/relax this sequence.
+/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
+/// above sequence, and expanded really late in the compilation flow, to ensure
+/// the sequence is produced as per above.
+SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
+ SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
- // The function we need to call is simply the first entry in the GOT for this
- // descriptor, load it in preparation.
- SDValue Func = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, SymAddr);
-
- // TLS calls preserve all registers except those that absolutely must be
- // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
- // silly).
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
- const AArch64RegisterInfo *ARI =
- static_cast(TRI);
- const uint32_t *Mask = ARI->getTLSCallPreservedMask();
-
- // The function takes only one argument: the address of the descriptor itself
- // in X0.
- SDValue Glue, Chain;
- Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
- Glue = Chain.getValue(1);
-
- // We're now ready to populate the argument list, as with a normal call:
- SmallVector Ops;
- Ops.push_back(Chain);
- Ops.push_back(Func);
- Ops.push_back(SymAddr);
- Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
- Ops.push_back(DAG.getRegisterMask(Mask));
- Ops.push_back(Glue);
-
+ SDValue Chain = DAG.getEntryNode();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(AArch64ISD::TLSDESC_CALL, DL, NodeTys, Ops);
- Glue = Chain.getValue(1);
+
+ SmallVector Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(SymAddr);
+
+ Chain = DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, Ops);
+ SDValue Glue = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
}
@@ -3114,9 +3096,18 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
assert(Subtarget->isTargetELF() && "This function expects an ELF target");
assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
"ELF TLS only supported in small memory model");
+ // Different choices can be made for the maximum size of the TLS area for a
+ // module. For the small address model, the default TLS size is 16MiB and the
+ // maximum TLS size is 4GiB.
+ // FIXME: add -mtls-size command line option and make it control the 16MiB
+ // vs. 4GiB code sequence generation.
const GlobalAddressSDNode *GA = cast(Op);
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
+ if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
+ if (Model == TLSModel::LocalDynamic)
+ Model = TLSModel::GeneralDynamic;
+ }
SDValue TPOff;
EVT PtrVT = getPointerTy();
@@ -3127,17 +3118,20 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
if (Model == TLSModel::LocalExec) {
SDValue HiVar = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+ GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
- AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
+ AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
- DAG.getTargetConstant(16, MVT::i32)),
- 0);
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
- DAG.getTargetConstant(0, MVT::i32)),
- 0);
+ SDValue TPWithOff_lo =
+ SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
+ HiVar, DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ SDValue TPWithOff =
+ SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
+ LoVar, DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ return TPWithOff;
} else if (Model == TLSModel::InitialExec) {
TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
@@ -3152,19 +3146,6 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
DAG.getMachineFunction().getInfo();
MFI->incNumLocalDynamicTLSAccesses();
- // Accesses used in this sequence go via the TLS descriptor which lives in
- // the GOT. Prepare an address we can use to handle this.
- SDValue HiDesc = DAG.getTargetExternalSymbol(
- "_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLS | AArch64II::MO_PAGE);
- SDValue LoDesc = DAG.getTargetExternalSymbol(
- "_TLS_MODULE_BASE_", PtrVT,
- AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
-
- // First argument to the descriptor call is the address of the descriptor
- // itself.
- SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
- DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
@@ -3173,40 +3154,23 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
// Now we can calculate the offset from TPIDR_EL0 to this module's
// thread-local area.
- TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
+ TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
// Now use :dtprel_whatever: operations to calculate this variable's offset
// in its thread-storage area.
SDValue HiVar = DAG.getTargetGlobalAddress(
- GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+ GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, MVT::i64, 0,
- AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
-
- SDValue DTPOff =
- SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
- DAG.getTargetConstant(16, MVT::i32)),
- 0);
- DTPOff =
- SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, DTPOff, LoVar,
- DAG.getTargetConstant(0, MVT::i32)),
- 0);
-
- TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff);
- } else if (Model == TLSModel::GeneralDynamic) {
- // Accesses used in this sequence go via the TLS descriptor which lives in
- // the GOT. Prepare an address we can use to handle this.
- SDValue HiDesc = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGE);
- SDValue LoDesc = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- // First argument to the descriptor call is the address of the descriptor
- // itself.
- SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
- DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
+ TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
+ DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
+ DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ } else if (Model == TLSModel::GeneralDynamic) {
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
@@ -3214,7 +3178,7 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
// Finally we can make a call to calculate the offset from tpidr_el0.
- TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
+ TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
} else
llvm_unreachable("Unsupported ELF TLS access model");
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index cc25bede8d6..5a193228f18 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -29,9 +29,9 @@ enum {
WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
CALL, // Function call.
- // Almost the same as a normal call node, except that a TLSDesc relocation is
- // needed so the linker can relax it correctly if possible.
- TLSDESC_CALL,
+ // Produces the full sequence of instructions for getting the thread pointer
+ // offset of a variable into X0, using the TLSDesc model.
+ TLSDESC_CALLSEQ,
ADRP, // Page address of a TargetGlobalAddress operand.
ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
LOADgot, // Load from automatically generated descriptor (e.g. Global
@@ -399,8 +399,8 @@ private:
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL,
- SelectionDAG &DAG) const;
+ SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
+ SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index e0fb90a9f62..a6f09e944af 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -96,6 +96,19 @@ def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
SDTCisPtrTy<1>]>;
+
+// Generates the general dynamic sequences, i.e.
+// adrp x0, :tlsdesc:var
+// ldr x1, [x0, #:tlsdesc_lo12:var]
+// add x0, x0, #:tlsdesc_lo12:var
+// .tlsdesccall var
+// blr x1
+
+// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
+// number of operands (the variable)
+def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
+ [SDTCisPtrTy<0>]>;
+
def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
[SDTCisVT<0, i64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
@@ -229,10 +242,11 @@ def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
-def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL",
- SDT_AArch64TLSDescCall,
- [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
- SDNPVariadic]>;
+def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
+ SDT_AArch64TLSDescCallSeq,
+ [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
+ SDNPVariadic]>;
+
def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
SDT_AArch64WrapperLarge>;
@@ -1049,15 +1063,16 @@ def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> {
let AsmString = ".tlsdesccall $sym";
}
-// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It
-// gets expanded to two MCInsts during lowering.
-let isCall = 1, Defs = [LR] in
-def TLSDESC_BLR
- : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym),
- [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>;
+// FIXME: maybe the scratch register used shouldn't be fixed to X1?
+// FIXME: can "hasSideEffects be dropped?
+let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1,
+ isCodeGenOnly = 1 in
+def TLSDESC_CALLSEQ
+ : Pseudo<(outs), (ins i64imm:$sym),
+ [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>;
+def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
+ (TLSDESC_CALLSEQ texternalsym:$sym)>;
-def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym),
- (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>;
//===----------------------------------------------------------------------===//
// Conditional branch (immediate) instruction.
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index e57b0f4dbb0..b82934134d9 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -22,9 +22,12 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+extern cl::opt EnableAArch64ELFLocalDynamicTLSGeneration;
+
AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, AsmPrinter &printer)
: Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {}
@@ -84,10 +87,16 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
if (MO.isGlobal()) {
const GlobalValue *GV = MO.getGlobal();
Model = Printer.TM.getTLSModel(GV);
+ if (!EnableAArch64ELFLocalDynamicTLSGeneration &&
+ Model == TLSModel::LocalDynamic)
+ Model = TLSModel::GeneralDynamic;
+
} else {
assert(MO.isSymbol() &&
StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" &&
"unexpected external TLS symbol");
+ // The general dynamic access sequence is used to get the
+ // address of _TLS_MODULE_BASE_.
Model = TLSModel::GeneralDynamic;
}
switch (Model) {
@@ -123,6 +132,8 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
RefFlags |= AArch64MCExpr::VK_G1;
else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0)
RefFlags |= AArch64MCExpr::VK_G0;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_HI12)
+ RefFlags |= AArch64MCExpr::VK_HI12;
if (MO.getTargetFlags() & AArch64II::MO_NC)
RefFlags |= AArch64MCExpr::VK_NC;
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index c60b09a5f11..6d0337ce15e 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -1229,7 +1229,7 @@ namespace AArch64II {
MO_NO_FLAG,
- MO_FRAGMENT = 0x7,
+ MO_FRAGMENT = 0xf,
/// MO_PAGE - A symbol operand with this flag represents the pc-relative
/// offset of the 4K page containing the symbol. This is used with the
@@ -1257,26 +1257,31 @@ namespace AArch64II {
/// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction
MO_G0 = 6,
+ /// MO_HI12 - This flag indicates that a symbol operand represents the bits
+ /// 13-24 of a 64-bit address, used in a arithmetic immediate-shifted-left-
+ /// by-12-bits instruction.
+ MO_HI12 = 7,
+
/// MO_GOT - This flag indicates that a symbol operand represents the
/// address of the GOT entry for the symbol, rather than the address of
/// the symbol itself.
- MO_GOT = 8,
+ MO_GOT = 0x10,
/// MO_NC - Indicates whether the linker is expected to check the symbol
/// reference for overflow. For example in an ADRP/ADD pair of relocations
/// the ADRP usually does check, but not the ADD.
- MO_NC = 0x10,
+ MO_NC = 0x20,
/// MO_TLS - Indicates that the operand being accessed is some kind of
/// thread-local symbol. On Darwin, only one type of thread-local access
/// exists (pre linker-relaxation), but on ELF the TLSModel used for the
/// referee will affect interpretation.
- MO_TLS = 0x20,
+ MO_TLS = 0x40,
/// MO_CONSTPOOL - This flag indicates that a symbol operand represents
/// the address of a constant pool entry for the symbol, rather than the
/// address of the symbol itself.
- MO_CONSTPOOL = 0x40
+ MO_CONSTPOOL = 0x80
};
} // end namespace AArch64II
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index a1de5efb450..2dc4707bb0a 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -3092,8 +3092,11 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
- CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;
+ if (Ins[VA.getValNo()].isOrigArg()) {
+ std::advance(CurOrigArg,
+ Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
+ CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
+ }
// Arguments stored in registers.
if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT();
@@ -3173,7 +3176,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
assert(VA.isMemLoc());
assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
- int index = ArgLocs[i].getValNo();
+ int index = VA.getValNo();
// Some Ins[] entries become multiple ArgLoc[] entries.
// Process them only once.
@@ -3186,6 +3189,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Since they could be overwritten by lowering of arguments in case of
// a tail call.
if (Flags.isByVal()) {
+ assert(Ins[index].isOrigArg() &&
+ "Byval arguments cannot be implicit");
unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 7db5b34204c..699dfae02fc 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -3667,43 +3667,44 @@ bool MipsAsmParser::parseDirectiveModule() {
return false;
}
- if (Lexer.is(AsmToken::Identifier)) {
- StringRef Option = Parser.getTok().getString();
- Parser.Lex();
-
- if (Option == "oddspreg") {
- getTargetStreamer().emitDirectiveModuleOddSPReg(true, isABI_O32());
- clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
-
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token, expected end of statement");
- return false;
- }
-
- return false;
- } else if (Option == "nooddspreg") {
- if (!isABI_O32()) {
- Error(L, "'.module nooddspreg' requires the O32 ABI");
- return false;
- }
-
- getTargetStreamer().emitDirectiveModuleOddSPReg(false, isABI_O32());
- setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
-
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token, expected end of statement");
- return false;
- }
-
- return false;
- } else if (Option == "fp") {
- return parseDirectiveModuleFP();
- }
-
- return Error(L, "'" + Twine(Option) + "' is not a valid .module option.");
+ StringRef Option;
+ if (Parser.parseIdentifier(Option)) {
+ reportParseError("expected .module option identifier");
+ return false;
}
- return false;
+ if (Option == "oddspreg") {
+ getTargetStreamer().emitDirectiveModuleOddSPReg(true, isABI_O32());
+ clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ return false; // parseDirectiveModule has finished successfully.
+ } else if (Option == "nooddspreg") {
+ if (!isABI_O32()) {
+ Error(L, "'.module nooddspreg' requires the O32 ABI");
+ return false;
+ }
+
+ getTargetStreamer().emitDirectiveModuleOddSPReg(false, isABI_O32());
+ setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ return false; // parseDirectiveModule has finished successfully.
+ } else if (Option == "fp") {
+ return parseDirectiveModuleFP();
+ } else {
+ return Error(L, "'" + Twine(Option) + "' is not a valid .module option.");
+ }
}
/// parseDirectiveModuleFP
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index da33f3b913c..d42b948cc61 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -259,6 +259,11 @@ static DecodeStatus DecodeCacheOp(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeCacheOpR6(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -304,6 +309,10 @@ static DecodeStatus DecodeFMem3(MCInst &Inst, unsigned Insn,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeFMemCop2R6(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -1118,6 +1127,23 @@ static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeCacheOpR6(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = fieldFromInstruction(Insn, 7, 9);
+ unsigned Hint = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 21, 5);
+
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+ Inst.addOperand(MCOperand::CreateImm(Hint));
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeSyncI(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -1354,6 +1380,23 @@ static DecodeStatus DecodeFMem3(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeFMemCop2R6(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<11>(Insn & 0x07ff);
+ unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 11, 5);
+
+ Reg = getReg(Decoder, Mips::COP2RegClassID, Reg);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+
+ return MCDisassembler::Success;
+}
static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
unsigned Insn,
uint64_t Address,
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 3e1d047091a..5ad56834607 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -58,15 +58,15 @@ def MipsInstrInfo : InstrInfo;
//===----------------------------------------------------------------------===//
def FeatureNoABICalls : SubtargetFeature<"noabicalls", "NoABICalls", "true",
- "Disable SVR4-style position-independent code.">;
+ "Disable SVR4-style position-independent code">;
def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true",
- "General Purpose Registers are 64-bit wide.">;
+ "General Purpose Registers are 64-bit wide">;
def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true",
- "Support 64-bit FP registers.">;
+ "Support 64-bit FP registers">;
def FeatureFPXX : SubtargetFeature<"fpxx", "IsFPXX", "true",
- "Support for FPXX.">;
+ "Support for FPXX">;
def FeatureNaN2008 : SubtargetFeature<"nan2008", "IsNaN2008bit", "true",
- "IEEE 754-2008 NaN encoding.">;
+ "IEEE 754-2008 NaN encoding">;
def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
"true", "Only supports single precision float">;
def FeatureO32 : SubtargetFeature<"o32", "ABI", "MipsABIInfo::O32()",
@@ -81,7 +81,7 @@ def FeatureNoOddSPReg : SubtargetFeature<"nooddspreg", "UseOddSPReg", "false",
"Disable odd numbered single-precision "
"registers">;
def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
- "true", "Enable vector FPU instructions.">;
+ "true", "Enable vector FPU instructions">;
def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
"Mips I ISA Support [highly experimental]">;
def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 976beccfed9..7e1fbfdcac4 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -293,6 +293,9 @@ void Mips16InstrInfo::adjustStackPtrBigUnrestricted(
void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
+ if (Amount == 0)
+ return;
+
if (isInt<16>(Amount)) // need to change to addiu sp, ....and isInt<16>
BuildAddiuSpImm(MBB, I, Amount);
else
diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td
index 185d12ec93f..49c63226dc0 100644
--- a/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -379,7 +379,6 @@ class JMP_IDX_COMPACT_DESC_BASE Pattern = [];
bit isTerminator = 1;
bit hasDelaySlot = 0;
- string DecoderMethod = "DecodeSimm16";
}
class JIALC_DESC : JMP_IDX_COMPACT_DESC_BASE<"jialc", calloffset16,
@@ -550,6 +549,7 @@ class CACHE_HINT_DESC Pattern = [];
+ string DecoderMethod = "DecodeCacheOpR6";
}
class CACHE_DESC : CACHE_HINT_DESC<"cache", mem_simm9, GPR32Opnd>;
@@ -561,6 +561,7 @@ class COP2LD_DESC_BASE {
string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
list Pattern = [];
bit mayLoad = 1;
+ string DecoderMethod = "DecodeFMemCop2R6";
}
class LDC2_R6_DESC : COP2LD_DESC_BASE<"ldc2", COP2Opnd>;
@@ -572,6 +573,7 @@ class COP2ST_DESC_BASE {
string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
list Pattern = [];
bit mayStore = 1;
+ string DecoderMethod = "DecodeFMemCop2R6";
}
class SDC2_R6_DESC : COP2ST_DESC_BASE<"sdc2", COP2Opnd>;
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index bfe2ba01195..d6628d40848 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -440,6 +440,16 @@ def : MipsPat<(i64 (sext_inreg GPR64:$src, i32)),
// bswap MipsPattern
def : MipsPat<(bswap GPR64:$rt), (DSHD (DSBH GPR64:$rt))>;
+// Carry pattern
+def : MipsPat<(subc GPR64:$lhs, GPR64:$rhs),
+ (DSUBu GPR64:$lhs, GPR64:$rhs)>;
+let AdditionalPredicates = [NotDSP] in {
+ def : MipsPat<(addc GPR64:$lhs, GPR64:$rhs),
+ (DADDu GPR64:$lhs, GPR64:$rhs)>;
+ def : MipsPat<(addc GPR64:$lhs, immSExt16:$imm),
+ (DADDiu GPR64:$lhs, imm:$imm)>;
+}
+
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsCCState.cpp b/lib/Target/Mips/MipsCCState.cpp
index e18cc8b1f7b..b8081295ca6 100644
--- a/lib/Target/Mips/MipsCCState.cpp
+++ b/lib/Target/Mips/MipsCCState.cpp
@@ -132,8 +132,8 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128(
continue;
}
- assert(Ins[i].OrigArgIndex < MF.getFunction()->arg_size());
- std::advance(FuncArg, Ins[i].OrigArgIndex);
+ assert(Ins[i].getOrigArgIndex() < MF.getFunction()->arg_size());
+ std::advance(FuncArg, Ins[i].getOrigArgIndex());
OriginalArgWasF128.push_back(
originalTypeIsF128(FuncArg->getType(), nullptr));
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 90bf9f3f583..dcd88f25d25 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -123,7 +123,7 @@ def CC_MipsN_SoftFloat : CallingConv<[
]>;
def CC_MipsN : CallingConv<[
- CCIfType<[i8, i16, i32],
+ CCIfType<[i8, i16, i32, i64],
CCIfSubtargetNot<"isLittle()",
CCIfInReg>>>,
@@ -159,6 +159,10 @@ def CC_MipsN : CallingConv<[
// N32/64 variable arguments.
// All arguments are passed in integer registers.
def CC_MipsN_VarArg : CallingConv<[
+ CCIfType<[i8, i16, i32, i64],
+ CCIfSubtargetNot<"isLittle()",
+ CCIfInReg>>>,
+
// All integers are promoted to 64-bit.
CCIfType<[i8, i16, i32], CCPromoteToType>,
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index d25f5637f57..37fc7849310 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -261,6 +261,9 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::i64, Custom);
setOperationAction(ISD::STORE, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
}
if (!Subtarget.isGP64bit()) {
@@ -616,6 +619,33 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue performCMovFPCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget &Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue ValueIfTrue = N->getOperand(0), ValueIfFalse = N->getOperand(2);
+
+ ConstantSDNode *FalseC = dyn_cast(ValueIfFalse);
+ if (!FalseC || FalseC->getZExtValue())
+ return SDValue();
+
+ // Since RHS (False) is 0, we swap the order of the True/False operands
+ // (obviously also inverting the condition) so that we can
+ // take advantage of conditional moves using the $0 register.
+ // Example:
+ // return (a != 0) ? x : 0;
+ // load $reg, x
+ // movz $reg, $0, a
+ unsigned Opc = (N->getOpcode() == MipsISD::CMovFP_T) ? MipsISD::CMovFP_F :
+ MipsISD::CMovFP_T;
+
+ SDValue FCC = N->getOperand(1), Glue = N->getOperand(3);
+ return DAG.getNode(Opc, SDLoc(N), ValueIfFalse.getValueType(),
+ ValueIfFalse, FCC, ValueIfTrue, Glue);
+}
+
static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget &Subtarget) {
@@ -749,6 +779,9 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
return performDivRemCombine(N, DAG, DCI, Subtarget);
case ISD::SELECT:
return performSELECTCombine(N, DAG, DCI, Subtarget);
+ case MipsISD::CMovFP_F:
+ case MipsISD::CMovFP_T:
+ return performCMovFPCombine(N, DAG, DCI, Subtarget);
case ISD::AND:
return performANDCombine(N, DAG, DCI, Subtarget);
case ISD::OR:
@@ -2017,10 +2050,11 @@ SDValue MipsTargetLowering::lowerATOMIC_FENCE(SDValue Op,
SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
+ MVT VT = Subtarget.isGP64bit() ? MVT::i64 : MVT::i32;
+
SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
SDValue Shamt = Op.getOperand(2);
-
- // if shamt < 32:
+ // if shamt < (VT.bits):
// lo = (shl lo, shamt)
// hi = (or (shl hi, shamt) (srl (srl lo, 1), ~shamt))
// else:
@@ -2028,18 +2062,17 @@ SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op,
// hi = (shl lo, shamt[4:0])
SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt,
DAG.getConstant(-1, MVT::i32));
- SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, MVT::i32, Lo,
- DAG.getConstant(1, MVT::i32));
- SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, MVT::i32, ShiftRight1Lo,
- Not);
- SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, MVT::i32, Hi, Shamt);
- SDValue Or = DAG.getNode(ISD::OR, DL, MVT::i32, ShiftLeftHi, ShiftRightLo);
- SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, MVT::i32, Lo, Shamt);
+ SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo,
+ DAG.getConstant(1, VT));
+ SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, Not);
+ SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
+ SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
+ SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt,
- DAG.getConstant(0x20, MVT::i32));
- Lo = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond,
- DAG.getConstant(0, MVT::i32), ShiftLeftLo);
- Hi = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, ShiftLeftLo, Or);
+ DAG.getConstant(VT.getSizeInBits(), MVT::i32));
+ Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond,
+ DAG.getConstant(0, VT), ShiftLeftLo);
+ Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftLeftLo, Or);
SDValue Ops[2] = {Lo, Hi};
return DAG.getMergeValues(Ops, DL);
@@ -2050,8 +2083,9 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
SDLoc DL(Op);
SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
SDValue Shamt = Op.getOperand(2);
+ MVT VT = Subtarget.isGP64bit() ? MVT::i64 : MVT::i32;
- // if shamt < 32:
+ // if shamt < (VT.bits):
// lo = (or (shl (shl hi, 1), ~shamt) (srl lo, shamt))
// if isSRA:
// hi = (sra hi, shamt)
@@ -2066,21 +2100,20 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
// hi = 0
SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt,
DAG.getConstant(-1, MVT::i32));
- SDValue ShiftLeft1Hi = DAG.getNode(ISD::SHL, DL, MVT::i32, Hi,
- DAG.getConstant(1, MVT::i32));
- SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, MVT::i32, ShiftLeft1Hi, Not);
- SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, MVT::i32, Lo, Shamt);
- SDValue Or = DAG.getNode(ISD::OR, DL, MVT::i32, ShiftLeftHi, ShiftRightLo);
- SDValue ShiftRightHi = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, DL, MVT::i32,
- Hi, Shamt);
+ SDValue ShiftLeft1Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
+ DAG.getConstant(1, VT));
+ SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, ShiftLeft1Hi, Not);
+ SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
+ SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
+ SDValue ShiftRightHi = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL,
+ DL, VT, Hi, Shamt);
SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt,
- DAG.getConstant(0x20, MVT::i32));
- SDValue Shift31 = DAG.getNode(ISD::SRA, DL, MVT::i32, Hi,
- DAG.getConstant(31, MVT::i32));
- Lo = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, ShiftRightHi, Or);
- Hi = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond,
- IsSRA ? Shift31 : DAG.getConstant(0, MVT::i32),
- ShiftRightHi);
+ DAG.getConstant(VT.getSizeInBits(), MVT::i32));
+ SDValue Ext = DAG.getNode(ISD::SRA, DL, VT, Hi,
+ DAG.getConstant(VT.getSizeInBits() - 1, VT));
+ Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftRightHi, Or);
+ Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond,
+ IsSRA ? Ext : DAG.getConstant(0, VT), ShiftRightHi);
SDValue Ops[2] = {Lo, Hi};
return DAG.getMergeValues(Ops, DL);
@@ -2900,13 +2933,16 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- std::advance(FuncArg, Ins[i].OrigArgIndex - CurArgIdx);
- CurArgIdx = Ins[i].OrigArgIndex;
+ if (Ins[i].isOrigArg()) {
+ std::advance(FuncArg, Ins[i].getOrigArgIndex() - CurArgIdx);
+ CurArgIdx = Ins[i].getOrigArgIndex();
+ }
EVT ValVT = VA.getValVT();
ISD::ArgFlagsTy Flags = Ins[i].Flags;
bool IsRegLoc = VA.isRegLoc();
if (Flags.isByVal()) {
+ assert(Ins[i].isOrigArg() && "Byval arguments cannot be implicit");
unsigned FirstByValReg, LastByValReg;
unsigned ByValIdx = CCInfo.getInRegsParamsProcessed();
CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg);
@@ -3027,6 +3063,15 @@ MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
return CCInfo.CheckReturn(Outs, RetCC_Mips);
}
+bool
+MipsTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
+ if (Subtarget.hasMips3() && Subtarget.abiUsesSoftFloat()) {
+ if (Type == MVT::i32)
+ return true;
+ }
+ return IsSigned;
+}
+
SDValue
MipsTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool IsVarArg,
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 4132de6dbca..4da337a61da 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -472,6 +472,8 @@ namespace llvm {
const SmallVectorImpl &OutVals,
SDLoc dl, SelectionDAG &DAG) const override;
+ bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
+
// Inline asm support
ConstraintType
getConstraintType(const std::string &Constraint) const override;
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 2aa83289a10..ed97cb46192 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -458,42 +458,42 @@ def FSUB_S : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>,
defm FSUB : ADDS_M<"sub.d", II_SUB_D, 0, fsub>, ADDS_FM<0x01, 17>;
def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>,
- MADDS_FM<4, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
+ MADDS_FM<4, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>,
- MADDS_FM<5, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
+ MADDS_FM<5, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
let AdditionalPredicates = [NoNaNsFPMath] in {
def NMADD_S : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S, fadd>,
- MADDS_FM<6, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
+ MADDS_FM<6, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
def NMSUB_S : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, II_NMSUB_S, fsub>,
- MADDS_FM<7, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
+ MADDS_FM<7, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
}
def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>,
- MADDS_FM<4, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
+ MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>,
- MADDS_FM<5, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
+ MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
let AdditionalPredicates = [NoNaNsFPMath] in {
def NMADD_D32 : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>,
- MADDS_FM<6, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
+ MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
def NMSUB_D32 : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>,
- MADDS_FM<7, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
+ MADDS_FM<7, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
}
-let isCodeGenOnly=1 in {
+let DecoderNamespace = "Mips64" in {
def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, II_MADD_D, fadd>,
- MADDS_FM<4, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, II_MSUB_D, fsub>,
- MADDS_FM<5, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
}
let AdditionalPredicates = [NoNaNsFPMath],
- isCodeGenOnly=1 in {
+ DecoderNamespace = "Mips64" in {
def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, II_NMADD_D, fadd>,
- MADDS_FM<6, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64Opnd, II_NMSUB_D, fsub>,
- MADDS_FM<7, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ MADDS_FM<7, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 2b3b6c1e524..1eb8d9ad821 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -388,6 +388,8 @@ def MSA128W: RegisterClass<"Mips", [v4i32, v4f32], 128,
(sequence "W%u", 0, 31)>;
def MSA128D: RegisterClass<"Mips", [v2i64, v2f64], 128,
(sequence "W%u", 0, 31)>;
+def MSA128WEvens: RegisterClass<"Mips", [v4i32, v4f32], 128,
+ (decimate (sequence "W%u", 0, 31), 2)>;
def MSACtrl: RegisterClass<"Mips", [i32], 32, (add
MSAIR, MSACSR, MSAAccess, MSASave, MSAModify, MSARequest, MSAMap, MSAUnmap)>;
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 70963821690..8c2620ca7f0 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -236,13 +236,35 @@ SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
(Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
"(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+ unsigned SLTuOp = Mips::SLTu, ADDuOp = Mips::ADDu;
+ if (Subtarget->isGP64bit()) {
+ SLTuOp = Mips::SLTu64;
+ ADDuOp = Mips::DADDu;
+ }
+
SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
EVT VT = LHS.getValueType();
- SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops);
- SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT,
- SDValue(Carry, 0), RHS);
+ SDNode *Carry = CurDAG->getMachineNode(SLTuOp, DL, VT, Ops);
+
+ if (Subtarget->isGP64bit()) {
+ // On 64-bit targets, sltu produces an i64 but our backend currently says
+ // that SLTu64 produces an i32. We need to fix this in the long run but for
+ // now, just make the DAG type-correct by asserting the upper bits are zero.
+ Carry = CurDAG->getMachineNode(Mips::SUBREG_TO_REG, DL, VT,
+ CurDAG->getTargetConstant(0, VT),
+ SDValue(Carry, 0),
+ CurDAG->getTargetConstant(Mips::sub_32, VT));
+ }
+
+ // Generate a second addition only if we know that RHS is not a
+ // constant-zero node.
+ SDNode *AddCarry = Carry;
+ ConstantSDNode *C = dyn_cast(RHS);
+ if (!C || C->getZExtValue())
+ AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS);
+
return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
SDValue(AddCarry, 0));
}
@@ -641,7 +663,8 @@ std::pair MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
case ISD::SUBE: {
SDValue InFlag = Node->getOperand(2);
- Result = selectAddESubE(Mips::SUBu, InFlag, InFlag.getOperand(0), DL, Node);
+ unsigned Opc = Subtarget->isGP64bit() ? Mips::DSUBu : Mips::SUBu;
+ Result = selectAddESubE(Opc, InFlag, InFlag.getOperand(0), DL, Node);
return std::make_pair(true, Result);
}
@@ -649,7 +672,8 @@ std::pair MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
if (Subtarget->hasDSP()) // Select DSP instructions, ADDSC and ADDWC.
break;
SDValue InFlag = Node->getOperand(2);
- Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node);
+ unsigned Opc = Subtarget->isGP64bit() ? Mips::DADDu : Mips::ADDu;
+ Result = selectAddESubE(Opc, InFlag, InFlag.getValue(0), DL, Node);
return std::make_pair(true, Result);
}
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index 29aac2e276b..2c033ce61c1 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -122,6 +122,8 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::MUL, MVT::i64, Custom);
if (Subtarget.isGP64bit()) {
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
setOperationAction(ISD::MULHS, MVT::i64, Custom);
setOperationAction(ISD::MULHU, MVT::i64, Custom);
setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
@@ -200,6 +202,8 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
if (Subtarget.hasMips64r6()) {
// MIPS64r6 replaces the accumulator-based multiplies with a three register
// instruction
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::MUL, MVT::i64, Legal);
setOperationAction(ISD::MULHS, MVT::i64, Legal);
setOperationAction(ISD::MULHU, MVT::i64, Legal);
@@ -2879,10 +2883,21 @@ emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{
unsigned Ws = MI->getOperand(1).getReg();
unsigned Lane = MI->getOperand(2).getImm();
- if (Lane == 0)
- BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_lo);
- else {
- unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+ if (Lane == 0) {
+ unsigned Wt = Ws;
+ if (!Subtarget.useOddSPReg()) {
+ // We must copy to an even-numbered MSA register so that the
+ // single-precision sub-register is also guaranteed to be even-numbered.
+ Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass);
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws);
+ }
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
+ } else {
+ unsigned Wt = RegInfo.createVirtualRegister(
+ Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass :
+ &Mips::MSA128WEvensRegClass);
BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane);
BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
@@ -2944,7 +2959,9 @@ MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI,
unsigned Wd_in = MI->getOperand(1).getReg();
unsigned Lane = MI->getOperand(2).getImm();
unsigned Fs = MI->getOperand(3).getReg();
- unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+ unsigned Wt = RegInfo.createVirtualRegister(
+ Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass :
+ &Mips::MSA128WEvensRegClass);
BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
.addImm(0)
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index 74f291f609f..180b04327fc 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -364,6 +364,9 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
+ if (Amount == 0)
+ return;
+
if (isInt<16>(Amount))// addi sp, sp, amount
BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount);
else { // Expand immediate that doesn't fit in 16-bit.
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 5c52bb19b0d..e580c811d71 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2688,9 +2688,10 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
unsigned ObjSize = ObjectVT.getStoreSize();
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
- std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
- CurArgIdx = Ins[ArgNo].OrigArgIndex;
-
+ if (Ins[ArgNo].isOrigArg()) {
+ std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
+ CurArgIdx = Ins[ArgNo].getOrigArgIndex();
+ }
/* Respect alignment of argument on the stack. */
unsigned Align =
CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
@@ -2704,6 +2705,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// FIXME the codegen can be much improved in some cases.
// We do not have to keep everything in memory.
if (Flags.isByVal()) {
+ assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
+
// ObjSize is the true size, ArgSize rounded up to multiple of registers.
ObjSize = Flags.getByValSize();
ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
@@ -3064,9 +3067,10 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
- std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
- CurArgIdx = Ins[ArgNo].OrigArgIndex;
-
+ if (Ins[ArgNo].isOrigArg()) {
+ std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
+ CurArgIdx = Ins[ArgNo].getOrigArgIndex();
+ }
unsigned CurArgOffset = ArgOffset;
// Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
@@ -3087,6 +3091,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
// FIXME the codegen can be much improved in some cases.
// We do not have to keep everything in memory.
if (Flags.isByVal()) {
+ assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
+
// ObjSize is the true size, ArgSize rounded up to multiple of registers.
ObjSize = Flags.getByValSize();
ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index 1df4448abf0..03f2bbe4820 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -97,6 +97,11 @@ def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
"true",
"Enable spilling of VGPRs to scratch memory">;
+def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
+ "SGPRInitBug",
+ "true",
+ "VI SGPR initilization bug requiring a fixed SGPR allocation size">;
+
class SubtargetFeatureFetchLimit :
SubtargetFeature <"fetch"#Value,
"TexVTXClauseSize",
diff --git a/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp b/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp
index b545b456161..0b426bc63dd 100644
--- a/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp
+++ b/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp
@@ -40,7 +40,8 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
std::vector FuncsToClone;
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
Function &F = *I;
- if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty())
+ if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() &&
+ !F.hasFnAttribute(Attribute::NoInline))
FuncsToClone.push_back(&F);
}
@@ -54,7 +55,7 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
Function &F = *I;
- if (F.hasLocalLinkage()) {
+ if (F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::NoInline)) {
F.addFnAttr(Attribute::AlwaysInline);
}
}
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index 6185e367ff5..1fae26e18a4 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -343,6 +343,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.NumVGPR = MaxVGPR + 1;
ProgInfo.NumSGPR = MaxSGPR + 1;
+ if (STM.hasSGPRInitBug()) {
+ if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG)
+ llvm_unreachable("Too many SGPRs used with the SGPR init bug");
+
+ ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+ }
+
ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index 15112c7e54d..68d557a1cf7 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -439,6 +439,31 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
break;
}
+ case ISD::STORE: {
+ // Handle i64 stores here for the same reason mentioned above for loads.
+ StoreSDNode *ST = cast(N);
+ SDValue Value = ST->getValue();
+ if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
+ break;
+
+ SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
+ MVT::v2i32, Value);
+ SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
+ ST->getBasePtr(), ST->getMemOperand());
+
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
+
+ if (NewValue.getOpcode() == ISD::BITCAST) {
+ Select(NewStore.getNode());
+ return SelectCode(NewValue.getNode());
+ }
+
+ // getNode() may fold the bitcast if its input was another bitcast. If that
+ // happens we should only select the new store.
+ N = NewStore.getNode();
+ break;
+ }
+
case AMDGPUISD::REGISTER_LOAD: {
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
break;
@@ -761,6 +786,8 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
}
+// We need to handle this here because tablegen doesn't support matching
+// instructions with multiple outputs.
SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
SDLoc SL(N);
EVT VT = N->getValueType(0);
@@ -770,19 +797,12 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
unsigned Opc
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
- const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
- const SDValue False = CurDAG->getTargetConstant(0, MVT::i1);
- SDValue Ops[] = {
- Zero, // src0_modifiers
- N->getOperand(0), // src0
- Zero, // src1_modifiers
- N->getOperand(1), // src1
- Zero, // src2_modifiers
- N->getOperand(2), // src2
- False, // clamp
- Zero // omod
- };
+ // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
+ SDValue Ops[8];
+ SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
+ SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
+ SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
}
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 2adcdf1c299..b137053fbbc 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -141,9 +141,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::v2f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
- setOperationAction(ISD::STORE, MVT::i64, Promote);
- AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
-
setOperationAction(ISD::STORE, MVT::v4f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
@@ -162,9 +159,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
// Custom lowering of vector stores is required for local address space
// stores.
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
- // XXX: Native v2i32 local address space stores are possible, but not
- // currently implemented.
- setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
@@ -832,11 +826,9 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
SelectionDAG &DAG) const {
SmallVector Args;
- SDValue A = Op.getOperand(0);
- SDValue B = Op.getOperand(1);
- DAG.ExtractVectorElements(A, Args);
- DAG.ExtractVectorElements(B, Args);
+ for (const SDUse &U : Op->ops())
+ DAG.ExtractVectorElements(U.get(), Args);
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args);
}
@@ -881,9 +873,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return LowerIntrinsicIABS(Op, DAG);
case AMDGPUIntrinsic::AMDGPU_lrp:
return LowerIntrinsicLRP(Op, DAG);
- case AMDGPUIntrinsic::AMDGPU_fract:
- case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
- return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDGPU_clamp:
case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
@@ -913,10 +902,9 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::AMDGPU_div_fmas:
- // FIXME: Dropping bool parameter. Work is needed to support the implicit
- // read from VCC.
return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(4));
case Intrinsic::AMDGPU_div_fixup:
return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
index e28ce0f03ac..202183c18a8 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -140,6 +140,12 @@ public:
/// not exist. If Opcode is not a pseudo instruction, this is identity.
int pseudoToMCOpcode(int Opcode) const;
+ /// \brief Return the descriptor of the target-specific machine instruction
+ /// that corresponds to the specified pseudo or native opcode.
+ const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
+ return get(pseudoToMCOpcode(Opcode));
+ }
+
//===---------------------------------------------------------------------===//
// Pure virtual funtions to be implemented by sub-classes.
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
index 0e34392bd50..d657ad05c8c 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -35,6 +35,11 @@ def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
[SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
>;
+// float, float, float, vcc
+def AMDGPUFmasOp : SDTypeProfile<1, 4,
+ [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>]
+>;
+
//===----------------------------------------------------------------------===//
// AMDGPU DAG Nodes
//
@@ -153,7 +158,7 @@ def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;
// Special case divide FMA with scale and flags (src0 = Quotient,
// src1 = Denominator, src2 = Numerator).
-def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", SDTFPTernaryOp>;
+def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp>;
// Single or double precision division fixup.
// Special case divide fixup and flags(src0 = Quotient, src1 =
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index 4e536c37b0b..34b1fc8b5f4 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -164,10 +164,6 @@ class PrivateStore : PrivateMemOp <
(ops node:$value, node:$ptr), (op node:$value, node:$ptr)
>;
-def extloadi8_private : PrivateLoad ;
-def sextloadi8_private : PrivateLoad ;
-def extloadi16_private : PrivateLoad ;
-def sextloadi16_private : PrivateLoad ;
def load_private : PrivateLoad ;
def truncstorei8_private : PrivateStore ;
@@ -231,6 +227,9 @@ def sextloadi8_local : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
return isLocalLoad(dyn_cast(N));
}]>;
+def extloadi8_private : PrivateLoad ;
+def sextloadi8_private : PrivateLoad ;
+
def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
return cast(N)->getMemoryVT() == MVT::i16;
}]>;
@@ -267,6 +266,9 @@ def sextloadi16_local : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
return isLocalLoad(dyn_cast(N));
}]>;
+def extloadi16_private : PrivateLoad ;
+def sextloadi16_private : PrivateLoad ;
+
def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
return cast(N)->getMemoryVT() == MVT::i32;
}]>;
@@ -649,17 +651,10 @@ class RcpPat : Pat <
(RcpInst $src)
>;
-multiclass RsqPat {
- def : Pat <
- (fdiv FP_ONE, (fsqrt vt:$src)),
- (RsqInst $src)
- >;
-
- def : Pat <
- (AMDGPUrcp (fsqrt vt:$src)),
- (RsqInst $src)
- >;
-}
+class RsqPat : Pat <
+ (AMDGPUrcp (fsqrt vt:$src)),
+ (RsqInst $src)
+>;
include "R600Instructions.td"
include "R700Instructions.td"
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
index eee9c29038d..ab489cd2a4a 100644
--- a/lib/Target/R600/AMDGPUIntrinsics.td
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -68,6 +68,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_barrier_local : Intrinsic<[], [], []>;
def int_AMDGPU_barrier_global : Intrinsic<[], [], []>;
}
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index b1c7498fc14..87cdb5f8db8 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -80,7 +80,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
FlatAddressSpace(false), EnableIRStructurizer(true),
EnablePromoteAlloca(false), EnableIfCvt(true),
EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
- EnableVGPRSpilling(false),
+ EnableVGPRSpilling(false),SGPRInitBug(false),
DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),
FrameLowering(TargetFrameLowering::StackGrowsUp,
64 * 16, // Maximum stack alignment (long16)
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 566b45c1dcc..eeb41d3ec7f 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -45,6 +45,10 @@ public:
VOLCANIC_ISLANDS,
};
+ enum {
+ FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
+ };
+
private:
std::string DevName;
bool Is64bit;
@@ -66,6 +70,7 @@ private:
bool CFALUBug;
int LocalMemorySize;
bool EnableVGPRSpilling;
+ bool SGPRInitBug;
const DataLayout DL;
AMDGPUFrameLowering FrameLowering;
@@ -203,6 +208,10 @@ public:
return LocalMemorySize;
}
+ bool hasSGPRInitBug() const {
+ return SGPRInitBug;
+ }
+
unsigned getAmdKernelCodeChipID() const;
bool enableMachineScheduler() const override {
diff --git a/lib/Target/R600/CaymanInstructions.td b/lib/Target/R600/CaymanInstructions.td
index 58b5ce24b4a..433c3fc0185 100644
--- a/lib/Target/R600/CaymanInstructions.td
+++ b/lib/Target/R600/CaymanInstructions.td
@@ -46,7 +46,7 @@ def SIN_cm : SIN_Common<0x8D>;
def COS_cm : COS_Common<0x8E>;
} // End isVector = 1
-defm : RsqPat;
+def : RsqPat;
def : POW_Common ;
diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td
index f24f76b7fe1..299d1fa1489 100644
--- a/lib/Target/R600/EvergreenInstructions.td
+++ b/lib/Target/R600/EvergreenInstructions.td
@@ -69,7 +69,7 @@ def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
-defm : RsqPat;
+def : RsqPat;
def SIN_eg : SIN_Common<0x8D>;
def COS_eg : COS_Common<0x8E>;
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index 8271c6f45fb..b66ed100025 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -291,6 +291,8 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
printImmediate64(Op.getImm(), O);
else
llvm_unreachable("Invalid register class size");
+ } else if (Desc.OpInfo[OpNo].OperandType == MCOI::OPERAND_IMMEDIATE) {
+ printImmediate32(Op.getImm(), O);
} else {
// We hit this for the immediate instruction bits that don't yet have a
// custom printer.
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
index cff97cdb3be..e5fef0c1d03 100644
--- a/lib/Target/R600/Processors.td
+++ b/lib/Target/R600/Processors.td
@@ -113,8 +113,12 @@ def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>;
// Volcanic Islands
//===----------------------------------------------------------------------===//
-def : ProcessorModel<"tonga", SIFullSpeedModel, [FeatureVolcanicIslands]>;
+def : ProcessorModel<"tonga", SIQuarterSpeedModel,
+ [FeatureVolcanicIslands, FeatureSGPRInitBug]
+>;
-def : ProcessorModel<"iceland", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
+def : ProcessorModel<"iceland", SIQuarterSpeedModel,
+ [FeatureVolcanicIslands, FeatureSGPRInitBug]
+>;
def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index 595f6988454..2e1b0943252 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -838,6 +838,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::AMDGPU_rsq:
// XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
+
+ case AMDGPUIntrinsic::AMDGPU_fract:
+ case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
+ return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
}
// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
break;
@@ -1694,7 +1698,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
// XXX - I think PartOffset should give you this, but it seems to give the
// size of the register which isn't useful.
- unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset();
+ unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
unsigned PartOffset = VA.getLocMemOffset();
unsigned Offset = 36 + VA.getLocMemOffset();
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index b1d3ce276ee..05957d2cf2a 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -1193,7 +1193,7 @@ let Predicates = [isR600] in {
def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common;
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
- defm : RsqPat;
+ def : RsqPat;
def : FROUNDPat ;
diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp
index c99219dd907..b8165fb4ab2 100644
--- a/lib/Target/R600/SIAnnotateControlFlow.cpp
+++ b/lib/Target/R600/SIAnnotateControlFlow.cpp
@@ -83,7 +83,7 @@ class SIAnnotateControlFlow : public FunctionPass {
void insertElse(BranchInst *Term);
- Value *handleLoopCondition(Value *Cond, PHINode *Broken);
+ Value *handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L);
void handleLoop(BranchInst *Term);
@@ -207,8 +207,17 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
}
/// \brief Recursively handle the condition leading to a loop
-Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) {
- if (PHINode *Phi = dyn_cast(Cond)) {
+Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
+ llvm::Loop *L) {
+
+ // Only search through PHI nodes which are inside the loop. If we try this
+ // with PHI nodes that are outside of the loop, we end up inserting new PHI
+ // nodes outside of the loop which depend on values defined inside the loop.
+ // This will break the module with
+ // 'Instruction does not dominate all users!' errors.
+ PHINode *Phi = nullptr;
+ if ((Phi = dyn_cast(Cond)) && L->contains(Phi)) {
+
BasicBlock *Parent = Phi->getParent();
PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front());
Value *Ret = NewPhi;
@@ -223,7 +232,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
}
Phi->setIncomingValue(i, BoolFalse);
- Value *PhiArg = handleLoopCondition(Incoming, Broken);
+ Value *PhiArg = handleLoopCondition(Incoming, Broken, L);
NewPhi->addIncoming(PhiArg, From);
}
@@ -253,7 +262,12 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
} else if (Instruction *Inst = dyn_cast(Cond)) {
BasicBlock *Parent = Inst->getParent();
- TerminatorInst *Insert = Parent->getTerminator();
+ Instruction *Insert;
+ if (L->contains(Inst)) {
+ Insert = Parent->getTerminator();
+ } else {
+ Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
+ }
Value *Args[] = { Cond, Broken };
return CallInst::Create(IfBreak, Args, "", Insert);
@@ -265,14 +279,15 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
/// \brief Handle a back edge (loop)
void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
+ BasicBlock *BB = Term->getParent();
+ llvm::Loop *L = LI->getLoopFor(BB);
BasicBlock *Target = Term->getSuccessor(1);
PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
Value *Cond = Term->getCondition();
Term->setCondition(BoolTrue);
- Value *Arg = handleLoopCondition(Cond, Broken);
+ Value *Arg = handleLoopCondition(Cond, Broken, L);
- BasicBlock *BB = Term->getParent();
for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target);
PI != PE; ++PI) {
diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h
index 7601794beab..b54014072bf 100644
--- a/lib/Target/R600/SIDefines.h
+++ b/lib/Target/R600/SIDefines.h
@@ -35,7 +35,8 @@ enum {
SMRD = 1 << 16,
DS = 1 << 17,
MIMG = 1 << 18,
- FLAT = 1 << 19
+ FLAT = 1 << 19,
+ WQM = 1 << 20
};
}
diff --git a/lib/Target/R600/SIFoldOperands.cpp b/lib/Target/R600/SIFoldOperands.cpp
index d8ffa4f7550..cb24bba24a2 100644
--- a/lib/Target/R600/SIFoldOperands.cpp
+++ b/lib/Target/R600/SIFoldOperands.cpp
@@ -209,7 +209,12 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
APInt Imm;
if (FoldingImm) {
- const TargetRegisterClass *UseRC = MRI.getRegClass(UseOp.getReg());
+ unsigned UseReg = UseOp.getReg();
+ const TargetRegisterClass *UseRC
+ = TargetRegisterInfo::isVirtualRegister(UseReg) ?
+ MRI.getRegClass(UseReg) :
+ TRI.getRegClass(UseReg);
+
Imm = APInt(64, OpToFold.getImm());
// Split 64-bit constants into 32-bits for folding.
@@ -228,8 +233,13 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
// In order to fold immediates into copies, we need to change the
// copy to a MOV.
if (UseMI->getOpcode() == AMDGPU::COPY) {
- unsigned MovOp = TII->getMovOpcode(
- MRI.getRegClass(UseMI->getOperand(0).getReg()));
+ unsigned DestReg = UseMI->getOperand(0).getReg();
+ const TargetRegisterClass *DestRC
+ = TargetRegisterInfo::isVirtualRegister(DestReg) ?
+ MRI.getRegClass(DestReg) :
+ TRI.getRegClass(DestReg);
+
+ unsigned MovOp = TII->getMovOpcode(DestRC);
if (MovOp == AMDGPU::COPY)
continue;
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 6b2ea0682a4..32ae605e3f6 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -89,8 +89,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::v16i32, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
- setOperationAction(ISD::STORE, MVT::i32, Custom);
- setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
@@ -158,8 +156,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
for (MVT VT : MVT::fp_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
- setTruncStoreAction(MVT::i32, MVT::i8, Custom);
- setTruncStoreAction(MVT::i32, MVT::i16, Custom);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
@@ -214,6 +210,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
}
setOperationAction(ISD::FDIV, MVT::f32, Custom);
+ setOperationAction(ISD::FDIV, MVT::f64, Custom);
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
@@ -314,9 +311,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
if (!VT.isSimple() || VT == MVT::Other)
return false;
- // XXX - CI changes say "Support for unaligned memory accesses" but I don't
- // see what for specifically. The wording everywhere else seems to be the
- // same.
+ // TODO - CI+ supports unaligned memory accesses, but this requires driver
+ // support.
// XXX - The only mention I see of this in the ISA manual is for LDS direct
// reads the "byte address and must be dword aligned". Is it also true for the
@@ -328,12 +324,18 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
return Align % 4 == 0;
}
+ // Smaller than dword value must be aligned.
+ // FIXME: This should be allowed on CI+
+ if (VT.bitsLT(MVT::i32))
+ return false;
+
// 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
// byte-address are ignored, thus forcing Dword alignment.
// This applies to private, global, and constant memory.
if (IsFast)
*IsFast = true;
- return VT.bitsGT(MVT::i32);
+
+ return VT.bitsGT(MVT::i32) && Align % 4 == 0;
}
EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
@@ -448,7 +450,7 @@ SDValue SITargetLowering::LowerFormalArguments(
// We REALLY want the ORIGINAL number of vertex elements here, e.g. a
// three or five element vertex only needs three or five registers,
// NOT four or eigth.
- Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
+ Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
unsigned NumElements = ParamType->getVectorNumElements();
for (unsigned j = 0; j != NumElements; ++j) {
@@ -531,7 +533,7 @@ SDValue SITargetLowering::LowerFormalArguments(
Offset, Ins[i].Flags.isSExt());
const PointerType *ParamTy =
- dyn_cast(FType->getParamType(Ins[i].OrigArgIndex));
+ dyn_cast(FType->getParamType(Ins[i].getOrigArgIndex()));
if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
// On SI local pointers are just offsets into LDS, so they are always
@@ -566,7 +568,7 @@ SDValue SITargetLowering::LowerFormalArguments(
if (Arg.VT.isVector()) {
// Build a vector from the registers
- Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
+ Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
unsigned NumElements = ParamType->getVectorNumElements();
SmallVector Regs;
@@ -919,6 +921,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
+
+ case AMDGPUIntrinsic::AMDGPU_fract:
+ case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
+ return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1),
+ DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1)));
+
default:
return AMDGPUTargetLowering::LowerOperation(Op, DAG);
}
@@ -1104,7 +1112,70 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
}
SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
- return SDValue();
+ if (DAG.getTarget().Options.UnsafeFPMath)
+ return LowerFastFDIV(Op, DAG);
+
+ SDLoc SL(Op);
+ SDValue X = Op.getOperand(0);
+ SDValue Y = Op.getOperand(1);
+
+ const SDValue One = DAG.getConstantFP(1.0, MVT::f64);
+
+ SDVTList ScaleVT = DAG.getVTList(MVT::f64, MVT::i1);
+
+ SDValue DivScale0 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, Y, Y, X);
+
+ SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f64, DivScale0);
+
+ SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f64, DivScale0);
+
+ SDValue Fma0 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Rcp, One);
+
+ SDValue Fma1 = DAG.getNode(ISD::FMA, SL, MVT::f64, Rcp, Fma0, Rcp);
+
+ SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Fma1, One);
+
+ SDValue DivScale1 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, X, Y, X);
+
+ SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f64, Fma1, Fma2, Fma1);
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, DivScale1, Fma3);
+
+ SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f64,
+ NegDivScale0, Mul, DivScale1);
+
+ SDValue Scale;
+
+ if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ // Workaround a hardware bug on SI where the condition output from div_scale
+ // is not usable.
+
+ const SDValue Hi = DAG.getConstant(1, MVT::i32);
+
+ // Figure out if the scale to use for div_fmas.
+ SDValue NumBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
+ SDValue DenBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Y);
+ SDValue Scale0BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale0);
+ SDValue Scale1BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale1);
+
+ SDValue NumHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, NumBC, Hi);
+ SDValue DenHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, DenBC, Hi);
+
+ SDValue Scale0Hi
+ = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale0BC, Hi);
+ SDValue Scale1Hi
+ = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale1BC, Hi);
+
+ SDValue CmpDen = DAG.getSetCC(SL, MVT::i1, DenHi, Scale0Hi, ISD::SETEQ);
+ SDValue CmpNum = DAG.getSetCC(SL, MVT::i1, NumHi, Scale1Hi, ISD::SETEQ);
+ Scale = DAG.getNode(ISD::XOR, SL, MVT::i1, CmpNum, CmpDen);
+ } else {
+ Scale = DivScale1.getValue(1);
+ }
+
+ SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f64,
+ Fma4, Fma3, Mul, Scale);
+
+ return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f64, Fmas, Y, X);
}
SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
@@ -1125,11 +1196,6 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Store->getMemoryVT();
// These stores are legal.
- if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
- VT.isVector() && VT.getVectorNumElements() == 2 &&
- VT.getVectorElementType() == MVT::i32)
- return SDValue();
-
if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
if (VT.isVector() && VT.getVectorNumElements() > 4)
return ScalarizeVectorStore(Op, DAG);
@@ -1524,6 +1590,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case AMDGPUISD::UMAX:
case AMDGPUISD::UMIN: {
if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
+ N->getValueType(0) != MVT::f64 &&
getTargetMachine().getOptLevel() > CodeGenOpt::None)
return performMin3Max3Combine(N, DCI);
break;
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index 181b11643bf..50f20ac3619 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -82,6 +82,8 @@ private:
/// \brief Type of the last opcode.
InstType LastOpcodeType;
+ bool LastInstWritesM0;
+
/// \brief Get increment/decrement amount for this instruction.
Counters getHwCounts(MachineInstr &MI);
@@ -106,6 +108,9 @@ private:
/// \brief Resolve all operand dependencies to counter requirements
Counters handleOperands(MachineInstr &MI);
+ /// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
+ void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
+
public:
SIInsertWaits(TargetMachine &tm) :
MachineFunctionPass(ID),
@@ -269,6 +274,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
// Insert a NOP to break the clause.
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
.addImm(0);
+ LastInstWritesM0 = false;
}
if (TII->isSMRD(I->getOpcode()))
@@ -362,6 +368,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
((Counts.Named.LGKM & 0x7) << 8));
LastOpcodeType = OTHER;
+ LastInstWritesM0 = false;
return true;
}
@@ -403,6 +410,30 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
return Result;
}
+void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ if (TRI->ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ return;
+
+ // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
+ if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
+ LastInstWritesM0 = false;
+ return;
+ }
+
+ // Set whether this instruction sets M0
+ LastInstWritesM0 = false;
+
+ unsigned NumOperands = I->getNumOperands();
+ for (unsigned i = 0; i < NumOperands; i++) {
+ const MachineOperand &Op = I->getOperand(i);
+
+ if (Op.isReg() && Op.isDef() && Op.getReg() == AMDGPU::M0)
+ LastInstWritesM0 = true;
+ }
+}
+
// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
// around other non-memory instructions.
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
@@ -417,6 +448,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
WaitedOn = ZeroCounts;
LastIssued = ZeroCounts;
LastOpcodeType = OTHER;
+ LastInstWritesM0 = false;
memset(&UsedRegs, 0, sizeof(UsedRegs));
memset(&DefinedRegs, 0, sizeof(DefinedRegs));
@@ -433,7 +465,9 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
Changes |= insertWait(MBB, I, LastIssued);
else
Changes |= insertWait(MBB, I, handleOperands(*I));
+
pushInstruction(MBB, I);
+ handleSendMsg(MBB, I);
}
// Wait for everything at the end of the MBB
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 09c0cbe8f5c..b825208c92b 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -38,6 +38,7 @@ class InstSI pattern> :
field bits<1> DS = 0;
field bits<1> MIMG = 0;
field bits<1> FLAT = 0;
+ field bits<1> WQM = 0;
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = VM_CNT;
@@ -64,6 +65,7 @@ class InstSI pattern> :
let TSFlags{17} = DS;
let TSFlags{18} = MIMG;
let TSFlags{19} = FLAT;
+ let TSFlags{20} = WQM;
// Most instructions require adjustments after selection to satisfy
// operand requirements.
@@ -295,18 +297,32 @@ class VOP1e op> : Enc32 {
}
class VOP2e op> : Enc32 {
+ bits<8> vdst;
+ bits<9> src0;
+ bits<8> src1;
- bits<8> VDST;
- bits<9> SRC0;
- bits<8> VSRC1;
-
- let Inst{8-0} = SRC0;
- let Inst{16-9} = VSRC1;
- let Inst{24-17} = VDST;
+ let Inst{8-0} = src0;
+ let Inst{16-9} = src1;
+ let Inst{24-17} = vdst;
let Inst{30-25} = op;
let Inst{31} = 0x0; //encoding
}
+class VOP2_MADKe op> : Enc64 {
+
+ bits<8> vdst;
+ bits<9> src0;
+ bits<8> vsrc1;
+ bits<32> src2;
+
+ let Inst{8-0} = src0;
+ let Inst{16-9} = vsrc1;
+ let Inst{24-17} = vdst;
+ let Inst{30-25} = op;
+ let Inst{31} = 0x0; // encoding
+ let Inst{63-32} = src2;
+}
+
class VOP3e op> : Enc64 {
bits<8> dst;
@@ -554,9 +570,6 @@ class VOP1 op, dag outs, dag ins, string asm, list pattern> :
class VOP2 op, dag outs, dag ins, string asm, list pattern> :
VOP2Common , VOP2e;
-class VOP3b op, dag outs, dag ins, string asm, list pattern> :
- VOP3Common , VOP3be;
-
class VOPC op, dag ins, string asm, list pattern> :
VOPCCommon , VOPCe ;
@@ -585,9 +598,6 @@ class DS pattern> :
let SchedRW = [WriteLDS];
}
-class DS_si op, dag outs, dag ins, string asm, list pattern> :
- DS , DSe;
-
class MUBUF pattern> :
InstSI {
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index 80b560eb65a..5ab33b491ce 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -121,12 +121,20 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
if (Load0->getOperand(0) != Load1->getOperand(0))
return false;
+ const ConstantSDNode *Load0Offset =
+ dyn_cast(Load0->getOperand(1));
+ const ConstantSDNode *Load1Offset =
+ dyn_cast(Load1->getOperand(1));
+
+ if (!Load0Offset || !Load1Offset)
+ return false;
+
// Check chain.
if (findChainOperand(Load0) != findChainOperand(Load1))
return false;
- Offset0 = cast(Load0->getOperand(1))->getZExtValue();
- Offset1 = cast(Load1->getOperand(1))->getZExtValue();
+ Offset0 = Load0Offset->getZExtValue();
+ Offset1 = Load1Offset->getZExtValue();
return true;
}
@@ -333,6 +341,21 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
} else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
+ if (DestReg == AMDGPU::VCC) {
+ if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else {
+ // FIXME: Hack until VReg_1 removed.
+ assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32), AMDGPU::VCC)
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ }
+
+ return;
+ }
+
assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
@@ -408,11 +431,15 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
int NewOpc;
// Try to map original to commuted opcode
- if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1)
+ NewOpc = AMDGPU::getCommuteRev(Opcode);
+ // Check if the commuted (REV) opcode exists on the target.
+ if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
return NewOpc;
// Try to map commuted to original opcode
- if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1)
+ NewOpc = AMDGPU::getCommuteOrig(Opcode);
+ // Check if the original (non-REV) opcode exists on the target.
+ if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
return NewOpc;
return Opcode;
@@ -1121,6 +1148,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
return false;
}
+ int RegClass = Desc.OpInfo[i].RegClass;
+
switch (Desc.OpInfo[i].OperandType) {
case MCOI::OPERAND_REGISTER:
if (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm()) {
@@ -1131,7 +1160,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
case AMDGPU::OPERAND_REG_IMM32:
break;
case AMDGPU::OPERAND_REG_INLINE_C:
- if (MI->getOperand(i).isImm() && !isInlineConstant(MI->getOperand(i))) {
+ if (isLiteralConstant(MI->getOperand(i))) {
ErrInfo = "Illegal immediate value for operand.";
return false;
}
@@ -1152,7 +1181,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
if (!MI->getOperand(i).isReg())
continue;
- int RegClass = Desc.OpInfo[i].RegClass;
if (RegClass != -1) {
unsigned Reg = MI->getOperand(i).getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg))
@@ -1197,31 +1225,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
}
}
- // Verify SRC1 for VOP2 and VOPC
- if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) {
- const MachineOperand &Src1 = MI->getOperand(Src1Idx);
- if (Src1.isImm()) {
- ErrInfo = "VOP[2C] src1 cannot be an immediate.";
- return false;
- }
- }
-
- // Verify VOP3
- if (isVOP3(Opcode)) {
- if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) {
- ErrInfo = "VOP3 src0 cannot be a literal constant.";
- return false;
- }
- if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) {
- ErrInfo = "VOP3 src1 cannot be a literal constant.";
- return false;
- }
- if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) {
- ErrInfo = "VOP3 src2 cannot be a literal constant.";
- return false;
- }
- }
-
// Verify misc. restrictions on specific instructions.
if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
@@ -1292,6 +1295,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
+ case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
}
}
@@ -2043,6 +2047,24 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
swapOperands(Inst);
}
break;
+ case AMDGPU::S_LSHL_B64:
+ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ NewOpcode = AMDGPU::V_LSHLREV_B64;
+ swapOperands(Inst);
+ }
+ break;
+ case AMDGPU::S_ASHR_I64:
+ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ NewOpcode = AMDGPU::V_ASHRREV_I64;
+ swapOperands(Inst);
+ }
+ break;
+ case AMDGPU::S_LSHR_B64:
+ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ NewOpcode = AMDGPU::V_LSHRREV_B64;
+ swapOperands(Inst);
+ }
+ break;
case AMDGPU::S_BFE_U64:
case AMDGPU::S_BFM_B64:
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 28cd27dd896..12980307267 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -204,6 +204,10 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::FLAT;
}
+ bool isWQM(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::WQM;
+ }
+
bool isInlineConstant(const APInt &Imm) const;
bool isInlineConstant(const MachineOperand &MO) const;
bool isLiteralConstant(const MachineOperand &MO) const;
@@ -243,7 +247,27 @@ public:
/// the register class of its machine operand.
/// to infer the correct register class base on the other operands.
const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
- unsigned OpNo) const;\
+ unsigned OpNo) const;
+
+ /// \brief Return the size in bytes of the operand OpNo on the given
+ // instruction opcode.
+ unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
+ const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo];
+
+ if (OpInfo.RegClass == -1) {
+ // If this is an immediate operand, this must be a 32-bit literal.
+ assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
+ return 4;
+ }
+
+ return RI.getRegClass(OpInfo.RegClass)->getSize();
+ }
+
+ /// \brief This form should usually be preferred since it handles operands
+ /// with unknown register classes.
+ unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
+ return getOpRegClass(MI, OpNo)->getSize();
+ }
/// \returns true if it is legal for the operand at index \p OpNo
/// to read a VGPR.
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index 175e11d709c..a749e7f861b 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -383,15 +383,13 @@ class SOP1_Pseudo pattern> :
let isPseudo = 1;
}
-class SOP1_Real_si pattern> :
- SOP1 ,
+class SOP1_Real_si :
+ SOP1 ,
SOP1e ,
SIMCInstr;
-class SOP1_Real_vi pattern> :
- SOP1 ,
+class SOP1_Real_vi :
+ SOP1 ,
SOP1e ,
SIMCInstr;
@@ -400,10 +398,10 @@ multiclass SOP1_32 pattern> {
pattern>;
def _si : SOP1_Real_si ;
+ opName#" $dst, $src0">;
def _vi : SOP1_Real_vi ;
+ opName#" $dst, $src0">;
}
multiclass SOP1_64 pattern> {
@@ -411,10 +409,10 @@ multiclass SOP1_64 pattern> {
pattern>;
def _si : SOP1_Real_si ;
+ opName#" $dst, $src0">;
def _vi : SOP1_Real_vi ;
+ opName#" $dst, $src0">;
}
// no input, 64-bit output.
@@ -422,12 +420,12 @@ multiclass SOP1_64_0 pattern> {
def "" : SOP1_Pseudo ;
def _si : SOP1_Real_si {
+ opName#" $dst"> {
let SSRC0 = 0;
}
def _vi : SOP1_Real_vi {
+ opName#" $dst"> {
let SSRC0 = 0;
}
}
@@ -438,10 +436,10 @@ multiclass SOP1_32_64 pattern> {
pattern>;
def _si : SOP1_Real_si ;
+ opName#" $dst, $src0">;
def _vi : SOP1_Real_vi ;
+ opName#" $dst, $src0">;
}
class SOP2_Pseudo pattern> :
@@ -451,15 +449,13 @@ class SOP2_Pseudo pattern> :
let Size = 4;
}
-class SOP2_Real_si pattern> :
- SOP2,
+class SOP2_Real_si :
+ SOP2,
SOP2e,
SIMCInstr;
-class SOP2_Real_vi pattern> :
- SOP2,
+class SOP2_Real_vi :
+ SOP2,
SOP2e,
SIMCInstr;
@@ -469,11 +465,11 @@ multiclass SOP2_SELECT_32 pattern> {
def _si : SOP2_Real_si ;
+ opName#" $dst, $src0, $src1 [$scc]">;
def _vi : SOP2_Real_vi ;
+ opName#" $dst, $src0, $src1 [$scc]">;
}
multiclass SOP2_32 pattern> {
@@ -481,10 +477,10 @@ multiclass SOP2_32 pattern> {
(ins SSrc_32:$src0, SSrc_32:$src1), pattern>;
def _si : SOP2_Real_si ;
+ (ins SSrc_32:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">;
def _vi : SOP2_Real_vi ;
+ (ins SSrc_32:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">;
}
multiclass SOP2_64 pattern> {
@@ -492,10 +488,10 @@ multiclass SOP2_64 pattern> {
(ins SSrc_64:$src0, SSrc_64:$src1), pattern>;
def _si : SOP2_Real_si ;
+ (ins SSrc_64:$src0, SSrc_64:$src1), opName#" $dst, $src0, $src1">;
def _vi : SOP2_Real_vi ;
+ (ins SSrc_64:$src0, SSrc_64:$src1), opName#" $dst, $src0, $src1">;
}
multiclass SOP2_64_32 pattern> {
@@ -503,10 +499,10 @@ multiclass SOP2_64_32 pattern> {
(ins SSrc_64:$src0, SSrc_32:$src1), pattern>;
def _si : SOP2_Real_si ;
+ (ins SSrc_64:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">;
def _vi : SOP2_Real_vi ;
+ (ins SSrc_64:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1">;
}
@@ -527,15 +523,13 @@ class SOPK_Pseudo pattern> :
let isPseudo = 1;
}
-class SOPK_Real_si pattern> :
- SOPK ,
+class SOPK_Real_si :
+ SOPK ,
SOPKe ,
SIMCInstr;
-class SOPK_Real_vi pattern> :
- SOPK ,
+class SOPK_Real_vi :
+ SOPK ,
SOPKe ,
SIMCInstr;
@@ -544,10 +538,10 @@ multiclass SOPK_32 pattern> {
pattern>;
def _si : SOPK_Real_si ;
+ opName#" $dst, $src0">;
def _vi : SOPK_Real_vi ;
+ opName#" $dst, $src0">;
}
multiclass SOPK_SCC pattern> {
@@ -555,10 +549,10 @@ multiclass SOPK_SCC pattern> {
(ins SReg_32:$src0, u16imm:$src1), pattern>;
def _si : SOPK_Real_si ;
+ (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">;
def _vi : SOPK_Real_vi ;
+ (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">;
}
//===----------------------------------------------------------------------===//
@@ -792,6 +786,7 @@ def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>;
def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>;
def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
+def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> {
let Src0RC32 = VCSrc_32;
@@ -808,9 +803,14 @@ def VOP_I1_F64_I32 : VOPProfile <[i1, f64, i32, untyped]> {
}
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
+def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
+def VOP_MADK : VOPProfile <[f32, f32, f32, f32]> {
+ field dag Ins = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$src2);
+ field string Asm = " $dst, $src0, $vsrc1, $src2";
+}
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
@@ -847,6 +847,15 @@ multiclass VOP1_m pattern,
SIMCInstr ;
}
+multiclass VOP1SI_m pattern,
+ string opName> {
+ def "" : VOP1_Pseudo ;
+
+ def _si : VOP1,
+ SIMCInstr ;
+ // No VI instruction. This class is for SI only.
+}
+
class VOP2_Pseudo pattern, string opName> :
VOP2Common ,
VOP ,
@@ -855,25 +864,22 @@ class VOP2_Pseudo pattern, string opName> :
}
multiclass VOP2SI_m pattern,
- string opName, string revOpSI> {
+ string opName, string revOp> {
def "" : VOP2_Pseudo ,
- VOP2_REV;
+ VOP2_REV;
def _si : VOP2 ,
- VOP2_REV,
SIMCInstr ;
}
multiclass VOP2_m pattern,
- string opName, string revOpSI, string revOpVI> {
+ string opName, string revOp> {
def "" : VOP2_Pseudo ,
- VOP2_REV;
+ VOP2_REV;
def _si : VOP2 ,
- VOP2_REV,
SIMCInstr ;
def _vi : VOP2 ,
- VOP2_REV,
SIMCInstr ;
}
@@ -905,6 +911,16 @@ class VOP3_Real_vi op, dag outs, dag ins, string asm, string opName> :
VOP3e_vi ,
SIMCInstr ;
+class VOP3b_Real_si op, dag outs, dag ins, string asm, string opName> :
+ VOP3Common ,
+ VOP3be ,
+ SIMCInstr;
+
+class VOP3b_Real_vi op, dag outs, dag ins, string asm, string opName> :
+ VOP3Common ,
+ VOP3be_vi ,
+ SIMCInstr ;
+
multiclass VOP3_m pattern,
string opName, int NumSrcArgs, bit HasMods = 1> {
@@ -946,24 +962,45 @@ multiclass VOP3_1_m ;
}
+multiclass VOP3SI_1_m pattern, string opName, bit HasMods = 1> {
+
+ def "" : VOP3_Pseudo ;
+
+ def _si : VOP3_Real_si ,
+ VOP3DisableFields<0, 0, HasMods>;
+ // No VI instruction. This class is for SI only.
+}
+
multiclass VOP3_2_m pattern, string opName, string revOpSI, string revOpVI,
+ list pattern, string opName, string revOp,
bit HasMods = 1, bit UseFullOp = 0> {
def "" : VOP3_Pseudo ,
- VOP2_REV;
+ VOP2_REV;
- def _si : VOP3_Real_si ,
- VOP2_REV,
+ def _si : VOP3_Real_si ,
VOP3DisableFields<1, 0, HasMods>;
- def _vi : VOP3_Real_vi ,
- VOP2_REV,
+ def _vi : VOP3_Real_vi ,
VOP3DisableFields<1, 0, HasMods>;
}
+multiclass VOP3SI_2_m pattern, string opName, string revOp,
+ bit HasMods = 1, bit UseFullOp = 0> {
+
+ def "" : VOP3_Pseudo ,
+ VOP2_REV;
+
+ def _si : VOP3_Real_si ,
+ VOP3DisableFields<1, 0, HasMods>;
+
+ // No VI instruction. This class is for SI only.
+}
+
+// XXX - Is v_div_scale_{f32|f64} only available in vop3b without
+// option of implicit vcc use?
multiclass VOP3b_2_m pattern, string opName, string revOp,
bit HasMods = 1, bit UseFullOp = 0> {
@@ -974,19 +1011,27 @@ multiclass VOP3b_2_m ,
- VOP3DisableFields<1, 0, HasMods>,
- SIMCInstr,
- VOP2_REV