mirror of
https://github.com/opnsense/src.git
synced 2026-06-09 08:43:19 -04:00
Vendor import of llvm-project branch release/13.x llvmorg-13.0.0-rc2-43-gf56129fe78d5.
This commit is contained in:
parent
d545c2ce5a
commit
f9ef3ff6e2
61 changed files with 761 additions and 356 deletions
|
|
@ -9653,11 +9653,19 @@ static QualType mergeEnumWithInteger(ASTContext &Context, const EnumType *ET,
|
|||
QualType ASTContext::mergeTypes(QualType LHS, QualType RHS,
|
||||
bool OfBlockPointer,
|
||||
bool Unqualified, bool BlockReturnType) {
|
||||
// For C++ we will not reach this code with reference types (see below),
|
||||
// for OpenMP variant call overloading we might.
|
||||
//
|
||||
// C++ [expr]: If an expression initially has the type "reference to T", the
|
||||
// type is adjusted to "T" prior to any further analysis, the expression
|
||||
// designates the object or function denoted by the reference, and the
|
||||
// expression is an lvalue unless the reference is an rvalue reference and
|
||||
// the expression is a function call (possibly inside parentheses).
|
||||
if (LangOpts.OpenMP && LHS->getAs<ReferenceType>() &&
|
||||
RHS->getAs<ReferenceType>() && LHS->getTypeClass() == RHS->getTypeClass())
|
||||
return mergeTypes(LHS->getAs<ReferenceType>()->getPointeeType(),
|
||||
RHS->getAs<ReferenceType>()->getPointeeType(),
|
||||
OfBlockPointer, Unqualified, BlockReturnType);
|
||||
if (LHS->getAs<ReferenceType>() || RHS->getAs<ReferenceType>())
|
||||
return {};
|
||||
|
||||
|
|
|
|||
|
|
@ -37,8 +37,8 @@ M68kTargetInfo::M68kTargetInfo(const llvm::Triple &Triple,
|
|||
// FIXME how to wire it with the used object format?
|
||||
Layout += "-m:e";
|
||||
|
||||
// M68k pointers are always 32 bit wide even for 16 bit cpus
|
||||
Layout += "-p:32:32";
|
||||
// M68k pointers are always 32 bit wide even for 16-bit CPUs
|
||||
Layout += "-p:32:16:32";
|
||||
|
||||
// M68k integer data types
|
||||
Layout += "-i8:8:8-i16:16:16-i32:16:32";
|
||||
|
|
|
|||
|
|
@ -460,6 +460,11 @@ protected:
|
|||
Builder.defineMacro("_REENTRANT");
|
||||
if (this->HasFloat128)
|
||||
Builder.defineMacro("__FLOAT128__");
|
||||
|
||||
if (Opts.C11) {
|
||||
Builder.defineMacro("__STDC_NO_ATOMICS__");
|
||||
Builder.defineMacro("__STDC_NO_THREADS__");
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
|
|
|
|||
|
|
@ -5568,7 +5568,6 @@ llvm::StringRef clang::driver::getDriverMode(StringRef ProgName,
|
|||
if (!Arg.startswith(OptName))
|
||||
continue;
|
||||
Opt = Arg;
|
||||
break;
|
||||
}
|
||||
if (Opt.empty())
|
||||
Opt = ToolChain::getTargetAndModeFromProgramName(ProgName).DriverMode;
|
||||
|
|
|
|||
|
|
@ -893,3 +893,38 @@ bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const {
|
|||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
llvm::SmallVector<std::string, 12>
|
||||
ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
|
||||
const std::string &GPUArch) const {
|
||||
auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch);
|
||||
const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
|
||||
|
||||
std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
|
||||
if (LibDeviceFile.empty()) {
|
||||
getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GPUArch;
|
||||
return {};
|
||||
}
|
||||
|
||||
// If --hip-device-lib is not set, add the default bitcode libraries.
|
||||
// TODO: There are way too many flags that change this. Do we need to check
|
||||
// them all?
|
||||
bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
|
||||
options::OPT_fno_gpu_flush_denormals_to_zero,
|
||||
getDefaultDenormsAreZeroForTarget(Kind));
|
||||
bool FiniteOnly = DriverArgs.hasFlag(
|
||||
options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false);
|
||||
bool UnsafeMathOpt =
|
||||
DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
|
||||
options::OPT_fno_unsafe_math_optimizations, false);
|
||||
bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math,
|
||||
options::OPT_fno_fast_math, false);
|
||||
bool CorrectSqrt = DriverArgs.hasFlag(
|
||||
options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
|
||||
options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
|
||||
bool Wave64 = isWave64(DriverArgs, Kind);
|
||||
|
||||
return RocmInstallation.getCommonBitcodeLibs(
|
||||
DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
|
||||
FastRelaxedMath, CorrectSqrt);
|
||||
}
|
||||
|
|
@ -136,6 +136,11 @@ public:
|
|||
addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
|
||||
llvm::opt::ArgStringList &CC1Args,
|
||||
Action::OffloadKind DeviceOffloadKind) const override;
|
||||
|
||||
// Returns a list of device library names shared by different languages
|
||||
llvm::SmallVector<std::string, 12>
|
||||
getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
|
||||
const std::string &GPUArch) const;
|
||||
};
|
||||
|
||||
} // end namespace toolchains
|
||||
|
|
|
|||
|
|
@ -9,12 +9,14 @@
|
|||
#include "AMDGPUOpenMP.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "CommonArgs.h"
|
||||
#include "ToolChains/ROCm.h"
|
||||
#include "clang/Basic/DiagnosticDriver.h"
|
||||
#include "clang/Driver/Compilation.h"
|
||||
#include "clang/Driver/Driver.h"
|
||||
#include "clang/Driver/DriverDiagnostic.h"
|
||||
#include "clang/Driver/InputInfo.h"
|
||||
#include "clang/Driver/Options.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
#include "llvm/Support/FormatAdapters.h"
|
||||
#include "llvm/Support/FormatVariadic.h"
|
||||
|
|
@ -84,14 +86,34 @@ static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC,
|
|||
} // namespace
|
||||
|
||||
const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand(
|
||||
Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
|
||||
const ArgList &Args, StringRef SubArchName,
|
||||
StringRef OutputFilePrefix) const {
|
||||
const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C,
|
||||
const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args,
|
||||
StringRef SubArchName, StringRef OutputFilePrefix) const {
|
||||
ArgStringList CmdArgs;
|
||||
|
||||
for (const auto &II : Inputs)
|
||||
if (II.isFilename())
|
||||
CmdArgs.push_back(II.getFilename());
|
||||
|
||||
if (Args.hasArg(options::OPT_l)) {
|
||||
auto Lm = Args.getAllArgValues(options::OPT_l);
|
||||
bool HasLibm = false;
|
||||
for (auto &Lib : Lm) {
|
||||
if (Lib == "m") {
|
||||
HasLibm = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (HasLibm) {
|
||||
SmallVector<std::string, 12> BCLibs =
|
||||
AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str());
|
||||
llvm::for_each(BCLibs, [&](StringRef BCFile) {
|
||||
CmdArgs.push_back(Args.MakeArgString(BCFile));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Add an intermediate output file.
|
||||
CmdArgs.push_back("-o");
|
||||
const char *OutputFileName =
|
||||
|
|
@ -180,8 +202,8 @@ void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
assert(Prefix.length() && "no linker inputs are files ");
|
||||
|
||||
// Each command outputs different files.
|
||||
const char *LLVMLinkCommand =
|
||||
constructLLVMLinkCommand(C, JA, Inputs, Args, GPUArch, Prefix);
|
||||
const char *LLVMLinkCommand = constructLLVMLinkCommand(
|
||||
AMDGPUOpenMPTC, C, JA, Inputs, Args, GPUArch, Prefix);
|
||||
|
||||
// Produce readable assembly if save-temps is enabled.
|
||||
if (C.getDriver().isSaveTempsEnabled())
|
||||
|
|
|
|||
|
|
@ -16,6 +16,10 @@
|
|||
namespace clang {
|
||||
namespace driver {
|
||||
|
||||
namespace toolchains {
|
||||
class AMDGPUOpenMPToolChain;
|
||||
}
|
||||
|
||||
namespace tools {
|
||||
|
||||
namespace AMDGCN {
|
||||
|
|
@ -35,11 +39,11 @@ public:
|
|||
|
||||
private:
|
||||
/// \return llvm-link output file name.
|
||||
const char *constructLLVMLinkCommand(Compilation &C, const JobAction &JA,
|
||||
const InputInfoList &Inputs,
|
||||
const llvm::opt::ArgList &Args,
|
||||
llvm::StringRef SubArchName,
|
||||
llvm::StringRef OutputFilePrefix) const;
|
||||
const char *constructLLVMLinkCommand(
|
||||
const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C,
|
||||
const JobAction &JA, const InputInfoList &Inputs,
|
||||
const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
|
||||
llvm::StringRef OutputFilePrefix) const;
|
||||
|
||||
/// \return llc output file name.
|
||||
const char *constructLlcCommand(Compilation &C, const JobAction &JA,
|
||||
|
|
|
|||
|
|
@ -1255,7 +1255,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
|
|||
// If we are offloading to a target via OpenMP we need to include the
|
||||
// openmp_wrappers folder which contains alternative system headers.
|
||||
if (JA.isDeviceOffloading(Action::OFK_OpenMP) &&
|
||||
getToolChain().getTriple().isNVPTX()){
|
||||
(getToolChain().getTriple().isNVPTX() ||
|
||||
getToolChain().getTriple().isAMDGCN())) {
|
||||
if (!Args.hasArg(options::OPT_nobuiltininc)) {
|
||||
// Add openmp_wrappers/* to our system include path. This lets us wrap
|
||||
// standard library headers.
|
||||
|
|
|
|||
|
|
@ -775,7 +775,8 @@ void tools::linkSanitizerRuntimeDeps(const ToolChain &TC,
|
|||
CmdArgs.push_back("-ldl");
|
||||
// Required for backtrace on some OSes
|
||||
if (TC.getTriple().isOSFreeBSD() ||
|
||||
TC.getTriple().isOSNetBSD())
|
||||
TC.getTriple().isOSNetBSD() ||
|
||||
TC.getTriple().isOSOpenBSD())
|
||||
CmdArgs.push_back("-lexecinfo");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -395,35 +395,8 @@ HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
|
|||
}
|
||||
StringRef GpuArch = getGPUArch(DriverArgs);
|
||||
assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
|
||||
(void)GpuArch;
|
||||
auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
|
||||
const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
|
||||
|
||||
std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
|
||||
if (LibDeviceFile.empty()) {
|
||||
getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
|
||||
return {};
|
||||
}
|
||||
|
||||
// If --hip-device-lib is not set, add the default bitcode libraries.
|
||||
// TODO: There are way too many flags that change this. Do we need to check
|
||||
// them all?
|
||||
bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
|
||||
options::OPT_fno_gpu_flush_denormals_to_zero,
|
||||
getDefaultDenormsAreZeroForTarget(Kind));
|
||||
bool FiniteOnly =
|
||||
DriverArgs.hasFlag(options::OPT_ffinite_math_only,
|
||||
options::OPT_fno_finite_math_only, false);
|
||||
bool UnsafeMathOpt =
|
||||
DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
|
||||
options::OPT_fno_unsafe_math_optimizations, false);
|
||||
bool FastRelaxedMath = DriverArgs.hasFlag(
|
||||
options::OPT_ffast_math, options::OPT_fno_fast_math, false);
|
||||
bool CorrectSqrt = DriverArgs.hasFlag(
|
||||
options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
|
||||
options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
|
||||
bool Wave64 = isWave64(DriverArgs, Kind);
|
||||
|
||||
if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
|
||||
options::OPT_fno_gpu_sanitize, false)) {
|
||||
auto AsanRTL = RocmInstallation.getAsanRTLPath();
|
||||
|
|
@ -442,10 +415,8 @@ HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
|
|||
// Add the HIP specific bitcode library.
|
||||
BCLibs.push_back(RocmInstallation.getHIPPath().str());
|
||||
|
||||
// Add the generic set of libraries.
|
||||
BCLibs.append(RocmInstallation.getCommonBitcodeLibs(
|
||||
DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
|
||||
FastRelaxedMath, CorrectSqrt));
|
||||
// Add common device libraries like ocml etc.
|
||||
BCLibs.append(getCommonDeviceLibNames(DriverArgs, GpuArch.str()));
|
||||
|
||||
// Add instrument lib.
|
||||
auto InstLib =
|
||||
|
|
|
|||
|
|
@ -174,6 +174,11 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);
|
||||
|
||||
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
|
||||
// Use the static OpenMP runtime with -static-openmp
|
||||
bool StaticOpenMP = Args.hasArg(options::OPT_static_openmp) &&
|
||||
!Args.hasArg(options::OPT_static);
|
||||
addOpenMPRuntime(CmdArgs, ToolChain, Args, StaticOpenMP);
|
||||
|
||||
if (D.CCCIsCXX()) {
|
||||
if (ToolChain.ShouldLinkCXXStdlib(Args))
|
||||
ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
|
||||
|
|
@ -221,6 +226,8 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend)));
|
||||
}
|
||||
|
||||
ToolChain.addProfileRTLibs(Args, CmdArgs);
|
||||
|
||||
const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
|
||||
C.addCommand(std::make_unique<Command>(JA, *this,
|
||||
ResponseFileSupport::AtFileCurCP(),
|
||||
|
|
|
|||
|
|
@ -34,10 +34,12 @@ __DEVICE__ unsigned long long __brevll(unsigned long long __a) {
|
|||
return __nv_brevll(__a);
|
||||
}
|
||||
#if defined(__cplusplus)
|
||||
__DEVICE__ void __brkpt() { asm volatile("brkpt;"); }
|
||||
__DEVICE__ void __brkpt() { __asm__ __volatile__("brkpt;"); }
|
||||
__DEVICE__ void __brkpt(int __a) { __brkpt(); }
|
||||
#else
|
||||
__DEVICE__ void __attribute__((overloadable)) __brkpt(void) { asm volatile("brkpt;"); }
|
||||
__DEVICE__ void __attribute__((overloadable)) __brkpt(void) {
|
||||
__asm__ __volatile__("brkpt;");
|
||||
}
|
||||
__DEVICE__ void __attribute__((overloadable)) __brkpt(int __a) { __brkpt(); }
|
||||
#endif
|
||||
__DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b,
|
||||
|
|
@ -507,7 +509,7 @@ __DEVICE__ float __powf(float __a, float __b) {
|
|||
}
|
||||
|
||||
// Parameter must have a known integer value.
|
||||
#define __prof_trigger(__a) asm __volatile__("pmevent \t%0;" ::"i"(__a))
|
||||
#define __prof_trigger(__a) __asm__ __volatile__("pmevent \t%0;" ::"i"(__a))
|
||||
__DEVICE__ int __rhadd(int __a, int __b) { return __nv_rhadd(__a, __b); }
|
||||
__DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) {
|
||||
return __nv_sad(__a, __b, __c);
|
||||
|
|
@ -526,7 +528,7 @@ __DEVICE__ float __tanf(float __a) { return __nv_fast_tanf(__a); }
|
|||
__DEVICE__ void __threadfence(void) { __nvvm_membar_gl(); }
|
||||
__DEVICE__ void __threadfence_block(void) { __nvvm_membar_cta(); };
|
||||
__DEVICE__ void __threadfence_system(void) { __nvvm_membar_sys(); };
|
||||
__DEVICE__ void __trap(void) { asm volatile("trap;"); }
|
||||
__DEVICE__ void __trap(void) { __asm__ __volatile__("trap;"); }
|
||||
__DEVICE__ unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v) {
|
||||
return __nvvm_atom_add_gen_i((int *)__p, __v);
|
||||
}
|
||||
|
|
@ -1051,122 +1053,136 @@ __DEVICE__ unsigned int __bool2mask(unsigned int __a, int shift) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vabs2(unsigned int __a) {
|
||||
unsigned int r;
|
||||
asm("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(0), "r"(0));
|
||||
__asm__("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(0), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vabs4(unsigned int __a) {
|
||||
unsigned int r;
|
||||
asm("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(0), "r"(0));
|
||||
__asm__("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(0), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
|
||||
__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vabsdiff2.u32.u32.u32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vabsdiff2.u32.u32.u32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vabsdiff4.u32.u32.u32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vabsdiff4.u32.u32.u32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vabsss2(unsigned int __a) {
|
||||
unsigned int r;
|
||||
asm("vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(0), "r"(0));
|
||||
__asm__("vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(0), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vabsss4(unsigned int __a) {
|
||||
unsigned int r;
|
||||
asm("vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(0), "r"(0));
|
||||
__asm__("vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(0), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vadd2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vadd2.u32.u32.u32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vadd4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vadd4.u32.u32.u32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vadd2.s32.s32.s32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vadd2.s32.s32.s32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vadd4.s32.s32.s32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vadd4.s32.s32.s32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vadd2.u32.u32.u32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vadd2.u32.u32.u32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vadd4.u32.u32.u32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vadd4.u32.u32.u32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vavrg2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vavrg2.s32.s32.s32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vavrg4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vavrg4.s32.s32.s32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vavrg2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vavrg2.u32.u32.u32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vavrg4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vavrg4.u32.u32.u32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset2.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset2.u32.u32.eq %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1174,7 +1190,9 @@ __DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset4.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset4.u32.u32.eq %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1182,7 +1200,9 @@ __DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset2.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset2.s32.s32.ge %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1190,7 +1210,9 @@ __DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset4.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset4.s32.s32.ge %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1198,7 +1220,9 @@ __DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset2.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset2.u32.u32.ge %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1206,7 +1230,9 @@ __DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset4.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset4.u32.u32.ge %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1214,7 +1240,9 @@ __DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset2.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset2.s32.s32.gt %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1222,7 +1250,9 @@ __DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset4.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset4.s32.s32.gt %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1230,7 +1260,9 @@ __DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset2.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset2.u32.u32.gt %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1238,7 +1270,9 @@ __DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset4.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset4.u32.u32.gt %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1246,7 +1280,9 @@ __DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset2.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset2.s32.s32.le %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1254,7 +1290,9 @@ __DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset4.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset4.s32.s32.le %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1262,7 +1300,9 @@ __DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset2.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset2.u32.u32.le %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1270,7 +1310,9 @@ __DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset4.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset4.u32.u32.le %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1278,7 +1320,9 @@ __DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset2.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset2.s32.s32.lt %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1286,7 +1330,9 @@ __DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset4.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset4.s32.s32.lt %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1294,7 +1340,9 @@ __DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset2.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset2.u32.u32.lt %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1302,7 +1350,9 @@ __DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset4.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset4.u32.u32.lt %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1310,7 +1360,9 @@ __DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset2.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset2.u32.u32.ne %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1318,7 +1370,9 @@ __DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vset4.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vset4.u32.u32.ne %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {
|
||||
|
|
@ -1345,94 +1399,112 @@ __DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {
|
|||
unsigned mask = __vcmpgts2(__a, __b);
|
||||
r = (__a & mask) | (__b & ~mask);
|
||||
} else {
|
||||
asm("vmax2.s32.s32.s32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vmax2.s32.s32.s32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
}
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vmax4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vmax4.s32.s32.s32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vmax2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vmax2.u32.u32.u32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vmax4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vmax4.u32.u32.u32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vmin2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vmin2.s32.s32.s32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vmin4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vmin4.s32.s32.s32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vmin2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vmin2.u32.u32.u32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vmin4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vmin4.u32.u32.u32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
|
||||
__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vsub2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vsub2.u32.u32.u32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __vsub2(0, __a); }
|
||||
|
||||
__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vsub4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vsub4.u32.u32.u32 %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __vsub4(0, __a); }
|
||||
__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vsub2.s32.s32.s32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vsub2.s32.s32.s32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vnegss2(unsigned int __a) {
|
||||
|
|
@ -1440,9 +1512,9 @@ __DEVICE__ unsigned int __vnegss2(unsigned int __a) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vsub4.s32.s32.s32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vsub4.s32.s32.s32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vnegss4(unsigned int __a) {
|
||||
|
|
@ -1450,16 +1522,16 @@ __DEVICE__ unsigned int __vnegss4(unsigned int __a) {
|
|||
}
|
||||
__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vsub2.u32.u32.u32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vsub2.u32.u32.u32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {
|
||||
unsigned int r;
|
||||
asm("vsub4.u32.u32.u32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
__asm__("vsub4.u32.u32.u32.sat %0,%1,%2,%3;"
|
||||
: "=r"(r)
|
||||
: "r"(__a), "r"(__b), "r"(0));
|
||||
return r;
|
||||
}
|
||||
#endif // CUDA_VERSION >= 9020
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@
|
|||
#ifndef __CLANG_HIP_CMATH_H__
|
||||
#define __CLANG_HIP_CMATH_H__
|
||||
|
||||
#if !defined(__HIP__)
|
||||
#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
|
||||
#error "This file is for HIP and OpenMP AMDGCN device compilation only."
|
||||
#endif
|
||||
|
||||
|
|
@ -25,31 +25,43 @@
|
|||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
#pragma push_macro("__DEVICE__")
|
||||
#pragma push_macro("__CONSTEXPR__")
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#define __DEVICE__ static __attribute__((always_inline, nothrow))
|
||||
#define __CONSTEXPR__ constexpr
|
||||
#else
|
||||
#define __DEVICE__ static __device__ inline __attribute__((always_inline))
|
||||
#define __CONSTEXPR__
|
||||
#endif // __OPENMP_AMDGCN__
|
||||
|
||||
// Start with functions that cannot be defined by DEF macros below.
|
||||
#if defined(__cplusplus)
|
||||
__DEVICE__ double abs(double __x) { return ::fabs(__x); }
|
||||
__DEVICE__ float abs(float __x) { return ::fabsf(__x); }
|
||||
__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
|
||||
__DEVICE__ long abs(long __n) { return ::labs(__n); }
|
||||
__DEVICE__ float fma(float __x, float __y, float __z) {
|
||||
#if defined __OPENMP_AMDGCN__
|
||||
__DEVICE__ __CONSTEXPR__ float fabs(float __x) { return ::fabsf(__x); }
|
||||
__DEVICE__ __CONSTEXPR__ float sin(float __x) { return ::sinf(__x); }
|
||||
__DEVICE__ __CONSTEXPR__ float cos(float __x) { return ::cosf(__x); }
|
||||
#endif
|
||||
__DEVICE__ __CONSTEXPR__ double abs(double __x) { return ::fabs(__x); }
|
||||
__DEVICE__ __CONSTEXPR__ float abs(float __x) { return ::fabsf(__x); }
|
||||
__DEVICE__ __CONSTEXPR__ long long abs(long long __n) { return ::llabs(__n); }
|
||||
__DEVICE__ __CONSTEXPR__ long abs(long __n) { return ::labs(__n); }
|
||||
__DEVICE__ __CONSTEXPR__ float fma(float __x, float __y, float __z) {
|
||||
return ::fmaf(__x, __y, __z);
|
||||
}
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
// The value returned by fpclassify is platform dependent, therefore it is not
|
||||
// supported by hipRTC.
|
||||
__DEVICE__ int fpclassify(float __x) {
|
||||
__DEVICE__ __CONSTEXPR__ int fpclassify(float __x) {
|
||||
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
|
||||
FP_ZERO, __x);
|
||||
}
|
||||
__DEVICE__ int fpclassify(double __x) {
|
||||
__DEVICE__ __CONSTEXPR__ int fpclassify(double __x) {
|
||||
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
|
||||
FP_ZERO, __x);
|
||||
}
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
__DEVICE__ float frexp(float __arg, int *__exp) {
|
||||
__DEVICE__ __CONSTEXPR__ float frexp(float __arg, int *__exp) {
|
||||
return ::frexpf(__arg, __exp);
|
||||
}
|
||||
|
||||
|
|
@ -71,93 +83,101 @@ __DEVICE__ float frexp(float __arg, int *__exp) {
|
|||
// of the variants inside the inner region and avoid the clash.
|
||||
#pragma omp begin declare variant match(implementation = {vendor(llvm)})
|
||||
|
||||
__DEVICE__ int isinf(float __x) { return ::__isinff(__x); }
|
||||
__DEVICE__ int isinf(double __x) { return ::__isinf(__x); }
|
||||
__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); }
|
||||
__DEVICE__ int isfinite(double __x) { return ::__finite(__x); }
|
||||
__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); }
|
||||
__DEVICE__ int isnan(double __x) { return ::__isnan(__x); }
|
||||
__DEVICE__ __CONSTEXPR__ int isinf(float __x) { return ::__isinff(__x); }
|
||||
__DEVICE__ __CONSTEXPR__ int isinf(double __x) { return ::__isinf(__x); }
|
||||
__DEVICE__ __CONSTEXPR__ int isfinite(float __x) { return ::__finitef(__x); }
|
||||
__DEVICE__ __CONSTEXPR__ int isfinite(double __x) { return ::__finite(__x); }
|
||||
__DEVICE__ __CONSTEXPR__ int isnan(float __x) { return ::__isnanf(__x); }
|
||||
__DEVICE__ __CONSTEXPR__ int isnan(double __x) { return ::__isnan(__x); }
|
||||
|
||||
#pragma omp end declare variant
|
||||
#endif // defined(__OPENMP_AMDGCN__)
|
||||
|
||||
__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
|
||||
__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
|
||||
__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
|
||||
__DEVICE__ bool isfinite(double __x) { return ::__finite(__x); }
|
||||
__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
|
||||
__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
|
||||
__DEVICE__ __CONSTEXPR__ bool isinf(float __x) { return ::__isinff(__x); }
|
||||
__DEVICE__ __CONSTEXPR__ bool isinf(double __x) { return ::__isinf(__x); }
|
||||
__DEVICE__ __CONSTEXPR__ bool isfinite(float __x) { return ::__finitef(__x); }
|
||||
__DEVICE__ __CONSTEXPR__ bool isfinite(double __x) { return ::__finite(__x); }
|
||||
__DEVICE__ __CONSTEXPR__ bool isnan(float __x) { return ::__isnanf(__x); }
|
||||
__DEVICE__ __CONSTEXPR__ bool isnan(double __x) { return ::__isnan(__x); }
|
||||
|
||||
#if defined(__OPENMP_AMDGCN__)
|
||||
#pragma omp end declare variant
|
||||
#endif // defined(__OPENMP_AMDGCN__)
|
||||
|
||||
__DEVICE__ bool isgreater(float __x, float __y) {
|
||||
__DEVICE__ __CONSTEXPR__ bool isgreater(float __x, float __y) {
|
||||
return __builtin_isgreater(__x, __y);
|
||||
}
|
||||
__DEVICE__ bool isgreater(double __x, double __y) {
|
||||
__DEVICE__ __CONSTEXPR__ bool isgreater(double __x, double __y) {
|
||||
return __builtin_isgreater(__x, __y);
|
||||
}
|
||||
__DEVICE__ bool isgreaterequal(float __x, float __y) {
|
||||
__DEVICE__ __CONSTEXPR__ bool isgreaterequal(float __x, float __y) {
|
||||
return __builtin_isgreaterequal(__x, __y);
|
||||
}
|
||||
__DEVICE__ bool isgreaterequal(double __x, double __y) {
|
||||
__DEVICE__ __CONSTEXPR__ bool isgreaterequal(double __x, double __y) {
|
||||
return __builtin_isgreaterequal(__x, __y);
|
||||
}
|
||||
__DEVICE__ bool isless(float __x, float __y) {
|
||||
__DEVICE__ __CONSTEXPR__ bool isless(float __x, float __y) {
|
||||
return __builtin_isless(__x, __y);
|
||||
}
|
||||
__DEVICE__ bool isless(double __x, double __y) {
|
||||
__DEVICE__ __CONSTEXPR__ bool isless(double __x, double __y) {
|
||||
return __builtin_isless(__x, __y);
|
||||
}
|
||||
__DEVICE__ bool islessequal(float __x, float __y) {
|
||||
__DEVICE__ __CONSTEXPR__ bool islessequal(float __x, float __y) {
|
||||
return __builtin_islessequal(__x, __y);
|
||||
}
|
||||
__DEVICE__ bool islessequal(double __x, double __y) {
|
||||
__DEVICE__ __CONSTEXPR__ bool islessequal(double __x, double __y) {
|
||||
return __builtin_islessequal(__x, __y);
|
||||
}
|
||||
__DEVICE__ bool islessgreater(float __x, float __y) {
|
||||
__DEVICE__ __CONSTEXPR__ bool islessgreater(float __x, float __y) {
|
||||
return __builtin_islessgreater(__x, __y);
|
||||
}
|
||||
__DEVICE__ bool islessgreater(double __x, double __y) {
|
||||
__DEVICE__ __CONSTEXPR__ bool islessgreater(double __x, double __y) {
|
||||
return __builtin_islessgreater(__x, __y);
|
||||
}
|
||||
__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
|
||||
__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
|
||||
__DEVICE__ bool isunordered(float __x, float __y) {
|
||||
__DEVICE__ __CONSTEXPR__ bool isnormal(float __x) {
|
||||
return __builtin_isnormal(__x);
|
||||
}
|
||||
__DEVICE__ __CONSTEXPR__ bool isnormal(double __x) {
|
||||
return __builtin_isnormal(__x);
|
||||
}
|
||||
__DEVICE__ __CONSTEXPR__ bool isunordered(float __x, float __y) {
|
||||
return __builtin_isunordered(__x, __y);
|
||||
}
|
||||
__DEVICE__ bool isunordered(double __x, double __y) {
|
||||
__DEVICE__ __CONSTEXPR__ bool isunordered(double __x, double __y) {
|
||||
return __builtin_isunordered(__x, __y);
|
||||
}
|
||||
__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
|
||||
__DEVICE__ float pow(float __base, int __iexp) {
|
||||
__DEVICE__ __CONSTEXPR__ float modf(float __x, float *__iptr) {
|
||||
return ::modff(__x, __iptr);
|
||||
}
|
||||
__DEVICE__ __CONSTEXPR__ float pow(float __base, int __iexp) {
|
||||
return ::powif(__base, __iexp);
|
||||
}
|
||||
__DEVICE__ double pow(double __base, int __iexp) {
|
||||
__DEVICE__ __CONSTEXPR__ double pow(double __base, int __iexp) {
|
||||
return ::powi(__base, __iexp);
|
||||
}
|
||||
__DEVICE__ float remquo(float __x, float __y, int *__quo) {
|
||||
__DEVICE__ __CONSTEXPR__ float remquo(float __x, float __y, int *__quo) {
|
||||
return ::remquof(__x, __y, __quo);
|
||||
}
|
||||
__DEVICE__ float scalbln(float __x, long int __n) {
|
||||
__DEVICE__ __CONSTEXPR__ float scalbln(float __x, long int __n) {
|
||||
return ::scalblnf(__x, __n);
|
||||
}
|
||||
__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
|
||||
__DEVICE__ bool signbit(double __x) { return ::__signbit(__x); }
|
||||
__DEVICE__ __CONSTEXPR__ bool signbit(float __x) { return ::__signbitf(__x); }
|
||||
__DEVICE__ __CONSTEXPR__ bool signbit(double __x) { return ::__signbit(__x); }
|
||||
|
||||
// Notably missing above is nexttoward. We omit it because
|
||||
// ocml doesn't provide an implementation, and we don't want to be in the
|
||||
// business of implementing tricky libm functions in this header.
|
||||
|
||||
// Other functions.
|
||||
__DEVICE__ _Float16 fma(_Float16 __x, _Float16 __y, _Float16 __z) {
|
||||
__DEVICE__ __CONSTEXPR__ _Float16 fma(_Float16 __x, _Float16 __y,
|
||||
_Float16 __z) {
|
||||
return __ocml_fma_f16(__x, __y, __z);
|
||||
}
|
||||
__DEVICE__ _Float16 pow(_Float16 __base, int __iexp) {
|
||||
__DEVICE__ __CONSTEXPR__ _Float16 pow(_Float16 __base, int __iexp) {
|
||||
return __ocml_pown_f16(__base, __iexp);
|
||||
}
|
||||
|
||||
#ifndef __OPENMP_AMDGCN__
|
||||
// BEGIN DEF_FUN and HIP_OVERLOAD
|
||||
|
||||
// BEGIN DEF_FUN
|
||||
|
|
@ -168,18 +188,19 @@ __DEVICE__ _Float16 pow(_Float16 __base, int __iexp) {
|
|||
|
||||
// Define cmath functions with float argument and returns __retty.
|
||||
#define __DEF_FUN1(__retty, __func) \
|
||||
__DEVICE__ \
|
||||
__retty __func(float __x) { return __func##f(__x); }
|
||||
__DEVICE__ __CONSTEXPR__ __retty __func(float __x) { return __func##f(__x); }
|
||||
|
||||
// Define cmath functions with two float arguments and returns __retty.
|
||||
#define __DEF_FUN2(__retty, __func) \
|
||||
__DEVICE__ \
|
||||
__retty __func(float __x, float __y) { return __func##f(__x, __y); }
|
||||
__DEVICE__ __CONSTEXPR__ __retty __func(float __x, float __y) { \
|
||||
return __func##f(__x, __y); \
|
||||
}
|
||||
|
||||
// Define cmath functions with a float and an int argument and returns __retty.
|
||||
#define __DEF_FUN2_FI(__retty, __func) \
|
||||
__DEVICE__ \
|
||||
__retty __func(float __x, int __y) { return __func##f(__x, __y); }
|
||||
__DEVICE__ __CONSTEXPR__ __retty __func(float __x, int __y) { \
|
||||
return __func##f(__x, __y); \
|
||||
}
|
||||
|
||||
__DEF_FUN1(float, acos)
|
||||
__DEF_FUN1(float, acosh)
|
||||
|
|
@ -426,7 +447,7 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {};
|
|||
// floor(double).
|
||||
#define __HIP_OVERLOAD1(__retty, __fn) \
|
||||
template <typename __T> \
|
||||
__DEVICE__ \
|
||||
__DEVICE__ __CONSTEXPR__ \
|
||||
typename __hip_enable_if<__hip::is_integral<__T>::value, __retty>::type \
|
||||
__fn(__T __x) { \
|
||||
return ::__fn((double)__x); \
|
||||
|
|
@ -438,7 +459,7 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {};
|
|||
#if __cplusplus >= 201103L
|
||||
#define __HIP_OVERLOAD2(__retty, __fn) \
|
||||
template <typename __T1, typename __T2> \
|
||||
__DEVICE__ typename __hip_enable_if< \
|
||||
__DEVICE__ __CONSTEXPR__ typename __hip_enable_if< \
|
||||
__hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, \
|
||||
typename __hip::__promote<__T1, __T2>::type>::type \
|
||||
__fn(__T1 __x, __T2 __y) { \
|
||||
|
|
@ -448,10 +469,11 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {};
|
|||
#else
|
||||
#define __HIP_OVERLOAD2(__retty, __fn) \
|
||||
template <typename __T1, typename __T2> \
|
||||
__DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && \
|
||||
__hip::is_arithmetic<__T2>::value, \
|
||||
__retty>::type \
|
||||
__fn(__T1 __x, __T2 __y) { \
|
||||
__DEVICE__ __CONSTEXPR__ \
|
||||
typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && \
|
||||
__hip::is_arithmetic<__T2>::value, \
|
||||
__retty>::type \
|
||||
__fn(__T1 __x, __T2 __y) { \
|
||||
return __fn((double)__x, (double)__y); \
|
||||
}
|
||||
#endif
|
||||
|
|
@ -526,7 +548,7 @@ __HIP_OVERLOAD2(double, min)
|
|||
// Additional Overloads that don't quite match HIP_OVERLOAD.
|
||||
#if __cplusplus >= 201103L
|
||||
template <typename __T1, typename __T2, typename __T3>
|
||||
__DEVICE__ typename __hip_enable_if<
|
||||
__DEVICE__ __CONSTEXPR__ typename __hip_enable_if<
|
||||
__hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value &&
|
||||
__hip::is_arithmetic<__T3>::value,
|
||||
typename __hip::__promote<__T1, __T2, __T3>::type>::type
|
||||
|
|
@ -536,31 +558,32 @@ fma(__T1 __x, __T2 __y, __T3 __z) {
|
|||
}
|
||||
#else
|
||||
template <typename __T1, typename __T2, typename __T3>
|
||||
__DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
|
||||
__hip::is_arithmetic<__T2>::value &&
|
||||
__hip::is_arithmetic<__T3>::value,
|
||||
double>::type
|
||||
fma(__T1 __x, __T2 __y, __T3 __z) {
|
||||
__DEVICE__ __CONSTEXPR__
|
||||
typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
|
||||
__hip::is_arithmetic<__T2>::value &&
|
||||
__hip::is_arithmetic<__T3>::value,
|
||||
double>::type
|
||||
fma(__T1 __x, __T2 __y, __T3 __z) {
|
||||
return ::fma((double)__x, (double)__y, (double)__z);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename __T>
|
||||
__DEVICE__
|
||||
__DEVICE__ __CONSTEXPR__
|
||||
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
|
||||
frexp(__T __x, int *__exp) {
|
||||
return ::frexp((double)__x, __exp);
|
||||
}
|
||||
|
||||
template <typename __T>
|
||||
__DEVICE__
|
||||
__DEVICE__ __CONSTEXPR__
|
||||
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
|
||||
ldexp(__T __x, int __exp) {
|
||||
return ::ldexp((double)__x, __exp);
|
||||
}
|
||||
|
||||
template <typename __T>
|
||||
__DEVICE__
|
||||
__DEVICE__ __CONSTEXPR__
|
||||
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
|
||||
modf(__T __x, double *__exp) {
|
||||
return ::modf((double)__x, __exp);
|
||||
|
|
@ -568,7 +591,7 @@ __DEVICE__
|
|||
|
||||
#if __cplusplus >= 201103L
|
||||
template <typename __T1, typename __T2>
|
||||
__DEVICE__
|
||||
__DEVICE__ __CONSTEXPR__
|
||||
typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
|
||||
__hip::is_arithmetic<__T2>::value,
|
||||
typename __hip::__promote<__T1, __T2>::type>::type
|
||||
|
|
@ -578,23 +601,24 @@ __DEVICE__
|
|||
}
|
||||
#else
|
||||
template <typename __T1, typename __T2>
|
||||
__DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
|
||||
__hip::is_arithmetic<__T2>::value,
|
||||
double>::type
|
||||
remquo(__T1 __x, __T2 __y, int *__quo) {
|
||||
__DEVICE__ __CONSTEXPR__
|
||||
typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
|
||||
__hip::is_arithmetic<__T2>::value,
|
||||
double>::type
|
||||
remquo(__T1 __x, __T2 __y, int *__quo) {
|
||||
return ::remquo((double)__x, (double)__y, __quo);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename __T>
|
||||
__DEVICE__
|
||||
__DEVICE__ __CONSTEXPR__
|
||||
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
|
||||
scalbln(__T __x, long int __exp) {
|
||||
return ::scalbln((double)__x, __exp);
|
||||
}
|
||||
|
||||
template <typename __T>
|
||||
__DEVICE__
|
||||
__DEVICE__ __CONSTEXPR__
|
||||
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
|
||||
scalbn(__T __x, int __exp) {
|
||||
return ::scalbn((double)__x, __exp);
|
||||
|
|
@ -607,8 +631,10 @@ __DEVICE__
|
|||
|
||||
// END DEF_FUN and HIP_OVERLOAD
|
||||
|
||||
#endif // ifndef __OPENMP_AMDGCN__
|
||||
#endif // defined(__cplusplus)
|
||||
|
||||
#ifndef __OPENMP_AMDGCN__
|
||||
// Define these overloads inside the namespace our standard library uses.
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#ifdef _LIBCPP_BEGIN_NAMESPACE_STD
|
||||
|
|
@ -781,22 +807,26 @@ _GLIBCXX_END_NAMESPACE_VERSION
|
|||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif // defined(__cplusplus)
|
||||
__DEVICE__ __attribute__((overloadable)) double _Cosh(double x, double y) {
|
||||
__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Cosh(double x,
|
||||
double y) {
|
||||
return cosh(x) * y;
|
||||
}
|
||||
__DEVICE__ __attribute__((overloadable)) float _FCosh(float x, float y) {
|
||||
__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FCosh(float x,
|
||||
float y) {
|
||||
return coshf(x) * y;
|
||||
}
|
||||
__DEVICE__ __attribute__((overloadable)) short _Dtest(double *p) {
|
||||
__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _Dtest(double *p) {
|
||||
return fpclassify(*p);
|
||||
}
|
||||
__DEVICE__ __attribute__((overloadable)) short _FDtest(float *p) {
|
||||
__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _FDtest(float *p) {
|
||||
return fpclassify(*p);
|
||||
}
|
||||
__DEVICE__ __attribute__((overloadable)) double _Sinh(double x, double y) {
|
||||
__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Sinh(double x,
|
||||
double y) {
|
||||
return sinh(x) * y;
|
||||
}
|
||||
__DEVICE__ __attribute__((overloadable)) float _FSinh(float x, float y) {
|
||||
__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FSinh(float x,
|
||||
float y) {
|
||||
return sinhf(x) * y;
|
||||
}
|
||||
#if defined(__cplusplus)
|
||||
|
|
@ -804,7 +834,9 @@ __DEVICE__ __attribute__((overloadable)) float _FSinh(float x, float y) {
|
|||
#endif // defined(__cplusplus)
|
||||
#endif // defined(_MSC_VER)
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
#endif // ifndef __OPENMP_AMDGCN__
|
||||
|
||||
#pragma pop_macro("__DEVICE__")
|
||||
#pragma pop_macro("__CONSTEXPR__")
|
||||
|
||||
#endif // __CLANG_HIP_CMATH_H__
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
#ifndef __CLANG_HIP_MATH_H__
|
||||
#define __CLANG_HIP_MATH_H__
|
||||
|
||||
#if !defined(__HIP__)
|
||||
#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
|
||||
#error "This file is for HIP and OpenMP AMDGCN device compilation only."
|
||||
#endif
|
||||
|
||||
|
|
@ -19,18 +19,30 @@
|
|||
#endif
|
||||
#include <limits.h>
|
||||
#include <stdint.h>
|
||||
#endif // __HIPCC_RTC__
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#include <omp.h>
|
||||
#endif
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
#pragma push_macro("__DEVICE__")
|
||||
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#define __DEVICE__ static inline __attribute__((always_inline, nothrow))
|
||||
#else
|
||||
#define __DEVICE__ static __device__ inline __attribute__((always_inline))
|
||||
#endif
|
||||
|
||||
// A few functions return bool type starting only in C++11.
|
||||
#pragma push_macro("__RETURN_TYPE")
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#define __RETURN_TYPE int
|
||||
#else
|
||||
#if defined(__cplusplus)
|
||||
#define __RETURN_TYPE bool
|
||||
#else
|
||||
#define __RETURN_TYPE int
|
||||
#endif
|
||||
#endif // __OPENMP_AMDGCN__
|
||||
|
||||
#if defined (__cplusplus) && __cplusplus < 201103L
|
||||
// emulate static_assert on type sizes
|
||||
|
|
@ -249,6 +261,9 @@ float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
|
|||
__DEVICE__
|
||||
float frexpf(float __x, int *__nptr) {
|
||||
int __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
float __r =
|
||||
__ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
|
||||
*__nptr = __tmp;
|
||||
|
|
@ -334,6 +349,9 @@ long int lroundf(float __x) { return __ocml_round_f32(__x); }
|
|||
__DEVICE__
|
||||
float modff(float __x, float *__iptr) {
|
||||
float __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
float __r =
|
||||
__ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
|
||||
*__iptr = __tmp;
|
||||
|
|
@ -414,6 +432,9 @@ float remainderf(float __x, float __y) {
|
|||
__DEVICE__
|
||||
float remquof(float __x, float __y, int *__quo) {
|
||||
int __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
float __r = __ocml_remquo_f32(
|
||||
__x, __y, (__attribute__((address_space(5))) int *)&__tmp);
|
||||
*__quo = __tmp;
|
||||
|
|
@ -470,6 +491,9 @@ __RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); }
|
|||
__DEVICE__
|
||||
void sincosf(float __x, float *__sinptr, float *__cosptr) {
|
||||
float __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
*__sinptr =
|
||||
__ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
|
||||
*__cosptr = __tmp;
|
||||
|
|
@ -478,6 +502,9 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) {
|
|||
__DEVICE__
|
||||
void sincospif(float __x, float *__sinptr, float *__cosptr) {
|
||||
float __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
*__sinptr = __ocml_sincospi_f32(
|
||||
__x, (__attribute__((address_space(5))) float *)&__tmp);
|
||||
*__cosptr = __tmp;
|
||||
|
|
@ -790,6 +817,9 @@ double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
|
|||
__DEVICE__
|
||||
double frexp(double __x, int *__nptr) {
|
||||
int __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
double __r =
|
||||
__ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
|
||||
*__nptr = __tmp;
|
||||
|
|
@ -874,6 +904,9 @@ long int lround(double __x) { return __ocml_round_f64(__x); }
|
|||
__DEVICE__
|
||||
double modf(double __x, double *__iptr) {
|
||||
double __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
double __r =
|
||||
__ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
|
||||
*__iptr = __tmp;
|
||||
|
|
@ -962,6 +995,9 @@ double remainder(double __x, double __y) {
|
|||
__DEVICE__
|
||||
double remquo(double __x, double __y, int *__quo) {
|
||||
int __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
double __r = __ocml_remquo_f64(
|
||||
__x, __y, (__attribute__((address_space(5))) int *)&__tmp);
|
||||
*__quo = __tmp;
|
||||
|
|
@ -1020,6 +1056,9 @@ double sin(double __x) { return __ocml_sin_f64(__x); }
|
|||
__DEVICE__
|
||||
void sincos(double __x, double *__sinptr, double *__cosptr) {
|
||||
double __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
*__sinptr = __ocml_sincos_f64(
|
||||
__x, (__attribute__((address_space(5))) double *)&__tmp);
|
||||
*__cosptr = __tmp;
|
||||
|
|
@ -1028,6 +1067,9 @@ void sincos(double __x, double *__sinptr, double *__cosptr) {
|
|||
__DEVICE__
|
||||
void sincospi(double __x, double *__sinptr, double *__cosptr) {
|
||||
double __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
*__sinptr = __ocml_sincospi_f64(
|
||||
__x, (__attribute__((address_space(5))) double *)&__tmp);
|
||||
*__cosptr = __tmp;
|
||||
|
|
@ -1262,7 +1304,7 @@ float min(float __x, float __y) { return fminf(__x, __y); }
|
|||
__DEVICE__
|
||||
double min(double __x, double __y) { return fmin(__x, __y); }
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
|
||||
__host__ inline static int min(int __arg1, int __arg2) {
|
||||
return std::min(__arg1, __arg2);
|
||||
}
|
||||
|
|
@ -1270,7 +1312,7 @@ __host__ inline static int min(int __arg1, int __arg2) {
|
|||
__host__ inline static int max(int __arg1, int __arg2) {
|
||||
return std::max(__arg1, __arg2);
|
||||
}
|
||||
#endif // __HIPCC_RTC__
|
||||
#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
|
||||
#endif
|
||||
|
||||
#pragma pop_macro("__DEVICE__")
|
||||
|
|
|
|||
|
|
@ -14,13 +14,13 @@
|
|||
#error "This file is for OpenMP compilation only."
|
||||
#endif
|
||||
|
||||
#pragma omp begin declare variant match( \
|
||||
device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#pragma omp begin declare variant match( \
|
||||
device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
|
||||
|
||||
#define __CUDA__
|
||||
#define __OPENMP_NVPTX__
|
||||
|
||||
|
|
@ -33,12 +33,34 @@ extern "C" {
|
|||
#undef __OPENMP_NVPTX__
|
||||
#undef __CUDA__
|
||||
|
||||
#pragma omp end declare variant
|
||||
|
||||
#ifdef __AMDGCN__
|
||||
#pragma omp begin declare variant match(device = {arch(amdgcn)})
|
||||
|
||||
// Import types which will be used by __clang_hip_libdevice_declares.h
|
||||
#ifndef __cplusplus
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
#define __OPENMP_AMDGCN__
|
||||
#pragma push_macro("__device__")
|
||||
#define __device__
|
||||
|
||||
/// Include declarations for libdevice functions.
|
||||
#include <__clang_hip_libdevice_declares.h>
|
||||
|
||||
#pragma pop_macro("__device__")
|
||||
#undef __OPENMP_AMDGCN__
|
||||
|
||||
#pragma omp end declare variant
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#pragma omp end declare variant
|
||||
|
||||
// Ensure we make `_ZdlPv`, aka. `operator delete(void*)` available without the
|
||||
// need to `include <new>` in C++ mode.
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -75,4 +75,58 @@ __DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); }
|
|||
|
||||
#pragma omp end declare variant
|
||||
|
||||
#ifdef __AMDGCN__
|
||||
#pragma omp begin declare variant match(device = {arch(amdgcn)})
|
||||
|
||||
#pragma push_macro("__constant__")
|
||||
#define __constant__ __attribute__((constant))
|
||||
#define __OPENMP_AMDGCN__
|
||||
|
||||
#include <__clang_hip_cmath.h>
|
||||
|
||||
#pragma pop_macro("__constant__")
|
||||
#undef __OPENMP_AMDGCN__
|
||||
|
||||
// Define overloads otherwise which are absent
|
||||
#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
|
||||
|
||||
__DEVICE__ float acos(float __x) { return ::acosf(__x); }
|
||||
__DEVICE__ float acosh(float __x) { return ::acoshf(__x); }
|
||||
__DEVICE__ float asin(float __x) { return ::asinf(__x); }
|
||||
__DEVICE__ float asinh(float __x) { return ::asinhf(__x); }
|
||||
__DEVICE__ float atan(float __x) { return ::atanf(__x); }
|
||||
__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
|
||||
__DEVICE__ float atanh(float __x) { return ::atanhf(__x); }
|
||||
__DEVICE__ float cbrt(float __x) { return ::cbrtf(__x); }
|
||||
__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
|
||||
__DEVICE__ float erf(float __x) { return ::erff(__x); }
|
||||
__DEVICE__ float erfc(float __x) { return ::erfcf(__x); }
|
||||
__DEVICE__ float exp2(float __x) { return ::exp2f(__x); }
|
||||
__DEVICE__ float expm1(float __x) { return ::expm1f(__x); }
|
||||
__DEVICE__ float fdim(float __x, float __y) { return ::fdimf(__x, __y); }
|
||||
__DEVICE__ float hypot(float __x, float __y) { return ::hypotf(__x, __y); }
|
||||
__DEVICE__ int ilogb(float __x) { return ::ilogbf(__x); }
|
||||
__DEVICE__ float ldexp(float __arg, int __exp) {
|
||||
return ::ldexpf(__arg, __exp);
|
||||
}
|
||||
__DEVICE__ float lgamma(float __x) { return ::lgammaf(__x); }
|
||||
__DEVICE__ float log1p(float __x) { return ::log1pf(__x); }
|
||||
__DEVICE__ float logb(float __x) { return ::logbf(__x); }
|
||||
__DEVICE__ float nextafter(float __x, float __y) {
|
||||
return ::nextafterf(__x, __y);
|
||||
}
|
||||
__DEVICE__ float remainder(float __x, float __y) {
|
||||
return ::remainderf(__x, __y);
|
||||
}
|
||||
__DEVICE__ float scalbn(float __x, int __y) { return ::scalbnf(__x, __y); }
|
||||
__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
|
||||
__DEVICE__ float tan(float __x) { return ::tanf(__x); }
|
||||
__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
|
||||
__DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); }
|
||||
|
||||
#undef __DEVICE__
|
||||
|
||||
#pragma omp end declare variant
|
||||
#endif // __AMDGCN__
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -48,4 +48,14 @@
|
|||
|
||||
#pragma omp end declare variant
|
||||
|
||||
#ifdef __AMDGCN__
|
||||
#pragma omp begin declare variant match(device = {arch(amdgcn)})
|
||||
|
||||
#define __OPENMP_AMDGCN__
|
||||
#include <__clang_hip_math.h>
|
||||
#undef __OPENMP_AMDGCN__
|
||||
|
||||
#pragma omp end declare variant
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1087,7 +1087,7 @@ Decl *TemplateDeclInstantiator::VisitVarDecl(VarDecl *D,
|
|||
|
||||
SemaRef.BuildVariableInstantiation(Var, D, TemplateArgs, LateAttrs, Owner,
|
||||
StartingScope, InstantiatingVarTemplate);
|
||||
if (D->isNRVOVariable()) {
|
||||
if (D->isNRVOVariable() && !Var->isInvalidDecl()) {
|
||||
QualType RT;
|
||||
if (auto *F = dyn_cast<FunctionDecl>(DC))
|
||||
RT = F->getReturnType();
|
||||
|
|
|
|||
|
|
@ -6578,7 +6578,7 @@ QualType TreeTransform<Derived>::TransformAutoType(TypeLocBuilder &TLB,
|
|||
NewTL.setFoundDecl(TL.getFoundDecl());
|
||||
NewTL.setLAngleLoc(TL.getLAngleLoc());
|
||||
NewTL.setRAngleLoc(TL.getRAngleLoc());
|
||||
for (unsigned I = 0; I < TL.getNumArgs(); ++I)
|
||||
for (unsigned I = 0; I < NewTL.getNumArgs(); ++I)
|
||||
NewTL.setArgLocInfo(I, NewTemplateArgs.arguments()[I].getLocInfo());
|
||||
|
||||
return Result;
|
||||
|
|
|
|||
|
|
@ -8456,6 +8456,8 @@ void ASTReader::ReadLateParsedTemplates(
|
|||
LPTMap.insert(std::make_pair(FD, std::move(LT)));
|
||||
}
|
||||
}
|
||||
|
||||
LateParsedTemplates.clear();
|
||||
}
|
||||
|
||||
void ASTReader::LoadSelector(Selector Sel) {
|
||||
|
|
|
|||
|
|
@ -592,11 +592,17 @@ intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR = 0;
|
|||
|
||||
/* This variable is a weak external reference which could be used to detect
|
||||
* whether or not the compiler defined this symbol. */
|
||||
#if defined(_WIN32)
|
||||
#if defined(_MSC_VER)
|
||||
COMPILER_RT_VISIBILITY extern intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_VAR;
|
||||
#pragma comment(linker, "/alternatename:" \
|
||||
INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_VAR) "=" \
|
||||
INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR))
|
||||
#if defined(_M_IX86) || defined(__i386__)
|
||||
#define WIN_SYM_PREFIX "_"
|
||||
#else
|
||||
#define WIN_SYM_PREFIX
|
||||
#endif
|
||||
#pragma comment( \
|
||||
linker, "/alternatename:" WIN_SYM_PREFIX INSTR_PROF_QUOTE( \
|
||||
INSTR_PROF_PROFILE_COUNTER_BIAS_VAR) "=" WIN_SYM_PREFIX \
|
||||
INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR))
|
||||
#else
|
||||
COMPILER_RT_VISIBILITY extern intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_VAR
|
||||
__attribute__((weak, alias(INSTR_PROF_QUOTE(
|
||||
|
|
@ -651,8 +657,9 @@ static void initializeProfileForContinuousMode(void) {
|
|||
const uint64_t *CountersBegin = __llvm_profile_begin_counters();
|
||||
const uint64_t *CountersEnd = __llvm_profile_end_counters();
|
||||
uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
|
||||
const uint64_t CountersOffset =
|
||||
sizeof(__llvm_profile_header) + (DataSize * sizeof(__llvm_profile_data));
|
||||
const uint64_t CountersOffset = sizeof(__llvm_profile_header) +
|
||||
__llvm_write_binary_ids(NULL) +
|
||||
(DataSize * sizeof(__llvm_profile_data));
|
||||
|
||||
int Length = getCurFilenameLength();
|
||||
char *FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1);
|
||||
|
|
|
|||
|
|
@ -119,8 +119,9 @@ void __llvm_profile_initialize(void) {
|
|||
const uint64_t *CountersBegin = __llvm_profile_begin_counters();
|
||||
const uint64_t *CountersEnd = __llvm_profile_end_counters();
|
||||
const uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
|
||||
const uint64_t CountersOffset =
|
||||
sizeof(__llvm_profile_header) + (DataSize * sizeof(__llvm_profile_data));
|
||||
const uint64_t CountersOffset = sizeof(__llvm_profile_header) +
|
||||
__llvm_write_binary_ids(NULL) +
|
||||
(DataSize * sizeof(__llvm_profile_data));
|
||||
uint64_t CountersSize = CountersEnd - CountersBegin;
|
||||
|
||||
/* Don't publish a VMO if there are no counters. */
|
||||
|
|
|
|||
|
|
@ -94,8 +94,8 @@ static size_t RoundUp(size_t size, size_t align) {
|
|||
* Write binary id length and then its data, because binary id does not
|
||||
* have a fixed length.
|
||||
*/
|
||||
int WriteOneBinaryId(ProfDataWriter *Writer, uint64_t BinaryIdLen,
|
||||
const uint8_t *BinaryIdData) {
|
||||
static int WriteOneBinaryId(ProfDataWriter *Writer, uint64_t BinaryIdLen,
|
||||
const uint8_t *BinaryIdData) {
|
||||
ProfDataIOVec BinaryIdIOVec[] = {
|
||||
{&BinaryIdLen, sizeof(uint64_t), 1, 0},
|
||||
{BinaryIdData, sizeof(uint8_t), BinaryIdLen, 0}};
|
||||
|
|
@ -119,7 +119,8 @@ int WriteOneBinaryId(ProfDataWriter *Writer, uint64_t BinaryIdLen,
|
|||
* Note sections like .note.ABI-tag and .note.gnu.build-id are aligned
|
||||
* to 4 bytes, so round n_namesz and n_descsz to the nearest 4 bytes.
|
||||
*/
|
||||
int WriteBinaryIdForNote(ProfDataWriter *Writer, const ElfW(Nhdr) * Note) {
|
||||
static int WriteBinaryIdForNote(ProfDataWriter *Writer,
|
||||
const ElfW(Nhdr) * Note) {
|
||||
int BinaryIdSize = 0;
|
||||
|
||||
const char *NoteName = (const char *)Note + sizeof(ElfW(Nhdr));
|
||||
|
|
@ -144,8 +145,8 @@ int WriteBinaryIdForNote(ProfDataWriter *Writer, const ElfW(Nhdr) * Note) {
|
|||
* If writer is given, write binary ids into profiles.
|
||||
* If an error happens while writing, return -1.
|
||||
*/
|
||||
int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note,
|
||||
const ElfW(Nhdr) * NotesEnd) {
|
||||
static int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note,
|
||||
const ElfW(Nhdr) * NotesEnd) {
|
||||
int TotalBinaryIdsSize = 0;
|
||||
while (Note < NotesEnd) {
|
||||
int Result = WriteBinaryIdForNote(Writer, Note);
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ wctrans_t wctrans(const char* property);
|
|||
|
||||
_LIBCPP_BEGIN_NAMESPACE_STD
|
||||
|
||||
#if defined(_LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H)
|
||||
using ::wint_t _LIBCPP_USING_IF_EXISTS;
|
||||
using ::wctrans_t _LIBCPP_USING_IF_EXISTS;
|
||||
using ::wctype_t _LIBCPP_USING_IF_EXISTS;
|
||||
|
|
@ -80,6 +81,7 @@ using ::towlower _LIBCPP_USING_IF_EXISTS;
|
|||
using ::towupper _LIBCPP_USING_IF_EXISTS;
|
||||
using ::towctrans _LIBCPP_USING_IF_EXISTS;
|
||||
using ::wctrans _LIBCPP_USING_IF_EXISTS;
|
||||
#endif // _LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H
|
||||
|
||||
_LIBCPP_END_NAMESPACE_STD
|
||||
|
||||
|
|
|
|||
|
|
@ -522,6 +522,7 @@ basic_string<char32_t> operator "" s( const char32_t *str, size_t len ); // C++1
|
|||
#include <algorithm>
|
||||
#include <compare>
|
||||
#include <cstdio> // EOF
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cwchar>
|
||||
#include <initializer_list>
|
||||
|
|
@ -1714,6 +1715,24 @@ private:
|
|||
return data() <= __p && __p <= data() + size();
|
||||
}
|
||||
|
||||
_LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
|
||||
void __throw_length_error() const {
|
||||
#ifndef _LIBCPP_NO_EXCEPTIONS
|
||||
__basic_string_common<true>::__throw_length_error();
|
||||
#else
|
||||
_VSTD::abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
_LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
|
||||
void __throw_out_of_range() const {
|
||||
#ifndef _LIBCPP_NO_EXCEPTIONS
|
||||
__basic_string_common<true>::__throw_out_of_range();
|
||||
#else
|
||||
_VSTD::abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
friend basic_string operator+<>(const basic_string&, const basic_string&);
|
||||
friend basic_string operator+<>(const value_type*, const basic_string&);
|
||||
friend basic_string operator+<>(value_type, const basic_string&);
|
||||
|
|
|
|||
|
|
@ -281,6 +281,7 @@ erase_if(vector<T, Allocator>& c, Predicate pred); // C++20
|
|||
#include <algorithm>
|
||||
#include <climits>
|
||||
#include <compare>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <initializer_list>
|
||||
#include <iosfwd> // for forward declaration of vector
|
||||
|
|
@ -390,6 +391,25 @@ protected:
|
|||
is_nothrow_move_assignable<allocator_type>::value)
|
||||
{__move_assign_alloc(__c, integral_constant<bool,
|
||||
__alloc_traits::propagate_on_container_move_assignment::value>());}
|
||||
|
||||
_LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
|
||||
void __throw_length_error() const {
|
||||
#ifndef _LIBCPP_NO_EXCEPTIONS
|
||||
__vector_base_common<true>::__throw_length_error();
|
||||
#else
|
||||
_VSTD::abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
_LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
|
||||
void __throw_out_of_range() const {
|
||||
#ifndef _LIBCPP_NO_EXCEPTIONS
|
||||
__vector_base_common<true>::__throw_out_of_range();
|
||||
#else
|
||||
_VSTD::abort();
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
_LIBCPP_INLINE_VISIBILITY
|
||||
void __copy_assign_alloc(const __vector_base& __c, true_type)
|
||||
|
|
|
|||
|
|
@ -50,8 +50,18 @@ wctrans_t wctrans(const char* property);
|
|||
#pragma GCC system_header
|
||||
#endif
|
||||
|
||||
// TODO:
|
||||
// In the future, we should unconditionally include_next <wctype.h> here and instead
|
||||
// have a mode under which the library does not need libc++'s <wctype.h> or <cwctype>
|
||||
// at all (i.e. a mode without wchar_t). As it stands, we need to do that to completely
|
||||
// bypass the using declarations in <cwctype> when we did not include <wctype.h>.
|
||||
// Otherwise, a using declaration like `using ::wint_t` in <cwctype> will refer to
|
||||
// nothing (with using_if_exists), and if we include another header that defines one
|
||||
// of these declarations (e.g. <wchar.h>), the second `using ::wint_t` with using_if_exists
|
||||
// will fail because it does not refer to the same declaration.
|
||||
#if __has_include_next(<wctype.h>)
|
||||
# include_next <wctype.h>
|
||||
# define _LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -97,9 +97,11 @@ _Unwind_Reason_Code ProcessDescriptors(
|
|||
case Descriptor::LU32:
|
||||
descriptor = getNextWord(descriptor, &length);
|
||||
descriptor = getNextWord(descriptor, &offset);
|
||||
break;
|
||||
case Descriptor::LU16:
|
||||
descriptor = getNextNibble(descriptor, &length);
|
||||
descriptor = getNextNibble(descriptor, &offset);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
return _URC_FAILURE;
|
||||
|
|
|
|||
|
|
@ -419,7 +419,7 @@ public:
|
|||
/// outer structure. SCCs do not support mutation of the call graph, that
|
||||
/// must be done through the containing \c RefSCC in order to fully reason
|
||||
/// about the ordering and connections of the graph.
|
||||
class SCC {
|
||||
class LLVM_EXTERNAL_VISIBILITY SCC {
|
||||
friend class LazyCallGraph;
|
||||
friend class LazyCallGraph::Node;
|
||||
|
||||
|
|
|
|||
|
|
@ -527,7 +527,7 @@ extern template class LoopBase<BasicBlock, Loop>;
|
|||
|
||||
/// Represents a single loop in the control flow graph. Note that not all SCCs
|
||||
/// in the CFG are necessarily loops.
|
||||
class Loop : public LoopBase<BasicBlock, Loop> {
|
||||
class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase<BasicBlock, Loop> {
|
||||
public:
|
||||
/// A range representing the start and end location of a loop.
|
||||
class LocRange {
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ using LoopVectorTy = SmallVector<Loop *, 8>;
|
|||
class LPMUpdater;
|
||||
|
||||
/// This class represents a loop nest and can be used to query its properties.
|
||||
class LoopNest {
|
||||
class LLVM_EXTERNAL_VISIBILITY LoopNest {
|
||||
public:
|
||||
/// Construct a loop nest rooted by loop \p Root.
|
||||
LoopNest(Loop &Root, ScalarEvolution &SE);
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ struct HardwareLoopInfo {
|
|||
Loop *L = nullptr;
|
||||
BasicBlock *ExitBlock = nullptr;
|
||||
BranchInst *ExitBranch = nullptr;
|
||||
const SCEV *TripCount = nullptr;
|
||||
const SCEV *ExitCount = nullptr;
|
||||
IntegerType *CountType = nullptr;
|
||||
Value *LoopDecrement = nullptr; // Decrement the loop counter by this
|
||||
// value in every iteration.
|
||||
|
|
|
|||
|
|
@ -227,7 +227,7 @@ struct LandingPadInfo {
|
|||
: LandingPadBlock(MBB) {}
|
||||
};
|
||||
|
||||
class MachineFunction {
|
||||
class LLVM_EXTERNAL_VISIBILITY MachineFunction {
|
||||
Function &F;
|
||||
const LLVMTargetMachine &Target;
|
||||
const TargetSubtargetInfo *STI;
|
||||
|
|
|
|||
|
|
@ -58,7 +58,8 @@ class User;
|
|||
class BranchProbabilityInfo;
|
||||
class BlockFrequencyInfo;
|
||||
|
||||
class Function : public GlobalObject, public ilist_node<Function> {
|
||||
class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject,
|
||||
public ilist_node<Function> {
|
||||
public:
|
||||
using BasicBlockListType = SymbolTableList<BasicBlock>;
|
||||
|
||||
|
|
|
|||
|
|
@ -64,9 +64,9 @@ class VersionTuple;
|
|||
/// constant references to global variables in the module. When a global
|
||||
/// variable is destroyed, it should have no entries in the GlobalValueRefMap.
|
||||
/// The main container class for the LLVM Intermediate Representation.
|
||||
class Module {
|
||||
/// @name Types And Enumerations
|
||||
/// @{
|
||||
class LLVM_EXTERNAL_VISIBILITY Module {
|
||||
/// @name Types And Enumerations
|
||||
/// @{
|
||||
public:
|
||||
/// The type for the list of global variables.
|
||||
using GlobalListType = SymbolTableList<GlobalVariable>;
|
||||
|
|
|
|||
|
|
@ -13969,7 +13969,7 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
|
|||
if (ExactRegion.isWrappedSet() || ExactRegion.isFullSet())
|
||||
return false;
|
||||
auto I = RewriteMap.find(LHSUnknown->getValue());
|
||||
const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHS;
|
||||
const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHSUnknown;
|
||||
RewriteMap[LHSUnknown->getValue()] = getUMaxExpr(
|
||||
getConstant(ExactRegion.getUnsignedMin()),
|
||||
getUMinExpr(RewrittenLHS, getConstant(ExactRegion.getUnsignedMax())));
|
||||
|
|
|
|||
|
|
@ -167,11 +167,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
|
|||
// Note that this block may not be the loop latch block, even if the loop
|
||||
// has a latch block.
|
||||
ExitBlock = BB;
|
||||
TripCount = SE.getAddExpr(EC, SE.getOne(EC->getType()));
|
||||
|
||||
if (!EC->getType()->isPointerTy() && EC->getType() != CountType)
|
||||
TripCount = SE.getZeroExtendExpr(TripCount, CountType);
|
||||
|
||||
ExitCount = EC;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1162,7 +1162,7 @@ DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const {
|
|||
}
|
||||
|
||||
DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE,
|
||||
DIE *CalleeDIE,
|
||||
const DISubprogram *CalleeSP,
|
||||
bool IsTail,
|
||||
const MCSymbol *PCAddr,
|
||||
const MCSymbol *CallAddr,
|
||||
|
|
@ -1176,7 +1176,8 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE,
|
|||
addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target),
|
||||
MachineLocation(CallReg));
|
||||
} else {
|
||||
assert(CalleeDIE && "No DIE for call site entry origin");
|
||||
DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP);
|
||||
assert(CalleeDIE && "Could not create DIE for call site entry origin");
|
||||
addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin),
|
||||
*CalleeDIE);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -249,16 +249,14 @@ public:
|
|||
dwarf::LocationAtom getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const;
|
||||
|
||||
/// Construct a call site entry DIE describing a call within \p Scope to a
|
||||
/// callee described by \p CalleeDIE.
|
||||
/// \p CalleeDIE is a declaration or definition subprogram DIE for the callee.
|
||||
/// For indirect calls \p CalleeDIE is set to nullptr.
|
||||
/// callee described by \p CalleeSP.
|
||||
/// \p IsTail specifies whether the call is a tail call.
|
||||
/// \p PCAddr points to the PC value after the call instruction.
|
||||
/// \p CallAddr points to the PC value at the call instruction (or is null).
|
||||
/// \p CallReg is a register location for an indirect call. For direct calls
|
||||
/// the \p CallReg is set to 0.
|
||||
DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, DIE *CalleeDIE, bool IsTail,
|
||||
const MCSymbol *PCAddr,
|
||||
DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram *CalleeSP,
|
||||
bool IsTail, const MCSymbol *PCAddr,
|
||||
const MCSymbol *CallAddr, unsigned CallReg);
|
||||
/// Construct call site parameter DIEs for the \p CallSiteDIE. The \p Params
|
||||
/// were collected by the \ref collectCallSiteParameters.
|
||||
|
|
|
|||
|
|
@ -587,14 +587,6 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
|
|||
}
|
||||
}
|
||||
|
||||
DIE &DwarfDebug::constructSubprogramDefinitionDIE(const DISubprogram *SP) {
|
||||
DICompileUnit *Unit = SP->getUnit();
|
||||
assert(SP->isDefinition() && "Subprogram not a definition");
|
||||
assert(Unit && "Subprogram definition without parent unit");
|
||||
auto &CU = getOrCreateDwarfCompileUnit(Unit);
|
||||
return *CU.getOrCreateSubprogramDIE(SP);
|
||||
}
|
||||
|
||||
/// Represents a parameter whose call site value can be described by applying a
|
||||
/// debug expression to a register in the forwarded register worklist.
|
||||
struct FwdRegParamInfo {
|
||||
|
|
@ -945,7 +937,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
|
|||
continue;
|
||||
|
||||
unsigned CallReg = 0;
|
||||
DIE *CalleeDIE = nullptr;
|
||||
const DISubprogram *CalleeSP = nullptr;
|
||||
const Function *CalleeDecl = nullptr;
|
||||
if (CalleeOp.isReg()) {
|
||||
CallReg = CalleeOp.getReg();
|
||||
|
|
@ -955,19 +947,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
|
|||
CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal());
|
||||
if (!CalleeDecl || !CalleeDecl->getSubprogram())
|
||||
continue;
|
||||
const DISubprogram *CalleeSP = CalleeDecl->getSubprogram();
|
||||
|
||||
if (CalleeSP->isDefinition()) {
|
||||
// Ensure that a subprogram DIE for the callee is available in the
|
||||
// appropriate CU.
|
||||
CalleeDIE = &constructSubprogramDefinitionDIE(CalleeSP);
|
||||
} else {
|
||||
// Create the declaration DIE if it is missing. This is required to
|
||||
// support compilation of old bitcode with an incomplete list of
|
||||
// retained metadata.
|
||||
CalleeDIE = CU.getOrCreateSubprogramDIE(CalleeSP);
|
||||
}
|
||||
assert(CalleeDIE && "Must have a DIE for the callee");
|
||||
CalleeSP = CalleeDecl->getSubprogram();
|
||||
}
|
||||
|
||||
// TODO: Omit call site entries for runtime calls (objc_msgSend, etc).
|
||||
|
|
@ -1004,7 +984,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
|
|||
<< (IsTail ? " [IsTail]" : "") << "\n");
|
||||
|
||||
DIE &CallSiteDIE = CU.constructCallSiteEntryDIE(
|
||||
ScopeDIE, CalleeDIE, IsTail, PCAddr, CallAddr, CallReg);
|
||||
ScopeDIE, CalleeSP, IsTail, PCAddr, CallAddr, CallReg);
|
||||
|
||||
// Optionally emit call-site-param debug info.
|
||||
if (emitDebugEntryValues()) {
|
||||
|
|
@ -1121,6 +1101,11 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
|
|||
NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
|
||||
}
|
||||
|
||||
// Create DIEs for function declarations used for call site debug info.
|
||||
for (auto Scope : DIUnit->getRetainedTypes())
|
||||
if (auto *SP = dyn_cast_or_null<DISubprogram>(Scope))
|
||||
NewCU.getOrCreateSubprogramDIE(SP);
|
||||
|
||||
CUMap.insert({DIUnit, &NewCU});
|
||||
CUDieMap.insert({&NewCU.getUnitDie(), &NewCU});
|
||||
return NewCU;
|
||||
|
|
|
|||
|
|
@ -471,9 +471,6 @@ private:
|
|||
/// Construct a DIE for this abstract scope.
|
||||
void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope);
|
||||
|
||||
/// Construct a DIE for the subprogram definition \p SP and return it.
|
||||
DIE &constructSubprogramDefinitionDIE(const DISubprogram *SP);
|
||||
|
||||
/// Construct DIEs for call site entries describing the calls in \p MF.
|
||||
void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU,
|
||||
DIE &ScopeDIE, const MachineFunction &MF);
|
||||
|
|
|
|||
|
|
@ -186,9 +186,8 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
|
|||
|
||||
/// Check whether the DIE for this MDNode can be shared across CUs.
|
||||
bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
|
||||
// When the MDNode can be part of the type system (this includes subprogram
|
||||
// declarations *and* subprogram definitions, even local definitions), the
|
||||
// DIE must be shared across CUs.
|
||||
// When the MDNode can be part of the type system, the DIE can be shared
|
||||
// across CUs.
|
||||
// Combining type units and cross-CU DIE sharing is lower value (since
|
||||
// cross-CU DIE sharing is used in LTO and removes type redundancy at that
|
||||
// level already) but may be implementable for some value in projects
|
||||
|
|
@ -196,7 +195,9 @@ bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
|
|||
// together.
|
||||
if (isDwoUnit() && !DD->shareAcrossDWOCUs())
|
||||
return false;
|
||||
return (isa<DIType>(D) || isa<DISubprogram>(D)) && !DD->generateTypeUnits();
|
||||
return (isa<DIType>(D) ||
|
||||
(isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) &&
|
||||
!DD->generateTypeUnits();
|
||||
}
|
||||
|
||||
DIE *DwarfUnit::getDIE(const DINode *D) const {
|
||||
|
|
|
|||
|
|
@ -187,7 +187,7 @@ namespace {
|
|||
const DataLayout &DL,
|
||||
OptimizationRemarkEmitter *ORE) :
|
||||
SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
|
||||
TripCount(Info.TripCount),
|
||||
ExitCount(Info.ExitCount),
|
||||
CountType(Info.CountType),
|
||||
ExitBranch(Info.ExitBranch),
|
||||
LoopDecrement(Info.LoopDecrement),
|
||||
|
|
@ -202,7 +202,7 @@ namespace {
|
|||
OptimizationRemarkEmitter *ORE = nullptr;
|
||||
Loop *L = nullptr;
|
||||
Module *M = nullptr;
|
||||
const SCEV *TripCount = nullptr;
|
||||
const SCEV *ExitCount = nullptr;
|
||||
Type *CountType = nullptr;
|
||||
BranchInst *ExitBranch = nullptr;
|
||||
Value *LoopDecrement = nullptr;
|
||||
|
|
@ -296,7 +296,7 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
|
|||
}
|
||||
|
||||
assert(
|
||||
(HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.TripCount) &&
|
||||
(HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
|
||||
"Hardware Loop must have set exit info.");
|
||||
|
||||
BasicBlock *Preheader = L->getLoopPreheader();
|
||||
|
|
@ -381,13 +381,18 @@ Value *HardwareLoop::InitLoopCount() {
|
|||
// loop counter and tests that is not zero?
|
||||
|
||||
SCEVExpander SCEVE(SE, DL, "loopcnt");
|
||||
if (!ExitCount->getType()->isPointerTy() &&
|
||||
ExitCount->getType() != CountType)
|
||||
ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
|
||||
|
||||
ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
|
||||
|
||||
// If we're trying to use the 'test and set' form of the intrinsic, we need
|
||||
// to replace a conditional branch that is controlling entry to the loop. It
|
||||
// is likely (guaranteed?) that the preheader has an unconditional branch to
|
||||
// the loop header, so also check if it has a single predecessor.
|
||||
if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, TripCount,
|
||||
SE.getZero(TripCount->getType()))) {
|
||||
if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
|
||||
SE.getZero(ExitCount->getType()))) {
|
||||
LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
|
||||
UseLoopGuard |= ForceGuardLoopEntry;
|
||||
} else
|
||||
|
|
@ -399,19 +404,19 @@ Value *HardwareLoop::InitLoopCount() {
|
|||
BasicBlock *Predecessor = BB->getSinglePredecessor();
|
||||
// If it's not safe to create a while loop then don't force it and create a
|
||||
// do-while loop instead
|
||||
if (!isSafeToExpandAt(TripCount, Predecessor->getTerminator(), SE))
|
||||
if (!isSafeToExpandAt(ExitCount, Predecessor->getTerminator(), SE))
|
||||
UseLoopGuard = false;
|
||||
else
|
||||
BB = Predecessor;
|
||||
}
|
||||
|
||||
if (!isSafeToExpandAt(TripCount, BB->getTerminator(), SE)) {
|
||||
LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand TripCount " << *TripCount
|
||||
<< "\n");
|
||||
if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) {
|
||||
LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
|
||||
<< *ExitCount << "\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Value *Count = SCEVE.expandCodeFor(TripCount, CountType,
|
||||
Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
|
||||
BB->getTerminator());
|
||||
|
||||
// FIXME: We've expanded Count where we hope to insert the counter setting
|
||||
|
|
|
|||
|
|
@ -5133,8 +5133,9 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
|
|||
if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
|
||||
return V;
|
||||
|
||||
// TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
|
||||
if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
|
||||
VT.getSizeInBits() <= 64) {
|
||||
VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
|
||||
if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
|
||||
if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
|
||||
// Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
|
||||
|
|
|
|||
|
|
@ -3464,8 +3464,11 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
|
|||
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
|
||||
SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
|
||||
SDValue Zero = DAG.getConstant(0, dl, VT);
|
||||
SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
|
||||
Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
|
||||
// Xor the inputs, if resulting sign bit is 0 the product will be
|
||||
// positive, else negative.
|
||||
SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
|
||||
SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
|
||||
Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
|
||||
Result = DAG.getSelect(dl, VT, Overflow, Result, Product);
|
||||
} else {
|
||||
// For unsigned multiplication, we only need to check the max since we
|
||||
|
|
|
|||
|
|
@ -8677,8 +8677,10 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
|
|||
RegisterSDNode *R = dyn_cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]);
|
||||
Register TiedReg = R->getReg();
|
||||
MVT RegVT = R->getSimpleValueType(0);
|
||||
const TargetRegisterClass *RC = TiedReg.isVirtual() ?
|
||||
MRI.getRegClass(TiedReg) : TRI.getMinimalPhysRegClass(TiedReg);
|
||||
const TargetRegisterClass *RC =
|
||||
TiedReg.isVirtual() ? MRI.getRegClass(TiedReg)
|
||||
: RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT)
|
||||
: TRI.getMinimalPhysRegClass(TiedReg);
|
||||
unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);
|
||||
for (unsigned i = 0; i != NumRegs; ++i)
|
||||
Regs.push_back(MRI.createVirtualRegister(RC));
|
||||
|
|
|
|||
|
|
@ -8155,8 +8155,11 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
|
|||
APInt MaxVal = APInt::getSignedMaxValue(VTSize);
|
||||
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
|
||||
SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
|
||||
SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
|
||||
Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
|
||||
// Xor the inputs, if resulting sign bit is 0 the product will be
|
||||
// positive, else negative.
|
||||
SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
|
||||
SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
|
||||
Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
|
||||
return DAG.getSelect(dl, VT, Overflow, Result, Product);
|
||||
} else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
|
||||
SDValue Result =
|
||||
|
|
|
|||
|
|
@ -177,9 +177,25 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
|
|||
// Go with Dst.
|
||||
LinkFromSrc = false;
|
||||
break;
|
||||
case Comdat::SelectionKind::NoDeduplicate:
|
||||
return emitError("Linking COMDATs named '" + ComdatName +
|
||||
"': nodeduplicate has been violated!");
|
||||
case Comdat::SelectionKind::NoDeduplicate: {
|
||||
const GlobalVariable *DstGV;
|
||||
const GlobalVariable *SrcGV;
|
||||
if (getComdatLeader(DstM, ComdatName, DstGV) ||
|
||||
getComdatLeader(*SrcM, ComdatName, SrcGV))
|
||||
return true;
|
||||
|
||||
if (SrcGV->isWeakForLinker()) {
|
||||
// Go with Dst.
|
||||
LinkFromSrc = false;
|
||||
} else if (DstGV->isWeakForLinker()) {
|
||||
// Go with Src.
|
||||
LinkFromSrc = true;
|
||||
} else {
|
||||
return emitError("Linking COMDATs named '" + ComdatName +
|
||||
"': nodeduplicate has been violated!");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Comdat::SelectionKind::ExactMatch:
|
||||
case Comdat::SelectionKind::Largest:
|
||||
case Comdat::SelectionKind::SameSize: {
|
||||
|
|
|
|||
|
|
@ -1784,9 +1784,12 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
|
|||
MPM.addPass(GlobalOptPass());
|
||||
|
||||
// Garbage collect dead functions.
|
||||
// FIXME: Add ArgumentPromotion pass after once it's ported.
|
||||
MPM.addPass(GlobalDCEPass());
|
||||
|
||||
// If we didn't decide to inline a function, check to see if we can
|
||||
// transform it to pass arguments by value instead of by reference.
|
||||
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));
|
||||
|
||||
FunctionPassManager FPM;
|
||||
// The IPO Passes may leave cruft around. Clean up after them.
|
||||
FPM.addPass(InstCombinePass());
|
||||
|
|
|
|||
|
|
@ -13680,6 +13680,8 @@ static bool isEssentiallyExtractHighSubvector(SDValue N) {
|
|||
N = N.getOperand(0);
|
||||
if (N.getOpcode() != ISD::EXTRACT_SUBVECTOR)
|
||||
return false;
|
||||
if (N.getOperand(0).getValueType().isScalableVector())
|
||||
return false;
|
||||
return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() ==
|
||||
N.getOperand(0).getValueType().getVectorNumElements() / 2;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -480,7 +480,7 @@ multiclass sme_vector_to_tile_aliases<Instruction inst,
|
|||
MatrixTileVectorOperand tile_ty,
|
||||
ZPRRegOp zpr_ty, Operand imm_ty> {
|
||||
def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn",
|
||||
(inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm0_15:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
|
||||
(inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
|
||||
}
|
||||
|
||||
multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
|
||||
|
|
|
|||
|
|
@ -117,14 +117,14 @@ struct M68kMemOp {
|
|||
class M68kOperand : public MCParsedAsmOperand {
|
||||
typedef MCParsedAsmOperand Base;
|
||||
|
||||
enum class Kind {
|
||||
enum class KindTy {
|
||||
Invalid,
|
||||
Token,
|
||||
Imm,
|
||||
MemOp,
|
||||
};
|
||||
|
||||
Kind Kind;
|
||||
KindTy Kind;
|
||||
SMLoc Start, End;
|
||||
union {
|
||||
StringRef Token;
|
||||
|
|
@ -134,7 +134,7 @@ class M68kOperand : public MCParsedAsmOperand {
|
|||
};
|
||||
|
||||
public:
|
||||
M68kOperand(enum Kind Kind, SMLoc Start, SMLoc End)
|
||||
M68kOperand(KindTy Kind, SMLoc Start, SMLoc End)
|
||||
: Base(), Kind(Kind), Start(Start), End(End) {}
|
||||
|
||||
SMLoc getStartLoc() const override { return Start; }
|
||||
|
|
@ -143,7 +143,7 @@ public:
|
|||
void print(raw_ostream &OS) const override;
|
||||
|
||||
bool isMem() const override { return false; }
|
||||
bool isMemOp() const { return Kind == Kind::MemOp; }
|
||||
bool isMemOp() const { return Kind == KindTy::MemOp; }
|
||||
|
||||
static void addExpr(MCInst &Inst, const MCExpr *Expr);
|
||||
|
||||
|
|
@ -248,7 +248,7 @@ void M68kOperand::addExpr(MCInst &Inst, const MCExpr *Expr) {
|
|||
|
||||
// Reg
|
||||
bool M68kOperand::isReg() const {
|
||||
return Kind == Kind::MemOp && MemOp.Op == M68kMemOp::Kind::Reg;
|
||||
return Kind == KindTy::MemOp && MemOp.Op == M68kMemOp::Kind::Reg;
|
||||
}
|
||||
|
||||
unsigned M68kOperand::getReg() const {
|
||||
|
|
@ -265,13 +265,13 @@ void M68kOperand::addRegOperands(MCInst &Inst, unsigned N) const {
|
|||
|
||||
std::unique_ptr<M68kOperand> M68kOperand::createMemOp(M68kMemOp MemOp,
|
||||
SMLoc Start, SMLoc End) {
|
||||
auto Op = std::make_unique<M68kOperand>(Kind::MemOp, Start, End);
|
||||
auto Op = std::make_unique<M68kOperand>(KindTy::MemOp, Start, End);
|
||||
Op->MemOp = MemOp;
|
||||
return Op;
|
||||
}
|
||||
|
||||
// Token
|
||||
bool M68kOperand::isToken() const { return Kind == Kind::Token; }
|
||||
bool M68kOperand::isToken() const { return Kind == KindTy::Token; }
|
||||
StringRef M68kOperand::getToken() const {
|
||||
assert(isToken());
|
||||
return Token;
|
||||
|
|
@ -279,13 +279,13 @@ StringRef M68kOperand::getToken() const {
|
|||
|
||||
std::unique_ptr<M68kOperand> M68kOperand::createToken(StringRef Token,
|
||||
SMLoc Start, SMLoc End) {
|
||||
auto Op = std::make_unique<M68kOperand>(Kind::Token, Start, End);
|
||||
auto Op = std::make_unique<M68kOperand>(KindTy::Token, Start, End);
|
||||
Op->Token = Token;
|
||||
return Op;
|
||||
}
|
||||
|
||||
// Imm
|
||||
bool M68kOperand::isImm() const { return Kind == Kind::Imm; }
|
||||
bool M68kOperand::isImm() const { return Kind == KindTy::Imm; }
|
||||
void M68kOperand::addImmOperands(MCInst &Inst, unsigned N) const {
|
||||
assert(isImm() && "wrong oeprand kind");
|
||||
assert((N == 1) && "can only handle one register operand");
|
||||
|
|
@ -295,7 +295,7 @@ void M68kOperand::addImmOperands(MCInst &Inst, unsigned N) const {
|
|||
|
||||
std::unique_ptr<M68kOperand> M68kOperand::createImm(const MCExpr *Expr,
|
||||
SMLoc Start, SMLoc End) {
|
||||
auto Op = std::make_unique<M68kOperand>(Kind::Imm, Start, End);
|
||||
auto Op = std::make_unique<M68kOperand>(KindTy::Imm, Start, End);
|
||||
Op->Expr = Expr;
|
||||
return Op;
|
||||
}
|
||||
|
|
@ -842,19 +842,19 @@ bool M68kAsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
|
|||
|
||||
void M68kOperand::print(raw_ostream &OS) const {
|
||||
switch (Kind) {
|
||||
case Kind::Invalid:
|
||||
case KindTy::Invalid:
|
||||
OS << "invalid";
|
||||
break;
|
||||
|
||||
case Kind::Token:
|
||||
case KindTy::Token:
|
||||
OS << "token '" << Token << "'";
|
||||
break;
|
||||
|
||||
case Kind::Imm:
|
||||
case KindTy::Imm:
|
||||
OS << "immediate " << Imm;
|
||||
break;
|
||||
|
||||
case Kind::MemOp:
|
||||
case KindTy::MemOp:
|
||||
MemOp.print(OS);
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,10 +49,14 @@ std::string computeDataLayout(const Triple &TT, StringRef CPU,
|
|||
// FIXME how to wire it with the used object format?
|
||||
Ret += "-m:e";
|
||||
|
||||
// M68k pointers are always 32 bit wide even for 16 bit cpus
|
||||
Ret += "-p:32:32";
|
||||
// M68k pointers are always 32 bit wide even for 16-bit CPUs.
|
||||
// The ABI only specifies 16-bit alignment.
|
||||
// On at least the 68020+ with a 32-bit bus, there is a performance benefit
|
||||
// to having 32-bit alignment.
|
||||
Ret += "-p:32:16:32";
|
||||
|
||||
// M68k requires i8 to align on 2 byte boundry
|
||||
// Bytes do not require special alignment, words are word aligned and
|
||||
// long words are word aligned at minimum.
|
||||
Ret += "-i8:8:8-i16:16:16-i32:16:32";
|
||||
|
||||
// FIXME no floats at the moment
|
||||
|
|
|
|||
|
|
@ -1223,7 +1223,7 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
|
|||
// Both of operands are not fixed. Set one of commutable
|
||||
// operands to the tied source.
|
||||
CommutableOpIdx1 = 1;
|
||||
} else if (SrcOpIdx1 == CommutableOpIdx1) {
|
||||
} else if (SrcOpIdx1 == CommuteAnyOperandIndex) {
|
||||
// Only one of the operands is not fixed.
|
||||
CommutableOpIdx1 = SrcOpIdx2;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -157,7 +157,7 @@ private:
|
|||
void addLoadStoreOperands(const Address &Addr, const MachineInstrBuilder &MIB,
|
||||
MachineMemOperand *MMO);
|
||||
unsigned maskI1Value(unsigned Reg, const Value *V);
|
||||
unsigned getRegForI1Value(const Value *V, bool &Not);
|
||||
unsigned getRegForI1Value(const Value *V, const BasicBlock *BB, bool &Not);
|
||||
unsigned zeroExtendToI32(unsigned Reg, const Value *V,
|
||||
MVT::SimpleValueType From);
|
||||
unsigned signExtendToI32(unsigned Reg, const Value *V,
|
||||
|
|
@ -418,20 +418,17 @@ unsigned WebAssemblyFastISel::maskI1Value(unsigned Reg, const Value *V) {
|
|||
return zeroExtendToI32(Reg, V, MVT::i1);
|
||||
}
|
||||
|
||||
unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V, bool &Not) {
|
||||
unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V,
|
||||
const BasicBlock *BB,
|
||||
bool &Not) {
|
||||
if (const auto *ICmp = dyn_cast<ICmpInst>(V))
|
||||
if (const ConstantInt *C = dyn_cast<ConstantInt>(ICmp->getOperand(1)))
|
||||
if (ICmp->isEquality() && C->isZero() && C->getType()->isIntegerTy(32)) {
|
||||
if (ICmp->isEquality() && C->isZero() && C->getType()->isIntegerTy(32) &&
|
||||
ICmp->getParent() == BB) {
|
||||
Not = ICmp->isTrueWhenEqual();
|
||||
return getRegForValue(ICmp->getOperand(0));
|
||||
}
|
||||
|
||||
Value *NotV;
|
||||
if (match(V, m_Not(m_Value(NotV))) && V->getType()->isIntegerTy(32)) {
|
||||
Not = true;
|
||||
return getRegForValue(NotV);
|
||||
}
|
||||
|
||||
Not = false;
|
||||
unsigned Reg = getRegForValue(V);
|
||||
if (Reg == 0)
|
||||
|
|
@ -912,7 +909,8 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
|
|||
const auto *Select = cast<SelectInst>(I);
|
||||
|
||||
bool Not;
|
||||
unsigned CondReg = getRegForI1Value(Select->getCondition(), Not);
|
||||
unsigned CondReg =
|
||||
getRegForI1Value(Select->getCondition(), I->getParent(), Not);
|
||||
if (CondReg == 0)
|
||||
return false;
|
||||
|
||||
|
|
@ -1312,7 +1310,7 @@ bool WebAssemblyFastISel::selectBr(const Instruction *I) {
|
|||
MachineBasicBlock *FBB = FuncInfo.MBBMap[Br->getSuccessor(1)];
|
||||
|
||||
bool Not;
|
||||
unsigned CondReg = getRegForI1Value(Br->getCondition(), Not);
|
||||
unsigned CondReg = getRegForI1Value(Br->getCondition(), Br->getParent(), Not);
|
||||
if (CondReg == 0)
|
||||
return false;
|
||||
|
||||
|
|
|
|||
|
|
@ -33,6 +33,19 @@ using namespace llvm;
|
|||
|
||||
namespace {
|
||||
|
||||
// Determine if a promotion alias should be created for a symbol name.
|
||||
static bool allowPromotionAlias(const std::string &Name) {
|
||||
// Promotion aliases are used only in inline assembly. It's safe to
|
||||
// simply skip unusual names. Subset of MCAsmInfo::isAcceptableChar()
|
||||
// and MCAsmInfoXCOFF::isAcceptableChar().
|
||||
for (const char &C : Name) {
|
||||
if (isAlnum(C) || C == '_' || C == '.')
|
||||
continue;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Promote each local-linkage entity defined by ExportM and used by ImportM by
|
||||
// changing visibility and appending the given ModuleId.
|
||||
void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
|
||||
|
|
@ -55,6 +68,7 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
|
|||
}
|
||||
}
|
||||
|
||||
std::string OldName = Name.str();
|
||||
std::string NewName = (Name + ModuleId).str();
|
||||
|
||||
if (const auto *C = ExportGV.getComdat())
|
||||
|
|
@ -69,6 +83,13 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
|
|||
ImportGV->setName(NewName);
|
||||
ImportGV->setVisibility(GlobalValue::HiddenVisibility);
|
||||
}
|
||||
|
||||
if (isa<Function>(&ExportGV) && allowPromotionAlias(OldName)) {
|
||||
// Create a local alias with the original name to avoid breaking
|
||||
// references from inline assembly.
|
||||
std::string Alias = ".set " + OldName + "," + NewName + "\n";
|
||||
ExportM.appendModuleInlineAsm(Alias);
|
||||
}
|
||||
}
|
||||
|
||||
if (!RenamedComdats.empty())
|
||||
|
|
|
|||
|
|
@ -221,6 +221,10 @@ bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I,
|
|||
AAPtr = AAPtr->stripPointerCastsSameRepresentation();
|
||||
AlignSCEV = SE->getSCEV(AlignOB.Inputs[1].get());
|
||||
AlignSCEV = SE->getTruncateOrZeroExtend(AlignSCEV, Int64Ty);
|
||||
if (!isa<SCEVConstant>(AlignSCEV))
|
||||
// Added to suppress a crash because consumer doesn't expect non-constant
|
||||
// alignments in the assume bundle. TODO: Consider generalizing caller.
|
||||
return false;
|
||||
if (AlignOB.Inputs.size() == 3)
|
||||
OffSCEV = SE->getSCEV(AlignOB.Inputs[2].get());
|
||||
else
|
||||
|
|
|
|||
|
|
@ -1247,6 +1247,11 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
|
|||
mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
|
||||
StoreSize, *AA, Stores);
|
||||
if (UseMemMove) {
|
||||
// For memmove case it's not enough to guarantee that loop doesn't access
|
||||
// TheStore and TheLoad. Additionally we need to make sure that TheStore is
|
||||
// the only user of TheLoad.
|
||||
if (!TheLoad->hasOneUse())
|
||||
return Changed;
|
||||
Stores.insert(TheLoad);
|
||||
if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
|
||||
BECount, StoreSize, *AA, Stores)) {
|
||||
|
|
|
|||
|
|
@ -167,7 +167,7 @@ void renderLineSummary(raw_ostream &OS, const FileCoverageSummary &Summary) {
|
|||
|
||||
void renderBranchSummary(raw_ostream &OS, const FileCoverageSummary &Summary) {
|
||||
OS << "BRF:" << Summary.BranchCoverage.getNumBranches() << '\n'
|
||||
<< "BFH:" << Summary.BranchCoverage.getCovered() << '\n';
|
||||
<< "BRH:" << Summary.BranchCoverage.getCovered() << '\n';
|
||||
}
|
||||
|
||||
void renderFile(raw_ostream &OS, const coverage::CoverageMapping &Coverage,
|
||||
|
|
|
|||
|
|
@ -1286,6 +1286,10 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
|
|||
if (shouldAdjustVA(Section))
|
||||
VMAAdjustment = AdjustVMA;
|
||||
|
||||
// In executable and shared objects, r_offset holds a virtual address.
|
||||
// Subtract SectionAddr from the r_offset field of a relocation to get
|
||||
// the section offset.
|
||||
uint64_t RelAdjustment = Obj->isRelocatableObject() ? 0 : SectionAddr;
|
||||
uint64_t Size;
|
||||
uint64_t Index;
|
||||
bool PrintedSection = false;
|
||||
|
|
@ -1432,7 +1436,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
|
|||
// For --reloc: print zero blocks patched by relocations, so that
|
||||
// relocations can be shown in the dump.
|
||||
if (RelCur != RelEnd)
|
||||
MaxOffset = RelCur->getOffset() - Index;
|
||||
MaxOffset = std::min(RelCur->getOffset() - RelAdjustment - Index,
|
||||
MaxOffset);
|
||||
|
||||
if (size_t N =
|
||||
countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) {
|
||||
|
|
@ -1581,7 +1586,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
|
|||
if (Obj->getArch() != Triple::hexagon) {
|
||||
// Print relocation for instruction and data.
|
||||
while (RelCur != RelEnd) {
|
||||
uint64_t Offset = RelCur->getOffset();
|
||||
uint64_t Offset = RelCur->getOffset() - RelAdjustment;
|
||||
// If this relocation is hidden, skip it.
|
||||
if (getHidden(*RelCur) || SectionAddr + Offset < StartAddress) {
|
||||
++RelCur;
|
||||
|
|
|
|||
Loading…
Reference in a new issue