Vendor import of llvm-project branch release/19.x llvmorg-19.1.0-0-ga4bf6cd7cfb1, a.k.a. 19.1.0 release.

This commit is contained in:
Dimitry Andric 2024-09-22 11:37:02 +02:00
parent 7432c96084
commit 1de139fdd5
31 changed files with 227 additions and 176 deletions

View file

@ -723,6 +723,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
case CK_ZNVER4:
defineCPUMacros(Builder, "znver4");
break;
case CK_ZNVER5:
defineCPUMacros(Builder, "znver5");
break;
case CK_Geode:
defineCPUMacros(Builder, "geode");
break;
@ -1613,6 +1616,7 @@ std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
case CK_ZNVER2:
case CK_ZNVER3:
case CK_ZNVER4:
case CK_ZNVER5:
// Deprecated
case CK_x86_64:
case CK_x86_64_v2:

View file

@ -1336,75 +1336,50 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
return CGF.Builder.CreateLoad(Tmp);
}
// Function to store a first-class aggregate into memory. We prefer to
// store the elements rather than the aggregate to be more friendly to
// fast-isel.
// FIXME: Do we need to recurse here?
void CodeGenFunction::EmitAggregateStore(llvm::Value *Val, Address Dest,
bool DestIsVolatile) {
// Prefer scalar stores to first-class aggregate stores.
if (llvm::StructType *STy = dyn_cast<llvm::StructType>(Val->getType())) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Address EltPtr = Builder.CreateStructGEP(Dest, i);
llvm::Value *Elt = Builder.CreateExtractValue(Val, i);
Builder.CreateStore(Elt, EltPtr, DestIsVolatile);
}
} else {
Builder.CreateStore(Val, Dest, DestIsVolatile);
}
}
void CodeGenFunction::CreateCoercedStore(llvm::Value *Src, Address Dst,
llvm::TypeSize DstSize,
bool DstIsVolatile) {
if (!DstSize)
return;
/// CreateCoercedStore - Create a store to \arg DstPtr from \arg Src,
/// where the source and destination may have different types. The
/// destination is known to be aligned to \arg DstAlign bytes.
///
/// This safely handles the case when the src type is larger than the
/// destination type; the upper bits of the src will be lost.
static void CreateCoercedStore(llvm::Value *Src,
Address Dst,
bool DstIsVolatile,
CodeGenFunction &CGF) {
llvm::Type *SrcTy = Src->getType();
llvm::Type *DstTy = Dst.getElementType();
if (SrcTy == DstTy) {
CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
return;
llvm::TypeSize SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
// GEP into structs to try to make types match.
// FIXME: This isn't really that useful with opaque types, but it impacts a
// lot of regression tests.
if (SrcTy != Dst.getElementType()) {
if (llvm::StructType *DstSTy =
dyn_cast<llvm::StructType>(Dst.getElementType())) {
assert(!SrcSize.isScalable());
Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,
SrcSize.getFixedValue(), *this);
}
}
llvm::TypeSize SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy);
if (llvm::StructType *DstSTy = dyn_cast<llvm::StructType>(DstTy)) {
Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,
SrcSize.getFixedValue(), CGF);
DstTy = Dst.getElementType();
}
llvm::PointerType *SrcPtrTy = llvm::dyn_cast<llvm::PointerType>(SrcTy);
llvm::PointerType *DstPtrTy = llvm::dyn_cast<llvm::PointerType>(DstTy);
if (SrcPtrTy && DstPtrTy &&
SrcPtrTy->getAddressSpace() != DstPtrTy->getAddressSpace()) {
Src = CGF.Builder.CreateAddrSpaceCast(Src, DstTy);
CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
return;
}
// If the source and destination are integer or pointer types, just do an
// extension or truncation to the desired type.
if ((isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy)) &&
(isa<llvm::IntegerType>(DstTy) || isa<llvm::PointerType>(DstTy))) {
Src = CoerceIntOrPtrToIntOrPtr(Src, DstTy, CGF);
CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
return;
}
llvm::TypeSize DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(DstTy);
// If store is legal, just bitcast the src pointer.
if (isa<llvm::ScalableVectorType>(SrcTy) ||
isa<llvm::ScalableVectorType>(DstTy) ||
SrcSize.getFixedValue() <= DstSize.getFixedValue()) {
Dst = Dst.withElementType(SrcTy);
CGF.EmitAggregateStore(Src, Dst, DstIsVolatile);
if (SrcSize.isScalable() || SrcSize <= DstSize) {
if (SrcTy->isIntegerTy() && Dst.getElementType()->isPointerTy() &&
SrcSize == CGM.getDataLayout().getTypeAllocSize(Dst.getElementType())) {
// If the value is supposed to be a pointer, convert it before storing it.
Src = CoerceIntOrPtrToIntOrPtr(Src, Dst.getElementType(), *this);
Builder.CreateStore(Src, Dst, DstIsVolatile);
} else if (llvm::StructType *STy =
dyn_cast<llvm::StructType>(Src->getType())) {
// Prefer scalar stores to first-class aggregate stores.
Dst = Dst.withElementType(SrcTy);
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Address EltPtr = Builder.CreateStructGEP(Dst, i);
llvm::Value *Elt = Builder.CreateExtractValue(Src, i);
Builder.CreateStore(Elt, EltPtr, DstIsVolatile);
}
} else {
Builder.CreateStore(Src, Dst.withElementType(SrcTy), DstIsVolatile);
}
} else if (SrcTy->isIntegerTy()) {
// If the source is a simple integer, coerce it directly.
llvm::Type *DstIntTy = Builder.getIntNTy(DstSize.getFixedValue() * 8);
Src = CoerceIntOrPtrToIntOrPtr(Src, DstIntTy, *this);
Builder.CreateStore(Src, Dst.withElementType(DstIntTy), DstIsVolatile);
} else {
// Otherwise do coercion through memory. This is stupid, but
// simple.
@ -1416,12 +1391,12 @@ static void CreateCoercedStore(llvm::Value *Src,
// FIXME: Assert that we aren't truncating non-padding bits when have access
// to that information.
RawAddress Tmp =
CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());
CGF.Builder.CreateStore(Src, Tmp);
CGF.Builder.CreateMemCpy(
Dst.emitRawPointer(CGF), Dst.getAlignment().getAsAlign(),
Tmp.getPointer(), Tmp.getAlignment().getAsAlign(),
llvm::ConstantInt::get(CGF.IntPtrTy, DstSize.getFixedValue()));
CreateTempAllocaForCoercion(*this, SrcTy, Dst.getAlignment());
Builder.CreateStore(Src, Tmp);
Builder.CreateMemCpy(Dst.emitRawPointer(*this),
Dst.getAlignment().getAsAlign(), Tmp.getPointer(),
Tmp.getAlignment().getAsAlign(),
Builder.CreateTypeSize(IntPtrTy, DstSize));
}
}
@ -3309,7 +3284,12 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
assert(NumIRArgs == 1);
auto AI = Fn->getArg(FirstIRArg);
AI->setName(Arg->getName() + ".coerce");
CreateCoercedStore(AI, Ptr, /*DstIsVolatile=*/false, *this);
CreateCoercedStore(
AI, Ptr,
llvm::TypeSize::getFixed(
getContext().getTypeSizeInChars(Ty).getQuantity() -
ArgI.getDirectOffset()),
/*DstIsVolatile=*/false);
}
// Match to what EmitParmDecl is expecting for this type.
@ -5939,17 +5919,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
llvm::Value *Imag = Builder.CreateExtractValue(CI, 1);
return RValue::getComplex(std::make_pair(Real, Imag));
}
case TEK_Aggregate: {
Address DestPtr = ReturnValue.getAddress();
bool DestIsVolatile = ReturnValue.isVolatile();
if (!DestPtr.isValid()) {
DestPtr = CreateMemTemp(RetTy, "agg.tmp");
DestIsVolatile = false;
}
EmitAggregateStore(CI, DestPtr, DestIsVolatile);
return RValue::getAggregate(DestPtr);
}
case TEK_Aggregate:
break;
case TEK_Scalar: {
// If the argument doesn't match, perform a bitcast to coerce it.
// This can happen due to trivial type mismatches.
@ -5959,7 +5930,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
return RValue::get(V);
}
}
llvm_unreachable("bad evaluation kind");
}
// If coercing a fixed vector from a scalable vector for ABI
@ -5981,10 +5951,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
Address DestPtr = ReturnValue.getValue();
bool DestIsVolatile = ReturnValue.isVolatile();
uint64_t DestSize =
getContext().getTypeInfoDataSizeInChars(RetTy).Width.getQuantity();
if (!DestPtr.isValid()) {
DestPtr = CreateMemTemp(RetTy, "coerce");
DestIsVolatile = false;
DestSize = getContext().getTypeSizeInChars(RetTy).getQuantity();
}
// An empty record can overlap other data (if declared with
@ -5993,7 +5966,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
if (!isEmptyRecord(getContext(), RetTy, true)) {
// If the value is offset in memory, apply the offset now.
Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI);
CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this);
CreateCoercedStore(
CI, StorePtr,
llvm::TypeSize::getFixed(DestSize - RetAI.getDirectOffset()),
DestIsVolatile);
}
return convertTempToRValue(DestPtr, RetTy, SourceLocation());

View file

@ -131,15 +131,12 @@ public:
EnsureDest(E->getType());
if (llvm::Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) {
Address StoreDest = Dest.getAddress();
// The emitted value is guaranteed to have the same size as the
// destination but can have a different type. Just do a bitcast in this
// case to avoid incorrect GEPs.
if (Result->getType() != StoreDest.getType())
StoreDest = StoreDest.withElementType(Result->getType());
CGF.EmitAggregateStore(Result, StoreDest,
E->getType().isVolatileQualified());
CGF.CreateCoercedStore(
Result, Dest.getAddress(),
llvm::TypeSize::getFixed(
Dest.getPreferredSize(CGF.getContext(), E->getType())
.getQuantity()),
E->getType().isVolatileQualified());
return;
}
return Visit(E->getSubExpr());
@ -2050,6 +2047,10 @@ CodeGenFunction::getOverlapForFieldInit(const FieldDecl *FD) {
if (!FD->hasAttr<NoUniqueAddressAttr>() || !FD->getType()->isRecordType())
return AggValueSlot::DoesNotOverlap;
// Empty fields can overlap earlier fields.
if (FD->getType()->getAsCXXRecordDecl()->isEmpty())
return AggValueSlot::MayOverlap;
// If the field lies entirely within the enclosing class's nvsize, its tail
// padding cannot overlap any already-initialized object. (The only subobjects
// with greater addresses that might already be initialized are vbases.)
@ -2072,6 +2073,10 @@ AggValueSlot::Overlap_t CodeGenFunction::getOverlapForBaseInit(
if (IsVirtual)
return AggValueSlot::MayOverlap;
// Empty bases can overlap earlier bases.
if (BaseRD->isEmpty())
return AggValueSlot::MayOverlap;
// If the base class is laid out entirely within the nvsize of the derived
// class, its tail padding cannot yet be initialized, so we can issue
// stores at the full width of the base class.

View file

@ -745,7 +745,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
} break;
case attr::CXXAssume: {
const Expr *Assumption = cast<CXXAssumeAttr>(A)->getAssumption();
if (getLangOpts().CXXAssumptions &&
if (getLangOpts().CXXAssumptions && Builder.GetInsertBlock() &&
!Assumption->HasSideEffects(getContext())) {
llvm::Value *AssumptionVal = EvaluateExprAsBool(Assumption);
Builder.CreateAssumption(AssumptionVal);

View file

@ -4838,9 +4838,10 @@ public:
void EmitAggFinalDestCopy(QualType Type, AggValueSlot Dest, const LValue &Src,
ExprValueKind SrcKind);
/// Build all the stores needed to initialize an aggregate at Dest with the
/// value Val.
void EmitAggregateStore(llvm::Value *Val, Address Dest, bool DestIsVolatile);
/// Create a store to \arg DstPtr from \arg Src, truncating the stored value
/// to at most \arg DstSize bytes.
void CreateCoercedStore(llvm::Value *Src, Address Dst, llvm::TypeSize DstSize,
bool DstIsVolatile);
/// EmitExtendGCLifetime - Given a pointer to an Objective-C object,
/// make sure it survives garbage collection until this point.

View file

@ -570,7 +570,8 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
Keywords.kw_as));
ProbablyBracedList =
ProbablyBracedList || (IsCpp && NextTok->is(tok::l_paren));
ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() ||
NextTok->is(tok::l_paren)));
// If there is a comma, semicolon or right paren after the closing
// brace, we assume this is a braced initializer list.
@ -609,8 +610,9 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
ProbablyBracedList = NextTok->isNot(tok::l_square);
}
// Cpp macro definition body containing nonempty braced list or block:
// Cpp macro definition body that is a nonempty braced list or block:
if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
!FormatTok->Previous && NextTok->is(tok::eof) &&
// A statement can end with only `;` (simple statement), a block
// closing brace (compound statement), or `:` (label statement).
// If PrevTok is a block opening brace, Tok ends an empty block.

View file

@ -5430,11 +5430,24 @@ struct EnsureImmediateInvocationInDefaultArgs
// Rewrite to source location to refer to the context in which they are used.
ExprResult TransformSourceLocExpr(SourceLocExpr *E) {
if (E->getParentContext() == SemaRef.CurContext)
DeclContext *DC = E->getParentContext();
if (DC == SemaRef.CurContext)
return E;
return getDerived().RebuildSourceLocExpr(E->getIdentKind(), E->getType(),
E->getBeginLoc(), E->getEndLoc(),
SemaRef.CurContext);
// FIXME: During instantiation, because the rebuild of defaults arguments
// is not always done in the context of the template instantiator,
// we run the risk of producing a dependent source location
// that would never be rebuilt.
// This usually happens during overload resolution, or in contexts
// where the value of the source location does not matter.
// However, we should find a better way to deal with source location
// of function templates.
if (!SemaRef.CurrentInstantiationScope ||
!SemaRef.CurContext->isDependentContext() || DC->isDependentContext())
DC = SemaRef.CurContext;
return getDerived().RebuildSourceLocExpr(
E->getIdentKind(), E->getType(), E->getBeginLoc(), E->getEndLoc(), DC);
}
};

View file

@ -5140,7 +5140,8 @@ static bool HasNonDeletedDefaultedEqualityComparison(Sema &S,
// const ClassT& obj;
OpaqueValueExpr Operand(
{}, Decl->getTypeForDecl()->getCanonicalTypeUnqualified().withConst(),
KeyLoc,
Decl->getTypeForDecl()->getCanonicalTypeUnqualified().withConst(),
ExprValueKind::VK_LValue);
UnresolvedSet<16> Functions;
// obj == obj;

View file

@ -1318,7 +1318,6 @@ void Sema::ActOnLambdaExpressionAfterIntroducer(LambdaIntroducer &Intro,
if (C->Init.isUsable()) {
addInitCapture(LSI, cast<VarDecl>(Var), C->Kind == LCK_ByRef);
PushOnScopeChains(Var, CurScope, false);
} else {
TryCaptureKind Kind = C->Kind == LCK_ByRef ? TryCapture_ExplicitByRef
: TryCapture_ExplicitByVal;

View file

@ -570,7 +570,7 @@ void LookupResult::resolveKind() {
// For non-type declarations, check for a prior lookup result naming this
// canonical declaration.
if (!D->isPlaceholderVar(getSema().getLangOpts()) && !ExistingI) {
if (!ExistingI) {
auto UniqueResult = Unique.insert(std::make_pair(D, I));
if (!UniqueResult.second) {
// We've seen this entity before.

View file

@ -1928,6 +1928,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
case Stmt::CXXRewrittenBinaryOperatorClass:
case Stmt::RequiresExprClass:
case Expr::CXXParenListInitExprClass:
case Stmt::EmbedExprClass:
// Fall through.
// Cases we intentionally don't evaluate, since they don't need
@ -2430,10 +2431,6 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
Bldr.addNodes(Dst);
break;
}
case Stmt::EmbedExprClass:
llvm::report_fatal_error("Support for EmbedExpr is not implemented.");
break;
}
}

View file

@ -59,6 +59,7 @@ enum ProcessorTypes {
INTEL_SIERRAFOREST,
INTEL_GRANDRIDGE,
INTEL_CLEARWATERFOREST,
AMDFAM1AH,
CPU_TYPE_MAX
};
@ -97,6 +98,7 @@ enum ProcessorSubtypes {
INTEL_COREI7_ARROWLAKE,
INTEL_COREI7_ARROWLAKE_S,
INTEL_COREI7_PANTHERLAKE,
AMDFAM1AH_ZNVER5,
CPU_SUBTYPE_MAX
};
@ -803,6 +805,24 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
break; // "znver4"
}
break; // family 19h
case 26:
CPU = "znver5";
*Type = AMDFAM1AH;
if (Model <= 0x77) {
// Models 00h-0Fh (Breithorn).
// Models 10h-1Fh (Breithorn-Dense).
// Models 20h-2Fh (Strix 1).
// Models 30h-37h (Strix 2).
// Models 38h-3Fh (Strix 3).
// Models 40h-4Fh (Granite Ridge).
// Models 50h-5Fh (Weisshorn).
// Models 60h-6Fh (Krackan1).
// Models 70h-77h (Sarlak).
CPU = "znver5";
*Subtype = AMDFAM1AH_ZNVER5;
break; // "znver5"
}
break;
default:
break; // Unknown AMD CPU.
}

View file

@ -13,7 +13,7 @@
#define QUAD_PRECISION
#include "fp_lib.h"
#if defined(CRT_HAS_F128)
#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128)
// Returns: the quotient of (a + ib) / (c + id)

View file

@ -15,7 +15,7 @@
#include "int_lib.h"
#include "int_math.h"
#if defined(CRT_HAS_F128)
#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128)
// Returns: the product of a + ib and c + id

View file

@ -1015,8 +1015,8 @@ constexpr chrono::year operator ""y(unsigned lo
# include <charconv>
# if !defined(_LIBCPP_HAS_NO_LOCALIZATION)
# include <locale>
# include <ostream>
# endif
# include <ostream>
#endif
#endif // _LIBCPP_CHRONO

View file

@ -60,17 +60,15 @@ Hexagon::Hexagon() {
}
uint32_t Hexagon::calcEFlags() const {
assert(!ctx.objectFiles.empty());
// The architecture revision must always be equal to or greater than
// greatest revision in the list of inputs.
uint32_t ret = 0;
std::optional<uint32_t> ret;
for (InputFile *f : ctx.objectFiles) {
uint32_t eflags = cast<ObjFile<ELF32LE>>(f)->getObj().getHeader().e_flags;
if (eflags > ret)
if (!ret || eflags > *ret)
ret = eflags;
}
return ret;
return ret.value_or(/* Default Arch Rev: */ 0x60);
}
static uint32_t applyMask(uint32_t mask, uint32_t data) {

View file

@ -49,11 +49,13 @@ X86_CPU_TYPE(ZHAOXIN_FAM7H, "zhaoxin_fam7h")
X86_CPU_TYPE(INTEL_SIERRAFOREST, "sierraforest")
X86_CPU_TYPE(INTEL_GRANDRIDGE, "grandridge")
X86_CPU_TYPE(INTEL_CLEARWATERFOREST, "clearwaterforest")
X86_CPU_TYPE(AMDFAM1AH, "amdfam1ah")
// Alternate names supported by __builtin_cpu_is and target multiversioning.
X86_CPU_TYPE_ALIAS(INTEL_BONNELL, "atom")
X86_CPU_TYPE_ALIAS(AMDFAM10H, "amdfam10")
X86_CPU_TYPE_ALIAS(AMDFAM15H, "amdfam15")
X86_CPU_TYPE_ALIAS(AMDFAM1AH, "amdfam1a")
X86_CPU_TYPE_ALIAS(INTEL_SILVERMONT, "slm")
#undef X86_CPU_TYPE_ALIAS
@ -104,6 +106,7 @@ X86_CPU_SUBTYPE(INTEL_COREI7_GRANITERAPIDS_D,"graniterapids-d")
X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE, "arrowlake")
X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE_S, "arrowlake-s")
X86_CPU_SUBTYPE(INTEL_COREI7_PANTHERLAKE, "pantherlake")
X86_CPU_SUBTYPE(AMDFAM1AH_ZNVER5, "znver5")
// Alternate names supported by __builtin_cpu_is and target multiversioning.
X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "raptorlake")

View file

@ -147,6 +147,7 @@ enum CPUKind {
CK_x86_64_v3,
CK_x86_64_v4,
CK_Geode,
CK_ZNVER5,
};
/// Parse \p CPU string into a CPUKind. Will only accept 64-bit capable CPUs if

View file

@ -130,6 +130,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
// Generate the prolog instructions that set up the pipeline.
generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
MF.insert(BB->getIterator(), KernelBB);
LIS.insertMBBInMaps(KernelBB);
// Rearrange the instructions to generate the new, pipelined loop,
// and update register names as needed.
@ -210,6 +211,7 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,
NewBB->transferSuccessors(PredBB);
PredBB->addSuccessor(NewBB);
PredBB = NewBB;
LIS.insertMBBInMaps(NewBB);
// Generate instructions for each appropriate stage. Process instructions
// in original program order.
@ -283,6 +285,7 @@ void ModuloScheduleExpander::generateEpilog(
PredBB->replaceSuccessor(LoopExitBB, NewBB);
NewBB->addSuccessor(LoopExitBB);
LIS.insertMBBInMaps(NewBB);
if (EpilogStart == LoopExitBB)
EpilogStart = NewBB;

View file

@ -1453,6 +1453,10 @@ void SelectionDAGISel::reportIPToStateForBlocks(MachineFunction *MF) {
if (BB->getFirstMayFaultInst()) {
// Report IP range only for blocks with Faulty inst
auto MBBb = MBB.getFirstNonPHI();
if (MBBb == MBB.end())
continue;
MachineInstr *MIb = &*MBBb;
if (MIb->isTerminator())
continue;

View file

@ -975,8 +975,16 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
if (ReadFromTail && Src->getMarker(Last)) {
DbgMarker *FromLast = Src->getMarker(Last);
if (LastIsEnd) {
Dest->adoptDbgRecords(Src, Last, true);
// adoptDbgRecords will release any trailers.
if (Dest == end()) {
// Abosrb the trailing markers from Src.
assert(FromLast == Src->getTrailingDbgRecords());
createMarker(Dest)->absorbDebugValues(*FromLast, true);
FromLast->eraseFromParent();
Src->deleteTrailingDbgRecords();
} else {
// adoptDbgRecords will release any trailers.
Dest->adoptDbgRecords(Src, Last, true);
}
assert(!Src->getTrailingDbgRecords());
} else {
// FIXME: can we use adoptDbgRecords here to reduce allocations?

View file

@ -2931,16 +2931,6 @@ struct RegPairInfo {
} // end anonymous namespace
unsigned findFreePredicateReg(BitVector &SavedRegs) {
for (unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) {
if (SavedRegs.test(PReg)) {
unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0;
return PNReg;
}
}
return AArch64::NoRegister;
}
static void computeCalleeSaveRegisterPairs(
MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
@ -3645,7 +3635,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
unsigned ExtraCSSpill = 0;
bool HasUnpairedGPR64 = false;
bool HasPairZReg = false;
// Figure out which callee-saved registers to save/restore.
for (unsigned i = 0; CSRegs[i]; ++i) {
const unsigned Reg = CSRegs[i];
@ -3699,28 +3688,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
!RegInfo->isReservedReg(MF, PairedReg))
ExtraCSSpill = PairedReg;
}
// Check if there is a pair of ZRegs, so it can select PReg for spill/fill
HasPairZReg |= (AArch64::ZPRRegClass.contains(Reg, CSRegs[i ^ 1]) &&
SavedRegs.test(CSRegs[i ^ 1]));
}
if (HasPairZReg && (Subtarget.hasSVE2p1() || Subtarget.hasSME2())) {
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
// Find a suitable predicate register for the multi-vector spill/fill
// instructions.
unsigned PnReg = findFreePredicateReg(SavedRegs);
if (PnReg != AArch64::NoRegister)
AFI->setPredicateRegForFillSpill(PnReg);
// If no free callee-save has been found assign one.
if (!AFI->getPredicateRegForFillSpill() &&
MF.getFunction().getCallingConv() ==
CallingConv::AArch64_SVE_VectorCall) {
SavedRegs.set(AArch64::P8);
AFI->setPredicateRegForFillSpill(AArch64::PN8);
}
assert(!RegInfo->isReservedReg(MF, AFI->getPredicateRegForFillSpill()) &&
"Predicate cannot be a reserved register");
}
if (MF.getFunction().getCallingConv() == CallingConv::Win64 &&

View file

@ -5144,10 +5144,6 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
if (PNRReg.isValid() && !PNRReg.isVirtual())
MI.addDef(PNRReg, RegState::Implicit);
MI.addMemOperand(MMO);
if (PNRReg.isValid() && PNRReg.isVirtual())
BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), PNRReg)
.addReg(DestReg);
}
bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,

View file

@ -4349,6 +4349,7 @@ AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@ -4363,20 +4364,25 @@ AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,
// Try to use two fast 24-bit multiplies (one for each half of the result)
// instead of one slow extending multiply.
unsigned LoOpcode, HiOpcode;
if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
LoOpcode = AMDGPUISD::MUL_U24;
HiOpcode = AMDGPUISD::MULHI_U24;
} else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
LoOpcode = AMDGPUISD::MUL_I24;
HiOpcode = AMDGPUISD::MULHI_I24;
unsigned LoOpcode = 0;
unsigned HiOpcode = 0;
if (Signed) {
if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
LoOpcode = AMDGPUISD::MUL_I24;
HiOpcode = AMDGPUISD::MULHI_I24;
}
} else {
return SDValue();
if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
LoOpcode = AMDGPUISD::MUL_U24;
HiOpcode = AMDGPUISD::MULHI_U24;
}
}
if (!LoOpcode)
return SDValue();
SDValue Lo = DAG.getNode(LoOpcode, DL, MVT::i32, N0, N1);
SDValue Hi = DAG.getNode(HiOpcode, DL, MVT::i32, N0, N1);

View file

@ -9338,12 +9338,13 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue Op0 = Op->getOperand(0);
if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
(Op.getValueType() != MVT::f128))
return SDValue();
SDValue Lo = Op0.getOperand(0);
SDValue Hi = Op0.getOperand(1);
if ((Op.getValueType() != MVT::f128) ||
(Op0.getOpcode() != ISD::BUILD_PAIR) || (Lo.getValueType() != MVT::i64) ||
(Hi.getValueType() != MVT::i64) || !Subtarget.isPPC64())
if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
return SDValue();
if (!Subtarget.isLittleEndian())

View file

@ -2902,7 +2902,7 @@ RISCVInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI,
// if any possible.
if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO &&
(MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() ||
F.hasSection()))
F.hasSection() || F.getSectionPrefix()))
return outliner::InstrType::Illegal;
}

View file

@ -1543,6 +1543,19 @@ def ProcessorFeatures {
FeatureVPOPCNTDQ];
list<SubtargetFeature> ZN4Features =
!listconcat(ZN3Features, ZN4AdditionalFeatures);
list<SubtargetFeature> ZN5Tuning = ZN4Tuning;
list<SubtargetFeature> ZN5AdditionalFeatures = [FeatureVNNI,
FeatureMOVDIRI,
FeatureMOVDIR64B,
FeatureVP2INTERSECT,
FeaturePREFETCHI,
FeatureAVXVNNI
];
list<SubtargetFeature> ZN5Features =
!listconcat(ZN4Features, ZN5AdditionalFeatures);
}
//===----------------------------------------------------------------------===//
@ -1892,6 +1905,8 @@ def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
ProcessorFeatures.ZN3Tuning>;
def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,
ProcessorFeatures.ZN4Tuning>;
def : ProcModel<"znver5", Znver4Model, ProcessorFeatures.ZN5Features,
ProcessorFeatures.ZN5Tuning>;
def : Proc<"geode", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;

View file

@ -350,3 +350,4 @@ def ZnVer4PfmCounters : ProcPfmCounters {
let ValidationCounters = DefaultAMDPfmValidationCounters;
}
def : PfmCountersBinding<"znver4", ZnVer4PfmCounters>;
def : PfmCountersBinding<"znver5", ZnVer4PfmCounters>;

View file

@ -1213,6 +1213,25 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
break; // "znver4"
}
break; // family 19h
case 26:
CPU = "znver5";
*Type = X86::AMDFAM1AH;
if (Model <= 0x77) {
// Models 00h-0Fh (Breithorn).
// Models 10h-1Fh (Breithorn-Dense).
// Models 20h-2Fh (Strix 1).
// Models 30h-37h (Strix 2).
// Models 38h-3Fh (Strix 3).
// Models 40h-4Fh (Granite Ridge).
// Models 50h-5Fh (Weisshorn).
// Models 60h-6Fh (Krackan1).
// Models 70h-77h (Sarlak).
CPU = "znver5";
*Subtype = X86::AMDFAM1AH_ZNVER5;
break; // "znver5"
}
break;
default:
break; // Unknown AMD CPU.
}

View file

@ -238,6 +238,10 @@ static constexpr FeatureBitset FeaturesZNVER4 =
FeatureAVX512BITALG | FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 |
FeatureGFNI | FeatureSHSTK;
static constexpr FeatureBitset FeaturesZNVER5 =
FeaturesZNVER4 | FeatureAVXVNNI | FeatureMOVDIRI | FeatureMOVDIR64B |
FeatureAVX512VP2INTERSECT | FeaturePREFETCHI | FeatureAVXVNNI;
// D151696 tranplanted Mangling and OnlyForCPUDispatchSpecific from
// X86TargetParser.def to here. They are assigned by following ways:
// 1. Copy the mangling from the original CPU_SPEICIFC MACROs. If no, assign
@ -417,6 +421,7 @@ constexpr ProcInfo Processors[] = {
{ {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2, '\0', false },
{ {"znver3"}, CK_ZNVER3, FEATURE_AVX2, FeaturesZNVER3, '\0', false },
{ {"znver4"}, CK_ZNVER4, FEATURE_AVX512VBMI2, FeaturesZNVER4, '\0', false },
{ {"znver5"}, CK_ZNVER5, FEATURE_AVX512VP2INTERSECT, FeaturesZNVER5, '\0', false },
// Generic 64-bit processor.
{ {"x86-64"}, CK_x86_64, FEATURE_SSE2 , FeaturesX86_64, '\0', false },
{ {"x86-64-v2"}, CK_x86_64_v2, FEATURE_SSE4_2 , FeaturesX86_64_V2, '\0', false },

View file

@ -15211,7 +15211,8 @@ bool BoUpSLP::collectValuesToDemote(
if (any_of(E.Scalars, [&](Value *V) {
return !all_of(V->users(), [=](User *U) {
return getTreeEntry(U) ||
(UserIgnoreList && UserIgnoreList->contains(U)) ||
(E.Idx == 0 && UserIgnoreList &&
UserIgnoreList->contains(U)) ||
(!isa<CmpInst>(U) && U->getType()->isSized() &&
!U->getType()->isScalableTy() &&
DL->getTypeSizeInBits(U->getType()) <= BitWidth);
@ -15539,6 +15540,11 @@ void BoUpSLP::computeMinimumValueSizes() {
const TreeEntry *UserTE = E.UserTreeIndices.back().UserTE;
if (TE == UserTE || !TE)
return false;
if (!isa<CastInst, BinaryOperator, FreezeInst, PHINode,
SelectInst>(U) ||
!isa<CastInst, BinaryOperator, FreezeInst, PHINode,
SelectInst>(UserTE->getMainOp()))
return true;
unsigned UserTESz = DL->getTypeSizeInBits(
UserTE->Scalars.front()->getType());
auto It = MinBWs.find(TE);