mirror of
https://github.com/opnsense/src.git
synced 2026-06-04 22:32:43 -04:00
Vendor import of llvm-project branch release/19.x llvmorg-19.1.0-0-ga4bf6cd7cfb1, a.k.a. 19.1.0 release.
This commit is contained in:
parent
7432c96084
commit
1de139fdd5
31 changed files with 227 additions and 176 deletions
|
|
@ -723,6 +723,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
|
|||
case CK_ZNVER4:
|
||||
defineCPUMacros(Builder, "znver4");
|
||||
break;
|
||||
case CK_ZNVER5:
|
||||
defineCPUMacros(Builder, "znver5");
|
||||
break;
|
||||
case CK_Geode:
|
||||
defineCPUMacros(Builder, "geode");
|
||||
break;
|
||||
|
|
@ -1613,6 +1616,7 @@ std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
|
|||
case CK_ZNVER2:
|
||||
case CK_ZNVER3:
|
||||
case CK_ZNVER4:
|
||||
case CK_ZNVER5:
|
||||
// Deprecated
|
||||
case CK_x86_64:
|
||||
case CK_x86_64_v2:
|
||||
|
|
|
|||
|
|
@ -1336,75 +1336,50 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
|
|||
return CGF.Builder.CreateLoad(Tmp);
|
||||
}
|
||||
|
||||
// Function to store a first-class aggregate into memory. We prefer to
|
||||
// store the elements rather than the aggregate to be more friendly to
|
||||
// fast-isel.
|
||||
// FIXME: Do we need to recurse here?
|
||||
void CodeGenFunction::EmitAggregateStore(llvm::Value *Val, Address Dest,
|
||||
bool DestIsVolatile) {
|
||||
// Prefer scalar stores to first-class aggregate stores.
|
||||
if (llvm::StructType *STy = dyn_cast<llvm::StructType>(Val->getType())) {
|
||||
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
||||
Address EltPtr = Builder.CreateStructGEP(Dest, i);
|
||||
llvm::Value *Elt = Builder.CreateExtractValue(Val, i);
|
||||
Builder.CreateStore(Elt, EltPtr, DestIsVolatile);
|
||||
}
|
||||
} else {
|
||||
Builder.CreateStore(Val, Dest, DestIsVolatile);
|
||||
}
|
||||
}
|
||||
void CodeGenFunction::CreateCoercedStore(llvm::Value *Src, Address Dst,
|
||||
llvm::TypeSize DstSize,
|
||||
bool DstIsVolatile) {
|
||||
if (!DstSize)
|
||||
return;
|
||||
|
||||
/// CreateCoercedStore - Create a store to \arg DstPtr from \arg Src,
|
||||
/// where the source and destination may have different types. The
|
||||
/// destination is known to be aligned to \arg DstAlign bytes.
|
||||
///
|
||||
/// This safely handles the case when the src type is larger than the
|
||||
/// destination type; the upper bits of the src will be lost.
|
||||
static void CreateCoercedStore(llvm::Value *Src,
|
||||
Address Dst,
|
||||
bool DstIsVolatile,
|
||||
CodeGenFunction &CGF) {
|
||||
llvm::Type *SrcTy = Src->getType();
|
||||
llvm::Type *DstTy = Dst.getElementType();
|
||||
if (SrcTy == DstTy) {
|
||||
CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
|
||||
return;
|
||||
llvm::TypeSize SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
|
||||
|
||||
// GEP into structs to try to make types match.
|
||||
// FIXME: This isn't really that useful with opaque types, but it impacts a
|
||||
// lot of regression tests.
|
||||
if (SrcTy != Dst.getElementType()) {
|
||||
if (llvm::StructType *DstSTy =
|
||||
dyn_cast<llvm::StructType>(Dst.getElementType())) {
|
||||
assert(!SrcSize.isScalable());
|
||||
Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,
|
||||
SrcSize.getFixedValue(), *this);
|
||||
}
|
||||
}
|
||||
|
||||
llvm::TypeSize SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy);
|
||||
|
||||
if (llvm::StructType *DstSTy = dyn_cast<llvm::StructType>(DstTy)) {
|
||||
Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,
|
||||
SrcSize.getFixedValue(), CGF);
|
||||
DstTy = Dst.getElementType();
|
||||
}
|
||||
|
||||
llvm::PointerType *SrcPtrTy = llvm::dyn_cast<llvm::PointerType>(SrcTy);
|
||||
llvm::PointerType *DstPtrTy = llvm::dyn_cast<llvm::PointerType>(DstTy);
|
||||
if (SrcPtrTy && DstPtrTy &&
|
||||
SrcPtrTy->getAddressSpace() != DstPtrTy->getAddressSpace()) {
|
||||
Src = CGF.Builder.CreateAddrSpaceCast(Src, DstTy);
|
||||
CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
|
||||
return;
|
||||
}
|
||||
|
||||
// If the source and destination are integer or pointer types, just do an
|
||||
// extension or truncation to the desired type.
|
||||
if ((isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy)) &&
|
||||
(isa<llvm::IntegerType>(DstTy) || isa<llvm::PointerType>(DstTy))) {
|
||||
Src = CoerceIntOrPtrToIntOrPtr(Src, DstTy, CGF);
|
||||
CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
|
||||
return;
|
||||
}
|
||||
|
||||
llvm::TypeSize DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(DstTy);
|
||||
|
||||
// If store is legal, just bitcast the src pointer.
|
||||
if (isa<llvm::ScalableVectorType>(SrcTy) ||
|
||||
isa<llvm::ScalableVectorType>(DstTy) ||
|
||||
SrcSize.getFixedValue() <= DstSize.getFixedValue()) {
|
||||
Dst = Dst.withElementType(SrcTy);
|
||||
CGF.EmitAggregateStore(Src, Dst, DstIsVolatile);
|
||||
if (SrcSize.isScalable() || SrcSize <= DstSize) {
|
||||
if (SrcTy->isIntegerTy() && Dst.getElementType()->isPointerTy() &&
|
||||
SrcSize == CGM.getDataLayout().getTypeAllocSize(Dst.getElementType())) {
|
||||
// If the value is supposed to be a pointer, convert it before storing it.
|
||||
Src = CoerceIntOrPtrToIntOrPtr(Src, Dst.getElementType(), *this);
|
||||
Builder.CreateStore(Src, Dst, DstIsVolatile);
|
||||
} else if (llvm::StructType *STy =
|
||||
dyn_cast<llvm::StructType>(Src->getType())) {
|
||||
// Prefer scalar stores to first-class aggregate stores.
|
||||
Dst = Dst.withElementType(SrcTy);
|
||||
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
||||
Address EltPtr = Builder.CreateStructGEP(Dst, i);
|
||||
llvm::Value *Elt = Builder.CreateExtractValue(Src, i);
|
||||
Builder.CreateStore(Elt, EltPtr, DstIsVolatile);
|
||||
}
|
||||
} else {
|
||||
Builder.CreateStore(Src, Dst.withElementType(SrcTy), DstIsVolatile);
|
||||
}
|
||||
} else if (SrcTy->isIntegerTy()) {
|
||||
// If the source is a simple integer, coerce it directly.
|
||||
llvm::Type *DstIntTy = Builder.getIntNTy(DstSize.getFixedValue() * 8);
|
||||
Src = CoerceIntOrPtrToIntOrPtr(Src, DstIntTy, *this);
|
||||
Builder.CreateStore(Src, Dst.withElementType(DstIntTy), DstIsVolatile);
|
||||
} else {
|
||||
// Otherwise do coercion through memory. This is stupid, but
|
||||
// simple.
|
||||
|
|
@ -1416,12 +1391,12 @@ static void CreateCoercedStore(llvm::Value *Src,
|
|||
// FIXME: Assert that we aren't truncating non-padding bits when have access
|
||||
// to that information.
|
||||
RawAddress Tmp =
|
||||
CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());
|
||||
CGF.Builder.CreateStore(Src, Tmp);
|
||||
CGF.Builder.CreateMemCpy(
|
||||
Dst.emitRawPointer(CGF), Dst.getAlignment().getAsAlign(),
|
||||
Tmp.getPointer(), Tmp.getAlignment().getAsAlign(),
|
||||
llvm::ConstantInt::get(CGF.IntPtrTy, DstSize.getFixedValue()));
|
||||
CreateTempAllocaForCoercion(*this, SrcTy, Dst.getAlignment());
|
||||
Builder.CreateStore(Src, Tmp);
|
||||
Builder.CreateMemCpy(Dst.emitRawPointer(*this),
|
||||
Dst.getAlignment().getAsAlign(), Tmp.getPointer(),
|
||||
Tmp.getAlignment().getAsAlign(),
|
||||
Builder.CreateTypeSize(IntPtrTy, DstSize));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3309,7 +3284,12 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
|
|||
assert(NumIRArgs == 1);
|
||||
auto AI = Fn->getArg(FirstIRArg);
|
||||
AI->setName(Arg->getName() + ".coerce");
|
||||
CreateCoercedStore(AI, Ptr, /*DstIsVolatile=*/false, *this);
|
||||
CreateCoercedStore(
|
||||
AI, Ptr,
|
||||
llvm::TypeSize::getFixed(
|
||||
getContext().getTypeSizeInChars(Ty).getQuantity() -
|
||||
ArgI.getDirectOffset()),
|
||||
/*DstIsVolatile=*/false);
|
||||
}
|
||||
|
||||
// Match to what EmitParmDecl is expecting for this type.
|
||||
|
|
@ -5939,17 +5919,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
|
|||
llvm::Value *Imag = Builder.CreateExtractValue(CI, 1);
|
||||
return RValue::getComplex(std::make_pair(Real, Imag));
|
||||
}
|
||||
case TEK_Aggregate: {
|
||||
Address DestPtr = ReturnValue.getAddress();
|
||||
bool DestIsVolatile = ReturnValue.isVolatile();
|
||||
|
||||
if (!DestPtr.isValid()) {
|
||||
DestPtr = CreateMemTemp(RetTy, "agg.tmp");
|
||||
DestIsVolatile = false;
|
||||
}
|
||||
EmitAggregateStore(CI, DestPtr, DestIsVolatile);
|
||||
return RValue::getAggregate(DestPtr);
|
||||
}
|
||||
case TEK_Aggregate:
|
||||
break;
|
||||
case TEK_Scalar: {
|
||||
// If the argument doesn't match, perform a bitcast to coerce it.
|
||||
// This can happen due to trivial type mismatches.
|
||||
|
|
@ -5959,7 +5930,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
|
|||
return RValue::get(V);
|
||||
}
|
||||
}
|
||||
llvm_unreachable("bad evaluation kind");
|
||||
}
|
||||
|
||||
// If coercing a fixed vector from a scalable vector for ABI
|
||||
|
|
@ -5981,10 +5951,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
|
|||
|
||||
Address DestPtr = ReturnValue.getValue();
|
||||
bool DestIsVolatile = ReturnValue.isVolatile();
|
||||
uint64_t DestSize =
|
||||
getContext().getTypeInfoDataSizeInChars(RetTy).Width.getQuantity();
|
||||
|
||||
if (!DestPtr.isValid()) {
|
||||
DestPtr = CreateMemTemp(RetTy, "coerce");
|
||||
DestIsVolatile = false;
|
||||
DestSize = getContext().getTypeSizeInChars(RetTy).getQuantity();
|
||||
}
|
||||
|
||||
// An empty record can overlap other data (if declared with
|
||||
|
|
@ -5993,7 +5966,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
|
|||
if (!isEmptyRecord(getContext(), RetTy, true)) {
|
||||
// If the value is offset in memory, apply the offset now.
|
||||
Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI);
|
||||
CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this);
|
||||
CreateCoercedStore(
|
||||
CI, StorePtr,
|
||||
llvm::TypeSize::getFixed(DestSize - RetAI.getDirectOffset()),
|
||||
DestIsVolatile);
|
||||
}
|
||||
|
||||
return convertTempToRValue(DestPtr, RetTy, SourceLocation());
|
||||
|
|
|
|||
|
|
@ -131,15 +131,12 @@ public:
|
|||
EnsureDest(E->getType());
|
||||
|
||||
if (llvm::Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) {
|
||||
Address StoreDest = Dest.getAddress();
|
||||
// The emitted value is guaranteed to have the same size as the
|
||||
// destination but can have a different type. Just do a bitcast in this
|
||||
// case to avoid incorrect GEPs.
|
||||
if (Result->getType() != StoreDest.getType())
|
||||
StoreDest = StoreDest.withElementType(Result->getType());
|
||||
|
||||
CGF.EmitAggregateStore(Result, StoreDest,
|
||||
E->getType().isVolatileQualified());
|
||||
CGF.CreateCoercedStore(
|
||||
Result, Dest.getAddress(),
|
||||
llvm::TypeSize::getFixed(
|
||||
Dest.getPreferredSize(CGF.getContext(), E->getType())
|
||||
.getQuantity()),
|
||||
E->getType().isVolatileQualified());
|
||||
return;
|
||||
}
|
||||
return Visit(E->getSubExpr());
|
||||
|
|
@ -2050,6 +2047,10 @@ CodeGenFunction::getOverlapForFieldInit(const FieldDecl *FD) {
|
|||
if (!FD->hasAttr<NoUniqueAddressAttr>() || !FD->getType()->isRecordType())
|
||||
return AggValueSlot::DoesNotOverlap;
|
||||
|
||||
// Empty fields can overlap earlier fields.
|
||||
if (FD->getType()->getAsCXXRecordDecl()->isEmpty())
|
||||
return AggValueSlot::MayOverlap;
|
||||
|
||||
// If the field lies entirely within the enclosing class's nvsize, its tail
|
||||
// padding cannot overlap any already-initialized object. (The only subobjects
|
||||
// with greater addresses that might already be initialized are vbases.)
|
||||
|
|
@ -2072,6 +2073,10 @@ AggValueSlot::Overlap_t CodeGenFunction::getOverlapForBaseInit(
|
|||
if (IsVirtual)
|
||||
return AggValueSlot::MayOverlap;
|
||||
|
||||
// Empty bases can overlap earlier bases.
|
||||
if (BaseRD->isEmpty())
|
||||
return AggValueSlot::MayOverlap;
|
||||
|
||||
// If the base class is laid out entirely within the nvsize of the derived
|
||||
// class, its tail padding cannot yet be initialized, so we can issue
|
||||
// stores at the full width of the base class.
|
||||
|
|
|
|||
|
|
@ -745,7 +745,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
|
|||
} break;
|
||||
case attr::CXXAssume: {
|
||||
const Expr *Assumption = cast<CXXAssumeAttr>(A)->getAssumption();
|
||||
if (getLangOpts().CXXAssumptions &&
|
||||
if (getLangOpts().CXXAssumptions && Builder.GetInsertBlock() &&
|
||||
!Assumption->HasSideEffects(getContext())) {
|
||||
llvm::Value *AssumptionVal = EvaluateExprAsBool(Assumption);
|
||||
Builder.CreateAssumption(AssumptionVal);
|
||||
|
|
|
|||
|
|
@ -4838,9 +4838,10 @@ public:
|
|||
void EmitAggFinalDestCopy(QualType Type, AggValueSlot Dest, const LValue &Src,
|
||||
ExprValueKind SrcKind);
|
||||
|
||||
/// Build all the stores needed to initialize an aggregate at Dest with the
|
||||
/// value Val.
|
||||
void EmitAggregateStore(llvm::Value *Val, Address Dest, bool DestIsVolatile);
|
||||
/// Create a store to \arg DstPtr from \arg Src, truncating the stored value
|
||||
/// to at most \arg DstSize bytes.
|
||||
void CreateCoercedStore(llvm::Value *Src, Address Dst, llvm::TypeSize DstSize,
|
||||
bool DstIsVolatile);
|
||||
|
||||
/// EmitExtendGCLifetime - Given a pointer to an Objective-C object,
|
||||
/// make sure it survives garbage collection until this point.
|
||||
|
|
|
|||
|
|
@ -570,7 +570,8 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
|
|||
NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
|
||||
Keywords.kw_as));
|
||||
ProbablyBracedList =
|
||||
ProbablyBracedList || (IsCpp && NextTok->is(tok::l_paren));
|
||||
ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() ||
|
||||
NextTok->is(tok::l_paren)));
|
||||
|
||||
// If there is a comma, semicolon or right paren after the closing
|
||||
// brace, we assume this is a braced initializer list.
|
||||
|
|
@ -609,8 +610,9 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
|
|||
ProbablyBracedList = NextTok->isNot(tok::l_square);
|
||||
}
|
||||
|
||||
// Cpp macro definition body containing nonempty braced list or block:
|
||||
// Cpp macro definition body that is a nonempty braced list or block:
|
||||
if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
|
||||
!FormatTok->Previous && NextTok->is(tok::eof) &&
|
||||
// A statement can end with only `;` (simple statement), a block
|
||||
// closing brace (compound statement), or `:` (label statement).
|
||||
// If PrevTok is a block opening brace, Tok ends an empty block.
|
||||
|
|
|
|||
|
|
@ -5430,11 +5430,24 @@ struct EnsureImmediateInvocationInDefaultArgs
|
|||
|
||||
// Rewrite to source location to refer to the context in which they are used.
|
||||
ExprResult TransformSourceLocExpr(SourceLocExpr *E) {
|
||||
if (E->getParentContext() == SemaRef.CurContext)
|
||||
DeclContext *DC = E->getParentContext();
|
||||
if (DC == SemaRef.CurContext)
|
||||
return E;
|
||||
return getDerived().RebuildSourceLocExpr(E->getIdentKind(), E->getType(),
|
||||
E->getBeginLoc(), E->getEndLoc(),
|
||||
SemaRef.CurContext);
|
||||
|
||||
// FIXME: During instantiation, because the rebuild of defaults arguments
|
||||
// is not always done in the context of the template instantiator,
|
||||
// we run the risk of producing a dependent source location
|
||||
// that would never be rebuilt.
|
||||
// This usually happens during overload resolution, or in contexts
|
||||
// where the value of the source location does not matter.
|
||||
// However, we should find a better way to deal with source location
|
||||
// of function templates.
|
||||
if (!SemaRef.CurrentInstantiationScope ||
|
||||
!SemaRef.CurContext->isDependentContext() || DC->isDependentContext())
|
||||
DC = SemaRef.CurContext;
|
||||
|
||||
return getDerived().RebuildSourceLocExpr(
|
||||
E->getIdentKind(), E->getType(), E->getBeginLoc(), E->getEndLoc(), DC);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -5140,7 +5140,8 @@ static bool HasNonDeletedDefaultedEqualityComparison(Sema &S,
|
|||
|
||||
// const ClassT& obj;
|
||||
OpaqueValueExpr Operand(
|
||||
{}, Decl->getTypeForDecl()->getCanonicalTypeUnqualified().withConst(),
|
||||
KeyLoc,
|
||||
Decl->getTypeForDecl()->getCanonicalTypeUnqualified().withConst(),
|
||||
ExprValueKind::VK_LValue);
|
||||
UnresolvedSet<16> Functions;
|
||||
// obj == obj;
|
||||
|
|
|
|||
|
|
@ -1318,7 +1318,6 @@ void Sema::ActOnLambdaExpressionAfterIntroducer(LambdaIntroducer &Intro,
|
|||
|
||||
if (C->Init.isUsable()) {
|
||||
addInitCapture(LSI, cast<VarDecl>(Var), C->Kind == LCK_ByRef);
|
||||
PushOnScopeChains(Var, CurScope, false);
|
||||
} else {
|
||||
TryCaptureKind Kind = C->Kind == LCK_ByRef ? TryCapture_ExplicitByRef
|
||||
: TryCapture_ExplicitByVal;
|
||||
|
|
|
|||
|
|
@ -570,7 +570,7 @@ void LookupResult::resolveKind() {
|
|||
|
||||
// For non-type declarations, check for a prior lookup result naming this
|
||||
// canonical declaration.
|
||||
if (!D->isPlaceholderVar(getSema().getLangOpts()) && !ExistingI) {
|
||||
if (!ExistingI) {
|
||||
auto UniqueResult = Unique.insert(std::make_pair(D, I));
|
||||
if (!UniqueResult.second) {
|
||||
// We've seen this entity before.
|
||||
|
|
|
|||
|
|
@ -1928,6 +1928,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
|
|||
case Stmt::CXXRewrittenBinaryOperatorClass:
|
||||
case Stmt::RequiresExprClass:
|
||||
case Expr::CXXParenListInitExprClass:
|
||||
case Stmt::EmbedExprClass:
|
||||
// Fall through.
|
||||
|
||||
// Cases we intentionally don't evaluate, since they don't need
|
||||
|
|
@ -2430,10 +2431,6 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
|
|||
Bldr.addNodes(Dst);
|
||||
break;
|
||||
}
|
||||
|
||||
case Stmt::EmbedExprClass:
|
||||
llvm::report_fatal_error("Support for EmbedExpr is not implemented.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ enum ProcessorTypes {
|
|||
INTEL_SIERRAFOREST,
|
||||
INTEL_GRANDRIDGE,
|
||||
INTEL_CLEARWATERFOREST,
|
||||
AMDFAM1AH,
|
||||
CPU_TYPE_MAX
|
||||
};
|
||||
|
||||
|
|
@ -97,6 +98,7 @@ enum ProcessorSubtypes {
|
|||
INTEL_COREI7_ARROWLAKE,
|
||||
INTEL_COREI7_ARROWLAKE_S,
|
||||
INTEL_COREI7_PANTHERLAKE,
|
||||
AMDFAM1AH_ZNVER5,
|
||||
CPU_SUBTYPE_MAX
|
||||
};
|
||||
|
||||
|
|
@ -803,6 +805,24 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
|
|||
break; // "znver4"
|
||||
}
|
||||
break; // family 19h
|
||||
case 26:
|
||||
CPU = "znver5";
|
||||
*Type = AMDFAM1AH;
|
||||
if (Model <= 0x77) {
|
||||
// Models 00h-0Fh (Breithorn).
|
||||
// Models 10h-1Fh (Breithorn-Dense).
|
||||
// Models 20h-2Fh (Strix 1).
|
||||
// Models 30h-37h (Strix 2).
|
||||
// Models 38h-3Fh (Strix 3).
|
||||
// Models 40h-4Fh (Granite Ridge).
|
||||
// Models 50h-5Fh (Weisshorn).
|
||||
// Models 60h-6Fh (Krackan1).
|
||||
// Models 70h-77h (Sarlak).
|
||||
CPU = "znver5";
|
||||
*Subtype = AMDFAM1AH_ZNVER5;
|
||||
break; // "znver5"
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break; // Unknown AMD CPU.
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@
|
|||
#define QUAD_PRECISION
|
||||
#include "fp_lib.h"
|
||||
|
||||
#if defined(CRT_HAS_F128)
|
||||
#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128)
|
||||
|
||||
// Returns: the quotient of (a + ib) / (c + id)
|
||||
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
#include "int_lib.h"
|
||||
#include "int_math.h"
|
||||
|
||||
#if defined(CRT_HAS_F128)
|
||||
#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128)
|
||||
|
||||
// Returns: the product of a + ib and c + id
|
||||
|
||||
|
|
|
|||
|
|
@ -1015,8 +1015,8 @@ constexpr chrono::year operator ""y(unsigned lo
|
|||
# include <charconv>
|
||||
# if !defined(_LIBCPP_HAS_NO_LOCALIZATION)
|
||||
# include <locale>
|
||||
# include <ostream>
|
||||
# endif
|
||||
# include <ostream>
|
||||
#endif
|
||||
|
||||
#endif // _LIBCPP_CHRONO
|
||||
|
|
|
|||
|
|
@ -60,17 +60,15 @@ Hexagon::Hexagon() {
|
|||
}
|
||||
|
||||
uint32_t Hexagon::calcEFlags() const {
|
||||
assert(!ctx.objectFiles.empty());
|
||||
|
||||
// The architecture revision must always be equal to or greater than
|
||||
// greatest revision in the list of inputs.
|
||||
uint32_t ret = 0;
|
||||
std::optional<uint32_t> ret;
|
||||
for (InputFile *f : ctx.objectFiles) {
|
||||
uint32_t eflags = cast<ObjFile<ELF32LE>>(f)->getObj().getHeader().e_flags;
|
||||
if (eflags > ret)
|
||||
if (!ret || eflags > *ret)
|
||||
ret = eflags;
|
||||
}
|
||||
return ret;
|
||||
return ret.value_or(/* Default Arch Rev: */ 0x60);
|
||||
}
|
||||
|
||||
static uint32_t applyMask(uint32_t mask, uint32_t data) {
|
||||
|
|
|
|||
|
|
@ -49,11 +49,13 @@ X86_CPU_TYPE(ZHAOXIN_FAM7H, "zhaoxin_fam7h")
|
|||
X86_CPU_TYPE(INTEL_SIERRAFOREST, "sierraforest")
|
||||
X86_CPU_TYPE(INTEL_GRANDRIDGE, "grandridge")
|
||||
X86_CPU_TYPE(INTEL_CLEARWATERFOREST, "clearwaterforest")
|
||||
X86_CPU_TYPE(AMDFAM1AH, "amdfam1ah")
|
||||
|
||||
// Alternate names supported by __builtin_cpu_is and target multiversioning.
|
||||
X86_CPU_TYPE_ALIAS(INTEL_BONNELL, "atom")
|
||||
X86_CPU_TYPE_ALIAS(AMDFAM10H, "amdfam10")
|
||||
X86_CPU_TYPE_ALIAS(AMDFAM15H, "amdfam15")
|
||||
X86_CPU_TYPE_ALIAS(AMDFAM1AH, "amdfam1a")
|
||||
X86_CPU_TYPE_ALIAS(INTEL_SILVERMONT, "slm")
|
||||
|
||||
#undef X86_CPU_TYPE_ALIAS
|
||||
|
|
@ -104,6 +106,7 @@ X86_CPU_SUBTYPE(INTEL_COREI7_GRANITERAPIDS_D,"graniterapids-d")
|
|||
X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE, "arrowlake")
|
||||
X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE_S, "arrowlake-s")
|
||||
X86_CPU_SUBTYPE(INTEL_COREI7_PANTHERLAKE, "pantherlake")
|
||||
X86_CPU_SUBTYPE(AMDFAM1AH_ZNVER5, "znver5")
|
||||
|
||||
// Alternate names supported by __builtin_cpu_is and target multiversioning.
|
||||
X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "raptorlake")
|
||||
|
|
|
|||
|
|
@ -147,6 +147,7 @@ enum CPUKind {
|
|||
CK_x86_64_v3,
|
||||
CK_x86_64_v4,
|
||||
CK_Geode,
|
||||
CK_ZNVER5,
|
||||
};
|
||||
|
||||
/// Parse \p CPU string into a CPUKind. Will only accept 64-bit capable CPUs if
|
||||
|
|
|
|||
|
|
@ -130,6 +130,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
|
|||
// Generate the prolog instructions that set up the pipeline.
|
||||
generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
|
||||
MF.insert(BB->getIterator(), KernelBB);
|
||||
LIS.insertMBBInMaps(KernelBB);
|
||||
|
||||
// Rearrange the instructions to generate the new, pipelined loop,
|
||||
// and update register names as needed.
|
||||
|
|
@ -210,6 +211,7 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,
|
|||
NewBB->transferSuccessors(PredBB);
|
||||
PredBB->addSuccessor(NewBB);
|
||||
PredBB = NewBB;
|
||||
LIS.insertMBBInMaps(NewBB);
|
||||
|
||||
// Generate instructions for each appropriate stage. Process instructions
|
||||
// in original program order.
|
||||
|
|
@ -283,6 +285,7 @@ void ModuloScheduleExpander::generateEpilog(
|
|||
|
||||
PredBB->replaceSuccessor(LoopExitBB, NewBB);
|
||||
NewBB->addSuccessor(LoopExitBB);
|
||||
LIS.insertMBBInMaps(NewBB);
|
||||
|
||||
if (EpilogStart == LoopExitBB)
|
||||
EpilogStart = NewBB;
|
||||
|
|
|
|||
|
|
@ -1453,6 +1453,10 @@ void SelectionDAGISel::reportIPToStateForBlocks(MachineFunction *MF) {
|
|||
if (BB->getFirstMayFaultInst()) {
|
||||
// Report IP range only for blocks with Faulty inst
|
||||
auto MBBb = MBB.getFirstNonPHI();
|
||||
|
||||
if (MBBb == MBB.end())
|
||||
continue;
|
||||
|
||||
MachineInstr *MIb = &*MBBb;
|
||||
if (MIb->isTerminator())
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -975,8 +975,16 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
|
|||
if (ReadFromTail && Src->getMarker(Last)) {
|
||||
DbgMarker *FromLast = Src->getMarker(Last);
|
||||
if (LastIsEnd) {
|
||||
Dest->adoptDbgRecords(Src, Last, true);
|
||||
// adoptDbgRecords will release any trailers.
|
||||
if (Dest == end()) {
|
||||
// Abosrb the trailing markers from Src.
|
||||
assert(FromLast == Src->getTrailingDbgRecords());
|
||||
createMarker(Dest)->absorbDebugValues(*FromLast, true);
|
||||
FromLast->eraseFromParent();
|
||||
Src->deleteTrailingDbgRecords();
|
||||
} else {
|
||||
// adoptDbgRecords will release any trailers.
|
||||
Dest->adoptDbgRecords(Src, Last, true);
|
||||
}
|
||||
assert(!Src->getTrailingDbgRecords());
|
||||
} else {
|
||||
// FIXME: can we use adoptDbgRecords here to reduce allocations?
|
||||
|
|
|
|||
|
|
@ -2931,16 +2931,6 @@ struct RegPairInfo {
|
|||
|
||||
} // end anonymous namespace
|
||||
|
||||
unsigned findFreePredicateReg(BitVector &SavedRegs) {
|
||||
for (unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) {
|
||||
if (SavedRegs.test(PReg)) {
|
||||
unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0;
|
||||
return PNReg;
|
||||
}
|
||||
}
|
||||
return AArch64::NoRegister;
|
||||
}
|
||||
|
||||
static void computeCalleeSaveRegisterPairs(
|
||||
MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI,
|
||||
const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
|
||||
|
|
@ -3645,7 +3635,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
|
|||
|
||||
unsigned ExtraCSSpill = 0;
|
||||
bool HasUnpairedGPR64 = false;
|
||||
bool HasPairZReg = false;
|
||||
// Figure out which callee-saved registers to save/restore.
|
||||
for (unsigned i = 0; CSRegs[i]; ++i) {
|
||||
const unsigned Reg = CSRegs[i];
|
||||
|
|
@ -3699,28 +3688,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
|
|||
!RegInfo->isReservedReg(MF, PairedReg))
|
||||
ExtraCSSpill = PairedReg;
|
||||
}
|
||||
// Check if there is a pair of ZRegs, so it can select PReg for spill/fill
|
||||
HasPairZReg |= (AArch64::ZPRRegClass.contains(Reg, CSRegs[i ^ 1]) &&
|
||||
SavedRegs.test(CSRegs[i ^ 1]));
|
||||
}
|
||||
|
||||
if (HasPairZReg && (Subtarget.hasSVE2p1() || Subtarget.hasSME2())) {
|
||||
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
|
||||
// Find a suitable predicate register for the multi-vector spill/fill
|
||||
// instructions.
|
||||
unsigned PnReg = findFreePredicateReg(SavedRegs);
|
||||
if (PnReg != AArch64::NoRegister)
|
||||
AFI->setPredicateRegForFillSpill(PnReg);
|
||||
// If no free callee-save has been found assign one.
|
||||
if (!AFI->getPredicateRegForFillSpill() &&
|
||||
MF.getFunction().getCallingConv() ==
|
||||
CallingConv::AArch64_SVE_VectorCall) {
|
||||
SavedRegs.set(AArch64::P8);
|
||||
AFI->setPredicateRegForFillSpill(AArch64::PN8);
|
||||
}
|
||||
|
||||
assert(!RegInfo->isReservedReg(MF, AFI->getPredicateRegForFillSpill()) &&
|
||||
"Predicate cannot be a reserved register");
|
||||
}
|
||||
|
||||
if (MF.getFunction().getCallingConv() == CallingConv::Win64 &&
|
||||
|
|
|
|||
|
|
@ -5144,10 +5144,6 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
|||
if (PNRReg.isValid() && !PNRReg.isVirtual())
|
||||
MI.addDef(PNRReg, RegState::Implicit);
|
||||
MI.addMemOperand(MMO);
|
||||
|
||||
if (PNRReg.isValid() && PNRReg.isVirtual())
|
||||
BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), PNRReg)
|
||||
.addReg(DestReg);
|
||||
}
|
||||
|
||||
bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
|
||||
|
|
|
|||
|
|
@ -4349,6 +4349,7 @@ AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,
|
|||
SelectionDAG &DAG = DCI.DAG;
|
||||
SDLoc DL(N);
|
||||
|
||||
bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
|
||||
|
|
@ -4363,20 +4364,25 @@ AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,
|
|||
|
||||
// Try to use two fast 24-bit multiplies (one for each half of the result)
|
||||
// instead of one slow extending multiply.
|
||||
unsigned LoOpcode, HiOpcode;
|
||||
if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
|
||||
N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
|
||||
N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
|
||||
LoOpcode = AMDGPUISD::MUL_U24;
|
||||
HiOpcode = AMDGPUISD::MULHI_U24;
|
||||
} else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
|
||||
N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
|
||||
N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
|
||||
LoOpcode = AMDGPUISD::MUL_I24;
|
||||
HiOpcode = AMDGPUISD::MULHI_I24;
|
||||
unsigned LoOpcode = 0;
|
||||
unsigned HiOpcode = 0;
|
||||
if (Signed) {
|
||||
if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
|
||||
N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
|
||||
N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
|
||||
LoOpcode = AMDGPUISD::MUL_I24;
|
||||
HiOpcode = AMDGPUISD::MULHI_I24;
|
||||
}
|
||||
} else {
|
||||
return SDValue();
|
||||
if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
|
||||
N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
|
||||
N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
|
||||
LoOpcode = AMDGPUISD::MUL_U24;
|
||||
HiOpcode = AMDGPUISD::MULHI_U24;
|
||||
}
|
||||
}
|
||||
if (!LoOpcode)
|
||||
return SDValue();
|
||||
|
||||
SDValue Lo = DAG.getNode(LoOpcode, DL, MVT::i32, N0, N1);
|
||||
SDValue Hi = DAG.getNode(HiOpcode, DL, MVT::i32, N0, N1);
|
||||
|
|
|
|||
|
|
@ -9338,12 +9338,13 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
|
|||
SDLoc dl(Op);
|
||||
SDValue Op0 = Op->getOperand(0);
|
||||
|
||||
if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
|
||||
(Op.getValueType() != MVT::f128))
|
||||
return SDValue();
|
||||
|
||||
SDValue Lo = Op0.getOperand(0);
|
||||
SDValue Hi = Op0.getOperand(1);
|
||||
|
||||
if ((Op.getValueType() != MVT::f128) ||
|
||||
(Op0.getOpcode() != ISD::BUILD_PAIR) || (Lo.getValueType() != MVT::i64) ||
|
||||
(Hi.getValueType() != MVT::i64) || !Subtarget.isPPC64())
|
||||
if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
|
||||
return SDValue();
|
||||
|
||||
if (!Subtarget.isLittleEndian())
|
||||
|
|
|
|||
|
|
@ -2902,7 +2902,7 @@ RISCVInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI,
|
|||
// if any possible.
|
||||
if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO &&
|
||||
(MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() ||
|
||||
F.hasSection()))
|
||||
F.hasSection() || F.getSectionPrefix()))
|
||||
return outliner::InstrType::Illegal;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1543,6 +1543,19 @@ def ProcessorFeatures {
|
|||
FeatureVPOPCNTDQ];
|
||||
list<SubtargetFeature> ZN4Features =
|
||||
!listconcat(ZN3Features, ZN4AdditionalFeatures);
|
||||
|
||||
|
||||
list<SubtargetFeature> ZN5Tuning = ZN4Tuning;
|
||||
list<SubtargetFeature> ZN5AdditionalFeatures = [FeatureVNNI,
|
||||
FeatureMOVDIRI,
|
||||
FeatureMOVDIR64B,
|
||||
FeatureVP2INTERSECT,
|
||||
FeaturePREFETCHI,
|
||||
FeatureAVXVNNI
|
||||
];
|
||||
list<SubtargetFeature> ZN5Features =
|
||||
!listconcat(ZN4Features, ZN5AdditionalFeatures);
|
||||
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
@ -1892,6 +1905,8 @@ def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
|
|||
ProcessorFeatures.ZN3Tuning>;
|
||||
def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,
|
||||
ProcessorFeatures.ZN4Tuning>;
|
||||
def : ProcModel<"znver5", Znver4Model, ProcessorFeatures.ZN5Features,
|
||||
ProcessorFeatures.ZN5Tuning>;
|
||||
|
||||
def : Proc<"geode", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
|
||||
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
|
||||
|
|
|
|||
|
|
@ -350,3 +350,4 @@ def ZnVer4PfmCounters : ProcPfmCounters {
|
|||
let ValidationCounters = DefaultAMDPfmValidationCounters;
|
||||
}
|
||||
def : PfmCountersBinding<"znver4", ZnVer4PfmCounters>;
|
||||
def : PfmCountersBinding<"znver5", ZnVer4PfmCounters>;
|
||||
|
|
|
|||
|
|
@ -1213,6 +1213,25 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
|
|||
break; // "znver4"
|
||||
}
|
||||
break; // family 19h
|
||||
case 26:
|
||||
CPU = "znver5";
|
||||
*Type = X86::AMDFAM1AH;
|
||||
if (Model <= 0x77) {
|
||||
// Models 00h-0Fh (Breithorn).
|
||||
// Models 10h-1Fh (Breithorn-Dense).
|
||||
// Models 20h-2Fh (Strix 1).
|
||||
// Models 30h-37h (Strix 2).
|
||||
// Models 38h-3Fh (Strix 3).
|
||||
// Models 40h-4Fh (Granite Ridge).
|
||||
// Models 50h-5Fh (Weisshorn).
|
||||
// Models 60h-6Fh (Krackan1).
|
||||
// Models 70h-77h (Sarlak).
|
||||
CPU = "znver5";
|
||||
*Subtype = X86::AMDFAM1AH_ZNVER5;
|
||||
break; // "znver5"
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break; // Unknown AMD CPU.
|
||||
}
|
||||
|
|
|
|||
|
|
@ -238,6 +238,10 @@ static constexpr FeatureBitset FeaturesZNVER4 =
|
|||
FeatureAVX512BITALG | FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 |
|
||||
FeatureGFNI | FeatureSHSTK;
|
||||
|
||||
static constexpr FeatureBitset FeaturesZNVER5 =
|
||||
FeaturesZNVER4 | FeatureAVXVNNI | FeatureMOVDIRI | FeatureMOVDIR64B |
|
||||
FeatureAVX512VP2INTERSECT | FeaturePREFETCHI | FeatureAVXVNNI;
|
||||
|
||||
// D151696 tranplanted Mangling and OnlyForCPUDispatchSpecific from
|
||||
// X86TargetParser.def to here. They are assigned by following ways:
|
||||
// 1. Copy the mangling from the original CPU_SPEICIFC MACROs. If no, assign
|
||||
|
|
@ -417,6 +421,7 @@ constexpr ProcInfo Processors[] = {
|
|||
{ {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2, '\0', false },
|
||||
{ {"znver3"}, CK_ZNVER3, FEATURE_AVX2, FeaturesZNVER3, '\0', false },
|
||||
{ {"znver4"}, CK_ZNVER4, FEATURE_AVX512VBMI2, FeaturesZNVER4, '\0', false },
|
||||
{ {"znver5"}, CK_ZNVER5, FEATURE_AVX512VP2INTERSECT, FeaturesZNVER5, '\0', false },
|
||||
// Generic 64-bit processor.
|
||||
{ {"x86-64"}, CK_x86_64, FEATURE_SSE2 , FeaturesX86_64, '\0', false },
|
||||
{ {"x86-64-v2"}, CK_x86_64_v2, FEATURE_SSE4_2 , FeaturesX86_64_V2, '\0', false },
|
||||
|
|
|
|||
|
|
@ -15211,7 +15211,8 @@ bool BoUpSLP::collectValuesToDemote(
|
|||
if (any_of(E.Scalars, [&](Value *V) {
|
||||
return !all_of(V->users(), [=](User *U) {
|
||||
return getTreeEntry(U) ||
|
||||
(UserIgnoreList && UserIgnoreList->contains(U)) ||
|
||||
(E.Idx == 0 && UserIgnoreList &&
|
||||
UserIgnoreList->contains(U)) ||
|
||||
(!isa<CmpInst>(U) && U->getType()->isSized() &&
|
||||
!U->getType()->isScalableTy() &&
|
||||
DL->getTypeSizeInBits(U->getType()) <= BitWidth);
|
||||
|
|
@ -15539,6 +15540,11 @@ void BoUpSLP::computeMinimumValueSizes() {
|
|||
const TreeEntry *UserTE = E.UserTreeIndices.back().UserTE;
|
||||
if (TE == UserTE || !TE)
|
||||
return false;
|
||||
if (!isa<CastInst, BinaryOperator, FreezeInst, PHINode,
|
||||
SelectInst>(U) ||
|
||||
!isa<CastInst, BinaryOperator, FreezeInst, PHINode,
|
||||
SelectInst>(UserTE->getMainOp()))
|
||||
return true;
|
||||
unsigned UserTESz = DL->getTypeSizeInBits(
|
||||
UserTE->Scalars.front()->getType());
|
||||
auto It = MinBWs.find(TE);
|
||||
|
|
|
|||
Loading…
Reference in a new issue