Vendor import of llvm-project branch release/19.x llvmorg-19.1.0-0-ga4bf6cd7cfb1, a.k.a. 19.1.0 release.

2026-06-04 22:32:43 -04:00 · 2024-09-22 11:37:02 +02:00 · 2024-09-22 11:37:02 +02:00 · 1de139fdd5
commit 1de139fdd5
parent 7432c96084
31 changed files with 227 additions and 176 deletions
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@ -723,6 +723,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
  case CK_ZNVER4:
    defineCPUMacros(Builder, "znver4");
    break;
+  case CK_ZNVER5:
+    defineCPUMacros(Builder, "znver5");
+    break;
  case CK_Geode:
    defineCPUMacros(Builder, "geode");
    break;
@ -1613,6 +1616,7 @@ std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
    case CK_ZNVER2:
    case CK_ZNVER3:
    case CK_ZNVER4:
+    case CK_ZNVER5:
    // Deprecated
    case CK_x86_64:
    case CK_x86_64_v2:
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@ -1336,75 +1336,50 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
  return CGF.Builder.CreateLoad(Tmp);
 }

-// Function to store a first-class aggregate into memory.  We prefer to
-// store the elements rather than the aggregate to be more friendly to
-// fast-isel.
-// FIXME: Do we need to recurse here?
-void CodeGenFunction::EmitAggregateStore(llvm::Value *Val, Address Dest,
-                                         bool DestIsVolatile) {
-  // Prefer scalar stores to first-class aggregate stores.
-  if (llvm::StructType *STy = dyn_cast<llvm::StructType>(Val->getType())) {
-    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-      Address EltPtr = Builder.CreateStructGEP(Dest, i);
-      llvm::Value *Elt = Builder.CreateExtractValue(Val, i);
-      Builder.CreateStore(Elt, EltPtr, DestIsVolatile);
-    }
-  } else {
-    Builder.CreateStore(Val, Dest, DestIsVolatile);
-  }
-}
+void CodeGenFunction::CreateCoercedStore(llvm::Value *Src, Address Dst,
+                                         llvm::TypeSize DstSize,
+                                         bool DstIsVolatile) {
+  if (!DstSize)
+    return;

-/// CreateCoercedStore - Create a store to \arg DstPtr from \arg Src,
-/// where the source and destination may have different types.  The
-/// destination is known to be aligned to \arg DstAlign bytes.
-///
-/// This safely handles the case when the src type is larger than the
-/// destination type; the upper bits of the src will be lost.
-static void CreateCoercedStore(llvm::Value *Src,
-                               Address Dst,
-                               bool DstIsVolatile,
-                               CodeGenFunction &CGF) {
  llvm::Type *SrcTy = Src->getType();
-  llvm::Type *DstTy = Dst.getElementType();
-  if (SrcTy == DstTy) {
-    CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
-    return;
+  llvm::TypeSize SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
+
+  // GEP into structs to try to make types match.
+  // FIXME: This isn't really that useful with opaque types, but it impacts a
+  // lot of regression tests.
+  if (SrcTy != Dst.getElementType()) {
+    if (llvm::StructType *DstSTy =
+            dyn_cast<llvm::StructType>(Dst.getElementType())) {
+      assert(!SrcSize.isScalable());
+      Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,
+                                               SrcSize.getFixedValue(), *this);
+    }
  }

-  llvm::TypeSize SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy);
-
-  if (llvm::StructType *DstSTy = dyn_cast<llvm::StructType>(DstTy)) {
-    Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,
-                                             SrcSize.getFixedValue(), CGF);
-    DstTy = Dst.getElementType();
-  }
-
-  llvm::PointerType *SrcPtrTy = llvm::dyn_cast<llvm::PointerType>(SrcTy);
-  llvm::PointerType *DstPtrTy = llvm::dyn_cast<llvm::PointerType>(DstTy);
-  if (SrcPtrTy && DstPtrTy &&
-      SrcPtrTy->getAddressSpace() != DstPtrTy->getAddressSpace()) {
-    Src = CGF.Builder.CreateAddrSpaceCast(Src, DstTy);
-    CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
-    return;
-  }
-
-  // If the source and destination are integer or pointer types, just do an
-  // extension or truncation to the desired type.
-  if ((isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy)) &&
-      (isa<llvm::IntegerType>(DstTy) || isa<llvm::PointerType>(DstTy))) {
-    Src = CoerceIntOrPtrToIntOrPtr(Src, DstTy, CGF);
-    CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
-    return;
-  }
-
-  llvm::TypeSize DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(DstTy);
-
-  // If store is legal, just bitcast the src pointer.
-  if (isa<llvm::ScalableVectorType>(SrcTy) ||
-      isa<llvm::ScalableVectorType>(DstTy) ||
-      SrcSize.getFixedValue() <= DstSize.getFixedValue()) {
-    Dst = Dst.withElementType(SrcTy);
-    CGF.EmitAggregateStore(Src, Dst, DstIsVolatile);
+  if (SrcSize.isScalable() || SrcSize <= DstSize) {
+    if (SrcTy->isIntegerTy() && Dst.getElementType()->isPointerTy() &&
+        SrcSize == CGM.getDataLayout().getTypeAllocSize(Dst.getElementType())) {
+      // If the value is supposed to be a pointer, convert it before storing it.
+      Src = CoerceIntOrPtrToIntOrPtr(Src, Dst.getElementType(), *this);
+      Builder.CreateStore(Src, Dst, DstIsVolatile);
+    } else if (llvm::StructType *STy =
+                   dyn_cast<llvm::StructType>(Src->getType())) {
+      // Prefer scalar stores to first-class aggregate stores.
+      Dst = Dst.withElementType(SrcTy);
+      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+        Address EltPtr = Builder.CreateStructGEP(Dst, i);
+        llvm::Value *Elt = Builder.CreateExtractValue(Src, i);
+        Builder.CreateStore(Elt, EltPtr, DstIsVolatile);
+      }
+    } else {
+      Builder.CreateStore(Src, Dst.withElementType(SrcTy), DstIsVolatile);
+    }
+  } else if (SrcTy->isIntegerTy()) {
+    // If the source is a simple integer, coerce it directly.
+    llvm::Type *DstIntTy = Builder.getIntNTy(DstSize.getFixedValue() * 8);
+    Src = CoerceIntOrPtrToIntOrPtr(Src, DstIntTy, *this);
+    Builder.CreateStore(Src, Dst.withElementType(DstIntTy), DstIsVolatile);
  } else {
    // Otherwise do coercion through memory. This is stupid, but
    // simple.
@ -1416,12 +1391,12 @@ static void CreateCoercedStore(llvm::Value *Src,
    // FIXME: Assert that we aren't truncating non-padding bits when have access
    // to that information.
    RawAddress Tmp =
-        CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());
-    CGF.Builder.CreateStore(Src, Tmp);
-    CGF.Builder.CreateMemCpy(
-        Dst.emitRawPointer(CGF), Dst.getAlignment().getAsAlign(),
-        Tmp.getPointer(), Tmp.getAlignment().getAsAlign(),
-        llvm::ConstantInt::get(CGF.IntPtrTy, DstSize.getFixedValue()));
+        CreateTempAllocaForCoercion(*this, SrcTy, Dst.getAlignment());
+    Builder.CreateStore(Src, Tmp);
+    Builder.CreateMemCpy(Dst.emitRawPointer(*this),
+                         Dst.getAlignment().getAsAlign(), Tmp.getPointer(),
+                         Tmp.getAlignment().getAsAlign(),
+                         Builder.CreateTypeSize(IntPtrTy, DstSize));
  }
 }

@ -3309,7 +3284,12 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
        assert(NumIRArgs == 1);
        auto AI = Fn->getArg(FirstIRArg);
        AI->setName(Arg->getName() + ".coerce");
-        CreateCoercedStore(AI, Ptr, /*DstIsVolatile=*/false, *this);
+        CreateCoercedStore(
+            AI, Ptr,
+            llvm::TypeSize::getFixed(
+                getContext().getTypeSizeInChars(Ty).getQuantity() -
+                ArgI.getDirectOffset()),
+            /*DstIsVolatile=*/false);
      }

      // Match to what EmitParmDecl is expecting for this type.
@ -5939,17 +5919,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
            llvm::Value *Imag = Builder.CreateExtractValue(CI, 1);
            return RValue::getComplex(std::make_pair(Real, Imag));
          }
-          case TEK_Aggregate: {
-            Address DestPtr = ReturnValue.getAddress();
-            bool DestIsVolatile = ReturnValue.isVolatile();
-
-            if (!DestPtr.isValid()) {
-              DestPtr = CreateMemTemp(RetTy, "agg.tmp");
-              DestIsVolatile = false;
-            }
-            EmitAggregateStore(CI, DestPtr, DestIsVolatile);
-            return RValue::getAggregate(DestPtr);
-          }
+          case TEK_Aggregate:
+            break;
          case TEK_Scalar: {
            // If the argument doesn't match, perform a bitcast to coerce it.
            // This can happen due to trivial type mismatches.
@ -5959,7 +5930,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
            return RValue::get(V);
          }
          }
-          llvm_unreachable("bad evaluation kind");
        }

        // If coercing a fixed vector from a scalable vector for ABI
@ -5981,10 +5951,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,

        Address DestPtr = ReturnValue.getValue();
        bool DestIsVolatile = ReturnValue.isVolatile();
+        uint64_t DestSize =
+            getContext().getTypeInfoDataSizeInChars(RetTy).Width.getQuantity();

        if (!DestPtr.isValid()) {
          DestPtr = CreateMemTemp(RetTy, "coerce");
          DestIsVolatile = false;
+          DestSize = getContext().getTypeSizeInChars(RetTy).getQuantity();
        }

        // An empty record can overlap other data (if declared with
@ -5993,7 +5966,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
        if (!isEmptyRecord(getContext(), RetTy, true)) {
          // If the value is offset in memory, apply the offset now.
          Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI);
-          CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this);
+          CreateCoercedStore(
+              CI, StorePtr,
+              llvm::TypeSize::getFixed(DestSize - RetAI.getDirectOffset()),
+              DestIsVolatile);
        }

        return convertTempToRValue(DestPtr, RetTy, SourceLocation());
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@ -131,15 +131,12 @@ public:
    EnsureDest(E->getType());

    if (llvm::Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) {
-      Address StoreDest = Dest.getAddress();
-      // The emitted value is guaranteed to have the same size as the
-      // destination but can have a different type. Just do a bitcast in this
-      // case to avoid incorrect GEPs.
-      if (Result->getType() != StoreDest.getType())
-        StoreDest = StoreDest.withElementType(Result->getType());
-
-      CGF.EmitAggregateStore(Result, StoreDest,
-                             E->getType().isVolatileQualified());
+      CGF.CreateCoercedStore(
+          Result, Dest.getAddress(),
+          llvm::TypeSize::getFixed(
+              Dest.getPreferredSize(CGF.getContext(), E->getType())
+                  .getQuantity()),
+          E->getType().isVolatileQualified());
      return;
    }
    return Visit(E->getSubExpr());
@ -2050,6 +2047,10 @@ CodeGenFunction::getOverlapForFieldInit(const FieldDecl *FD) {
  if (!FD->hasAttr<NoUniqueAddressAttr>() || !FD->getType()->isRecordType())
    return AggValueSlot::DoesNotOverlap;

+  // Empty fields can overlap earlier fields.
+  if (FD->getType()->getAsCXXRecordDecl()->isEmpty())
+    return AggValueSlot::MayOverlap;
+
  // If the field lies entirely within the enclosing class's nvsize, its tail
  // padding cannot overlap any already-initialized object. (The only subobjects
  // with greater addresses that might already be initialized are vbases.)
@ -2072,6 +2073,10 @@ AggValueSlot::Overlap_t CodeGenFunction::getOverlapForBaseInit(
  if (IsVirtual)
    return AggValueSlot::MayOverlap;

+  // Empty bases can overlap earlier bases.
+  if (BaseRD->isEmpty())
+    return AggValueSlot::MayOverlap;
+
  // If the base class is laid out entirely within the nvsize of the derived
  // class, its tail padding cannot yet be initialized, so we can issue
  // stores at the full width of the base class.
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@ -745,7 +745,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
    } break;
    case attr::CXXAssume: {
      const Expr *Assumption = cast<CXXAssumeAttr>(A)->getAssumption();
-      if (getLangOpts().CXXAssumptions &&
+      if (getLangOpts().CXXAssumptions && Builder.GetInsertBlock() &&
          !Assumption->HasSideEffects(getContext())) {
        llvm::Value *AssumptionVal = EvaluateExprAsBool(Assumption);
        Builder.CreateAssumption(AssumptionVal);
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@ -4838,9 +4838,10 @@ public:
  void EmitAggFinalDestCopy(QualType Type, AggValueSlot Dest, const LValue &Src,
                            ExprValueKind SrcKind);

-  /// Build all the stores needed to initialize an aggregate at Dest with the
-  /// value Val.
-  void EmitAggregateStore(llvm::Value *Val, Address Dest, bool DestIsVolatile);
+  /// Create a store to \arg DstPtr from \arg Src, truncating the stored value
+  /// to at most \arg DstSize bytes.
+  void CreateCoercedStore(llvm::Value *Src, Address Dst, llvm::TypeSize DstSize,
+                          bool DstIsVolatile);

  /// EmitExtendGCLifetime - Given a pointer to an Objective-C object,
  /// make sure it survives garbage collection until this point.
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@ -570,7 +570,8 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
                                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
                                                 Keywords.kw_as));
          ProbablyBracedList =
-              ProbablyBracedList || (IsCpp && NextTok->is(tok::l_paren));
+              ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() ||
+                                               NextTok->is(tok::l_paren)));

          // If there is a comma, semicolon or right paren after the closing
          // brace, we assume this is a braced initializer list.
@ -609,8 +610,9 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
            ProbablyBracedList = NextTok->isNot(tok::l_square);
          }

-          // Cpp macro definition body containing nonempty braced list or block:
+          // Cpp macro definition body that is a nonempty braced list or block:
          if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
+              !FormatTok->Previous && NextTok->is(tok::eof) &&
              // A statement can end with only `;` (simple statement), a block
              // closing brace (compound statement), or `:` (label statement).
              // If PrevTok is a block opening brace, Tok ends an empty block.
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@ -5430,11 +5430,24 @@ struct EnsureImmediateInvocationInDefaultArgs

  // Rewrite to source location to refer to the context in which they are used.
  ExprResult TransformSourceLocExpr(SourceLocExpr *E) {
-    if (E->getParentContext() == SemaRef.CurContext)
+    DeclContext *DC = E->getParentContext();
+    if (DC == SemaRef.CurContext)
      return E;
-    return getDerived().RebuildSourceLocExpr(E->getIdentKind(), E->getType(),
-                                             E->getBeginLoc(), E->getEndLoc(),
-                                             SemaRef.CurContext);
+
+    // FIXME: During instantiation, because the rebuild of defaults arguments
+    // is not always done in the context of the template instantiator,
+    // we run the risk of producing a dependent source location
+    // that would never be rebuilt.
+    // This usually happens during overload resolution, or in contexts
+    // where the value of the source location does not matter.
+    // However, we should find a better way to deal with source location
+    // of function templates.
+    if (!SemaRef.CurrentInstantiationScope ||
+        !SemaRef.CurContext->isDependentContext() || DC->isDependentContext())
+      DC = SemaRef.CurContext;
+
+    return getDerived().RebuildSourceLocExpr(
+        E->getIdentKind(), E->getType(), E->getBeginLoc(), E->getEndLoc(), DC);
  }
 };

--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@ -5140,7 +5140,8 @@ static bool HasNonDeletedDefaultedEqualityComparison(Sema &S,

    // const ClassT& obj;
    OpaqueValueExpr Operand(
-        {}, Decl->getTypeForDecl()->getCanonicalTypeUnqualified().withConst(),
+        KeyLoc,
+        Decl->getTypeForDecl()->getCanonicalTypeUnqualified().withConst(),
        ExprValueKind::VK_LValue);
    UnresolvedSet<16> Functions;
    // obj == obj;
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@ -1318,7 +1318,6 @@ void Sema::ActOnLambdaExpressionAfterIntroducer(LambdaIntroducer &Intro,

    if (C->Init.isUsable()) {
      addInitCapture(LSI, cast<VarDecl>(Var), C->Kind == LCK_ByRef);
-      PushOnScopeChains(Var, CurScope, false);
    } else {
      TryCaptureKind Kind = C->Kind == LCK_ByRef ? TryCapture_ExplicitByRef
                                                 : TryCapture_ExplicitByVal;
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@ -570,7 +570,7 @@ void LookupResult::resolveKind() {

    // For non-type declarations, check for a prior lookup result naming this
    // canonical declaration.
-    if (!D->isPlaceholderVar(getSema().getLangOpts()) && !ExistingI) {
+    if (!ExistingI) {
      auto UniqueResult = Unique.insert(std::make_pair(D, I));
      if (!UniqueResult.second) {
        // We've seen this entity before.
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@ -1928,6 +1928,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
    case Stmt::CXXRewrittenBinaryOperatorClass:
    case Stmt::RequiresExprClass:
    case Expr::CXXParenListInitExprClass:
+    case Stmt::EmbedExprClass:
      // Fall through.

    // Cases we intentionally don't evaluate, since they don't need
@ -2430,10 +2431,6 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
      Bldr.addNodes(Dst);
      break;
    }
-
-    case Stmt::EmbedExprClass:
-      llvm::report_fatal_error("Support for EmbedExpr is not implemented.");
-      break;
  }
 }

--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@ -59,6 +59,7 @@ enum ProcessorTypes {
  INTEL_SIERRAFOREST,
  INTEL_GRANDRIDGE,
  INTEL_CLEARWATERFOREST,
+  AMDFAM1AH,
  CPU_TYPE_MAX
 };

@ -97,6 +98,7 @@ enum ProcessorSubtypes {
  INTEL_COREI7_ARROWLAKE,
  INTEL_COREI7_ARROWLAKE_S,
  INTEL_COREI7_PANTHERLAKE,
+  AMDFAM1AH_ZNVER5,
  CPU_SUBTYPE_MAX
 };

@ -803,6 +805,24 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
      break; //  "znver4"
    }
    break; // family 19h
+  case 26:
+    CPU = "znver5";
+    *Type = AMDFAM1AH;
+    if (Model <= 0x77) {
+      // Models 00h-0Fh (Breithorn).
+      // Models 10h-1Fh (Breithorn-Dense).
+      // Models 20h-2Fh (Strix 1).
+      // Models 30h-37h (Strix 2).
+      // Models 38h-3Fh (Strix 3).
+      // Models 40h-4Fh (Granite Ridge).
+      // Models 50h-5Fh (Weisshorn).
+      // Models 60h-6Fh (Krackan1).
+      // Models 70h-77h (Sarlak).
+      CPU = "znver5";
+      *Subtype = AMDFAM1AH_ZNVER5;
+      break; //  "znver5"
+    }
+    break;
  default:
    break; // Unknown AMD CPU.
  }
--- a/compiler-rt/lib/builtins/divtc3.c
+++ b/compiler-rt/lib/builtins/divtc3.c
@ -13,7 +13,7 @@
 #define QUAD_PRECISION
 #include "fp_lib.h"

-#if defined(CRT_HAS_F128)
+#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128)

 // Returns: the quotient of (a + ib) / (c + id)

--- a/compiler-rt/lib/builtins/multc3.c
+++ b/compiler-rt/lib/builtins/multc3.c
@ -15,7 +15,7 @@
 #include "int_lib.h"
 #include "int_math.h"

-#if defined(CRT_HAS_F128)
+#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128)

 // Returns: the product of a + ib and c + id

--- a/libcxx/include/chrono
+++ b/libcxx/include/chrono
@ -1015,8 +1015,8 @@ constexpr chrono::year                                  operator ""y(unsigned lo
 #  include <charconv>
 #  if !defined(_LIBCPP_HAS_NO_LOCALIZATION)
 #    include <locale>
+#    include <ostream>
 #  endif
-#  include <ostream>
 #endif

 #endif // _LIBCPP_CHRONO
--- a/lld/ELF/Arch/Hexagon.cpp
+++ b/lld/ELF/Arch/Hexagon.cpp
@ -60,17 +60,15 @@ Hexagon::Hexagon() {
 }

 uint32_t Hexagon::calcEFlags() const {
-  assert(!ctx.objectFiles.empty());
-
  // The architecture revision must always be equal to or greater than
  // greatest revision in the list of inputs.
-  uint32_t ret = 0;
+  std::optional<uint32_t> ret;
  for (InputFile *f : ctx.objectFiles) {
    uint32_t eflags = cast<ObjFile<ELF32LE>>(f)->getObj().getHeader().e_flags;
-    if (eflags > ret)
+    if (!ret || eflags > *ret)
      ret = eflags;
  }
-  return ret;
+  return ret.value_or(/* Default Arch Rev: */ 0x60);
 }

 static uint32_t applyMask(uint32_t mask, uint32_t data) {
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@ -49,11 +49,13 @@ X86_CPU_TYPE(ZHAOXIN_FAM7H,       "zhaoxin_fam7h")
 X86_CPU_TYPE(INTEL_SIERRAFOREST,  "sierraforest")
 X86_CPU_TYPE(INTEL_GRANDRIDGE,    "grandridge")
 X86_CPU_TYPE(INTEL_CLEARWATERFOREST, "clearwaterforest")
+X86_CPU_TYPE(AMDFAM1AH,           "amdfam1ah")

 // Alternate names supported by __builtin_cpu_is and target multiversioning.
 X86_CPU_TYPE_ALIAS(INTEL_BONNELL,    "atom")
 X86_CPU_TYPE_ALIAS(AMDFAM10H,        "amdfam10")
 X86_CPU_TYPE_ALIAS(AMDFAM15H,        "amdfam15")
+X86_CPU_TYPE_ALIAS(AMDFAM1AH,        "amdfam1a")
 X86_CPU_TYPE_ALIAS(INTEL_SILVERMONT, "slm")

 #undef X86_CPU_TYPE_ALIAS
@ -104,6 +106,7 @@ X86_CPU_SUBTYPE(INTEL_COREI7_GRANITERAPIDS_D,"graniterapids-d")
 X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE,      "arrowlake")
 X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE_S,    "arrowlake-s")
 X86_CPU_SUBTYPE(INTEL_COREI7_PANTHERLAKE,    "pantherlake")
+X86_CPU_SUBTYPE(AMDFAM1AH_ZNVER5,            "znver5")

 // Alternate names supported by __builtin_cpu_is and target multiversioning.
 X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "raptorlake")
--- a/llvm/include/llvm/TargetParser/X86TargetParser.h
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.h
@ -147,6 +147,7 @@ enum CPUKind {
  CK_x86_64_v3,
  CK_x86_64_v4,
  CK_Geode,
+  CK_ZNVER5,
 };

 /// Parse \p CPU string into a CPUKind. Will only accept 64-bit capable CPUs if
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@ -130,6 +130,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
  // Generate the prolog instructions that set up the pipeline.
  generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
  MF.insert(BB->getIterator(), KernelBB);
+  LIS.insertMBBInMaps(KernelBB);

  // Rearrange the instructions to generate the new, pipelined loop,
  // and update register names as needed.
@ -210,6 +211,7 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,
    NewBB->transferSuccessors(PredBB);
    PredBB->addSuccessor(NewBB);
    PredBB = NewBB;
+    LIS.insertMBBInMaps(NewBB);

    // Generate instructions for each appropriate stage. Process instructions
    // in original program order.
@ -283,6 +285,7 @@ void ModuloScheduleExpander::generateEpilog(

    PredBB->replaceSuccessor(LoopExitBB, NewBB);
    NewBB->addSuccessor(LoopExitBB);
+    LIS.insertMBBInMaps(NewBB);

    if (EpilogStart == LoopExitBB)
      EpilogStart = NewBB;
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@ -1453,6 +1453,10 @@ void SelectionDAGISel::reportIPToStateForBlocks(MachineFunction *MF) {
    if (BB->getFirstMayFaultInst()) {
      // Report IP range only for blocks with Faulty inst
      auto MBBb = MBB.getFirstNonPHI();
+
+      if (MBBb == MBB.end())
+        continue;
+
      MachineInstr *MIb = &*MBBb;
      if (MIb->isTerminator())
        continue;
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@ -975,8 +975,16 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
  if (ReadFromTail && Src->getMarker(Last)) {
    DbgMarker *FromLast = Src->getMarker(Last);
    if (LastIsEnd) {
-      Dest->adoptDbgRecords(Src, Last, true);
-      // adoptDbgRecords will release any trailers.
+      if (Dest == end()) {
+        // Abosrb the trailing markers from Src.
+        assert(FromLast == Src->getTrailingDbgRecords());
+        createMarker(Dest)->absorbDebugValues(*FromLast, true);
+        FromLast->eraseFromParent();
+        Src->deleteTrailingDbgRecords();
+      } else {
+        // adoptDbgRecords will release any trailers.
+        Dest->adoptDbgRecords(Src, Last, true);
+      }
      assert(!Src->getTrailingDbgRecords());
    } else {
      // FIXME: can we use adoptDbgRecords here to reduce allocations?
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@ -2931,16 +2931,6 @@ struct RegPairInfo {

 } // end anonymous namespace

-unsigned findFreePredicateReg(BitVector &SavedRegs) {
-  for (unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) {
-    if (SavedRegs.test(PReg)) {
-      unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0;
-      return PNReg;
-    }
-  }
-  return AArch64::NoRegister;
-}
-
 static void computeCalleeSaveRegisterPairs(
    MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI,
    const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
@ -3645,7 +3635,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,

  unsigned ExtraCSSpill = 0;
  bool HasUnpairedGPR64 = false;
-  bool HasPairZReg = false;
  // Figure out which callee-saved registers to save/restore.
  for (unsigned i = 0; CSRegs[i]; ++i) {
    const unsigned Reg = CSRegs[i];
@ -3699,28 +3688,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
          !RegInfo->isReservedReg(MF, PairedReg))
        ExtraCSSpill = PairedReg;
    }
-    // Check if there is a pair of ZRegs, so it can select PReg for spill/fill
-    HasPairZReg |= (AArch64::ZPRRegClass.contains(Reg, CSRegs[i ^ 1]) &&
-                    SavedRegs.test(CSRegs[i ^ 1]));
-  }
-
-  if (HasPairZReg && (Subtarget.hasSVE2p1() || Subtarget.hasSME2())) {
-    AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-    // Find a suitable predicate register for the multi-vector spill/fill
-    // instructions.
-    unsigned PnReg = findFreePredicateReg(SavedRegs);
-    if (PnReg != AArch64::NoRegister)
-      AFI->setPredicateRegForFillSpill(PnReg);
-    // If no free callee-save has been found assign one.
-    if (!AFI->getPredicateRegForFillSpill() &&
-        MF.getFunction().getCallingConv() ==
-            CallingConv::AArch64_SVE_VectorCall) {
-      SavedRegs.set(AArch64::P8);
-      AFI->setPredicateRegForFillSpill(AArch64::PN8);
-    }
-
-    assert(!RegInfo->isReservedReg(MF, AFI->getPredicateRegForFillSpill()) &&
-           "Predicate cannot be a reserved register");
  }

  if (MF.getFunction().getCallingConv() == CallingConv::Win64 &&
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@ -5144,10 +5144,6 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
  if (PNRReg.isValid() && !PNRReg.isVirtual())
    MI.addDef(PNRReg, RegState::Implicit);
  MI.addMemOperand(MMO);
-
-  if (PNRReg.isValid() && PNRReg.isVirtual())
-    BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), PNRReg)
-        .addReg(DestReg);
 }

 bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@ -4349,6 +4349,7 @@ AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,
  SelectionDAG &DAG = DCI.DAG;
  SDLoc DL(N);

+  bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
  SDValue N0 = N->getOperand(0);
  SDValue N1 = N->getOperand(1);

@ -4363,20 +4364,25 @@ AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,

  // Try to use two fast 24-bit multiplies (one for each half of the result)
  // instead of one slow extending multiply.
-  unsigned LoOpcode, HiOpcode;
-  if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
-    N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
-    N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
-    LoOpcode = AMDGPUISD::MUL_U24;
-    HiOpcode = AMDGPUISD::MULHI_U24;
-  } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
-    N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
-    N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
-    LoOpcode = AMDGPUISD::MUL_I24;
-    HiOpcode = AMDGPUISD::MULHI_I24;
+  unsigned LoOpcode = 0;
+  unsigned HiOpcode = 0;
+  if (Signed) {
+    if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
+      N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
+      N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
+      LoOpcode = AMDGPUISD::MUL_I24;
+      HiOpcode = AMDGPUISD::MULHI_I24;
+    }
  } else {
-    return SDValue();
+    if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
+      N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
+      N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
+      LoOpcode = AMDGPUISD::MUL_U24;
+      HiOpcode = AMDGPUISD::MULHI_U24;
+    }
  }
+  if (!LoOpcode)
+    return SDValue();

  SDValue Lo = DAG.getNode(LoOpcode, DL, MVT::i32, N0, N1);
  SDValue Hi = DAG.getNode(HiOpcode, DL, MVT::i32, N0, N1);
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@ -9338,12 +9338,13 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
  SDLoc dl(Op);
  SDValue Op0 = Op->getOperand(0);

+  if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
+      (Op.getValueType() != MVT::f128))
+    return SDValue();
+
  SDValue Lo = Op0.getOperand(0);
  SDValue Hi = Op0.getOperand(1);
-
-  if ((Op.getValueType() != MVT::f128) ||
-      (Op0.getOpcode() != ISD::BUILD_PAIR) || (Lo.getValueType() != MVT::i64) ||
-      (Hi.getValueType() != MVT::i64) || !Subtarget.isPPC64())
+  if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
    return SDValue();

  if (!Subtarget.isLittleEndian())
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@ -2902,7 +2902,7 @@ RISCVInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI,
    // if any possible.
    if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO &&
        (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() ||
-         F.hasSection()))
+         F.hasSection() || F.getSectionPrefix()))
      return outliner::InstrType::Illegal;
  }

--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@ -1543,6 +1543,19 @@ def ProcessorFeatures {
                                                  FeatureVPOPCNTDQ];
  list<SubtargetFeature> ZN4Features =
    !listconcat(ZN3Features, ZN4AdditionalFeatures);
+
+
+  list<SubtargetFeature> ZN5Tuning = ZN4Tuning;
+  list<SubtargetFeature> ZN5AdditionalFeatures = [FeatureVNNI,
+                                                  FeatureMOVDIRI,
+                                                  FeatureMOVDIR64B,
+                                                  FeatureVP2INTERSECT,
+                                                  FeaturePREFETCHI,
+                                                  FeatureAVXVNNI
+                                                  ];
+  list<SubtargetFeature> ZN5Features =
+    !listconcat(ZN4Features, ZN5AdditionalFeatures);
+
 }

 //===----------------------------------------------------------------------===//
@ -1892,6 +1905,8 @@ def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
                ProcessorFeatures.ZN3Tuning>;
 def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,
           ProcessorFeatures.ZN4Tuning>;
+def : ProcModel<"znver5", Znver4Model, ProcessorFeatures.ZN5Features,
+                ProcessorFeatures.ZN5Tuning>;

 def : Proc<"geode",           [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
                              [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
--- a/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/llvm/lib/Target/X86/X86PfmCounters.td
@ -350,3 +350,4 @@ def ZnVer4PfmCounters : ProcPfmCounters {
  let ValidationCounters = DefaultAMDPfmValidationCounters;
 }
 def : PfmCountersBinding<"znver4", ZnVer4PfmCounters>;
+def : PfmCountersBinding<"znver5", ZnVer4PfmCounters>;
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@ -1213,6 +1213,25 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
      break; //  "znver4"
    }
    break; // family 19h
+  case 26:
+    CPU = "znver5";
+    *Type = X86::AMDFAM1AH;
+    if (Model <= 0x77) {
+      // Models 00h-0Fh (Breithorn).
+      // Models 10h-1Fh (Breithorn-Dense).
+      // Models 20h-2Fh (Strix 1).
+      // Models 30h-37h (Strix 2).
+      // Models 38h-3Fh (Strix 3).
+      // Models 40h-4Fh (Granite Ridge).
+      // Models 50h-5Fh (Weisshorn).
+      // Models 60h-6Fh (Krackan1).
+      // Models 70h-77h (Sarlak).
+      CPU = "znver5";
+      *Subtype = X86::AMDFAM1AH_ZNVER5;
+      break; //  "znver5"
+    }
+    break;
+
  default:
    break; // Unknown AMD CPU.
  }
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@ -238,6 +238,10 @@ static constexpr FeatureBitset FeaturesZNVER4 =
    FeatureAVX512BITALG | FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 |
    FeatureGFNI | FeatureSHSTK;

+static constexpr FeatureBitset FeaturesZNVER5 =
+    FeaturesZNVER4 | FeatureAVXVNNI | FeatureMOVDIRI | FeatureMOVDIR64B |
+    FeatureAVX512VP2INTERSECT | FeaturePREFETCHI | FeatureAVXVNNI;
+
 // D151696 tranplanted Mangling and OnlyForCPUDispatchSpecific from
 // X86TargetParser.def to here. They are assigned by following ways:
 // 1. Copy the mangling from the original CPU_SPEICIFC MACROs. If no, assign
@ -417,6 +421,7 @@ constexpr ProcInfo Processors[] = {
  { {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2, '\0', false },
  { {"znver3"}, CK_ZNVER3, FEATURE_AVX2, FeaturesZNVER3, '\0', false },
  { {"znver4"}, CK_ZNVER4, FEATURE_AVX512VBMI2, FeaturesZNVER4, '\0', false },
+  { {"znver5"}, CK_ZNVER5, FEATURE_AVX512VP2INTERSECT, FeaturesZNVER5, '\0', false },
  // Generic 64-bit processor.
  { {"x86-64"}, CK_x86_64, FEATURE_SSE2 , FeaturesX86_64, '\0', false },
  { {"x86-64-v2"}, CK_x86_64_v2, FEATURE_SSE4_2 , FeaturesX86_64_V2, '\0', false },
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@ -15211,7 +15211,8 @@ bool BoUpSLP::collectValuesToDemote(
  if (any_of(E.Scalars, [&](Value *V) {
        return !all_of(V->users(), [=](User *U) {
          return getTreeEntry(U) ||
-                 (UserIgnoreList && UserIgnoreList->contains(U)) ||
+                 (E.Idx == 0 && UserIgnoreList &&
+                  UserIgnoreList->contains(U)) ||
                 (!isa<CmpInst>(U) && U->getType()->isSized() &&
                  !U->getType()->isScalableTy() &&
                  DL->getTypeSizeInBits(U->getType()) <= BitWidth);
@ -15539,6 +15540,11 @@ void BoUpSLP::computeMinimumValueSizes() {
                    const TreeEntry *UserTE = E.UserTreeIndices.back().UserTE;
                    if (TE == UserTE || !TE)
                      return false;
+                    if (!isa<CastInst, BinaryOperator, FreezeInst, PHINode,
+                             SelectInst>(U) ||
+                        !isa<CastInst, BinaryOperator, FreezeInst, PHINode,
+                             SelectInst>(UserTE->getMainOp()))
+                      return true;
                    unsigned UserTESz = DL->getTypeSizeInBits(
                        UserTE->Scalars.front()->getType());
                    auto It = MinBWs.find(TE);