diff --git a/promql/engine.go b/promql/engine.go
index fec2bbf761..888df311d0 100644
--- a/promql/engine.go
+++ b/promql/engine.go
@@ -2981,7 +2981,6 @@ type groupedAggregation struct {
 	hasHistogram           bool // Has at least 1 histogram sample aggregated.
 	incompatibleHistograms bool // If true, group has seen mixed exponential and custom buckets, or incompatible custom buckets.
 	groupAggrComplete      bool // Used by LIMITK to short-cut series loop when we've reached K elem on every group.
-	incrementalMean        bool // True after reverting to incremental calculation of the mean value.
 }
 
 // aggregation evaluates sum, avg, count, stdvar, stddev or quantile at one timestep on inputMatrix.
@@ -3080,6 +3079,21 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
 			}
 
 		case parser.AVG:
+			// For the average calculation, we use incremental mean
+			// calculation. In particular in combination with Kahan
+			// summation (which we do for floats, but not yet for
+			// histograms, see issue #14105), this is quite accurate
+			// and only breaks in extreme cases (see testdata for
+			// avg_over_time). One might assume that simple direct
+			// mean calculation works better in some cases, but so
+			// far, our conclusion is that we fare best with the
+			// incremental approach plus Kahan summation (for
+			// floats). For a relevant discussion, see
+			// https://stackoverflow.com/questions/61665473/is-it-beneficial-for-precision-to-calculate-the-incremental-mean-average
+			// Additional note: For even better numerical accuracy,
+			// we would need to process the values in a particular
+			// order, but that would be very hard to implement given
+			// how the PromQL engine works.
 			group.groupCount++
 			if h != nil {
 				group.hasHistogram = true
@@ -3104,45 +3118,11 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
 				// point in copying the histogram in that case.
 			} else {
 				group.hasFloat = true
-				if !group.incrementalMean {
-					newV, newC := kahanSumInc(f, group.floatValue, group.floatKahanC)
-					if !math.IsInf(newV, 0) {
-						// The sum doesn't overflow, so we propagate it to the
-						// group struct and continue with the regular
-						// calculation of the mean value.
-						group.floatValue, group.floatKahanC = newV, newC
-						break
-					}
-					// If we are here, we know that the sum _would_ overflow. So
-					// instead of continue to sum up, we revert to incremental
-					// calculation of the mean value from here on.
-					group.incrementalMean = true
-					group.floatMean = group.floatValue / (group.groupCount - 1)
-					group.floatKahanC /= group.groupCount - 1
-				}
-				if math.IsInf(group.floatMean, 0) {
-					if math.IsInf(f, 0) && (group.floatMean > 0) == (f > 0) {
-						// The `floatMean` and `s.F` values are `Inf` of the same sign.  They
-						// can't be subtracted, but the value of `floatMean` is correct
-						// already.
-						break
-					}
-					if !math.IsInf(f, 0) && !math.IsNaN(f) {
-						// At this stage, the mean is an infinite. If the added
-						// value is neither an Inf or a Nan, we can keep that mean
-						// value.
-						// This is required because our calculation below removes
-						// the mean value, which would look like Inf += x - Inf and
-						// end up as a NaN.
-						break
-					}
-				}
-				currentMean := group.floatMean + group.floatKahanC
+				q := (group.groupCount - 1) / group.groupCount
 				group.floatMean, group.floatKahanC = kahanSumInc(
-					// Divide each side of the `-` by `group.groupCount` to avoid float64 overflows.
-					f/group.groupCount-currentMean/group.groupCount,
-					group.floatMean,
-					group.floatKahanC,
+					f/group.groupCount,
+					q*group.floatMean,
+					q*group.floatKahanC,
 				)
 			}
 
@@ -3215,10 +3195,8 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
 				continue
 			case aggr.hasHistogram:
 				aggr.histogramValue = aggr.histogramValue.Compact(0)
-			case aggr.incrementalMean:
-				aggr.floatValue = aggr.floatMean + aggr.floatKahanC
 			default:
-				aggr.floatValue = (aggr.floatValue + aggr.floatKahanC) / aggr.groupCount
+				aggr.floatValue = aggr.floatMean + aggr.floatKahanC
 			}
 
 		case parser.COUNT:
diff --git a/promql/functions.go b/promql/functions.go
index e4699f3914..7e5205844a 100644
--- a/promql/functions.go
+++ b/promql/functions.go
@@ -671,6 +671,20 @@ func funcAvgOverTime(vals []parser.Value, args parser.Expressions, enh *EvalNode
 		metricName := firstSeries.Metric.Get(labels.MetricName)
 		return enh.Out, annotations.New().Add(annotations.NewMixedFloatsHistogramsWarning(metricName, args[0].PositionRange()))
 	}
+	// For the average calculation, we use incremental mean calculation. In
+	// particular in combination with Kahan summation (which we do for
+	// floats, but not yet for histograms, see issue #14105), this is quite
+	// accurate and only breaks in extreme cases (see testdata). One might
+	// assume that simple direct mean calculation works better in some
+	// cases, but so far, our conclusion is that we fare best with the
+	// incremental approach plus Kahan summation (for floats). For a
+	// relevant discussion, see
+	// https://stackoverflow.com/questions/61665473/is-it-beneficial-for-precision-to-calculate-the-incremental-mean-average
+	// Additional note: For even better numerical accuracy, we would need to
+	// process the values in a particular order. For avg_over_time, that
+	// would be more or less feasible, but it would be more expensivo, and
+	// it would also be much harder for the avg aggregator, given how the
+	// PromQL engine works.
 	if len(firstSeries.Floats) == 0 {
 		// The passed values only contain histograms.
 		vec, err := aggrHistOverTime(vals, enh, func(s Series) (*histogram.FloatHistogram, error) {
@@ -702,52 +716,13 @@ func funcAvgOverTime(vals []parser.Value, args parser.Expressions, enh *EvalNode
 		return vec, nil
 	}
 	return aggrOverTime(vals, enh, func(s Series) float64 {
-		var (
-			sum, mean, count, kahanC float64
-			incrementalMean          bool
-		)
-		for _, f := range s.Floats {
-			count++
-			if !incrementalMean {
-				newSum, newC := kahanSumInc(f.F, sum, kahanC)
-				// Perform regular mean calculation as long as
-				// the sum doesn't overflow and (in any case)
-				// for the first iteration (even if we start
-				// with ±Inf) to not run into division-by-zero
-				// problems below.
-				if count == 1 || !math.IsInf(newSum, 0) {
-					sum, kahanC = newSum, newC
-					continue
-				}
-				// Handle overflow by reverting to incremental calculation of the mean value.
-				incrementalMean = true
-				mean = sum / (count - 1)
-				kahanC /= count - 1
-			}
-			if math.IsInf(mean, 0) {
-				if math.IsInf(f.F, 0) && (mean > 0) == (f.F > 0) {
-					// The `mean` and `f.F` values are `Inf` of the same sign.  They
-					// can't be subtracted, but the value of `mean` is correct
-					// already.
-					continue
-				}
-				if !math.IsInf(f.F, 0) && !math.IsNaN(f.F) {
-					// At this stage, the mean is an infinite. If the added
-					// value is neither an Inf or a Nan, we can keep that mean
-					// value.
-					// This is required because our calculation below removes
-					// the mean value, which would look like Inf += x - Inf and
-					// end up as a NaN.
-					continue
-				}
-			}
-			correctedMean := mean + kahanC
-			mean, kahanC = kahanSumInc(f.F/count-correctedMean/count, mean, kahanC)
+		var mean, kahanC float64
+		for i, f := range s.Floats {
+			count := float64(i + 1)
+			q := float64(i) / count
+			mean, kahanC = kahanSumInc(f.F/count, q*mean, q*kahanC)
 		}
-		if incrementalMean {
-			return mean + kahanC
-		}
-		return (sum + kahanC) / count
+		return mean + kahanC
 	}), nil
 }