Improve overflow/underflow handling in regr_intercept().

As with corr() and regr_r2(), improve regr_intercept()'s final function to cope with overflow/underflow in the final calculation. Here, instead of using sqrt(), we use frexp() and ldexp() to recover, if an overflow or underflow is detected, so that the multiplication and division steps operate on normalised mantissas, and cannot overflow or underflow. As with 6498287696, and the previous commit improving regr_r2(), this is arguably a bug fix, but given the lack of prior complaints, refrain from back-patching. Reported-by: Tom Lane <tgl@sss.pgh.pa.us> Author: Dean Rasheed <dean.a.rasheed@gmail.com> Reviewed-by: Chengpeng Yan <chengpeng_yan@outlook.com> Discussion: https://postgr.es/m/33E01656-BB3B-46E9-A41F-24A01A7C35F4@outlook.com
2026-06-09 00:32:10 -04:00 · 2026-06-03 09:20:21 +01:00 · 2026-06-03 09:20:21 +01:00 · eb8e76e130
commit eb8e76e130
parent d58ec50e0f
3 changed files with 51 additions and 2 deletions
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@ -4010,7 +4010,8 @@ float8_regr_intercept(PG_FUNCTION_ARGS)
 				Sx,
 				Sxx,
 				Sy,
-				Sxy;
+				Sxy,
+				dy;

 	transvalues = check_float8_array(transarray, "float8_regr_intercept", 8);
 	N = transvalues[0];
@ -4029,7 +4030,41 @@ float8_regr_intercept(PG_FUNCTION_ARGS)
 	if (Sxx == 0)
 		PG_RETURN_NULL();

-	PG_RETURN_FLOAT8((Sy - Sx * Sxy / Sxx) / N);
+	/*
+	 * The intercept is given by (Sy - dy) / N, where dy = Sx * Sxy / Sxx.
+	 * However, when computing dy, the intermediate product Sx * Sxy might
+	 * underflow or overflow.  If so, we can recover by decomposing Sx, Sxy,
+	 * and Sxx into normalized mantissa and integer power-of-two components,
+	 * computing the corresponding components of dy, and then recomposing dy.
+	 * We avoid doing this if Sx, Sxy, or Sxx are infinite or NaN, since the
+	 * exponent returned by frexp() is unspecified in those cases (and the
+	 * final result would be the same in any case).
+	 */
+	dy = Sx * Sxy / Sxx;
+	if ((dy == 0 || isinf(dy)) &&
+		!(isinf(Sx) || isinf(Sxy) || isinf(Sxx) ||
+		  isnan(Sx) || isnan(Sxy) || isnan(Sxx)))
+	{
+		float8		m_Sx,
+					m_Sxy,
+					m_Sxx,
+					m_dy;
+		int			n_Sx,
+					n_Sxy,
+					n_Sxx,
+					n_dy;
+
+		m_Sx = frexp(Sx, &n_Sx);
+		m_Sxy = frexp(Sxy, &n_Sxy);
+		m_Sxx = frexp(Sxx, &n_Sxx);
+
+		m_dy = m_Sx * m_Sxy / m_Sxx;
+		n_dy = n_Sx + n_Sxy - n_Sxx;
+
+		dy = ldexp(m_dy, n_dy);
+	}
+
+	PG_RETURN_FLOAT8((Sy - dy) / N);
 }


--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@ -563,6 +563,18 @@ SELECT corr(1e100 + g * 1e95, 1e100 + g * 1e95),
    1 |       1
 (1 row)

+SELECT regr_intercept(y, x) FROM (VALUES (-1e150, 0), (2e150, 3e150)) v(x, y);
+ regr_intercept 
+----------------
+         1e+150
+(1 row)
+
+SELECT regr_intercept(y, x) FROM (VALUES (-1e-131, 0), (2e-131, 3e-131)) v(x, y);
+ regr_intercept 
+----------------
+         1e-131
+(1 row)
+
 -- these examples pose definitional questions for NaN inputs,
 -- which we resolve by saying that an all-NaN input column is not all equal
 SELECT corr(g, 'NaN'), regr_r2(g, 'NaN') FROM generate_series(1, 30) g;
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@ -159,6 +159,8 @@ SELECT corr(1e-100 + g * 1e-105, 1e-100 + g * 1e-105),
 SELECT corr(1e100 + g * 1e95, 1e100 + g * 1e95),
       regr_r2(1e100 + g * 1e95, 1e100 + g * 1e95)
  FROM generate_series(1, 2) g;
+SELECT regr_intercept(y, x) FROM (VALUES (-1e150, 0), (2e150, 3e150)) v(x, y);
+SELECT regr_intercept(y, x) FROM (VALUES (-1e-131, 0), (2e-131, 3e-131)) v(x, y);

 -- these examples pose definitional questions for NaN inputs,
 -- which we resolve by saying that an all-NaN input column is not all equal