diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c index cc00c10c0d4..262ea2b73ba 100644 --- a/src/backend/utils/adt/float.c +++ b/src/backend/utils/adt/float.c @@ -4010,7 +4010,8 @@ float8_regr_intercept(PG_FUNCTION_ARGS) Sx, Sxx, Sy, - Sxy; + Sxy, + dy; transvalues = check_float8_array(transarray, "float8_regr_intercept", 8); N = transvalues[0]; @@ -4029,7 +4030,41 @@ float8_regr_intercept(PG_FUNCTION_ARGS) if (Sxx == 0) PG_RETURN_NULL(); - PG_RETURN_FLOAT8((Sy - Sx * Sxy / Sxx) / N); + /* + * The intercept is given by (Sy - dy) / N, where dy = Sx * Sxy / Sxx. + * However, when computing dy, the intermediate product Sx * Sxy might + * underflow or overflow. If so, we can recover by decomposing Sx, Sxy, + * and Sxx into normalized mantissa and integer power-of-two components, + * computing the corresponding components of dy, and then recomposing dy. + * We avoid doing this if Sx, Sxy, or Sxx are infinite or NaN, since the + * exponent returned by frexp() is unspecified in those cases (and the + * final result would be the same in any case). + */ + dy = Sx * Sxy / Sxx; + if ((dy == 0 || isinf(dy)) && + !(isinf(Sx) || isinf(Sxy) || isinf(Sxx) || + isnan(Sx) || isnan(Sxy) || isnan(Sxx))) + { + float8 m_Sx, + m_Sxy, + m_Sxx, + m_dy; + int n_Sx, + n_Sxy, + n_Sxx, + n_dy; + + m_Sx = frexp(Sx, &n_Sx); + m_Sxy = frexp(Sxy, &n_Sxy); + m_Sxx = frexp(Sxx, &n_Sxx); + + m_dy = m_Sx * m_Sxy / m_Sxx; + n_dy = n_Sx + n_Sxy - n_Sxx; + + dy = ldexp(m_dy, n_dy); + } + + PG_RETURN_FLOAT8((Sy - dy) / N); } diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 1ccdf7dfdd7..89e051ee824 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -563,6 +563,18 @@ SELECT corr(1e100 + g * 1e95, 1e100 + g * 1e95), 1 | 1 (1 row) +SELECT regr_intercept(y, x) FROM (VALUES (-1e150, 0), (2e150, 3e150)) v(x, y); + regr_intercept +---------------- + 1e+150 +(1 row) + +SELECT regr_intercept(y, x) FROM (VALUES (-1e-131, 0), (2e-131, 3e-131)) v(x, y); + regr_intercept +---------------- + 1e-131 +(1 row) + -- these examples pose definitional questions for NaN inputs, -- which we resolve by saying that an all-NaN input column is not all equal SELECT corr(g, 'NaN'), regr_r2(g, 'NaN') FROM generate_series(1, 30) g; diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index a310b39e7b8..916383db927 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -159,6 +159,8 @@ SELECT corr(1e-100 + g * 1e-105, 1e-100 + g * 1e-105), SELECT corr(1e100 + g * 1e95, 1e100 + g * 1e95), regr_r2(1e100 + g * 1e95, 1e100 + g * 1e95) FROM generate_series(1, 2) g; +SELECT regr_intercept(y, x) FROM (VALUES (-1e150, 0), (2e150, 3e150)) v(x, y); +SELECT regr_intercept(y, x) FROM (VALUES (-1e-131, 0), (2e-131, 3e-131)) v(x, y); -- these examples pose definitional questions for NaN inputs, -- which we resolve by saying that an all-NaN input column is not all equal