Add kernel functions for 80-bit long doubles. Many thanks to Steve and

Bruce for putting lots of effort into these; getting them right isn't easy, and they went through many iterations. Submitted by: Steve Kargl <sgk@apl.washington.edu> with revisions from bde
2026-06-09 00:32:25 -04:00 · 2008-02-17 07:32:14 +00:00 · 2008-02-17 07:32:14 +00:00 · de336b0c5e
commit de336b0c5e
parent 079299f710
3 changed files with 264 additions and 0 deletions
--- a/lib/msun/ld80/k_cosl.c
+++ b/lib/msun/ld80/k_cosl.c
@ -0,0 +1,78 @@
+/* From: @(#)k_cos.c 1.3 95/01/18 */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2008 Steven G. Kargl, David Schultz, Bruce D. Evans.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice 
+ * is preserved.
+ * ====================================================
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * ld80 version of k_cos.c.  See ../src/k_cos.c for most comments.
+ */
+
+#include "math_private.h"
+
+/*
+ * Domain [-0.7854, 0.7854], range ~[-2.43e-23, 2.425e-23]:
+ * |cos(x) - c(x)| < 2**-75.1
+ *
+ * The coefficients of c(x) were generated by a pari-gp script using
+ * a Remez algorithm that searches for the best higher coefficients
+ * after rounding leading coefficients to a specified precision.
+ *
+ * Simpler methods like Chebyshev or basic Remez barely suffice for
+ * cos() in 64-bit precision, because we want the coefficient of x^2
+ * to be precisely -0.5 so that multiplying by it is exact, and plain
+ * rounding of the coefficients of a good polynomial approximation only
+ * gives this up to about 64-bit precision.  Plain rounding also gives
+ * a mediocre approximation for the coefficient of x^4, but a rounding
+ * error of 0.5 ulps for this coefficient would only contribute ~0.01
+ * ulps to the final error, so this is unimportant.  Rounding errors in
+ * higher coefficients are even less important.
+ *
+ * In fact, coefficients above the x^4 one only need to have 53-bit
+ * precision, and this is more efficient.  We get this optimization
+ * almost for free from the complications needed to search for the best
+ * higher coefficients.
+ */
+static const double
+one = 1.0;
+
+#if defined(__amd64__) || defined(__i386__)
+/* Long double constants are slow on these arches, and broken on i386. */
+static const volatile double
+C1hi = 0.041666666666666664,		/*  0x15555555555555.0p-57 */
+C1lo = 2.2598839032744733e-18;		/*  0x14d80000000000.0p-111 */
+#define	C1	((long double)C1hi + C1lo)
+#else
+static const long double
+C1 =  0.0416666666666666666136L;	/*  0xaaaaaaaaaaaaaa9b.0p-68 */
+#endif
+
+static const double
+C2 = -0.0013888888888888874,		/* -0x16c16c16c16c10.0p-62 */
+C3 =  0.000024801587301571716,		/*  0x1a01a01a018e22.0p-68 */
+C4 = -0.00000027557319215507120,	/* -0x127e4fb7602f22.0p-74 */
+C5 =  0.0000000020876754400407278,	/*  0x11eed8caaeccf1.0p-81 */
+C6 = -1.1470297442401303e-11,		/* -0x19393412bd1529.0p-89 */
+C7 =  4.7383039476436467e-14;		/*  0x1aac9d9af5c43e.0p-97 */
+
+long double
+__kernel_cosl(long double x, long double y)
+{
+	long double hz,z,r,w;
+
+	z  = x*x;
+	r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*(C6+z*C7))))));
+	hz = 0.5*z;
+	w  = one-hz;
+	return w + (((one-w)-hz) + (z*r-x*y));
+}
--- a/lib/msun/ld80/k_sinl.c
+++ b/lib/msun/ld80/k_sinl.c
@ -0,0 +1,62 @@
+/* From: @(#)k_sin.c 1.3 95/01/18 */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2008 Steven G. Kargl, David Schultz, Bruce D. Evans.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice 
+ * is preserved.
+ * ====================================================
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * ld80 version of k_sin.c.  See ../src/k_sin.c for most comments.
+ */
+
+#include "math_private.h"
+
+static const double
+half =  0.5;
+
+/*
+ * Domain [-0.7854, 0.7854], range ~[-1.89e-22, 1.915e-22]
+ * |sin(x)/x - s(x)| < 2**-72.1
+ *
+ * See ../ld80/k_cosl.c for more details about the polynomial.
+ */
+#if defined(__amd64__) || defined(__i386__)
+/* Long double constants are slow on these arches, and broken on i386. */
+static const volatile double
+S1hi = -0.16666666666666666,		/* -0x15555555555555.0p-55 */
+S1lo = -9.2563760475949941e-18;		/* -0x15580000000000.0p-109 */
+#define	S1	((long double)S1hi + S1lo)
+#else
+static const long double
+S1 = -0.166666666666666666671L;		/* -0xaaaaaaaaaaaaaaab.0p-66 */
+#endif
+
+static const double
+S2 =  0.0083333333333333332,		/*  0x11111111111111.0p-59 */
+S3 = -0.00019841269841269427,		/* -0x1a01a01a019f81.0p-65 */
+S4 =  0.0000027557319223597490,		/*  0x171de3a55560f7.0p-71 */
+S5 = -0.000000025052108218074604,	/* -0x1ae64564f16cad.0p-78 */
+S6 =  1.6059006598854211e-10,		/*  0x161242b90243b5.0p-85 */
+S7 = -7.6429779983024564e-13,		/* -0x1ae42ebd1b2e00.0p-93 */
+S8 =  2.6174587166648325e-15;		/*  0x179372ea0b3f64.0p-101 */
+
+long double
+__kernel_sinl(long double x, long double y, int iy)
+{
+	long double z,r,v;
+
+	z	=  x*x;
+	v	=  z*x;
+	r	=  S2+z*(S3+z*(S4+z*(S5+z*(S6+z*(S7+z*S8)))));
+	if(iy==0) return x+v*(S1+z*r);
+	else      return x-((z*(half*y-v*r)-y)-v*S1);
+}
--- a/lib/msun/ld80/k_tanl.c
+++ b/lib/msun/ld80/k_tanl.c
@ -0,0 +1,124 @@
+/* From: @(#)k_tan.c 1.5 04/04/22 SMI */
+
+/*
+ * ====================================================
+ * Copyright 2004 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright (c) 2008 Steven G. Kargl, David Schultz, Bruce D. Evans.
+ *
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * ld80 version of k_tan.c.  See ../src/k_tan.c for most comments.
+ */
+
+#include "math.h"
+#include "math_private.h"
+
+/*
+ * Domain [-0.67434, 0.67434], range ~[-2.25e-22, 1.921e-22]
+ * |tan(x)/x - t(x)| < 2**-71.9
+ *
+ * See k_cosl.c for more details about the polynomial.
+ */
+#if defined(__amd64__) || defined(__i386__)
+/* Long double constants are slow on these arches, and broken on i386. */
+static const volatile double
+T3hi =  0.33333333333333331,		/*  0x15555555555555.0p-54 */
+T3lo =  1.8350121769317163e-17,		/*  0x15280000000000.0p-108 */
+T5hi =  0.13333333333333336,		/*  0x11111111111112.0p-55 */
+T5lo =  1.3051083651294260e-17,		/*  0x1e180000000000.0p-109 */
+T7hi =  0.053968253968250494,		/*  0x1ba1ba1ba1b827.0p-57 */
+T7lo =  3.1509625637859973e-18,		/*  0x1d100000000000.0p-111 */
+pio4_hi =  0.78539816339744828,		/*  0x1921fb54442d18.0p-53 */
+pio4_lo =  3.0628711372715500e-17,	/*  0x11a80000000000.0p-107 */
+pio4lo_hi = -1.2541394031670831e-20,	/* -0x1d9cceba3f91f2.0p-119 */
+pio4lo_lo =  6.1493048227390915e-37;	/*  0x1a280000000000.0p-173 */
+#define	T3	((long double)T3hi + T3lo)
+#define	T5	((long double)T5hi + T5lo)
+#define	T7	((long double)T7hi + T7lo)
+#define	pio4	((long double)pio4_hi + pio4_lo)
+#define	pio4lo	((long double)pio4lo_hi + pio4lo_lo)
+#else
+static const long double
+T3 =   0.333333333333333333180L,	/*  0xaaaaaaaaaaaaaaa5.0p-65 */
+T5 =   0.133333333333333372290L,	/*  0x88888888888893c3.0p-66 */
+T7 =   0.0539682539682504975744L;	/*  0xdd0dd0dd0dc13ba2.0p-68 */
+pio4 = 0.785398163397448309628,		/*  0xc90fdaa22168c235.0p-64 */
+pio4lo = -1.25413940316708300586e-20;	/* -0xece675d1fc8f8cbb.0p-130 */
+#endif
+
+static const double
+T9  =  0.021869488536312216,		/*  0x1664f4882cc1c2.0p-58 */
+T11 =  0.0088632355256619590,		/*  0x1226e355c17612.0p-59 */
+T13 =  0.0035921281113786528,		/*  0x1d6d3d185d7ff8.0p-61 */
+T15 =  0.0014558334756312418,		/*  0x17da354aa3f96b.0p-62 */
+T17 =  0.00059003538700862256,		/*  0x13559358685b83.0p-63 */
+T19 =  0.00023907843576635544,		/*  0x1f56242026b5be.0p-65 */
+T21 =  0.000097154625656538905,		/*  0x1977efc26806f4.0p-66 */
+T23 =  0.000038440165747303162,		/*  0x14275a09b3ceac.0p-67 */
+T25 =  0.000018082171885432524,		/*  0x12f5e563e5487e.0p-68 */
+T27 =  0.0000024196006108814377,	/*  0x144c0d80cc6896.0p-71 */
+T29 =  0.0000078293456938132840,	/*  0x106b59141a6cb3.0p-69 */
+T31 = -0.0000032609076735050182,	/* -0x1b5abef3ba4b59.0p-71 */
+T33 =  0.0000023261313142559411;	/*  0x13835436c0c87f.0p-71 */
+
+long double
+__kernel_tanl(long double x, long double y, int iy) {
+	long double z, r, v, w, s;
+	long double osign;
+	int i;
+
+	iy = (iy == 1 ? -1 : 1);	/* XXX recover original interface */
+	osign = (x >= 0 ? 1.0 : -1.0);	/* XXX slow, probably wrong for -0 */
+	if (fabsl(x) >= 0.67434) {
+		if (x < 0) {
+			x = -x;
+			y = -y;
+		}
+		z = pio4 - x;
+		w = pio4lo - y;
+		x = z + w;
+		y = 0.0;
+		i = 1;
+	} else
+		i = 0;
+	z = x * x;
+	w = z * z;
+	r = T5 + w * (T9 + w * (T13 + w * (T17 + w * (T21 +
+	    w * (T25 + w * (T29 + w * T33))))));
+	v = z * (T7 + w * (T11 + w * (T15 + w * (T19 + w * (T23 +
+	    w * (T27 + w * T31))))));
+	s = z * x;
+	r = y + z * (s * (r + v) + y);
+	r += T3 * s;
+	w = x + r;
+	if (i == 1) {
+		v = (long double) iy;
+		return osign *
+			(v - 2.0 * (x - (w * w / (w + v) - r)));
+	}
+	if (iy == 1)
+		return w;
+	else {
+		/*
+		 * if allow error up to 2 ulp, simply return
+		 * -1.0 / (x+r) here
+		 */
+		/* compute -1.0 / (x+r) accurately */
+		long double a, t;
+		z = w;
+		z = z + 0x1p32 - 0x1p32;
+		v = r - (z - x);	/* z+v = r+x */
+		t = a = -1.0 / w;	/* a = -1.0/w */
+		t = t + 0x1p32 - 0x1p32;
+		s = 1.0 + t * z;
+		return t + a * (s + t * v);
+	}
+}