mirror of
https://github.com/opnsense/src.git
synced 2026-03-23 19:23:10 -04:00
gives an average speedup of about 12 cycles or 17% for 9pi/4 < |x| <= 2**19pi/2 and a smaller speedup for larger x, and a small speeddown for |x| <= 9pi/4 (only 1-2 cycles average, but that is 4%). Inlining this is less likely to bust caches than inlining the float version since it is much smaller (about 220 bytes text and rodata) and has many fewer branches. However, the float version was already large due to its manual inlining of the branches and also the polynomial evaluations.
72 lines
2 KiB
C
72 lines
2 KiB
C
/* s_tanf.c -- float version of s_tan.c.
|
|
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
|
|
* Optimized by Bruce D. Evans.
|
|
*/
|
|
|
|
/*
|
|
* ====================================================
|
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
*
|
|
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
* Permission to use, copy, modify, and distribute this
|
|
* software is freely granted, provided that this notice
|
|
* is preserved.
|
|
* ====================================================
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <float.h>
|
|
|
|
#include "math.h"
|
|
#define INLINE_KERNEL_TANDF
|
|
#define INLINE_REM_PIO2F
|
|
#include "math_private.h"
|
|
#include "e_rem_pio2f.c"
|
|
#include "k_tanf.c"
|
|
|
|
/* Small multiples of pi/2 rounded to double precision. */
|
|
static const double
|
|
t1pio2 = 1*M_PI_2, /* 0x3FF921FB, 0x54442D18 */
|
|
t2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */
|
|
t3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */
|
|
t4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
|
|
|
|
float
|
|
tanf(float x)
|
|
{
|
|
double y;
|
|
int32_t n, hx, ix;
|
|
|
|
GET_FLOAT_WORD(hx,x);
|
|
ix = hx & 0x7fffffff;
|
|
|
|
if(ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
|
|
if(ix<0x39800000) /* |x| < 2**-12 */
|
|
if(((int)x)==0) return x; /* x with inexact if x != 0 */
|
|
return __kernel_tandf(x,1);
|
|
}
|
|
if(ix<=0x407b53d1) { /* |x| ~<= 5*pi/4 */
|
|
if(ix<=0x4016cbe3) /* |x| ~<= 3pi/4 */
|
|
return __kernel_tandf(x + (hx>0 ? -t1pio2 : t1pio2), -1);
|
|
else
|
|
return __kernel_tandf(x + (hx>0 ? -t2pio2 : t2pio2), 1);
|
|
}
|
|
if(ix<=0x40e231d5) { /* |x| ~<= 9*pi/4 */
|
|
if(ix<=0x40afeddf) /* |x| ~<= 7*pi/4 */
|
|
return __kernel_tandf(x + (hx>0 ? -t3pio2 : t3pio2), -1);
|
|
else
|
|
return __kernel_tandf(x + (hx>0 ? -t4pio2 : t4pio2), 1);
|
|
}
|
|
|
|
/* tan(Inf or NaN) is NaN */
|
|
else if (ix>=0x7f800000) return x-x;
|
|
|
|
/* general argument reduction needed */
|
|
else {
|
|
n = __ieee754_rem_pio2f(x,&y);
|
|
/* integer parameter: 1 -- n even; -1 -- n odd */
|
|
return __kernel_tandf(y,1-((n&1)<<1));
|
|
}
|
|
}
|