#include "math.h"
#include "fp_private.h"
#if (__WANT_LONG_DOUBLE_FORMAT__ - 0L == 128L)
#include "DD.h"
static const double recip = 0.1957294106339126123084757437350e-19;
static const double kBig = 4.504699138998272e+15 ;
uint32_t TanhTbl[] = {
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x3FB00000, 0x00030807, 0x3FAFF559, 0x97E63AD9, 0xBB6CF9C0, 0x2437818A,
0x3FC00000, 0x0000284A, 0x3FBFD599, 0x2BC5078A, 0xBB77F9D4, 0x704D912A,
0x3FC80000, 0x00005880, 0x3FC7B8FF, 0x903C4CEC, 0x3B47716F, 0xB54B60B5,
0x3FD00000, 0x00000CDC, 0x3FCF597E, 0xA69A34B3, 0xBB794C01, 0xC101435A,
0x3FD40000, 0x0000313C, 0x3FD35F98, 0xA0EA91C7, 0xBB904F63, 0x26BD63B6,
0x3FD80000, 0x000021EA, 0x3FD6EF53, 0xDE8CAD3F, 0xBB91172E, 0x8FAFB7A2,
0x3FDC0000, 0x00005214, 0x3FDA5729, 0xEE48C464, 0x3B9543D8, 0xC4943292,
0x3FE00000, 0x00001E86, 0x3FDD9353, 0xD756BAF6, 0xBB8F03A0, 0x34F7B469,
0x3FE20000, 0x00007115, 0x3FE05086, 0xF2F72867, 0xBB95D918, 0x3E3269EE,
0x3FE40000, 0x00005720, 0x3FE1BF47, 0xEABBCBE9, 0x3B918CCE, 0xA6F1FA3A,
0x3FE60000, 0x000021CF, 0x3FE3157D, 0xFE9F8724, 0xBBA1AC2C, 0x2B395168,
0x3FE80000, 0x00016908, 0x3FE45323, 0xE553C98B, 0xBBA14AF9, 0xB571AFA7,
0x3FEA0000, 0x00005202, 0x3FE5788F, 0xF10D56AF, 0xBBA3C229, 0xC063AD80,
0x3FEC0000, 0x00005742, 0x3FE68665, 0x0B8C4C1B, 0x3B8BE9FC, 0xF1B4428B,
0x3FEE0000, 0x00008582, 0x3FE77D83, 0x8E34430D, 0x3B829FA7, 0xF1623176,
0x3FF00000, 0x00005F76, 0x3FE85EFA, 0xB51543C3, 0xBB4B3592, 0x2498C785,
0x3FF10000, 0x00001F8D, 0x3FE92BFB, 0x370DB380, 0x3BA29188, 0x377121C3,
0x3FF20000, 0x0000130F, 0x3FE9E5CB, 0x5BA45A90, 0x3B7DFB4E, 0xF67C44E2,
0x3FF30000, 0x00004F18, 0x3FEA8DBC, 0xBC31BABE, 0x3B99C45D, 0xAE5D7827,
0x3FF40000, 0x00005131, 0x3FEB2523, 0xBB6B5B77, 0x3B85A025, 0x161129AB,
0x3FF50000, 0x000029D4, 0x3FEBAD50, 0xA4A6A0D5, 0xBB983007, 0x72D2D609,
0x3FF60000, 0x000007E1, 0x3FEC278A, 0x52A4E807, 0x3BA451F7, 0x31DE764B,
0x3FF70000, 0x0000185B, 0x3FEC950A, 0x3340D299, 0x3BAABF14, 0xE874463C,
0x3FF80000, 0x000024C4, 0x3FECF6F9, 0x786E02C1, 0x3B6B3F8E, 0x2446EA6B,
0x3FF90000, 0x0000BF0C, 0x3FED4E6F, 0x4642C44F, 0x3BA657E4, 0xDE96E741,
0x3FFA0000, 0x00006986, 0x3FED9C6F, 0xAFE63ACE, 0x3BA71F1B, 0xC2E3ED65,
0x3FFB0000, 0x00003690, 0x3FEDE1EB, 0x59375F86, 0x3B94D798, 0x08425B54,
0x3FFC0000, 0x000068CA, 0x3FEE1FBF, 0x97E34D01, 0x3BAD07C7, 0xA6BF9B47,
0x3FFD0000, 0x00008A24, 0x3FEE56B6, 0xF3EFC7EE, 0xBBA26658, 0xA2EB55F0,
0x3FFE0000, 0x0003C04A, 0x3FEE8789, 0xECECBA51, 0xBBA8B27F, 0x9026BBD4,
0x3FFF0000, 0x00000D68, 0x3FEEB2DF, 0xEDD5EEB6, 0x3B9EE74B, 0x12149464,
0x40000000, 0x00004DFD, 0x3FEED950, 0x5E1BD9DE, 0x3B971596, 0x795B2F55
};
uint32_t TanhCoeff[] = {
0x3FF00000, 0x00000000, 0x00000000, 0x00000000,
0xBFD55555, 0x55555555, 0xBC755555, 0x55555554,
0x3FC11111, 0x11111111, 0x3C411111, 0x10FF0F6D,
0xBFABA1BA, 0x1BA1BA1C, 0x3C47917A, 0xA287E6B6,
0x3F9664F4, 0x882C10FA, 0xBC0A8F5F, 0x684BD9FF,
0xBF8226E3, 0x55E6C238, 0x3C01097B, 0x425ED098,
0x3F6D6D3D, 0x0E154787, 0xBC0BA781, 0x948B0FCF,
0xBF57DA36, 0x446C8BDA, 0xBBD2F684, 0xBDA12F6B,
0x3F435580, 0xBCDA12F7, 0xBBEED097, 0xB425ED0A,
0xBF2F545A, 0x781948B1, 0x3B7948B0, 0xFCD6E991,
0x3F17B291, 0x61F9ADD4, 0xBBAF9ADD, 0x3C0CA458
};
struct TanhTblEntry {
double One;
double Two;
double Three;
} TanhTblEntry;
struct TanhCoeffEntry {
double One;
double Two;
} TanhCoeffEntry;
extern long double _ExpInnerLD(double alpha, double beta, double gamma,
double *pextra, int normflag);
long double sinhl(long double x)
{
double xHead, xTail, xHeadx, xTailx;
double p, q, r1, r2, r3, r4, r5, t1, t2;
double residual, residual1, residual2, residual3, residual4, residual5, residual6;
double res, resnew, reshi, reslow, restiny, resbot, resmid;
double remainder, partial, extraword;
double argsq, argsq2;
double prod, prodlow, sum, suml, temp, temp2, templow;
double bottom;
uint32_t hibits;
int i;
double fpenv;
DblDbl ddx;
double coeff[16] = {
0.62132974171578525315635255289e-14, 0.577836659795680285435407874e-11,
0.469203367754092391773551193841e-8, 0.329380764163372859025032938e-5,
0.1976284584980237154150197628458498e-2, 1.0, 420.0, 143640.0, 39070080.0,
8204716800.0, 1279935820800.0, 140792940288000.0, 10137091700736000.0,
425757851430912000.0, 8515157028618240000.0, 51090942171709440000.0
};
ddx.f = x;
xHead = ddx.d[0];
xTail = ddx.d[1];
hibits = ddx.i[0] & 0x7FFFFFFFu;
if (hibits >= 0x7ff00000u) return x;
if ((hibits | ddx.i[1]) == 0) return x;
FEGETENVD(fpenv);
FESETENVD(0.0);
if (fabs(xHead) <= 0.693147 ) {
temp=2.0*xHead*xTail; argsq=__FMADD( xHead, xHead, temp ); argsq2=__FMADD( xTail, xTail, __FMSUB( xHead, xHead, argsq ) + temp ); sum=coeff[0];
suml=0.0;
sum=__FMADD( argsq, sum, coeff[1] ); sum=__FMADD( argsq, sum, coeff[2] ); sum=__FMADD( argsq, sum, coeff[3] ); for ( i=4; i<15; i++) {
templow=__FMADD( suml, argsq, sum*argsq2 ); temp=__FMADD( sum, argsq, templow ); bottom=__FMSUB( sum, argsq, temp) + templow; sum=__FADD( coeff[i], temp );
residual=coeff[i]-sum+temp;
suml=bottom+residual;
}
prodlow=__FMADD( suml, argsq, sum*argsq2 ); prod=__FMADD( sum, argsq, prodlow ); prodlow=__FMSUB( sum, argsq, prod) + prodlow; temp2=__FMADD( prodlow, xHead, prod*xTail ); temp=__FMADD( prod, xHead, temp2 ); temp2=__FMSUB(prod, xHead, temp ) + temp2; sum=temp*recip; remainder=__FNMSUB( sum, coeff[15], temp ); partial=__FADD( remainder, temp2 );
residual=remainder-partial+temp2;
suml=__FMADD( partial, recip, residual*recip ); res=__FADD( xHead, sum ); reslow=(xHead-res+sum); resmid=__FADD( xTail, suml );
restiny=xTail-resmid+suml;
p=__FADD( reslow, resmid ); q=reslow-p+resmid; reshi=__FADD( res, p );
resbot=(res-reshi+p)+(q+restiny);
ddx.d[0] = __FADD( reshi, resbot ); ddx.d[1] = (reshi - ddx.d[0]) + resbot;
FESETENVD(fpenv);
return (ddx.f);
}
else {
if (xHead < 0) {
xHeadx = -xHead;
xTailx = -xTail;
}
else {
xHeadx = xHead;
xTailx = xTail;
}
ddx.f = _ExpInnerLD(xHeadx, xTailx, 0.0 , &extraword , 1);
if (fabs(xHead) > 39.0) {
if (xHead < 0) ddx.f = - ddx.f;
reshi = ddx.d[0];
ddx.d[0] = __FADD( ddx.d[0], ddx.d[1] );
if((ddx.i[0] & 0x7FFFFFFFu)>= 0x7ff00000u) {
ddx.d[1] = 0.0;
}
else {
ddx.d[1] = (reshi - ddx.d[0]) + ddx.d[1];
}
FESETENVD(fpenv);
return (ddx.f);
}
else {
t1 = ddx.d[0]; t2 = ddx.d[1]; r1=0.25/t1;
residual=__FNMSUB( t2, r1, __FNMSUB( t1, r1, 0.25 ) ); r2=residual*(4.0*r1); residual1 =__FNMSUB( t2, r1, __FNMSUB( t1, r1, 0.25 ) - residual ); r3=__FMSUB(t1, r2, residual ); r4=__FMADD( extraword, r1, t2*r2 ); reshi=__FSUB( t1, r1 );
reslow=__FSUB( t2, r2 );
residual=(t1-reshi)-r1; if (fabs(t2) > fabs(r2))
residual2=(t2-reslow)-r2; else
residual2=t2-(reslow+r2); r5=(r3+r4-residual1)*(4.0*r1);
resnew=__FADD( reshi, reslow );
residual3=reshi-resnew+reslow; residual4=__FADD( residual, residual3 );
reshi=__FADD( resnew, residual4 );
residual5=residual-residual4+residual3;
residual6=resnew -reshi+residual4;
reslow=(residual2+(extraword+r5))+residual5+residual6;
if (xHead < 0.0) { reshi = -reshi; reslow = -reslow;
}
ddx.d[0] = __FADD( reshi, reslow ); ddx.d[1] = (reshi - ddx.d[0]) + reslow;
FESETENVD(fpenv);
return (ddx.f);
}
}
}
long double coshl(long double x)
{
double xHead, xTail, xHeadx, xTailx;
double r1, r2, r3, r4, r5, t1, t2;
double residual, residual1, residual2, residual3, residual4, residual5, residual6;
double res, resnew, reshi, reslow;
double remainder, partial, extraword;
double argsq, argsq2;
double prod, prodlow, sum, suml, temp, templow;
double bottom;
uint32_t hibits, signx;
int i;
double fpenv;
DblDbl ddx;
double coeff[9] = {
306.0, 73440.0, 13366080.0, 1764322560.0, 158789030400.0, 8892185702400.0,
266765571072000.0, 3201186852864000.0, 6402373705728000.0
};
double recip = 1.561920696858622646221636435005e-16;
ddx.f = x;
xHead = ddx.d[0];
xTail = ddx.d[1];
hibits = ddx.i[0] & 0x7FFFFFFFu; signx = (ddx.i[0] >> 31) & 1u;
if (hibits >= 0x7ff00000u){ if (xHead != xHead) { return x;
}
else { if (signx)
return (-x);
else
return x;
}
}
if ((hibits | ddx.i[1]) == 0)
return (long double) 1.0;
FEGETENVD(fpenv);
FESETENVD(0.0);
if (hibits <= 0x3fc00000u) {
temp=2.0*xHead*xTail;
argsq=__FMADD( xHead, xHead, temp ); argsq2=__FMADD( xTail, xTail, __FMSUB( xHead, xHead, argsq ) + temp ); sum=1.0;
suml=0.0;
sum=__FMADD( argsq, sum, coeff[0] ); sum=__FMADD( argsq, sum, coeff[1] ); sum=__FMADD( argsq, sum, coeff[2] ); for (i=3; i< 8; i++) {
templow=__FMADD( suml, argsq, sum*argsq2 ); temp=__FMADD( sum, argsq, templow ); bottom=__FMSUB( sum, argsq, temp) + templow; sum=__FADD( coeff[i], temp );
residual=coeff[i]-sum+temp;
suml=bottom+residual;
}
prodlow=__FMADD( suml, argsq, sum*argsq2 ); prod=__FMADD( sum, argsq, prodlow ); prodlow=__FMSUB( sum, argsq, prod) + prodlow; sum=prod*recip; remainder=__FNMSUB( sum, coeff[8], prod ); partial=__FADD( remainder, prodlow );
residual=remainder-partial+prodlow;
suml=__FMADD( partial, recip, residual*recip ); res=__FADD(1.0, sum ); reslow=1.0 - res+sum+suml;
ddx.d[0] = __FADD( res, reslow ); ddx.d[1] = (res - ddx.d[0]) + reslow;
FESETENVD(fpenv);
return (ddx.f);
}
else { if (signx) {
xHeadx = -xHead;
xTailx = -xTail;
}
else{
xHeadx = xHead;
xTailx = xTail;
}
ddx.f = _ExpInnerLD (xHeadx, xTailx, 0.0 , &extraword , 1);
if (hibits > 0x40440000u) {
reshi = ddx.d[0];
ddx.d[0] = __FADD( ddx.d[0], ddx.d[1] );
if((ddx.i[0] & 0x7FFFFFFFu)>= 0x7ff00000u) { ddx.d[1] = 0.0;
}
else {
ddx.d[1] = (reshi - ddx.d[0]) + ddx.d[1];
}
FESETENVD(fpenv);
return (ddx.f);
}
else{
t1 = ddx.d[0]; t2 = ddx.d[1]; r1=0.25/t1;
residual=__FNMSUB( t2, r1, __FNMSUB( t1, r1, 0.25 ) ); r2=residual*(4.0*r1); residual1 =__FNMSUB( t2, r1, __FNMSUB( t1, r1, 0.25 ) - residual ); r3=__FMSUB(t1, r2, residual ); r4=__FMADD( extraword, r1, t2*r2 ); reshi=__FADD( t1, r1 );
reslow=__FADD( t2, r2 );
residual=(t1-reshi)+r1; if (fabs(t2) > fabs(r2))
residual2=(t2-reslow)+r2; else
residual2=r2-reslow+t2; r5=(r3+r4-residual1)*(4.0*r1); resnew=__FADD( reshi, reslow );
residual3=reshi-resnew+reslow; residual4=__FADD( residual, residual3 );
reshi=__FADD( resnew, residual4 );
residual5=residual-residual4+residual3;
residual6=resnew-reshi+residual4;
reslow=(residual2+(extraword-r5))+residual5+residual6;
ddx.d[0] = __FADD( reshi, reslow ); ddx.d[1] = (reshi - ddx.d[0]) + reslow;
FESETENVD(fpenv);
return (ddx.f);
}
} }
long double tanhl(long double x)
{
double r, res, res2, res3, res4,res5, resfin, reslow;
double rem, rem1, rem2;
uint32_t signx, hibits, ndx ;
double q, qq, qa, qb, qc, q1, quot, quot2, quot3;
double arg, arg1, arg2, argsq, argsq2;
double sum, sum2, suml, suml2;
double frac, frac2, fac, fac2;
double ra, rb, rc, rd, re, rf;
double ans, ansx, ansl, ansmid, anstiny, anslow, almost;
double prod, prodlow, prodx, prodlowx, proderr, pl;
double top, top2, den, den1, denf, denf2;
double rg, t2, t3, tsq, tsql;
double az, bz, temp, temp1, templow, carry, carry1, extra;
double bump, residual, tiny, recip;
Double DeN;
double fpenv;
double xHead, xTail;
DblDbl ddx;
int i;
struct TanhTblEntry *TanhTblPtr = (struct TanhTblEntry *)TanhTbl;
struct TanhCoeffEntry *TanhCoeffPtr = (struct TanhCoeffEntry *) TanhCoeff;
ddx.f = x;
xHead = ddx.d[0];
xTail = ddx.d[1];
hibits = ddx.i[0] & 0x7FFFFFFFu; signx = (ddx.i[0] >> 31) & 1u;
if ((xHead != xHead)||((hibits | ddx.i[1]) == 0))
return x;
FEGETENVD(fpenv);
FESETENVD(0.0);
if (hibits > 0x40440000u) { FESETENVD(fpenv);
if (signx)
return (long double) (-1.0); else
return (long double) (1.0);
}
if (signx) {
xHead = -xHead;
xTail = -xTail;
}
if (hibits > 0x40000000u) {
xHead=2.0*xHead; xTail=2.0*xTail;
ddx.f = _ExpInnerLD (xHead, xTail, 0.0 , &extra, 1 ); az = ddx.d[0];
bz = ddx.d[1];
temp=__FADD( az, 0.5 ); carry=az-temp+0.5; temp1=__FADD( carry, bz );
carry1=carry-temp1+bz;
den=__FADD( temp, temp1 );
den1=temp-den+temp1+carry1;
r=1.0/den; rem=__FNMSUB( den, r, 1.0 ); rem1=__FNMSUB( r, den1, rem ); if (fabs(rem) > fabs(r*den1))
rem2=__FNMSUB( r, den1, rem-rem1 ); else {
rem1=__FNMSUB( r, den1, rem );
rem2 = rem - __FMADD( r, den1, rem1 ) - __FMSUB( r, den1, (r*den1) );
}
qq=__FMADD( r, rem1, r ); qa=__FMADD( r, rem1, r-qq ); qb=__FMADD( r, rem2, qa ); q=__FADD( qq, qb );
qc = __FNMSUB( extra*r, r, __FMADD( r, rem2, qa - qb ) );
q1=qq-q+qb+qc;
res=__FSUB( 1.0, q ); res2=1.0-res-q; res3=__FSUB( res2, q1 );
resfin=__FADD( res, res3 );
res4=res2-res3-q1;
res5=res-resfin+res3;
reslow=res4+res5;
if (signx) {
resfin=-resfin;
reslow=-reslow;
}
ddx.d[0] = __FADD( resfin, reslow ); ddx.d[1] = (resfin - ddx.d[0]) + reslow;
FESETENVD(fpenv);
return (ddx.f);
}
else {
DeN.f = __FMADD( xHead, 16.0, kBig ); ndx = DeN.i[1];
if (hibits < 0x3fb00000u) ndx=0; arg1=xHead-TanhTblPtr[(int32_t)ndx].One; arg=__FADD( arg1, xTail ); arg2=arg1-arg+xTail; sum=TanhCoeffPtr[10].One;
suml=TanhCoeffPtr[10].Two;
argsq=arg*arg;
argsq2=__FMSUB( arg, arg, argsq ); for (i = 9; i > 0; i--) {
pl=__FMADD( suml, argsq, sum*argsq2 ); prod=__FMADD( sum, argsq, pl ); prodlow=__FMSUB( sum, argsq, prod ) + pl; sum=__FADD( TanhCoeffPtr[i].One, prod ); sum2=TanhCoeffPtr[i].Two+prodlow;
suml=TanhCoeffPtr[i].One-sum+prod+sum2;
}
pl=__FMADD( suml, argsq, sum*argsq2 ); temp=__FMADD( sum, argsq, pl ); templow=__FMSUB( sum, argsq, temp ) + pl; pl=templow*arg; prodx=__FMADD( temp, arg, pl ); prodlowx=__FMSUB( temp, arg, prodx ) + pl; prod=__FADD( arg, prodx );
prodlow=arg-prod+prodx+prodlowx;
if (hibits < 0x3fb00000u) { proderr=(arg-prod+prodx)-prodlow+prodlowx;
bump=__FNMSUB( prod, prod*arg2, arg2 ); reslow=__FADD( prodlow, bump );
residual=prodlow-reslow+bump+proderr;
res=__FADD( reslow, prod );
reslow=prod-res+reslow+residual;
if (signx) {
res=-res;
reslow=-reslow;
}
ddx.d[0] = __FADD( res, reslow ); ddx.d[1] = (res - ddx.d[0]) + reslow;
FESETENVD(fpenv);
return (ddx.f);
}
tiny=arg2*(TanhTblPtr[(int32_t)ndx].Two+prod);
pl = __FMADD( TanhTblPtr[(int32_t)ndx].Two, prodlow, TanhTblPtr[(int32_t)ndx].Three*prod );
den = __FMADD( TanhTblPtr[(int32_t)ndx].Two, prod, pl );
den1 = __FMSUB( TanhTblPtr[(int32_t)ndx].Two, prod, den ) + pl + tiny;
suml2=prodlow+arg2; t2=2.0*TanhTblPtr[(int32_t)ndx].Two*TanhTblPtr[(int32_t)ndx].Three;
tsq = __FMADD( TanhTblPtr[(int32_t)ndx].Two, TanhTblPtr[(int32_t)ndx].Two, t2 );
tsql = __FMSUB( TanhTblPtr[(int32_t)ndx].Two, TanhTblPtr[(int32_t)ndx].Two, tsq ) + t2;
t3 = __FMADD( TanhTblPtr[(int32_t)ndx].Three, TanhTblPtr[(int32_t)ndx].Three,
__FMSUB( 2.0*TanhTblPtr[(int32_t)ndx].Two, TanhTblPtr[(int32_t)ndx].Three, t2 ) );
denf=__FADD( 1.0, den ); denf2=1.0-denf+den+den1;
fac=__FSUB( 1.0, tsq ); residual=1.0-fac-tsq; fac2=residual-tsql-t3;
pl=__FMADD( fac, suml2, fac2*prod ); top=__FMADD( fac, prod, pl ); top2=__FMSUB( fac, prod, top ) + pl; recip=1.0/denf;
quot=top*recip;
ra=__FNMSUB( quot, denf, top ); rb=-(quot*denf2); rc= - __FMADD(quot, denf2, rb ); rd=__FADD( top2, rb ); if (fabs(top2) > fabs(rb))
re=top2-rd+rb;
else
re=rb-rd+top2;
rf=__FADD( ra, rd );
if (fabs(ra) > fabs(rd))
rg=ra-rf+rd; else rg=rd-rf+ra;
quot2=rf*recip;
quot3=__FMSUB( rf, recip, quot2 ); frac=__FADD( quot, quot2 );
frac2 = quot-frac+quot2 + __FMADD( recip, (rc+re+rg), quot3 );
ansx=__FADD( TanhTblPtr[(int32_t)ndx].Two, frac ); ansl=__FADD( TanhTblPtr[(int32_t)ndx].Three, frac2 );
ansmid=TanhTblPtr[(int32_t)ndx].Two-ansx+frac;
anstiny=TanhTblPtr[(int32_t)ndx].Three-ansl+frac2;
almost=__FADD( ansmid, ansl );
ans=__FADD( ansx, almost );
anslow=(ansx-ans+almost)+((ansmid-almost+ansl) +anstiny);
if (signx) {
ans=-ans;
anslow=-anslow;
}
ddx.d[0] = __FADD( ans, anslow ); ddx.d[1] = (ans - ddx.d[0]) + anslow;
FESETENVD(fpenv);
return (ddx.f);
}
}
#endif