#if defined( __i386__ )
#include "xmmLibm_prefix.h"
#include <math.h>
static inline xDouble _xsin( xDouble ) ALWAYS_INLINE;
static inline double _xcos( double ) ALWAYS_INLINE;
static inline xDouble _xtan( xDouble ) ALWAYS_INLINE;
static inline xDouble _halfPiReduce( xDouble, int32_t* ) ALWAYS_INLINE;
static inline xDouble _S( register xDouble ) ALWAYS_INLINE;
static inline xDouble _C( register xDouble ) ALWAYS_INLINE;
#pragma mark -
static const double two52 = 0x1.0p52; static const double S0 = -1.66666666666666796e-01;
static const double S1 = 8.33333333333178931e-03;
static const double S2 = -1.98412698361250482e-04;
static const double S3 = 2.75573156035895542e-06;
static const double S4 = -2.50510254394993115e-08;
static const double S5 = 1.59108690260756780e-10;
static inline xDouble _S(register xDouble x)
{
register xDouble t, u, k, v, w;
t = _mm_mul_sd( x, x ); u = _mm_mul_sd( t, t );
k = _mm_add_sdm( _mm_mul_sdm( u, &S5 ), &S3 ); v = _mm_add_sdm( _mm_mul_sdm( u, &S4 ), &S2 ); k = _mm_add_sdm( _mm_mul_sd( u, k ), &S1 ); v = _mm_add_sdm( _mm_mul_sd( u, v ), &S0 );
u = _mm_mul_sd( x, t ); w = _mm_add_sd( _mm_mul_sd( t, k), v );
return _mm_add_sd( _mm_mul_sd( u, w ), x ); }
static const double C0 = 4.16666666666666019e-02;
static const double C1 = -1.38888888888744744e-03;
static const double C2 = 2.48015872896717822e-05;
static const double C3 = -2.75573144009911252e-07;
static const double C4 = 2.08757292566166689e-09;
static const double C5 = -1.13599319556004135e-11;
static const double T1 = 5.22344792962423754e-01;
static const double T2 = 2.55389245354663896e-01;
static const double half = 0.5;
static const double one = 1.0;
static const xDouble mThreeSixteenths = { -3.0/16.0, 0.0 };
static inline xDouble _C(register xDouble x)
{
register xDouble t, u, k, v;
register xDouble c1, c2;
t = _mm_mul_sd( x, x ); xDouble tLTt1 = _mm_cmpge_sdm( t, &T1 );
xDouble tLTt2 = _mm_cmpge_sdm( t, &T2 );
u = _mm_mul_sd( t, t ); tLTt1 = _mm_and_pd( tLTt1, mThreeSixteenths ); tLTt2 = _mm_and_pd( tLTt2, mThreeSixteenths ); c1 = _mm_add_sd( tLTt1, tLTt2 ); c2 = _mm_add_sdm( c1, &one );
k = _mm_add_sdm( _mm_mul_sdm( u, &C5 ), &C3 ); v = _mm_add_sdm( _mm_mul_sdm( u, &C4 ), &C2 ); k = _mm_add_sdm( _mm_mul_sd( u, k ), &C1 ); v = _mm_add_sdm( _mm_mul_sd( u, v ), &C0 ); k = _mm_add_sd( _mm_mul_sd( t, k), v );
t = _mm_add_sd( _mm_mul_sdm( t, &half), c1 ); u = _mm_sub_sd( _mm_mul_sd( u, k ), t );
return _mm_add_sd( u, c2 ); }
static const double aBigNumber = 3.373e9;
static const double piOver4 = 0x1.921FB54442D18p-1; static const double piOver2 = 0x1.921FB54442D18p+0; static const double rPiOver2 = 0x1.45F306DC9C882p-1;
static const xDouble halfPiBits[] = { { 0x1.45f3068000000p-1, 0x1.45f3068000000p-1}, { 0x1.7272208000000p-27, 0x1.7272208000000p-27}, { 0x1.4a7f090000000p-54, 0x1.4a7f090000000p-54}, { 0x1.abe8fa8000000p-79, 0x1.abe8fa8000000p-79}, { 0x1.a6ee060000000p-107, 0x1.a6ee060000000p-107}, { 0x1.b629590000000p-132, 0x1.b629590000000p-132}, { 0x1.2788720000000p-157, 0x1.2788720000000p-157}, { 0x1.07f9440000000p-186, 0x1.07f9440000000p-186}, { 0x1.8eaf7a0000000p-210, 0x1.8eaf7a0000000p-210}, { 0x1.de2b0d8000000p-235, 0x1.de2b0d8000000p-235}, { 0x1.c91b8e0000000p-262, 0x1.c91b8e0000000p-262}, { 0x1.2126e90000000p-287, 0x1.2126e90000000p-287}, { 0x1.c00c920000000p-313, 0x1.c00c920000000p-313}, { 0x1.77504e8000000p-339, 0x1.77504e8000000p-339}, { 0x1.921cfc0000000p-368, 0x1.921cfc0000000p-368}, { 0x1.0ef58e0000000p-391, 0x1.0ef58e0000000p-391}, { 0x1.62534e0000000p-417, 0x1.62534e0000000p-417}, { 0x1.f744118000000p-443, 0x1.f744118000000p-443}, { 0x1.7d4bae0000000p-470, 0x1.7d4bae0000000p-470}, { 0x1.a242748000000p-495, 0x1.a242748000000p-495}, { 0x1.38e04d0000000p-521, 0x1.38e04d0000000p-521}, { 0x1.a2fbf20000000p-547, 0x1.a2fbf20000000p-547}, { 0x1.3991d40000000p-576, 0x1.3991d40000000p-576}, { 0x1.1cc1a98000000p-599, 0x1.1cc1a98000000p-599}, { 0x1.cfa4e40000000p-627, 0x1.cfa4e40000000p-627}, { 0x1.17e2ec0000000p-654, 0x1.17e2ec0000000p-654}, { 0x1.bf25070000000p-677, 0x1.bf25070000000p-677}, { 0x1.8ffc4b8000000p-703, 0x1.8ffc4b8000000p-703}, { 0x1.ffbc0b0000000p-729, 0x1.ffbc0b0000000p-729}, { 0x1.80fef20000000p-756, 0x1.80fef20000000p-756}, { 0x1.e2316b0000000p-781, 0x1.e2316b0000000p-781}, { 0x1.05368f8000000p-807, 0x1.05368f8000000p-807}, { 0x1.b4d9fb0000000p-834, 0x1.b4d9fb0000000p-834}, { 0x1.e4f9600000000p-861, 0x1.e4f9600000000p-861}, { 0x1.36e9e88000000p-885, 0x1.36e9e88000000p-885}, { 0x1.1fb34f0000000p-911, 0x1.1fb34f0000000p-911}, { 0x1.7fa8b50000000p-938, 0x1.7fa8b50000000p-938}, { 0x1.a93dd60000000p-963, 0x1.a93dd60000000p-963}, { 0x1.faf97c0000000p-990, 0x1.faf97c0000000p-990}, { 0x1.7b3d070000000p-1016, 0x1.7b3d070000000p-1016} };
static const int halfPiBinsCount = sizeof( halfPiBits ) / sizeof( halfPiBits[0] );
static inline xDouble extended_accum_pd( xDouble *a, xDouble b ) ALWAYS_INLINE;
static inline xDouble extended_accum_pd( xDouble *a, xDouble b )
{
xDouble oldA = *a;
*a = _mm_add_pd( *a, b );
xDouble carry = _mm_sub_pd( b, _mm_sub_pd( *a, oldA ) );
return carry;
}
static const xUInt64 himask = { 0xFFFFFFFFF8000000ULL, 0 };
static const xDouble two54D = { 0x1.0p54, 0x1.0p54};
static inline xDouble _halfPiReduce( xDouble x1, int32_t *quo )
{
#warning This is Super CHEESY and WRONG!!!!!
xDouble q = _mm_mul_sdm( x1, &rPiOver2 );
xDouble fabsq = q;
xDouble step = _mm_and_pd( _mm_load_sd( &two52), _mm_cmplt_sdm( fabsq, &two52 ) );
xDouble integral = _mm_sub_sd( _mm_add_sd( fabsq, step ), step ); xDouble fractional = _mm_sub_sd( x1, _mm_mul_sdm( integral, &piOver2) );
xDouble rightSize = _mm_cmple_sdm( fractional, &piOver4 );
integral = _mm_add_sd( integral, _mm_andnot_pd( rightSize, _mm_load_sd( &one ) ) );
fractional = _mm_sub_sd( x1, _mm_mul_sdm( integral, &piOver2) );
*quo = _mm_cvtsd_si32( integral );
return fractional;
}
#pragma mark -
static const union
{
uint64_t u;
double d;
}trigNaN = { 0x7ff8000000000021ULL };
static inline xDouble _xsin( xDouble x )
{
int oldMXCSR = _mm_getcsr();
int newMXCSR = (oldMXCSR | DEFAULT_MXCSR) & DEFAULT_MASK; if( newMXCSR != oldMXCSR )
_mm_setcsr( newMXCSR );
xDouble y = _mm_andnot_pd( minusZeroD, x );
xDouble xEQZero = _mm_cmpeq_sdm( x, (double*) &minusZeroD );
if( _mm_istrue_sd( _mm_cmpgt_sdm( y, &piOver4 ) ) )
{
int quo = 0;
y = _halfPiReduce( y, &quo );
if( _mm_istrue_sd( _mm_cmpunord_sd( y, y ) ) )
{
y = _mm_load_sd( &trigNaN.d );
oldMXCSR |= INVALID_FLAG;
}
else
{
if( quo & 1 )
y = _C(y);
else
y = _S(y);
if( quo & 2 )
y = _mm_xor_pd( y, minusZeroD );
x = _mm_and_pd( x, xEQZero );
y = _mm_or_pd( y, x );
oldMXCSR |= INEXACT_FLAG;
}
}
else
{
if( _mm_istrue_sd( _mm_cmpgt_sd( y, minusZeroD ) ) )
{
y = _S(y);
oldMXCSR |= INEXACT_FLAG;
}
else
{
x = _mm_and_pd( x, xEQZero );
y = _mm_or_pd( y, x );
}
}
_mm_setcsr( oldMXCSR );
return y;
}
static inline double _xcos( double xx )
{
if( xx != xx )
return xx + xx;
xDouble x = DOUBLE_2_XDOUBLE( xx );
xDouble y = _mm_andnot_pd( minusZeroD, x );
xDouble xEQZero = _mm_cmpeq_sdm( x, (double*) &minusZeroD );
if( _mm_istrue_sd( _mm_cmpgt_sdm( y, &piOver4 ) ) )
{
int quo = 0;
y = _halfPiReduce( y, &quo );
if( _mm_istrue_sd( _mm_cmpunord_sd( y, y ) ) )
{
y = _mm_load_sd( &trigNaN.d );
}
else
{
if( quo & 1 )
y = _S(y);
else
y = _C(y);
if( (quo+1) & 2 )
y = _mm_xor_pd( y, minusZeroD );
}
}
else
{
if( _mm_istrue_sd( _mm_cmpgt_sd( y, minusZeroD ) ) )
{
static const double minY = 0x1.0000000000001p-511;
y = _mm_max_sdm( y, &minY ); y = _C(y);
}
else
{
return 1.0;
}
}
return XDOUBLE_2_DOUBLE(y);
}
static const double plusinf = 1e500; static const double p0 = 0x1.E1346CE13A963p+8; static const double p1 = -0x1.D39B1C4972CB0p+4; static const double p2 = 0x1.553AE3B07D8EAp-2; static const double p3 = -0x1.63F866C7AF365p-22; static const double q0 = 0x1.C321261326ECDp+11; static const double q1 = -0x1.A3FF6484AEC5Cp+10; static const double q2 = 0x1.6A26064907801p+6; static const double twom2 = 0x1.0p-2;
static const double three = 3.0;
static inline xDouble _xtan( xDouble x )
{
int oldMXCSR = _mm_getcsr();
int newMXCSR = (oldMXCSR | DEFAULT_MXCSR) & DEFAULT_MASK; if( newMXCSR != oldMXCSR )
_mm_setcsr( newMXCSR );
xDouble y = _mm_andnot_pd( minusZeroD, x ); xDouble sign = _mm_and_pd( minusZeroD, x );
int quo = 0;
if( _mm_isfalse_sd( _mm_cmplt_sdm( y, &piOver4 ) ) )
y = _halfPiReduce( y, &quo );
y = _mm_xor_pd( y, sign );
xDouble fabsy = _mm_andnot_pd( minusZeroD, y );
if( _mm_istrue_sd( _mm_cmplt_sdm( fabsy, &plusinf ) ) )
{
xDouble s = _mm_mul_sd( y, y );
xDouble num = _mm_add_sdm( _mm_mul_sdm( s, &p3 ), &p2 ); xDouble denom = _mm_add_sdm( _mm_mul_sd( _mm_sub_sd( _mm_load_sd( &q2 ), s ), s ), &q1 ); num = _mm_add_sdm( _mm_mul_sd( num, s ), &p1 ); denom = _mm_add_sdm( _mm_mul_sd( denom, s ), &q0 ); xDouble t = _mm_div_sd( num, denom );
t = _mm_mul_sd( t, s );
xDouble c1 = _mm_load_sd( &twom2 );
xDouble c2 = _mm_sub_sdm( c1, &one );
xDouble sLEtwom2 = _mm_cmplt_sd( s, c1 );
c1 = _mm_and_pd( c1, sLEtwom2 );
c2 = _mm_and_pd( c2, sLEtwom2 );
xDouble b = _mm_add_sd( y, c2 ); b = _mm_mul_sd( b, s ); b = _mm_div_sdm( b, &three ); xDouble c = _mm_mul_sd( _mm_mul_sd( s, y ), t ); xDouble a = _mm_mul_sd( s, c1 ); y = _mm_add_sd( a, b );
y = _mm_add_sd( y, c );
if( quo & 1 )
y = _mm_div_sd( _mm_load_sd( &one ), y );
newMXCSR = _mm_getcsr() & ~UNDERFLOW_FLAG;
oldMXCSR |= newMXCSR & ALL_FLAGS;
}
else if ( _mm_istrue_sd( _mm_cmpeq_sdm( fabsy, &plusinf ) ) )
{
oldMXCSR |= INVALID_FLAG;
y = _mm_load_sd( &trigNaN.d );
}
else
{
y = x;
}
_mm_setcsr( oldMXCSR );
return y;
}
#pragma mark -
long double __cosl( long double );
long double __sinl( long double );
long double __tanl( long double );
double cos(double x)
{
return __cosl( x );
}
float cosf(float x)
{
return __cosl( x );
}
double sin(double x)
{
return __sinl( x );
}
float sinf(float x)
{
return __sinl( x );
}
double tan(double x)
{
return __tanl( x );
}
float tanf(float x)
{
return __tanl( x );
}
#endif