xmm_arcsincostan.c [plain text]
#include "xmmLibm_prefix.h"
#include <math.h>
static inline xDouble _fatn( xDouble ) ALWAYS_INLINE;
static inline xDouble _xatan( xDouble ) ALWAYS_INLINE;
static inline xDouble _xasin( xDouble ) ALWAYS_INLINE;
static inline xDouble _xacos( xDouble ) ALWAYS_INLINE;
static const union
{
uint64_t u;
double d;
}inverseTrigNaN = { 0x7ff8000000000022ULL };
#pragma mark -
static const double q0 = 0x1.880B38AA99E19p+5; static const double q1 = 0x1.9B497D4358B3Ep+6; static const double q2 = 0x1.1C6FDB0AE1716p+6; static const double q3 = 0x1.1EF299B1679BEp+4; static const double q4 = 0x1.31A63D5C55F4Cp+0;
static const double p0 = 0.0; static const double p1 = 0x1.055CD071BBEBBp+4; static const double p2 = 0x1.8790B27BF4749p+4; static const double p3 = 0x1.4476D8263F242p+3; static const double p4 = 0x1.0000000000000p+0;
static inline xDouble _fatn( xDouble x )
{
x = _mm_mul_sd( x, x );
xDouble denom = _mm_mul_sdm( x, &q4); xDouble num = _mm_add_sdm( x, &p3 );
denom = _mm_add_sdm( denom, &q3 ); num = _mm_mul_sd( num, x );
denom = _mm_mul_sd( denom, x ); num = _mm_add_sdm( num, &p2 );
denom = _mm_add_sdm( denom, &q2 ); num = _mm_mul_sd( num, x );
denom = _mm_mul_sd( denom, x ); num = _mm_add_sdm( num, &p1 );
denom = _mm_add_sdm( denom, &q1 ); num = _mm_mul_sd( num, x );
denom = _mm_mul_sd( denom, x );
denom = _mm_add_sdm( denom, &q0 );
return _mm_div_sd( num, denom );
}
static const double FPKX2 = 0x1.279A74590331Cp-1; static const double FPKX2FX2 = 0x1.B8550D62BFB6Dp-5; static const double FPKATNCONS = 0x1.126145E9ECD56p-2; static const double FPKPI2 = 0x1.921FB54442D18p+0; static const double one = 1.0;
static inline xDouble _xatan( xDouble x )
{
if( _mm_istrue_sd( _mm_cmpunord_sd( x, x ) ) )
return _mm_add_sd( x, x );
xDouble sign = _mm_and_pd( minusZeroD, x );
xDouble y = _mm_andnot_pd( minusZeroD, x ); int isLarge = _mm_isfalse_sd( _mm_cmple_sdm( y, &one ) );
if( isLarge ) y = _mm_div_sd( _mm_load_sd( &one ), y );
if( _mm_istrue_sd( _mm_cmple_sdm( y, &FPKATNCONS ) ) ) {
static const double small = 0x1.0p-63;
static const double scale = 0x1.0p55;
static const double mscale = 0x1.0p-55;
static const double tiny = 0x1.0000000000000p-1022;
if( _mm_istrue_sd( _mm_cmplt_sdm( y, (double*) &small ) ) )
{
if( _mm_istrue_sd( _mm_cmpne_sdm( y, (double*) &minusZeroD ) ) )
y = _mm_mul_sdm( _mm_add_sdm( _mm_mul_sdm( y, &scale ), &tiny ), &mscale ); }
else
y = _mm_sub_sd( y, _mm_mul_sd( y, _fatn(y) ) );
}
else
{
xDouble t = _mm_mul_sdm( y, &FPKX2 ); xDouble a = _mm_sub_sdm( y, &FPKX2 ); xDouble rt = _mm_div_sd( _mm_load_sd( &one ), t ); xDouble b = _mm_add_sdm( t, &one ); xDouble z = _mm_div_sd( a, b ); xDouble c = _mm_add_sdm( rt, &one ); c = _mm_div_sd( a, c ); xDouble d = _fatn(z);
xDouble e = _mm_add_sdm( _mm_mul_sd( z, d ), &FPKX2FX2 );
c = _mm_add_sd( c, e );
y = _mm_sub_sd( y, c );
}
if( isLarge )
y = _mm_sub_sd( _mm_load_sd( &FPKPI2), y );
y = _mm_xor_pd( y, sign );
return y;
}
static const double half = 0.5;
static const double Emin = 0x1.0p-1022;
static const double plusinf = 1e500;
static inline xDouble _xasin( xDouble x )
{
if( _mm_istrue_sd( _mm_cmpunord_sd( x, x ) ) )
return _mm_add_sd( x, x );
xDouble y = _mm_andnot_pd( minusZeroD, x );
xDouble oneD = _mm_load_sd( &one );
if( _mm_istrue_sd( _mm_cmpge_sd( y, oneD ) ) )
{
if( _mm_istrue_sd( _mm_cmpgt_sd( y, oneD ) ) )
{
y = _mm_or_pd( y, minusZeroD );
y = _MM_SQRT_SD( y );
}
else
{
static double pi_over_2_hi = 0x1.921fb54442d18p+0; static double pi_over_2_lo = 0x8.d4p-57;
y = REQUIRED_ADD_sd( _mm_load_sd(&pi_over_2_hi), _mm_load_sd(&pi_over_2_lo) );
y = _mm_or_pd( _mm_and_pd( minusZeroD, x ), y );
}
}
else
{
if( _mm_istrue_sd( _mm_cmple_sdm( y, &half ) ) )
{
static const double small = 0x1.0p-126;
y = _mm_max_sdm( y, &small ); y = _mm_sub_sd( oneD, _mm_mul_sd( y, y ) );
}
else
{
y = _mm_sub_sd( oneD, y );
y = _mm_sub_sd( _mm_add_sd( y, y ), _mm_mul_sd( y, y ) );
}
y = _MM_SQRT_SD( y );
y = _mm_div_sd( x, y );
y = _xatan( y );
}
if( _mm_istrue_sd( _mm_and_pd( _mm_cmpord_sd( x, x ), _mm_cmpunord_sd( y, y ) ) ) )
y = _mm_load_sd( &inverseTrigNaN.d );
return y;
}
static inline xDouble _xacos( xDouble x )
{
static const double three = 3.0;
xDouble y = x;
xDouble xone = _mm_load_sd( &one );
if( _mm_istrue_sd( _mm_cmpeq_sd( xone, _mm_andnot_pd( minusZeroD, y ) ) ) )
{ static const double pi = 0x1.921fb54442d18p1;
y = _mm_and_pd( _mm_load_sd( &pi ), _mm_cmplt_sd( y, minusZeroD ) );
}
else
{
xDouble safeY = _mm_andnot_pd( _mm_cmpunord_sd( y, y ), y );
safeY = _mm_andnot_pd( minusZeroD, safeY );
y = _mm_sel_pd( y, _mm_load_sd( &three), _mm_cmpgt_sd( safeY, xone ) ); y = _mm_div_sd( _mm_sub_sd( xone, y ), _mm_add_sd( xone, y ) );
y = _MM_SQRT_SD( y );
y = _xatan( y );
y = _mm_add_sd( y, y );
}
if( _mm_istrue_sd( _mm_and_pd( _mm_cmpord_sd( x, x ), _mm_cmpunord_sd( y, y ) ) ) )
{
y = _mm_load_sd( &inverseTrigNaN.d );
}
return y;
}
#pragma mark -
double asin(double x)
{
xDouble xd = DOUBLE_2_XDOUBLE( x );
xd = _xasin( xd );
return XDOUBLE_2_DOUBLE( xd );
}
float asinf(float x)
{
xDouble xd = FLOAT_2_XDOUBLE( x );
xd = _xasin( xd );
return XDOUBLE_2_FLOAT( xd );
}
double acos(double x)
{
xDouble xd = DOUBLE_2_XDOUBLE( x );
xd = _xacos( xd );
return XDOUBLE_2_DOUBLE( xd );
}
float acosf(float x)
{
xDouble xd = FLOAT_2_XDOUBLE( x );
xd = _xacos( xd );
return XDOUBLE_2_FLOAT( xd );
}
double atan(double x)
{
xDouble xd = DOUBLE_2_XDOUBLE( x );
xd = _xatan( xd );
return XDOUBLE_2_DOUBLE( xd );
}
float atanf(float x)
{
xDouble xd = FLOAT_2_XDOUBLE( x );
xd = _xatan( xd );
return XDOUBLE_2_FLOAT( xd );
}
#warning ******* CHEESY HACK TO GET LIBM BUILDING *********
double atan2( double x, double y )
{
return atan2l( (long double) x, (long double) y );
}
float atan2f( float x, float y )
{
return atan2l( (long double) x, (long double) y );
}