#if defined( __i386__ )
#include "xmmLibm_prefix.h"
#include "math.h"
#include "fenv.h"
#pragma mark exp
static inline double _xexp( double x ) ALWAYS_INLINE;
static const hexdouble S_Lead[32] =
{ { 0x3FF0000000000000ULL },
{ 0x3FF059B0D3158540ULL },
{ 0x3FF0B5586CF98900ULL },
{ 0x3FF11301D0125B40ULL },
{ 0x3FF172B83C7D5140ULL },
{ 0x3FF1D4873168B980ULL },
{ 0x3FF2387A6E756200ULL },
{ 0x3FF29E9DF51FDEC0ULL },
{ 0x3FF306FE0A31B700ULL },
{ 0x3FF371A7373AA9C0ULL },
{ 0x3FF3DEA64C123400ULL },
{ 0x3FF44E0860618900ULL },
{ 0x3FF4BFDAD5362A00ULL },
{ 0x3FF5342B569D4F80ULL },
{ 0x3FF5AB07DD485400ULL },
{ 0x3FF6247EB03A5580ULL },
{ 0x3FF6A09E667F3BC0ULL },
{ 0x3FF71F75E8EC5F40ULL },
{ 0x3FF7A11473EB0180ULL },
{ 0x3FF82589994CCE00ULL },
{ 0x3FF8ACE5422AA0C0ULL },
{ 0x3FF93737B0CDC5C0ULL },
{ 0x3FF9C49182A3F080ULL },
{ 0x3FFA5503B23E2540ULL },
{ 0x3FFAE89F995AD380ULL },
{ 0x3FFB7F76F2FB5E40ULL },
{ 0x3FFC199BDD855280ULL },
{ 0x3FFCB720DCEF9040ULL },
{ 0x3FFD5818DCFBA480ULL },
{ 0x3FFDFC97337B9B40ULL },
{ 0x3FFEA4AFA2A490C0ULL },
{ 0x3FFF50765B6E4540ULL } };
static const hexdouble S_Trail[32] =
{ { 0x0000000000000000ULL },
{ 0x3D0A1D73E2A475B4ULL },
{ 0x3CEEC5317256E308ULL },
{ 0x3CF0A4EBBF1AED93ULL },
{ 0x3D0D6E6FBE462876ULL },
{ 0x3D053C02DC0144C8ULL },
{ 0x3D0C3360FD6D8E0BULL },
{ 0x3D009612E8AFAD12ULL },
{ 0x3CF52DE8D5A46306ULL },
{ 0x3CE54E28AA05E8A9ULL },
{ 0x3D011ADA0911F09FULL },
{ 0x3D068189B7A04EF8ULL },
{ 0x3D038EA1CBD7F621ULL },
{ 0x3CBDF0A83C49D86AULL },
{ 0x3D04AC64980A8C8FULL },
{ 0x3CD2C7C3E81BF4B7ULL },
{ 0x3CE921165F626CDDULL },
{ 0x3D09EE91B8797785ULL },
{ 0x3CDB5F54408FDB37ULL },
{ 0x3CF28ACF88AFAB35ULL },
{ 0x3CFB5BA7C55A192DULL },
{ 0x3D027A280E1F92A0ULL },
{ 0x3CF01C7C46B071F3ULL },
{ 0x3CFC8B424491CAF8ULL },
{ 0x3D06AF439A68BB99ULL },
{ 0x3CDBAA9EC206AD4FULL },
{ 0x3CFC2220CB12A092ULL },
{ 0x3D048A81E5E8F4A5ULL },
{ 0x3CDC976816BAD9B8ULL },
{ 0x3CFEB968CAC39ED3ULL },
{ 0x3CF9858F73A18F5EULL },
{ 0x3C99D3E12DD8A18BULL } };
static const double scalars_data[2] = { 0x1.0000000000000p+54, 0x1.0000000000000p-54 };
static const int32_t exponents_data[2] = { 0x7920, 0x86a0 };
static const double *scalars = &scalars_data[1];
static const xDouble xNaN = { NAN, NAN };
static const xDouble xInfinity = { INFINITY, INFINITY };
static const xDouble minusOneD = { -1.0, -1.0 };
static const DoubleConstant_sd Inv_L = GET_CONSTANT_sd(0x1.71547652B82FEp+5); static const DoubleConstant_sd A0 = GET_CONSTANT_sd(0x1.0000000000000p-1); static const DoubleConstant_sd L1 = GET_CONSTANT_sd(0x1.62E42FEF00000p-6); static const DoubleConstant_sd L2 = GET_CONSTANT_sd(-0x1.473DE6AF278EDp-39); static const DoubleConstant_sd A1 = GET_CONSTANT_sd(0x1.5555555548F7Cp-3); static const DoubleConstant_sd A2 = GET_CONSTANT_sd(0x1.5555555545D4Ep-5); static const DoubleConstant_sd A3 = GET_CONSTANT_sd(0x1.11115B7AA905Ep-7); static const DoubleConstant_sd A4 = GET_CONSTANT_sd(0x1.6C1728D739765p-10); static const DoubleConstant_sd oneD = GET_CONSTANT_sd( 1.0 );
static const double largeD = 0x1.fffffffffffffp1022;
static const double tinyD = 0x1.0p-1022;
static inline double _xexp( double _x )
{
if( _x != _x || _x == __builtin_inf() )
return _x + _x;
static const double X1 = 0x1.62e42fefa39f0p+9; static const double X2 = -745.5;
static const double c0 = 1.5e-154;
static const double infD = INFINITY;
xDouble x = DOUBLE_2_XDOUBLE( _x );
xDouble fabsx = _mm_andnot_pd( minusZeroD, x );
xDouble xLTx1 = _mm_cmplt_sdm( x, &X1 );
xDouble xGTx2 = _mm_cmpgt_sdm( x, &X2 );
xDouble xIsSmall = _mm_cmplt_sdm( fabsx, &c0 );
xDouble branchtest = _mm_andnot_pd( xIsSmall, _mm_and_pd( xLTx1, xGTx2 ) );
xDouble result;
if( _mm_isfalse_sd( branchtest))
{
xDouble tiny = _mm_load_sd( &tinyD );
xDouble one = _mm_load_sd( &oneD );
xDouble minusInf = _mm_or_pd( minusZeroD, _mm_load_sd( &infD) );
result = _mm_andnot_pd( _mm_cmpeq_sd( x, minusInf ), _mm_add_sd( x, one ) );
xDouble xGEx1 = _mm_cmpge_sdm( x, &X1 );
xDouble multiplier = _mm_sel_pd( one, _mm_load_sd( &largeD ), xGEx1 );
xDouble xLEx2 = _mm_cmple_sdm( x, &X2 );
multiplier = _mm_andnot_pd( xLEx2, multiplier );
multiplier = _mm_or_pd( multiplier, tiny );
result = _mm_andnot_pd( minusZeroD, result ); result = _mm_mul_sd( result, multiplier );
result = _mm_mul_sd( result, multiplier );
result = _mm_mul_sd( result, multiplier );
return XDOUBLE_2_DOUBLE( result );
}
xDouble mask = _mm_cmpgt_sdm( x, (const double*) &minusZeroD ); xDouble dN =_mm_mul_sdm( x, &Inv_L );
int N = _mm_cvtsd_si32( dN );
int N2 = N & 0x1F; dN = _mm_cvtsi32_sd( minusZeroD, N ); xDouble R1 = _mm_sub_sd( x, _mm_mul_sdm( dN, &L1 ) ); xDouble R2 = _mm_mul_sdm( dN, &L2 ); xDouble R = _mm_add_sd( R1, R2 ); xDouble P, Q;
int index = _mm_cvtsi128_si32((xSInt32)mask);
xDouble scalar = _mm_load_sd( &scalars[ index ] );
Q = _mm_add_sdm( _mm_mul_sdm( R, &A4 ), &A3 );
Q = _mm_add_sdm( _mm_mul_sd( Q, R ), &A2 );
Q = _mm_add_sdm( _mm_mul_sd( Q, R ), &A1 );
Q = _mm_add_sdm( _mm_mul_sd( Q, R ), &A0 );
Q = _mm_mul_sd( _mm_mul_sd( Q, R ), R );
P = _mm_add_sd( _mm_add_sd( Q, R2 ), R1 );
R1 = _mm_load_sd( &S_Lead[N2].d ); R2 = _mm_load_sd( &S_Trail[N2].d );
N = N - N2;
N += exponents_data[ index + 1]; dN = (__m128d) _mm_cvtsi32_si128( N );
dN = (__m128d) _mm_slli_epi64( (__m128i) dN, 15 + 32 ); R = _mm_add_sd( R1, R2 );
result = _mm_mul_sd( P, R );
result = _mm_add_sd( result, R2 );
result = _mm_add_sd( result, R1 );
result = _mm_mul_sd( result, scalar );
result = _mm_mul_sd( result, dN );
return XDOUBLE_2_DOUBLE( result );
}
static inline double _xexp2( double x ) ALWAYS_INLINE;
static inline double _xexp2( double x )
{
static const double conversion = 6.9314718055994530942E-1;
if( x != x )
return x + x;
if( __builtin_fabs(x) < 0x1.0p-1020 )
x = 0x1.0p-1020;
return _xexp( x * conversion );
}
#if ! defined( BUILDING_FOR_CARBONCORE_LEGACY )
double exp ( double x )
{
return _xexp( x );
}
float expf( float x )
{ return _xexp( x );
}
#pragma mark -
#pragma mark expm1
static const hexdouble Em1_A[9] =
{ { 0x3FC5555555555549ULL },
{ 0x3FA55555555554B6ULL },
{ 0x3F8111111111A9F3ULL },
{ 0x3F56C16C16CE14C6ULL },
{ 0x3F2A01A01159DD2DULL },
{ 0x3EFA019F635825C4ULL },
{ 0x3EC71E14BFE3DB59ULL },
{ 0x3E928295484734EAULL },
{ 0x3E5A2836AA646B96ULL } };
static inline double _xexpm1( double x ) ALWAYS_INLINE;
static inline double _xexpm1( double _x )
{
if( EXPECT_FALSE( _x != _x ) || _x == __builtin_inf() )
return _x + _x;
xDouble x = DOUBLE_2_XDOUBLE( _x );
static const DoubleConstant_sd Em1_Tiny = GET_CONSTANT_sd(0x1.0000000000000p-54); static const DoubleConstant_sd Em1_Pos = GET_CONSTANT_sd(0x1.C4474E1726455p+10); static const DoubleConstant_sd Em1_Neg = GET_CONSTANT_sd(-0x1.2B708872320E1p+5); static const DoubleConstant_sd Em1_T1 = GET_CONSTANT_sd(-0x1.269621134DB93p-2); static const DoubleConstant_sd Em1_T2 = GET_CONSTANT_sd(0x1.C8FF7C79A9A22p-3); static const DoubleConstant_sd twoTOn7 = GET_CONSTANT_sd(0x1.0000000000000p-7); static const DoubleConstant_sd half = GET_CONSTANT_sd( 0.5 );
xDouble fabsx = _mm_andnot_pd( minusZeroD, x );
xDouble xGEem1Neg = _mm_cmpge_sdm( x, &Em1_Neg );
xDouble xLEem1Pos = _mm_cmple_sdm( x, &Em1_Pos );
if( _mm_isfalse_sd( _mm_and_pd( xLEem1Pos, xGEem1Neg )))
{
static const xDouble tiny = { 0x1.0p-1022, 0 };
static const xDouble maxFinite = { 0x1.FFFFFFFFFFFFFp1023, 0 };
static const xDouble mOneD = { -1.0, 0 };
xDouble xLTzero = _mm_cmplt_sdm( x, (double*) &minusZeroD );
xDouble xEQmInf = _mm_cmpeq_sdm( fabsx, (double*) &xInfinity );
xDouble v = _mm_sel_pd( maxFinite, mOneD, xLTzero );
xDouble v2 = _mm_sel_pd( maxFinite, _mm_andnot_pd( xEQmInf, tiny), xLTzero );
x = _mm_add_sd( v, v2 );
return XDOUBLE_2_DOUBLE( x );
}
xDouble xGEem1_t2 = _mm_cmpge_sdm( x, &Em1_T2 );
xDouble xLEem1_t1 = _mm_cmple_sdm( x, &Em1_T1 );
if( _mm_istrue_sd( _mm_or_pd( xGEem1_t2, xLEem1_t1 ) ) ) {
xDouble dN = _mm_mul_sdm( x, &Inv_L ); xDouble signedHalf = _mm_sel_pd( _mm_load_sd( &half ), x, minusZeroD );
dN = _mm_add_sd( dN, signedHalf );
int N = _mm_cvttsd_si32( dN ); int N2 = N & 0x1f; int M = (N - N2) / 32;
dN = _mm_cvtsi32_sd( dN, N );
xDouble R1 = _mm_sub_sd( x, _mm_mul_sdm( dN, &L1 )); xDouble R2 = _mm_mul_sdm( dN, &L2 ); xDouble R = _mm_add_sd( R1, R2 );
xDouble Q = _mm_add_sdm( _mm_mul_sdm( R, &A4 ), &A3 );
Q = _mm_add_sdm( _mm_mul_sd( Q, R ), &A2 );
Q = _mm_add_sdm( _mm_mul_sd( Q, R ), &A1 );
Q = _mm_add_sdm( _mm_mul_sd( Q, R ), &A0 );
Q = _mm_mul_sd( Q, _mm_mul_sd( R, R ) );
xDouble P = _mm_add_sd( _mm_add_sd( R2, Q ), R1 ); xDouble S = _mm_add_sdm( _mm_load_sd( &S_Lead[N2].d), &S_Trail[N2].d ); xDouble twoM;
if( M >= 53 )
{
if( M > 1024 )
M = 1024;
twoM = twoToTheM( M );
R = _mm_sub_sdm( twoToTheM( -M ), &S_Trail[N2].d ); R = _mm_sub_sd( _mm_mul_sd( S, P ), R ); R = _mm_add_sdm( R, &S_Lead[N2].d ); x = _mm_mul_sd( R, twoM );
}
else if( M <= -8 )
{
if( M >= -1023 )
twoM = twoToTheM( M );
else
{
double zzz = scalbn( 1.0, M );
twoM = DOUBLE_2_XDOUBLE( zzz );
}
R = _mm_add_sdm( _mm_mul_sd( S, P ), &S_Trail[N2].d ); R = _mm_add_sdm( R, &S_Lead[N2].d); R = _mm_mul_sd( R, twoM );
x = _mm_sub_sdm( R, &oneD );
}
else
{
twoM = twoToTheM( M );
R = _mm_add_sdm( P, &oneD );
R = _mm_mul_sdm( R, &S_Trail[N2].d );
R = _mm_add_sd( R, _mm_mul_sdm( P, &S_Lead[N2].d ) ); R = _mm_sub_sd( R, _mm_sub_sdm( twoToTheM( -M ), &S_Lead[N2].d ) ); x = _mm_mul_sd( R, twoM );
}
return XDOUBLE_2_DOUBLE( x );
}
xDouble xIsTiny = _mm_cmplt_sdm( fabsx, &Em1_Tiny );
if( _mm_istrue_sd( xIsTiny ) )
{
static const double scale2 = 0x1.0000000000001p-1022;
static const double half = 0.5;
xDouble xEQzero = _mm_cmpeq_sd( x, minusZeroD );
xDouble oneHalf = _mm_load_sd( &half );
xDouble isDenorm = _mm_cmplt_sdm( fabsx, &tinyD );
xDouble result = (xDouble) _mm_add_epi64( (xSInt64) x, (xSInt64) oneHalf );
oneHalf = _mm_and_pd( oneHalf, isDenorm );
oneHalf = _mm_sel_pd( oneHalf, x, minusZeroD);
result = _mm_sub_sd( result, oneHalf );
result = _mm_mul_sdm( result, &scale2 );
result = _mm_sel_pd( result, x, xEQzero );
return XDOUBLE_2_DOUBLE( x );
}
else
{
xDouble U = _mm_cvtss_sd( x, _mm_cvtsd_ss( (xFloat)x, x ) ); xDouble V = _mm_sub_sd( x, U ); xDouble Y = _mm_mul_sdm( _mm_mul_sd( U, U ), &A0 ); xDouble Z = _mm_mul_sdm( _mm_mul_sd(V, _mm_add_sd( x, U)), &A0 ); xDouble Q = _mm_add_sdm( _mm_mul_sdm( x, &Em1_A[8] ), &Em1_A[7] ); Q = _mm_add_sdm( _mm_mul_sd( Q, x ), &Em1_A[6] );
Q = _mm_add_sdm( _mm_mul_sd( Q, x ), &Em1_A[5] );
Q = _mm_add_sdm( _mm_mul_sd( Q, x ), &Em1_A[4] );
Q = _mm_add_sdm( _mm_mul_sd( Q, x ), &Em1_A[3] );
Q = _mm_add_sdm( _mm_mul_sd( Q, x ), &Em1_A[2] );
Q = _mm_add_sdm( _mm_mul_sd( Q, x ), &Em1_A[1] );
Q = _mm_add_sdm( _mm_mul_sd( Q, x ), &Em1_A[0] );
Q = _mm_mul_sd( _mm_mul_sd( _mm_mul_sd( Q, x ), x), x );
xDouble test = _mm_cmpge_sdm( Y, &twoTOn7 );
U = _mm_and_pd( U, test );
V = _mm_and_pd( V, test );
x = _mm_andnot_pd( test, x );
V = _mm_add_sd( V, Z );
U = _mm_add_sd( U, Y );
Q = _mm_add_sd( Q, V );
U = _mm_add_sd( U, Q );
x = _mm_add_sd( x, U );
return XDOUBLE_2_DOUBLE( x );
}
}
double expm1( double x )
{
return _xexpm1( x );
}
float expm1f( float x )
{ return _xexpm1( x );
}
#pragma mark -
#pragma mark exp2
float exp2f( float x )
{
return (float) _xexp2( (double) x );
}
#else
double exp2( double x )
{
return _xexp2( x );
}
#endif
#endif