#if defined( __i386__ )
#include "xmmLibm_prefix.h"
static const long double stepL = 0x1.0p63;
static const long double oneL = 1.0L;
static const double stepD = 0x1.0p52;
static const double oneD = 1.0;
static const float stepF = 0x1.0p23f;
static const float oneF = 1.0f;
#if defined( BUILDING_FOR_CARBONCORE_LEGACY )
float modff( float f, float *i )
{
xFloat x = FLOAT_2_XFLOAT( f );
xFloat step = _mm_load_ss( &stepF );
xFloat one = _mm_load_ss( &oneF );
xFloat sign = _mm_and_ps( minusZeroF, x );
xFloat fabsx = _mm_andnot_ps( minusZeroF, x );
xFloat isNaN = _mm_cmpunord_ss( x, x );
xFloat safeX = _mm_andnot_ps( isNaN, fabsx );
xFloat isNotInt = _mm_cmplt_ssm( safeX, &stepF );
step = _mm_and_ps( step, isNotInt );
xFloat xGTzero = _mm_cmplt_ss( minusZeroF, safeX );
xFloat r = _mm_add_ss( safeX, step );
r = _mm_sub_ss( r, step );
r = _mm_sub_ss( r, _mm_and_ps( one, _mm_cmplt_ss( safeX, r ) ) );
r = _mm_or_ps( r, sign );
r = _mm_sel_ps( x, r, xGTzero );
*i = XFLOAT_2_FLOAT( r );
r = _mm_and_ps( r, isNotInt );
x = _mm_and_ps( x, isNotInt );
x = _mm_sub_ss( x, r );
x = _mm_or_ps( x, sign );
f = XFLOAT_2_FLOAT( x );
return f;
}
#else
double floor( double f )
{
xDouble x = DOUBLE_2_XDOUBLE( f );
xDouble step = _mm_load_sd( &stepD );
xDouble one = _mm_load_sd( &oneD );
xDouble sign = _mm_and_pd( minusZeroD, x );
xDouble fabsx = _mm_andnot_pd( minusZeroD, x );
xDouble isNaN = _mm_cmpunord_pd( x, x );
xDouble safeX = _mm_andnot_pd( isNaN, fabsx );
step = _mm_and_pd( step, _mm_cmplt_sdm( safeX, &stepD ) );
xDouble xGTzero = _mm_cmplt_sd( minusZeroD, safeX );
xDouble intX = _mm_add_sd( safeX, step );
intX = _mm_sub_sd( intX, step );
intX = _mm_or_pd( intX, sign );
safeX = _mm_or_pd( safeX, sign );
intX = _mm_sub_sd( intX, _mm_and_pd( one, _mm_cmplt_sd( safeX, intX ) ) );
intX = _mm_sel_pd( x, intX, xGTzero );
f = XDOUBLE_2_DOUBLE( intX );
return f;
}
float floorf( float f )
{
xFloat x = FLOAT_2_XFLOAT( f );
xFloat step = _mm_load_ss( &stepF );
xFloat one = _mm_load_ss( &oneF );
xFloat sign = _mm_and_ps( minusZeroF, x );
xFloat fabsx = _mm_andnot_ps( minusZeroF, x );
xFloat isNaN = _mm_cmpunord_ps( x, x );
xFloat safeX = _mm_andnot_ps( isNaN, fabsx );
step = _mm_and_ps( step, _mm_cmplt_ssm( safeX, &stepF ) );
xFloat xGTzero = _mm_cmplt_ss( minusZeroF, safeX );
xFloat intX = _mm_add_ss( safeX, step );
intX = _mm_sub_ss( intX, step );
intX = _mm_or_ps( intX, sign );
safeX = _mm_or_ps( safeX, sign );
intX = _mm_sub_ss( intX, _mm_and_ps( one, _mm_cmplt_ss( safeX, intX ) ) );
intX = _mm_sel_ps( x, intX, xGTzero );
f = XFLOAT_2_FLOAT( intX );
return f;
}
double ceil( double f )
{
xDouble x = DOUBLE_2_XDOUBLE( f );
xDouble step = _mm_load_sd( &stepD );
xDouble one = _mm_load_sd( &oneD );
xDouble sign = _mm_and_pd( minusZeroD, x );
xDouble fabsx = _mm_andnot_pd( minusZeroD, x );
xDouble isNaN = _mm_cmpunord_pd( x, x );
xDouble safeX = _mm_andnot_pd( isNaN, x );
xSInt64 isSmall = _mm_cmpgt_epi32( (xSInt32) step, (xSInt32) fabsx );
isSmall = _mm_shuffle_epi32( isSmall, 0xf5 );
step = _mm_or_pd( step, sign );
step = _mm_and_pd( step, (xDouble) isSmall );
xDouble intX = _mm_add_sd( x, step );
intX = _mm_sub_sd( intX, step );
intX = _mm_add_sd( intX, _mm_and_pd( one, _mm_cmplt_sd( _mm_andnot_pd( isNaN, intX), safeX ) ) );
intX = _mm_or_pd( intX, sign );
f = XDOUBLE_2_DOUBLE( intX );
return f;
}
float ceilf( float f )
{
xFloat x = FLOAT_2_XFLOAT( f );
xFloat step = _mm_load_ss( &stepF );
xFloat one = _mm_load_ss( &oneF );
xFloat sign = _mm_and_ps( minusZeroF, x );
xFloat fabsx = _mm_andnot_ps( minusZeroF, x );
xFloat isNaN = _mm_cmpunord_ps( x, x );
xFloat safeX = _mm_andnot_ps( isNaN, x );
xFloat isSmall = (xFloat) _mm_cmpgt_epi32( (xSInt32) step, (xSInt32) fabsx );
step = _mm_or_ps( step, sign );
step = _mm_and_ps( step, isSmall );
xFloat intX = _mm_add_ss( x, step );
intX = _mm_sub_ss( intX, step );
intX = _mm_add_ss( intX, _mm_and_ps( one, _mm_cmplt_ss( _mm_andnot_ps( isNaN, intX), safeX ) ) );
intX = _mm_or_ps( intX, sign );
f = XFLOAT_2_FLOAT( intX );
return f;
}
static inline double _xrint( double f ) ALWAYS_INLINE;
static inline float _xrintf( float f ) ALWAYS_INLINE;
static inline double _xrint( double f )
{
xDouble x = DOUBLE_2_XDOUBLE( f );
xDouble step = _mm_load_sd( &stepD );
xDouble sign = _mm_and_pd( minusZeroD, x );
xDouble fabsx = _mm_andnot_pd( minusZeroD, x );
xDouble isNaN = _mm_cmpunord_sd( x, x );
xDouble safeX = _mm_andnot_pd( isNaN, fabsx );
xDouble isSmall = _mm_cmplt_sd( safeX, step );
step = _mm_and_pd( step, isSmall );
step = _mm_or_pd( step, sign );
x = _mm_add_sd( x, step );
x = _mm_sub_sd( x, step );
x = _mm_or_pd( x, sign );
f = XDOUBLE_2_DOUBLE( x );
return f;
}
static inline float _xrintf( float f )
{
xFloat x = FLOAT_2_XFLOAT( f );
xFloat step = _mm_load_ss( &stepF );
xFloat sign = _mm_and_ps( minusZeroF, x );
xFloat fabsx = _mm_andnot_ps( minusZeroF, x );
xSInt32 isLarge = _mm_cmpgt_epi32( (xSInt32) fabsx, (xSInt32) step ); step = _mm_andnot_ps( (xFloat) isLarge, step );
step = _mm_or_ps( step, sign );
x = _mm_add_ss( x, step );
x = _mm_sub_ss( x, step );
x = _mm_or_ps( x, sign );
f = XFLOAT_2_FLOAT( x );
return f;
}
double rint( double f ){ return _xrint( f ); }
float rintf( float f ){ return _xrintf( f ); }
double nearbyint( double f )
{
int mxcsr = _mm_getcsr();
int new_mxcsr = mxcsr | INEXACT_MASK;
if( new_mxcsr != mxcsr )
_mm_setcsr( new_mxcsr );
f = _xrint( f );
_mm_setcsr( mxcsr );
return f;
}
float nearbyintf( float f )
{
int mxcsr = _mm_getcsr();
int new_mxcsr = mxcsr | INEXACT_MASK;
if( new_mxcsr != mxcsr )
_mm_setcsr( new_mxcsr );
f = _xrintf( f );
_mm_setcsr( mxcsr );
return f;
}
#define GET_FCW() ({ unsigned short _result; asm volatile ("fnstcw %0" : "=m" (_result)::"memory"); _result; })
#define SET_FCW(_a) { unsigned short _aa = _a; asm volatile ("fldcw %0" : :"m" (_aa)); }
static inline long long int _llrint( long double x ) ALWAYS_INLINE;
static inline long long int _llrint( long double x )
{
int64_t result = CVTLD_SI64( x );
int64_t mask = x >= 0x1.0p63L;
return result ^ -mask;
}
long long int llrint( double x ){ return _llrint(x); }
long long int llrintf( float x ){ return _llrint(x); }
#if defined( __LP64__ )
long int lrint( double x ){ return _llrint(x); }
long int lrintf( float x ){ return _llrint(x); }
long int lrintl( double x ){ return llrintl(x); }
#else
long int lrint( double x )
{
int result = _mm_cvtsd_si32( DOUBLE_2_XDOUBLE(x) );
int test = result == 0x80000000; int test2 = x > 2147483647.0; test &= test2;
return result ^ -test;
}
long int lrintf( float x )
{
int test = x >= 0x1.0p31f;
int result = _mm_cvtss_si32( FLOAT_2_XFLOAT(x) );
return result ^ (-test);
}
#endif
static inline long long int _xllround( double x ) ALWAYS_INLINE;
static inline long long int _xllround( double x )
{
long double lx = x;
long long int result;
int index = x < 0.0;
long double fabslx = __builtin_fabs( x );
const float limit[4] = {0.0f, 0.0f, 0x1.0p64f, -0x1.0p64f };
const float addend[4] = {0.0f, 0.0f, 0.5f, -0.5f };
index += 2 * ( fabslx < 0x1.0p64L );
long double test = lx - limit[ index ];
test += limit[ index ];
if( test != lx )
lx += addend[ index ];
result = CVTTLD_SI64( lx );
return result;
}
#if defined( __LP64__ )
long int lround( double x ){ return _xllround( x ); }
long int lroundf( float x ){ return _xllround( x ); }
#else
long int lround( double x )
{
static const double half = 0.5;
static const double large = 0x1.0p31;
static const double larger = 0x1.00000002p31;
xDouble xx = DOUBLE_2_XDOUBLE( x );
xDouble sign = _mm_and_pd( xx, minusZeroD );
xDouble fabsxx = _mm_andnot_pd( minusZeroD, xx );
xDouble isNotLarger = _mm_cmplt_sdm( fabsxx, &larger );
xDouble step = _mm_load_sd( &stepD );
step = _mm_and_pd( step, isNotLarger );
xDouble floor = _mm_sub_sd( _mm_add_sd( fabsxx, step ), step );
floor = _mm_sub_sd( floor, _mm_and_pd( _mm_load_sd( &oneD ), _mm_cmplt_sd( fabsxx, floor ) ) );
xDouble diff = _mm_sub_sd( fabsxx, floor );
xDouble addend = _mm_min_sdm( diff, &half );
fabsxx = _mm_add_sd( fabsxx, addend );
fabsxx = _mm_or_pd( fabsxx, sign );
int overflow = _mm_ucomige_sd( fabsxx, _mm_load_sd( &large ) );
int result = _mm_cvttsd_si32( fabsxx );
return result ^ (-overflow );
}
long int lroundf( float x )
{
static const float half = 0.5f;
static const float limit = 0x1.0p31f;
static const float small = 0x1.0p23f;
xFloat xx = FLOAT_2_XFLOAT( x );
xFloat sign = _mm_and_ps( xx, minusZeroF );
xFloat fabsxx = _mm_andnot_ps( minusZeroF, xx );
xFloat isSmall = _mm_cmplt_ssm( fabsxx, &small );
xFloat trunc = _mm_and_ps( isSmall, xx ); trunc = _mm_cvtsi32_ss( trunc, _mm_cvttss_si32( trunc ) ); trunc = _mm_or_ps( trunc, _mm_andnot_ps( isSmall, xx ));
xFloat diff = _mm_sub_ss( xx, trunc );
xFloat addend = _mm_min_ssm( _mm_andnot_ps( minusZeroF, diff), &half ); addend = _mm_or_ps( addend, sign );
addend = _mm_and_ps( addend, isSmall );
xx = _mm_add_ss( xx, addend );
int overflow = _mm_ucomige_ss( xx, _mm_load_ss( &limit) );
int result = _mm_cvttss_si32( xx );
return result ^ (-overflow);
}
#endif
long long int llround( double x ){ return _xllround( x ); }
long long int llroundf( float x ){ return _xllround( x ); }
double trunc( double f )
{
xDouble x = DOUBLE_2_XDOUBLE( f );
xDouble step = _mm_load_sd( &stepD );
xDouble one = _mm_load_sd( &oneD );
xDouble sign = _mm_and_pd( minusZeroD, x );
xDouble fabsx = _mm_andnot_pd( minusZeroD, x );
xDouble isNaN = _mm_cmpunord_sd( x, x );
xDouble safeX = _mm_andnot_pd( isNaN, fabsx );
step = _mm_and_pd( step, _mm_cmplt_sdm( safeX, &stepD ) );
xDouble xGTzero = _mm_cmplt_sd( minusZeroD, safeX );
xDouble r = _mm_add_sd( safeX, step );
r = _mm_sub_sd( r, step );
r = _mm_sub_sd( r, _mm_and_pd( one, _mm_cmplt_sd( safeX, r ) ) );
r = _mm_or_pd( r, sign );
r = _mm_sel_pd( x, r, xGTzero );
f = XDOUBLE_2_DOUBLE( r );
return f;
}
float truncf( float f )
{
xFloat x = FLOAT_2_XFLOAT( f );
xFloat step = _mm_load_ss( &stepF );
xFloat one = _mm_load_ss( &oneF );
xFloat sign = _mm_and_ps( minusZeroF, x );
xFloat fabsx = _mm_andnot_ps( minusZeroF, x );
xFloat isNaN = _mm_cmpunord_ss( x, x );
xFloat safeX = _mm_andnot_ps( isNaN, fabsx );
step = _mm_and_ps( step, _mm_cmplt_ssm( safeX, &stepF ) );
xFloat xGTzero = _mm_cmplt_ss( minusZeroF, safeX );
xFloat r = _mm_add_ss( safeX, step );
r = _mm_sub_ss( r, step );
r = _mm_sub_ss( r, _mm_and_ps( one, _mm_cmplt_ss( safeX, r ) ) );
r = _mm_or_ps( r, sign );
r = _mm_sel_ps( x, r, xGTzero );
f = XFLOAT_2_FLOAT( r );
return f;
}
double modf( double f, double *i )
{
xDouble x = DOUBLE_2_XDOUBLE( f );
xDouble step = _mm_load_sd( &stepD );
xDouble one = _mm_load_sd( &oneD );
xDouble sign = _mm_and_pd( minusZeroD, x );
xDouble fabsx = _mm_andnot_pd( minusZeroD, x );
xDouble isNaN = _mm_cmpunord_sd( x, x );
xDouble safeX = _mm_andnot_pd( isNaN, fabsx );
xDouble isNotInt = _mm_cmplt_sdm( safeX, &stepD );
step = _mm_and_pd( step, isNotInt );
xDouble xGTzero = _mm_cmplt_sd( minusZeroD, safeX );
xDouble r = _mm_add_sd( safeX, step );
r = _mm_sub_sd( r, step );
r = _mm_sub_sd( r, _mm_and_pd( one, _mm_cmplt_sd( safeX, r ) ) );
r = _mm_or_pd( r, sign );
r = _mm_sel_pd( x, r, xGTzero );
*i = XDOUBLE_2_DOUBLE( r );
r = _mm_and_pd( r, isNotInt );
x = _mm_and_pd( x, isNotInt );
x = _mm_sub_sd( x, r );
x = _mm_or_pd( x, sign );
f = XDOUBLE_2_DOUBLE( x );
return f;
}
#endif
#endif