xmm_floor.c [plain text]

/*
 *  xmm_floor.c
 *  xmmLibm
 *
 *  Created by Ian Ollmann on 8/15/05.
 *  Copyright 2005 Apple Computer. All rights reserved.
 *
 */

#include <math.h>

#include "xmmLibm_prefix.h"

static const long double stepL = 0x1.0p63;
static const long double oneL = 1.0L;

static const double stepD = 0x1.0p52;
static const double oneD = 1.0;

static const float stepF = 0x1.0p23f;
static const float oneF = 1.0f;

#if defined( BUILDING_FOR_CARBONCORE_LEGACY )

float modff( float f, float *i )
{
    xFloat x = FLOAT_2_XFLOAT( f );

    //load some constants
    xFloat step = _mm_load_ss( &stepF );
    xFloat one = _mm_load_ss( &oneF );

    //set aside sign and take absolute value
    xFloat sign = _mm_and_ps( minusZeroF, x );
    xFloat fabsx = _mm_andnot_ps( minusZeroF, x );
	xFloat isNaN = _mm_cmpunord_ss( x, x );
	xFloat safeX = _mm_andnot_ps( isNaN, fabsx );
	xFloat isNotInt = _mm_cmplt_ssm( safeX, &stepF );
    
    //set step to zero if |x| is guaranteed to be an integer (i.e. >= 0x1.0p52
    step = _mm_and_ps( step, isNotInt );

    //test for x == 0.0 special case
    xFloat xGTzero = _mm_cmplt_ss( minusZeroF, safeX );

    //round to integer
    xFloat r = _mm_add_ss( safeX, step );
    r = _mm_sub_ss( r, step );
    
    //Fix up mistaken rounding for rounding modes other than round to -Inf
    r = _mm_sub_ss( r, _mm_and_ps( one, _mm_cmplt_ss( safeX, r ) ) );

    //restore the sign
    r = _mm_or_ps( r, sign );
    
    //fix up -0.0
    r = _mm_sel_ps( x, r, xGTzero );
    *i = XFLOAT_2_FLOAT( r );
	
	r = _mm_and_ps( r, isNotInt );
	x = _mm_and_ps( x, isNotInt );
    x = _mm_sub_ss( x, r );
	x = _mm_or_ps( x, sign );

    f = XFLOAT_2_FLOAT( x );

    return f;
}

#else

/*
double floor( double f )
{
    xDouble x = DOUBLE_2_XDOUBLE( f );

    //load some constants
    xDouble step = _mm_load_sd( &stepD );
    xDouble one = _mm_load_sd( &oneD );

    //set aside sign and take absolute value
    xDouble sign = _mm_and_pd( minusZeroD, x );
    xDouble fabsx = _mm_andnot_pd( minusZeroD, x );
	xDouble isNaN = _mm_cmpunord_pd( x, x );
	xDouble safeX = _mm_andnot_pd( isNaN, fabsx );	//zero any NaNs to prevent invalid flag from being set
    
    //set step to zero if |x| is guaranteed to be an integer (i.e. >= 0x1.0p52
    step = _mm_and_pd( step, _mm_cmplt_sdm( safeX, &stepD ) );

    //test for x == 0.0 special case
    xDouble xGTzero = _mm_cmplt_sd( minusZeroD, safeX );

    //round to integer
    xDouble intX = _mm_add_sd( safeX, step );
    intX = _mm_sub_sd( intX, step );
    
    //restore the sign
    intX = _mm_or_pd( intX, sign );
	safeX = _mm_or_pd( safeX, sign );
    
    //Fix up mistaken rounding for rounding modes other than round to -Inf
    intX = _mm_sub_sd( intX, _mm_and_pd( one, _mm_cmplt_sd( safeX, intX ) ) );

    //fix up -0.0 and NaN
    intX = _mm_sel_pd( x, intX, xGTzero );

    f = XDOUBLE_2_DOUBLE( intX );

    return f;
}


float floorf( float f )
{
    xFloat x = FLOAT_2_XFLOAT( f );

    //load some constants
    xFloat step = _mm_load_ss( &stepF );
    xFloat one = _mm_load_ss( &oneF );

    //set aside sign and take absolute value
    xFloat sign = _mm_and_ps( minusZeroF, x );
    xFloat fabsx = _mm_andnot_ps( minusZeroF, x );
	xFloat isNaN = _mm_cmpunord_ps( x, x );
	xFloat safeX = _mm_andnot_ps( isNaN, fabsx );	//zero any NaNs to prevent invalid flag from being set
    
    //set step to zero if |x| is guaranteed to be an integer (i.e. >= 0x1.0p52
    step = _mm_and_ps( step, _mm_cmplt_ssm( safeX, &stepF ) );

    //test for x == 0.0 special case
    xFloat xGTzero = _mm_cmplt_ss( minusZeroF, safeX );

    //round to integer
    xFloat intX = _mm_add_ss( safeX, step );
    intX = _mm_sub_ss( intX, step );
    
    //restore the sign
    intX = _mm_or_ps( intX, sign );
	safeX = _mm_or_ps( safeX, sign );
    
    //Fix up mistaken rounding for rounding modes other than round to -Inf
    intX = _mm_sub_ss( intX, _mm_and_ps( one, _mm_cmplt_ss( safeX, intX ) ) );

    //fix up -0.0 and NaN
    intX = _mm_sel_ps( x, intX, xGTzero );

    f = XFLOAT_2_FLOAT( intX );

    return f;
}


double ceil( double f )
{
    xDouble x = DOUBLE_2_XDOUBLE( f );

    //load some constants
    xDouble step = _mm_load_sd( &stepD );
    xDouble one = _mm_load_sd( &oneD );

    //set aside sign and take absolute value
    xDouble sign = _mm_and_pd( minusZeroD, x );
    xDouble fabsx = _mm_andnot_pd( minusZeroD, x );
	xDouble isNaN = _mm_cmpunord_pd( x, x );
	xDouble safeX = _mm_andnot_pd( isNaN, x );
    
    //set step to zero if |x| is guaranteed to be an integer (i.e. >= 0x1.0p52
	xSInt64 isSmall = _mm_cmpgt_epi32( (xSInt32) step, (xSInt32) fabsx );
	isSmall = _mm_shuffle_epi32( isSmall, 0xf5 );
	step = _mm_or_pd( step, sign );
    step = _mm_and_pd( step, (xDouble) isSmall );

    //round to integer
    xDouble intX = _mm_add_sd( x, step );
    intX = _mm_sub_sd( intX, step );
    
    //Fix up mistaken rounding for rounding modes other than round to -Inf
    intX = _mm_add_sd( intX, _mm_and_pd( one, _mm_cmplt_sd( _mm_andnot_pd( isNaN, intX), safeX ) ) );

    //fix up the sign of the result if zero
    intX = _mm_or_pd( intX, sign );
	
    f = XDOUBLE_2_DOUBLE( intX );

    return f;
}

float ceilf( float f )
{
    xFloat x = FLOAT_2_XFLOAT( f );

    //load some constants
    xFloat step = _mm_load_ss( &stepF );
    xFloat one = _mm_load_ss( &oneF );

    //set aside sign and take absolute value
    xFloat sign = _mm_and_ps( minusZeroF, x );
    xFloat fabsx = _mm_andnot_ps( minusZeroF, x );
	xFloat isNaN = _mm_cmpunord_ps( x, x );
	xFloat safeX = _mm_andnot_ps( isNaN, x );
    
    //set step to zero if |x| is guaranteed to be an integer (i.e. >= 0x1.0p52
	xFloat isSmall = (xFloat) _mm_cmpgt_epi32( (xSInt32) step, (xSInt32) fabsx );
	step = _mm_or_ps( step, sign );
    step = _mm_and_ps( step, isSmall );

    //round to integer
    xFloat intX = _mm_add_ss( x, step );
    intX = _mm_sub_ss( intX, step );
    
    //Fix up mistaken rounding for rounding modes other than round to -Inf
    intX = _mm_add_ss( intX, _mm_and_ps( one, _mm_cmplt_ss( _mm_andnot_ps( isNaN, intX), safeX ) ) );

    //fix up the sign of the result if zero
    intX = _mm_or_ps( intX, sign );
	
    f = XFLOAT_2_FLOAT( intX );

    return f;
}
*/

static inline double _xrint( double f ) ALWAYS_INLINE;
static inline float _xrintf( float f ) ALWAYS_INLINE;

static inline double _xrint( double f )
{
    xDouble x = DOUBLE_2_XDOUBLE( f );

    //load some constants
    xDouble step = _mm_load_sd( &stepD );

    //set aside sign and take absolute value
    xDouble sign = _mm_and_pd( minusZeroD, x );
    xDouble fabsx = _mm_andnot_pd( minusZeroD, x );
	xDouble isNaN = _mm_cmpunord_sd( x, x );
	xDouble safeX = _mm_andnot_pd( isNaN, fabsx );	//flush NaNs to zero to avoid invalid flag
	
    //set step to zero if |x| is guaranteed to be an integer (i.e. >= 0x1.0p23
	xDouble isSmall = _mm_cmplt_sd( safeX, step );  
	step = _mm_and_pd( step, isSmall );

	//set the sign on step
	step = _mm_or_pd( step, sign );

    //round to integer
    x = _mm_add_sd( x, step );
    x = _mm_sub_sd( x, step );
        
    //set the sign of the result to be the same as the input
    x = _mm_or_pd( x, sign );

    f = XDOUBLE_2_DOUBLE( x );

    return f;
}


static inline float _xrintf( float f )
{
    xFloat x = FLOAT_2_XFLOAT( f );

    //load some constants
    xFloat step = _mm_load_ss( &stepF );

    //set aside sign and take absolute value
    xFloat sign = _mm_and_ps( minusZeroF, x );
    xFloat fabsx = _mm_andnot_ps( minusZeroF, x );
	
    //set step to zero if |x| is guaranteed to be an integer (i.e. >= 0x1.0p23
	xSInt32 isLarge = _mm_cmpgt_epi32( (xSInt32) fabsx, (xSInt32) step );  //use integer compare to avoid invalid flag with NaN
	step = _mm_andnot_ps( (xFloat) isLarge, step );

	//set the sign on step
	step = _mm_or_ps( step, sign );

    //round to integer
    x = _mm_add_ss( x, step );
    x = _mm_sub_ss( x, step );
        
    //set the sign of the result to be the same as the input
    x = _mm_or_ps( x, sign );

    f = XFLOAT_2_FLOAT( x );

    return f;
}

#if defined( __x86_64__ )
// the i386 versios are in s_rint.s
double rint( double f ){ return _xrint( f );  }
float rintf( float f  ){ return _xrintf( f );  }
#endif 

double nearbyint( double f )
{
    int mxcsr = _mm_getcsr();
    int new_mxcsr = mxcsr | INEXACT_MASK;
    if( new_mxcsr != mxcsr )
        _mm_setcsr( new_mxcsr );

    f = _xrint( f );
    
    _mm_setcsr( mxcsr );

    return f;
}

float nearbyintf( float f )
{
    int mxcsr = _mm_getcsr();
    int new_mxcsr = mxcsr | INEXACT_MASK;
    if( new_mxcsr != mxcsr )
        _mm_setcsr( new_mxcsr );

    f = _xrintf( f );
    
    _mm_setcsr( mxcsr );

    return f;
}

#define GET_FCW()      ({ unsigned short _result; asm volatile ("fnstcw %0" : "=m" (_result)::"memory"); /*return*/ _result; })
#define SET_FCW(_a)    { unsigned short _aa = _a; asm volatile ("fldcw %0" : :"m" (_aa)); }


static inline long long int _llrint( long double x )  ALWAYS_INLINE;
static inline long long int _llrint( long double x )
{ 
	int64_t result = CVTLD_SI64( x ); 
	int64_t mask = x >= 0x1.0p63L;

	return result ^ -mask;
}

// These all have been moved off to s_rint.s
/*
#if defined( __LP64__ )
    long int  lrint( double x ){ return _llrint(x); }
    long int lrintf( float x ){ return _llrint(x); }
    long int  lrintl( long double x ){ return _llrint(x); }
    long long int  llrintl( long double x ){ return _llrint(x); }
#else

    long long int  llrint( double x ){ return _llrint(x); }
    long long int  llrintf( float x ){ return _llrint(x); }

    long int  lrint( double x )
	{ 
		int result = _mm_cvtsd_si32( DOUBLE_2_XDOUBLE(x) ); 
		int test = result == 0x80000000;		//1 for over/underflow
		int test2 = x > 2147483647.0;		//1 for overflow
		test &= test2;
		return result ^ -test;
	}
    long int  lrintf( float x )
	{ 
		int test = x >= 0x1.0p31f;
		int result = _mm_cvtss_si32( FLOAT_2_XFLOAT(x) ); 
		return result ^ (-test);
	}
#endif
*/

static inline long long int _xllround( double x ) ALWAYS_INLINE;
static inline long long int _xllround( double x )
{
    long double lx = x;
    long long int result;
    int index = x < 0.0;
    long double fabslx = __builtin_fabs( x );
    const float limit[4] = {0.0f, 0.0f, 0x1.0p64f, -0x1.0p64f };
    const float addend[4] = {0.0f, 0.0f, 0.5f, -0.5f };

    index += 2 * ( fabslx < 0x1.0p64L );    // 0: x >= 2**64, 1: x <= -2**64,  2: 0 <= x < 2**64, 3: -2**64 < x <= 0

    //check to see if lx is an int. Set inexact flag if it is not.
    long double test = lx - limit[ index ];
    test += limit[ index ];
    
    //if not an int, add signed 0.5
    if( test != lx )
        lx += addend[ index ];
    
    result = CVTTLD_SI64( lx );
    
    return result;
}


#if defined( __LP64__ )
	#warning llround and llroundf can be done faster in SSE on x86_64
    long int  lround( double x ){ return _xllround( x ); }
    long int  lroundf( float x ){ return _xllround( x ); }
#else
	//This has a bug: For -2147483649 < x < -2147483648.5, this function returns inexact in addition to invald.
    long int  lround( double x )
    {
        static const double half = 0.5;
		static const double large = 0x1.0p31;
		static const double larger = 0x1.00000002p31;
	
        xDouble xx = DOUBLE_2_XDOUBLE( x );
        xDouble sign   = _mm_and_pd( xx, minusZeroD );
        xDouble fabsxx = _mm_andnot_pd( minusZeroD, xx );
		xDouble isNotLarger = _mm_cmplt_sdm( fabsxx, &larger );  //invalid flag okay here for NaN

		//calculate floor( |x| )	
		xDouble step = _mm_load_sd( &stepD );
		step = _mm_and_pd( step, isNotLarger );		
        xDouble floor = _mm_sub_sd( _mm_add_sd( fabsxx, step ), step );
		floor = _mm_sub_sd( floor, _mm_and_pd( _mm_load_sd( &oneD ), _mm_cmplt_sd( fabsxx, floor ) ) );

		//calculate fabsxx + min( 0.5, floor( fabsxx))
		xDouble diff = _mm_sub_sd( fabsxx, floor );
		xDouble addend = _mm_min_sdm( diff, &half );
		fabsxx = _mm_add_sd( fabsxx, addend );
		
		//set the sign
		fabsxx = _mm_or_pd( fabsxx, sign );
		
		int overflow = _mm_ucomige_sd( fabsxx, _mm_load_sd( &large ) );
		int result = _mm_cvttsd_si32( fabsxx );
		
		return result ^ (-overflow );
    }

    long int  lroundf( float x )
    {
        static const float half = 0.5f;
		static const float limit = 0x1.0p31f;
		static const float small = 0x1.0p23f;
	
        xFloat xx = FLOAT_2_XFLOAT( x );
        xFloat sign   = _mm_and_ps( xx, minusZeroF );
        xFloat fabsxx = _mm_andnot_ps( minusZeroF, xx );

		//calculate trunc(xx)
		xFloat isSmall = _mm_cmplt_ssm( fabsxx, &small );
		xFloat trunc = _mm_and_ps( isSmall, xx );					//clip large integers to zero
		trunc = _mm_cvtsi32_ss( trunc, _mm_cvttss_si32( trunc ) );	//truncate
		trunc = _mm_or_ps( trunc, _mm_andnot_ps( isSmall, xx ));		//or in large xx again

		//if the difference between floor and safex >= 0.5, add 0.5. Otherwise add 0.
		xFloat diff = _mm_sub_ss( xx, trunc );
		xFloat addend = _mm_min_ssm( _mm_andnot_ps( minusZeroF, diff), &half );	//use a smaller value than 0.5 if under 0.5. Should be exact. This is cheaper than slamming zero here
		addend = _mm_or_ps( addend, sign );
		addend = _mm_and_ps( addend, isSmall );
		xx = _mm_add_ss( xx, addend );
		
		int overflow = _mm_ucomige_ss( xx, _mm_load_ss( &limit) );
		int result = _mm_cvttss_si32( xx );
		
		return result ^ (-overflow);		
    }
#endif

#if defined (__LP64__ )
	#warning llround and llroundf can be done faster in SSE on x86_64
#endif

long long int  llround( double x ){ return _xllround( x ); }
long long int  llroundf( float x ){ return _xllround( x ); }

/*
double trunc( double f )
{
    xDouble x = DOUBLE_2_XDOUBLE( f );

    //load some constants
    xDouble step = _mm_load_sd( &stepD );
    xDouble one = _mm_load_sd( &oneD );

    //set aside sign and take absolute value
    xDouble sign = _mm_and_pd( minusZeroD, x );
    xDouble fabsx = _mm_andnot_pd( minusZeroD, x );
	xDouble isNaN = _mm_cmpunord_sd( x, x );
	xDouble safeX = _mm_andnot_pd( isNaN, fabsx );
    
    //set step to zero if |x| is guaranteed to be an integer (i.e. >= 0x1.0p52
    step = _mm_and_pd( step, _mm_cmplt_sdm( safeX, &stepD ) );

    //test for x == 0.0 special case
    xDouble xGTzero = _mm_cmplt_sd( minusZeroD, safeX );

    //round to integer
    xDouble r = _mm_add_sd( safeX, step );
    r = _mm_sub_sd( r, step );
    
    //Fix up mistaken rounding for rounding modes other than round to -Inf
    r = _mm_sub_sd( r, _mm_and_pd( one, _mm_cmplt_sd( safeX, r ) ) );

    //restore the sign
    r = _mm_or_pd( r, sign );
    
    //fix up -0.0
    r = _mm_sel_pd( x, r, xGTzero );

    f = XDOUBLE_2_DOUBLE( r );

    return f;
}


float truncf( float f )
{
    xFloat x = FLOAT_2_XFLOAT( f );

    //load some constants
    xFloat step = _mm_load_ss( &stepF );
    xFloat one = _mm_load_ss( &oneF );

    //set aside sign and take absolute value
    xFloat sign = _mm_and_ps( minusZeroF, x );
    xFloat fabsx = _mm_andnot_ps( minusZeroF, x );
	xFloat isNaN = _mm_cmpunord_ss( x, x );
	xFloat safeX = _mm_andnot_ps( isNaN, fabsx );
    
    //set step to zero if |x| is guaranteed to be an integer (i.e. >= 0x1.0p52
    step = _mm_and_ps( step, _mm_cmplt_ssm( safeX, &stepF ) );

    //test for x == 0.0 special case
    xFloat xGTzero = _mm_cmplt_ss( minusZeroF, safeX );

    //round to integer
    xFloat r = _mm_add_ss( safeX, step );
    r = _mm_sub_ss( r, step );
    
    //Fix up mistaken rounding for rounding modes other than round to -Inf
    r = _mm_sub_ss( r, _mm_and_ps( one, _mm_cmplt_ss( safeX, r ) ) );

    //restore the sign
    r = _mm_or_ps( r, sign );
    
    //fix up -0.0
    r = _mm_sel_ps( x, r, xGTzero );

    f = XFLOAT_2_FLOAT( r );

    return f;
}
*/

double modf( double f, double *i )
{
    xDouble x = DOUBLE_2_XDOUBLE( f );

    //load some constants
    xDouble step = _mm_load_sd( &stepD );
    xDouble one = _mm_load_sd( &oneD );

    //set aside sign and take absolute value
    xDouble sign = _mm_and_pd( minusZeroD, x );
    xDouble fabsx = _mm_andnot_pd( minusZeroD, x );
	xDouble isNaN = _mm_cmpunord_sd( x, x );
	xDouble safeX = _mm_andnot_pd( isNaN, fabsx );
	xDouble isNotInt = _mm_cmplt_sdm( safeX, &stepD );
    
    //set step to zero if |x| is guaranteed to be an integer (i.e. >= 0x1.0p52
    step = _mm_and_pd( step, isNotInt );

    //test for x == 0.0 special case
    xDouble xGTzero = _mm_cmplt_sd( minusZeroD, safeX );

    //round to integer
    xDouble r = _mm_add_sd( safeX, step );
    r = _mm_sub_sd( r, step );
    
    //Fix up mistaken rounding for rounding modes other than round to -Inf
    r = _mm_sub_sd( r, _mm_and_pd( one, _mm_cmplt_sd( safeX, r ) ) );

    //restore the sign
    r = _mm_or_pd( r, sign );
    
    //fix up -0.0
    r = _mm_sel_pd( x, r, xGTzero );
    *i = XDOUBLE_2_DOUBLE( r );
	
	r = _mm_and_pd( r, isNotInt );
	x = _mm_and_pd( x, isNotInt );
    x = _mm_sub_sd( x, r );
	x = _mm_or_pd( x, sign );

    f = XDOUBLE_2_DOUBLE( x );

    return f;
}

#endif /* CARBON_CORE */