/* * fenv.c * xmmLibm * * Created by iano on 6/21/05. * Copyright 2005 __MyCompanyName__. All rights reserved. * */ #include "xmmLibm_prefix.h" #include "fenv.h" #include <xmmintrin.h> #define DEFAULT_CONTROL 0x037f #define DEFAULT_STATUS 0x0 #define DEFAULT_RESERVED "\0\0\0\0\0\0\0" #define GET_FSW() ({ unsigned short _result; asm volatile ("fnstsw %0" : "=m" (_result)::"memory"); /*return*/ _result; }) #define GET_FCW() ({ unsigned short _result; asm volatile ("fnstcw %0" : "=m" (_result)::"memory"); /*return*/ _result; }) #define SET_FCW(_a) { unsigned short _aa = _a; asm volatile ("fldcw %0" : :"m" (_aa)); } typedef struct { unsigned short __control; unsigned short __reserved1; unsigned short __status; unsigned short __reserved2; unsigned int __private3; unsigned int __private4; unsigned int __private5; unsigned int __private6; unsigned int __private7; } __fpustate_t; #define FE_ALL_RND ( FE_TONEAREST | FE_TOWARDZERO | FE_UPWARD | FE_DOWNWARD ) static inline int _fesetexceptflag(const fexcept_t *flagp, int excepts ) ALWAYS_INLINE; static inline int _fesetexceptflag(const fexcept_t *flagp, int excepts ) { int state; __fpustate_t currfpu; unsigned int mxcsr; unsigned int exceptMask = excepts & FE_ALL_EXCEPT; unsigned int andMask = ~exceptMask; // clear just the bits indicated unsigned int orMask = *flagp & exceptMask; // latch the specified bits //read the state mxcsr = _mm_getcsr(); //read the MXCSR state asm volatile ("fnstenv %0" : "=m" (currfpu) ); //read x87 state //fix up the MXCSR state mxcsr &= andMask; mxcsr |= orMask; //fix up the x87 state state = currfpu.__status; state &= andMask; state |= orMask; currfpu.__status = state; //store the state asm volatile ("ldmxcsr %0 ; fldenv %1" : : "m" (mxcsr), "m" (currfpu)); return 0; } static inline int _fegetexceptflag(fexcept_t *flagp, int excepts); static inline int _fegetexceptflag(fexcept_t *flagp, int excepts) { fexcept_t fsw = GET_FSW(); //get the x87 status word unsigned int mxcsr = _mm_getcsr(); //get the mxcsr fexcept_t result = mxcsr | fsw; result &= excepts & FE_ALL_EXCEPT; *flagp = result; return 0; } #if defined( BUILDING_FOR_CARBONCORE_LEGACY ) const fenv_t _FE_DFL_ENV = { DEFAULT_CONTROL, DEFAULT_STATUS, DEFAULT_MXCSR, DEFAULT_RESERVED}; const fenv_t _FE_DFL_DISABLE_SSE_DENORMS_ENV = { DEFAULT_CONTROL, DEFAULT_STATUS, DEFAULT_MXCSR | 0x8040, DEFAULT_RESERVED }; /******************************************************************************* * The function "feclearexcept" clears the supported floating point * * exceptions represented by its argument. * *******************************************************************************/ int feclearexcept(int excepts) { fexcept_t zero = 0; return _fesetexceptflag( &zero, excepts ); } /******************************************************************************* * The function "feraiseexcept" raises the supported floating-point * * exceptions represented by its argument. The order in which these * * floating-point exceptions are raised is unspecified. * *******************************************************************************/ int feraiseexcept(int excepts) { fexcept_t t = excepts; int err = _fesetexceptflag ( &t, excepts ); asm volatile ("fwait" :"=X" (t)::"memory"); // and raise the exception(s) return err; } /******************************************************************************* * The function "fetestexcept" determines which of the specified subset of * * the floating-point exception flags are currently set. The excepts * * argument specifies the floating-point status flags to be queried. This * * function returns the value of the bitwise OR of the floating-point * * exception macros corresponding to the currently set floating-point * * exceptions included in excepts. * * * * On MacOS X for Intel, the result is the value of union of the * * corresponding result from the x87 and SSE floating point states. * *******************************************************************************/ int fetestexcept(int excepts ) { fexcept_t fsw = GET_FSW(); //get the x87 status word unsigned int mxcsr = _mm_getcsr(); //get the mxcsr unsigned int exceptMask = excepts & FE_ALL_EXCEPT; mxcsr |= fsw; mxcsr &= exceptMask; return mxcsr; } /******************************************************************************* * The following functions provide control of rounding direction modes. * *******************************************************************************/ /******************************************************************************* * The function "fegetround" returns the value of the rounding direction * * macro which represents the current rounding direction, or a negative * * if there is no such rounding direction macro or the current rounding * * direction is not determinable. * *******************************************************************************/ int fegetround(void) { int fcw = GET_FCW(); return (fcw & FE_ALL_RND); } /******************************************************************************* * The function "fesetround" establishes the rounding direction represented * * by its argument "round". If the argument is not equal to the value of a * * rounding direction macro, the rounding direction is not changed. It * * returns zero if and only if the argument is equal to a rounding * * direction macro. * *******************************************************************************/ int fesetround(int round ) { if ((round & ~FE_ALL_RND)) return 1; else { unsigned short fcw = GET_FCW(); int mxcsr = _mm_getcsr(); fcw &= ~FE_ALL_RND; fcw |= round; mxcsr &= ~( FE_ALL_RND << 3 ); mxcsr |= round << 3; _mm_setcsr( mxcsr ); SET_FCW( fcw ); return 0; } } /******************************************************************************* * The following functions manage the floating-point environment, exception * * flags and dynamic modes, as one entity. * *******************************************************************************/ /******************************************************************************* * The fegetenv function stores the current floating-point enviornment in * * the object pointed to by envp. * *******************************************************************************/ int fegetenv(fenv_t *envp) { __fpustate_t currfpu; int mxcsr = _mm_getcsr(); asm volatile ("fnstenv %0" : "=m" (currfpu) :: "memory"); envp->__control = currfpu.__control; envp->__status = currfpu.__status; envp->__mxcsr = mxcsr; ((int*) envp->__reserved)[0] = 0; ((int*) envp->__reserved)[1] = 0; return 0; } /******************************************************************************* * The feholdexcept function saves the current floating-point environment in * * the object pointed to by envp, clears the floating-point status flags, * * and then installs a non-stop (continue on floating-point exceptions) * * mode, if available, for all floating-point exceptions. The feholdexcept * * function returns zero if and only if non-stop floating-point exceptions * * handling was successfully installed. * *******************************************************************************/ int feholdexcept(fenv_t *envp) { __fpustate_t currfpu; int mxcsr; mxcsr = _mm_getcsr(); asm volatile ("fnstenv %0" : "=m" (*&currfpu) :: "memory"); envp->__control = currfpu.__control; envp->__status = currfpu.__status; envp->__mxcsr = mxcsr; ((int*) envp->__reserved)[0] = 0; ((int*) envp->__reserved)[1] = 0; currfpu.__control |= FE_ALL_EXCEPT; // FPU shall handle all exceptions currfpu.__status &= ~FE_ALL_EXCEPT; mxcsr |= FE_ALL_EXCEPT << 7; // left shifted because control mask is <<7 of the flags mxcsr &= ~FE_ALL_EXCEPT; asm volatile ("ldmxcsr %0 ; fldenv %1" : : "m" (*&mxcsr), "m" (*&currfpu)); return 0; } /******************************************************************************* * The fesetnv function establishes the floating-point environment * * represented by the object pointed to by envp. The argument envp shall * * point to an object set by a call to fegetenv or feholdexcept, or equal to * * a floating-point environment macro -- we define only *FE_DFL_ENV and * * FE_DISABLE_SSE_DENORMS_ENV -- to be C99 standard compliant and portable * * to other architectures. Note that fesetnv merely installs the state of * * the floating-point status flags represented through its argument, and * * does not raise these floating-point exceptions. * * * * On MacOS X for Intel you may test and set the bits in *envp yourself, * * provided that you conditionalize the code appropriately to preserve * * portability and you follow the various strictures and suggestions * * provided by Intel in appropriate processor documentation. Please be aware * * that because there are two hardware locations for setting and reading * * floating point environment, this function (and others like it) are not * * atomic -- that is, for a brief period of time during the function call * * your new environment will have been applied to one but not both of the * * floating point engines (x87 and SSE). In addition, the behavior of some * * higher level interfaces (fegetround) is undefined if the x87 and SSE * * floating point units rounding modes are configured differently. Please * * use common sense. * *******************************************************************************/ int fesetenv(const fenv_t *envp) { __fpustate_t currfpu; int mxcsr = _mm_getcsr(); const int mxcsr_mask = ( FE_ALL_RND << 3 ) | FE_ALL_EXCEPT | (FE_ALL_EXCEPT << 7); //flags, masks and rounding modes asm volatile ("fnstenv %0" : "=m" (currfpu)); currfpu.__control &= ~( FE_ALL_RND | FE_ALL_EXCEPT ); currfpu.__control |= ( envp->__control & ( FE_ALL_RND | FE_ALL_EXCEPT ) ); mxcsr &= ~mxcsr_mask; mxcsr |= envp->__mxcsr & mxcsr_mask; currfpu.__status &= ~FE_ALL_EXCEPT; currfpu.__status |= ( envp->__status & FE_ALL_EXCEPT ); asm volatile ("ldmxcsr %0 ; fldenv %1" : : "m" (mxcsr), "m" (currfpu)); return 0; } /******************************************************************************* * The feupdateenv function saves the currently raised floating-point * * exceptions in its automatic storage, installs the floating-point * * environment represented by the object pointed to by envp, and then raises * * the saved floating-point exceptions. The argument envp shall point to an * * object set by a call to feholdexcept or fegetenv or equal a * * floating-point environment macro. * * * * Please see the description of feholdexcept for additional ways to create * * a fenv_t object, which are valid only for MacOS X for Intel. * *******************************************************************************/ int feupdateenv(const fenv_t *envp) { __fpustate_t currfpu; int mxcsr = _mm_getcsr(); const int mxcsr_mask = ( FE_ALL_RND << 3 ) | (FE_ALL_EXCEPT << 7); //masks and rounding modes asm volatile ("fnstenv %0" : "=m" (currfpu)); currfpu.__control &= ~( FE_ALL_RND | FE_ALL_EXCEPT ); currfpu.__control |= ( envp->__control & ( FE_ALL_RND | FE_ALL_EXCEPT ) ); mxcsr &= ~mxcsr_mask; mxcsr |= envp->__mxcsr & mxcsr_mask; currfpu.__status |= ( envp->__status & FE_ALL_EXCEPT ); // add envp's to current excepts mxcsr |= envp->__mxcsr & FE_ALL_EXCEPT; //set flags asm volatile ("ldmxcsr %0 ; fldenv %1; fwait " : : "m" (mxcsr), "m" (currfpu)); return 0; } /* Legacy entry point */ void fegetexcept ( fexcept_t *flagp, int excepts ) { _fegetexceptflag (flagp, excepts ); } /* Legacy entry point */ void fesetexcept ( fexcept_t *flagp, int excepts ) { _fesetexceptflag ( flagp, excepts ); } #else /******************************************************************************* * The function "fegetexceptflag" stores a implementation-defined * * representation of the states of the floating-point status flags indicated * * by its integer argument excepts in the object pointed to by the argument, * * flagp. * *******************************************************************************/ int fegetexceptflag(fexcept_t *flagp, int excepts) { return _fegetexceptflag( flagp, excepts ); } /******************************************************************************* * The function "fesetexceptflag" sets or clears the floating point status * * flags indicated by the argument excepts to the states stored in the * * object pointed to by flagp. The value of the *flagp shall have been set * * by a previous call to fegetexceptflag whose second argument represented * * at least those floating-point exceptions represented by the argument * * excepts. This function does not raise floating-point exceptions; it just * * sets the state of the flags. * *******************************************************************************/ int fesetexceptflag(const fexcept_t *flagp, int excepts ) { return _fesetexceptflag( flagp, excepts ); } int __fegetfltrounds( void ) { switch ( fegetround() ) { case FE_TONEAREST: return 1; case FE_TOWARDZERO: return 0; case FE_UPWARD: return 2; case FE_DOWNWARD: return 3; default: return -1; } } #endif