#include "xmmLibm_test.h"
#include <stdint.h>
#include <stdio.h>
#include <math.h>
#include <mach/mach_time.h>
#include <sys/sysctl.h>
static inline double checkErrorF( float v, long double c );
static inline double checkErrorD( double v, long double c );
const char *bitNames[16] = { "Invalid Operation Flag",
"Denormal Flag",
"Divide by Zero Flag",
"Overflow Flag",
"Underflow Flag",
"Inexact Flag",
"DAZ",
"Invalid operation Mask",
"Denormal operation Mask",
"Divide by Zero Mask",
"Overflow Mask",
"Underflow Mask",
"Inexact Mask",
"RC 0",
"RC 1",
"Flush to Zero"
};
extern double xexp( double );
int main( void )
{
int i,j;
int oldMXCSR = _mm_getcsr();
UnaryFunction *func;
BinaryFunction *bfunc;
long double conversion = 0;
double clockspeed = 0;
{ const char *name = "hw.cpufrequency_max";
uint64_t freq = 0;
size_t size = sizeof( freq );
int err = sysctlbyname( name, &freq, &size, NULL, 0 );
if( 0 == err )
{
clockspeed = freq;
}
printf( "Clock speed: %g Hz\n", clockspeed );
}
{ mach_timebase_info_data_t data = {0,0};
kern_return_t error = mach_timebase_info( &data );
if( 0 == error )
conversion = 1e-9 * (long double) data.numer / (long double) data.denom;
}
printf( "starting MXCSR: 0x%4.4x\n", oldMXCSR );
printf( "set bits:\n" );
for( i = 0; i < 16; i++ )
{
if( oldMXCSR & 1 )
printf( "\t%s\n", bitNames[i] );
oldMXCSR = (( oldMXCSR >> 1 ) | ( oldMXCSR << 15 )) & 0xFFFF;
}
_mm_setcsr( 0x1f80 | 0x0000 );
printf( "MXCSR set to 0x%4.4x before running\n", _mm_getcsr() );
#if 1
{
uint64_t start, stop, current, best, latency;
const int count = 10000;
const int innerCount = 1000;
volatile double d = 0x1.FFFFFFFFFFFFFp-1; volatile float f = 0x1.FFFFF8p-1; double c = 0;
float cf = 0;
double ticks2cycles = clockspeed * conversion / (double) innerCount;
best = -1ULL;
for( i = 0; i < count; i++ )
{
start = mach_absolute_time();
for( j = 0; j < innerCount; j++ )
c += d;
stop = mach_absolute_time();
current = stop - start;
if( current < best )
best = current;
}
latency = best;
printf( "Clock latency: %g cycles (%g)\n\n", (double) latency * ticks2cycles, c );
for( func = unaries; NULL != func->testF; func++ )
{
c = 0;
cf = 0;
best = -1ULL;
for( i = 0; i < count; i++ )
{
start = mach_absolute_time();
for( j = 0; j < innerCount; j++ )
cf += func->testF( f );
stop = mach_absolute_time();
current = stop - start;
if( current < best )
best = current;
}
best -= latency;
printf( "%s:\t(F)%17.21g\t", func->name, (double)best * ticks2cycles );
fflush( stdout );
best = -1ULL;
for( i = 0; i < count; i++ )
{
start = mach_absolute_time();
for( j = 0; j < innerCount; j++ )
c += func->testD( d );
stop = mach_absolute_time();
current = stop - start;
if( current < best )
best = current;
}
best -= latency;
printf( "(D)%17.21g\t", (double)best * ticks2cycles );
fflush( stdout );
best = -1ULL;
for( i = 0; i < count; i++ )
{
start = mach_absolute_time();
for( j = 0; j < innerCount; j++ )
c += func->correct( d );
stop = mach_absolute_time();
current = stop - start;
if( current < best )
best = current;
}
best -= latency;
printf( "(L)%17.21g (%g, %g) \n", (double)best * ticks2cycles, c, cf );
fflush( stdout );
}
for( bfunc = binaries; NULL != bfunc->testF; bfunc++ )
{
c = 0;
cf = 0;
best = -1ULL;
for( i = 0; i < count; i++ )
{
start = mach_absolute_time();
for( j = 0; j < innerCount; j++ )
cf += bfunc->testF( f, f );
stop = mach_absolute_time();
current = stop - start;
if( current < best )
best = current;
}
best -= latency;
printf( "%s:\t(F)%17.21g\t", bfunc->name, (double)best * ticks2cycles );
fflush( stdout );
best = -1ULL;
for( i = 0; i < count; i++ )
{
start = mach_absolute_time();
for( j = 0; j < innerCount; j++ )
c += bfunc->testD( d, d );
stop = mach_absolute_time();
current = stop - start;
if( current < best )
best = current;
}
best -= latency;
printf( "(D)%17.21g\t", (double)best * ticks2cycles );
fflush( stdout );
best = -1ULL;
for( i = 0; i < count; i++ )
{
start = mach_absolute_time();
for( j = 0; j < innerCount; j++ )
c += bfunc->correct( d, d );
stop = mach_absolute_time();
current = stop - start;
if( current < best )
best = current;
}
best -= latency;
printf( "(L)%17.21g (%g, %g) \n", (double)best * ticks2cycles, c, cf );
fflush( stdout );
}
printf( "\n\n" );
}
#endif
#if 0
for( func = unaries + 0; NULL != func->testF; func++ )
{
for( i = 0; i < specialDCount; i++ )
{
double valueD = specialD[i].d;
float valueF = specialF[i].f;
long double correctD = func->correct( (long double) valueD );
long double correctF = func->correct( (long double) valueF );
double testD = func->testD( valueD );
float testF = func->testF( valueF );
double errD = checkErrorD( testD, correctD );
double errF = checkErrorF( testF, correctF );
if( fabs( errD ) > 0.5 )
printf( "%d) %21.21g ulp error for %s( %21.21g ) = %21.21g vs. %21.21g\n", i, errD, func->name, valueD, testD, (double) correctD );
if( fabs( errF ) > 0.5 )
printf( "%d) %21.21g ulp error for %sf( %21.21g ) = %21.21g vs. %21.21g\n", i, errF, func->name, valueF, testF, (double) correctF );
}
}
for( bfunc = binaries + 0; NULL != bfunc->testF; bfunc++ )
{
for( i = 12; i < specialDCount; i++ )
{
double valueD = specialD[i].d;
float valueF = specialF[i].f;
for( j = 6; j < specialDCount; j++ )
{
double valueD2 = specialD[j].d;
float valueF2 = specialF[j].f;
long double correctD = bfunc->correct( (long double) valueD, (long double) valueD2 );
long double correctF = bfunc->correct( (long double) valueF, (long double) valueF2 );
double testD = bfunc->testD( valueD, valueD2 );
float testF = bfunc->testF( valueF, valueF2 );
double errD = checkErrorD( testD, correctD );
double errF = checkErrorF( testF, correctF );
if( fabs( errD ) > 0.5 )
printf( "%d,%d) %21.21g ulp error for %s( %21.21g, %21.21g ) = %21.21g vs. %21.21g\n", i,j, errD, bfunc->name, valueD, valueD2, testD, (double) correctD );
if( fabs( errF ) > 0.5 )
printf( "%d,%d) %21.21g ulp error for %sf( %21.21g, %21.21g ) = %21.21g vs. %21.21g\n", i,j, errF, bfunc->name, valueF, valueF2, testF, (double) correctF );
}
}
}
#endif
printf( "done.\n" );
return 0;
}
static inline double checkErrorF( float v, long double c )
{
if( c != c )
{
if( v != v )
return 0.0;
else
return INFINITY;
}
if( v != v )
return INFINITY;
long double lv = v;
long double error = lv - c;
long double cExp = logb( c );
long double scale = scalb( 1.0, -cExp );
error *= scale;
error *= 0x1.0p23;
return (double) error;
}
static inline double checkErrorD( double v, long double c )
{
if( c != c )
{
if( v != v )
return 0.0;
else
return INFINITY;
}
if( v != v )
return INFINITY;
long double lv = v;
long double error = lv - c;
long double cExp = logb( c );
long double scale = scalb( 1.0, -cExp );
error *= scale;
error *= 0x1.0p52;
return (double) error;
}