/* * nextafterf.s * * by Ian Ollmann * * Copyright (c) 2007, Apple Inc. All Rights Reserved. * * Implementation of C99 nextafterf for __i386__ and __x86_64__. */ #include <machine/asm.h> #include "abi.h" ENTRY( nextafterf ) #if defined( __i386__ ) movss FRAME_SIZE( STACKP ), %xmm0 movss 4+FRAME_SIZE( STACKP ), %xmm1 #endif ucomiss %xmm0, %xmm1 // if( x == y || isnan(x) || isnan(y) ) je 2f // goto 4 xorps %xmm2, %xmm2 // 0.0f ucomiss %xmm0, %xmm2 // if( x == 0 ) je 1f // goto 3 // x != y. x != 0.0f. X and Y are numeric. cmpltss %xmm0, %xmm1 // y < x ? -1 : 0 xorps %xmm2, %xmm2 // 0.0f cmpltss %xmm0, %xmm2 // 0.0f < x ? -1 : 0 xorps %xmm2, %xmm1 // move away from zero ? -1 : 0 paddd %xmm1, %xmm1 // move away from zero ? -2 : 0 pcmpeqb %xmm2, %xmm2 // -1 psubd %xmm2, %xmm1 // move away from zero ? -1 : 1 psubd %xmm1, %xmm0 // initial stab at result //check to see if we overflowed, underflowed or did other bad things movd %xmm0, %eax // result andl $0x7fffffff, %eax // |result| subl $0x00800000, %eax // push denormals negative cmpl $0x7f000000, %eax // if( |result| < 0x1.0p-126 || |result| == inf || isnan( result ) ) jae 4f // goto 4 #if defined( __i386__ ) movss %xmm0, FRAME_SIZE( STACKP ) flds FRAME_SIZE( STACKP ) #endif ret 1: // x == 0 and y != x, so result is either 0x80000001 or 0x00000001 pcmpeqb %xmm0, %xmm0 // -1 movdqa %xmm0, %xmm2 // -1 pslld $31, %xmm0 // 0x80000000 andps %xmm1, %xmm0 // signof( y ) psubd %xmm2, %xmm0 // signof( y ) + 1U //set inexact and underflow movaps %xmm0, %xmm1 // 0x80000001 or 0x00000001 pslld $23, %xmm1 // 0x00800000 orps %xmm0, %xmm1 // 0x80800001 or 0x00800001 mulss %xmm1, %xmm1 // set inexact and underflow #if defined( __i386__ ) movss %xmm0, FRAME_SIZE( STACKP ) flds FRAME_SIZE( STACKP ) #endif ret // x == y || isnan(x) || isnan(y) 2: jnp 3f // if( x == y ) goto 5 addss %xmm1, %xmm0 // either x or y or both are nan, so add the two to silence and move to xmm0 3: // return result #if defined( __i386__ ) movss %xmm0, FRAME_SIZE( STACKP ) flds FRAME_SIZE( STACKP ) #endif ret // |result| < 0x1.0p-126 || |result| == inf || isnan( result ) 4: je 5f // if result is infinite, goto 5 jg 6f // if nan, goto 6 // denormal, set underflow and inexact psrld $31, %xmm2 // 1U movdqa %xmm2, %xmm3 // 1U pslld $23, %xmm2 // 0x00800000U por %xmm3, %xmm2 // 0x00800001U mulss %xmm2, %xmm2 // set inexact and underflow #if defined( __i386__ ) movss %xmm0, FRAME_SIZE( STACKP ) flds FRAME_SIZE( STACKP ) #endif ret // |result| is infinite 5: pcmpeqb %xmm1, %xmm1 // -1 paddd %xmm0, %xmm1 // copysign( max finite, result ) movdqa %xmm1, %xmm2 // copysign( max finite, result ) pslld $7, %xmm1 // copysign( 1.0f, result ) addss %xmm2, %xmm1 // set inexact addss %xmm2, %xmm2 // set overflow #if defined( __i386__ ) movss %xmm0, FRAME_SIZE( STACKP ) flds FRAME_SIZE( STACKP ) #endif ret // NaN results were infinities that became NaNs, push back to infinity 6: pcmpeqb %xmm1, %xmm1 // -1 paddd %xmm1, %xmm0 #if defined( __i386__ ) movss %xmm0, FRAME_SIZE( STACKP ) flds FRAME_SIZE( STACKP ) #endif ret