roundl.s [plain text]

/*
 *  roundl.s
 *  LibmV5
 *
 *  Created by Ian Ollmann on 8/19/05.
 *  Copyright 2005 Apple Computer. All rights reserved.
 *
 */

#include "machine/asm.h"

#define LOCAL_STACK_SIZE	12
#include "abi.h"


ENTRY(roundl)
    SUBP        $LOCAL_STACK_SIZE,     STACKP
    movl		$0x3f000000, 8(STACKP)             //0.5f
    movl		$0x5f000000, 4(STACKP)             //0x1.0p63 

//convert |f| to an int
    //check to see if f is already an int or zero:      |f| >= 2**63 is an int
    flds        8(STACKP)             //  {0.5}
    fldt        FIRST_ARG_OFFSET(STACKP)            //  {f, 0.5}
    fabs                            //  {|f|, 0.5}
    flds        4(STACKP)             //  {limit,     |f|, 0.5}
    fucomip     %ST(1), %ST         //  {|f|, 0.5}       0x1.0p63 <= |f| or NaN

    //if it is a large int, NaN, replace it with 0.5. This avoids spurious overflows, illegal, and inexact
    fld         %ST(0)              //  {|f|,   |f|,    0.5  }
    fcmovbe     %ST(2), %ST(0)      //  {.5or|f|, |f|,  0.5  }

    //since we have removed large integers, inf and NaN from being used, we can now round safely
    //add 0.5 (with same sign as f) to f, or to the 0.5 we just put there. 
    faddp       %ST(0), %ST(2)      //  {|f|, .5or|f|+.5 }
    fxch                            //  {.5or|f|+.5,    |f|} 

    //then convert to int with truncation to zero
    fisttpll    (STACKP)              //  {|f|}   ***USES SSE3***

    //generate NaN result
    fldz                            //  {0, |f|}
    faddp                           //  {|f|}   NaN silenced
    
    //load the integer result back in
    fildll      (STACKP)              //  {|intf|,  |f|}

    //if 2**63 <= |f| or NaN, use |f| instead
    fcmovbe      %ST(1), %ST(0)     //  {|intf| or |f|,   |f|}
    fstp         %ST(1)             //  {|intf| or |f|}
    
    //deal with the sign
    fldt        FIRST_ARG_OFFSET(STACKP)            //  { f, |intf| or |f|} 
    fldz                            //  { 0, f, |intf| or |f| }
    fucomip     %ST(1), %ST         //  { f, |intf| or |f| }           0 < f or f is NaN 
    fld         %ST(1)              //  { |intf| or |f|,  f, |intf| or |f| }  
    fchs                            //  { -(intf or f+0 or f), f, intf or f+0 or f }
    fcmovb      %ST(2), %ST(0)      //  { result, f, intf or f+0 or f}
    
    //return f, if f == 0
    fcmove      %ST(1), %ST(0)      //  { result, f, intf or f+0 or f}
    
    //clean up the stack
    fstp        %ST(2)              //  { intf or f+0 or f, result }
    fstp        %ST(0)              //  { result }

    ADDP        $LOCAL_STACK_SIZE,    STACKP
    ret


ENTRY(llroundl)
	SUBP		$LOCAL_STACK_SIZE, STACKP
    movl		$0x3f000000, 8(STACKP)             //0.5f

	//calculate floor(|x|)
	fldt		FIRST_ARG_OFFSET(STACKP)			//	{f}
	fld			%st(0)				//	{ f, f }
	fabs							//	{|f|, f}
	fld			%st(0)				//	{|f|, |f|, f}
	frndint							//	{rint(|f|), |f|, f}	
	fucomi		%st(1), %st(0)		//	{rint(|f|), |f|, f}
	fldz							//	{0, rint(|f|), |f|, f}			//select between 0 and 1 here to avoid inexact on large inputs
	fld1							//	{1, 0, rint(|f|), |f|, f}		
	fcmove		%st(1), %st(0)		//	{1 or 0, 0, rint(|f|), |f|, f}
	fstp		%st(1)				//	{1 or 0, rint( |f| ), |f|, f }
	fsubr		%st(1), %st(0)		//	{rint(|f|-1or0, rint(|f|), |f|, f }
	fxch							//	{rint(|f|, rint(|f|)-1or0, |f|, f }
	fcmovnbe	%st(1), %st(0)		//	{floor(|f|), rint(|f|)-1, |f|, f}
	fstp		%st(1)				//	{floor(|f|), |f|, f}
	
	//calculate the difference between floor(|f|) and |f|
	fsubr		%st(1), %st(0)		//	{ |f| - floor(|f|), |f|, f }

    //if( |f| - floor(|f|) >= 0.5 )	add 0.5, otherwise add |f| - floor(|f|) (which gives similar results to adding zero)
    flds        8(STACKP)				//  { 0.5, |f| - floor(|f|), |f|, f }
	fucomi		%st(1), %st(0)		//	{ 0.5, |f| - floor(|f|), |f|, f }
	fcmovnb		%st(1), %st(0)		//	{0.5 or |f| - floor(|f|), |f| - floor(|f|), |f|, f }
	fstp		%st(1)				//	{0.5 or |f| - floor(|f|), |f|, f }
	faddp							//	{ |f| + 0.5 or |f| - floor(|f|), f }

	//set the sign properly
	fldz							//	{ 0, a, f }
	fucomip		%st(2), %st(0)		//	{ a, f }
	fld			%st(0)				//	{ a, a, f }
	fchs							//	{ -a, a, f }
	fcmovb		%st(1), %st(0)		//	{ +-a, a, f }
	fstp		%st(2)				//	{ a, +-a }
	fstp		%st(0)				//	{ +-a }

    //convert to long long
    fisttpll    (STACKP)              //  {}

    //load into return registers
#if defined( __LP64__ ) 
    movq       (STACKP),     %rax
#else
    movl        (STACKP),     %eax    
    movl        4(STACKP),    %edx
#endif
    
    ADDP        $LOCAL_STACK_SIZE,    STACKP
    ret
    
ENTRY(lroundl)
	SUBP		$LOCAL_STACK_SIZE, STACKP
    movl		$0x3f000000, 8(STACKP)             //0.5f
    movl		$0x4f000000, 4(STACKP)             //0x1.0p31f

	//calculate floor(|x|)
	fldt		FIRST_ARG_OFFSET(STACKP)			//	{f}
	fld			%st(0)				//	{ f, f }
	fabs							//	{|f|, f}
	fld			%st(0)				//	{|f|, |f|, f}
	frndint							//	{rint(|f|), |f|, f}	
	fucomi		%st(1), %st(0)		//	{rint(|f|), |f|, f}
	fldz							//	{0, rint(|f|), |f|, f}			//select between 0 and 1 here to avoid inexact on large inputs
	fld1							//	{1, 0, rint(|f|), |f|, f}		
	fcmove		%st(1), %st(0)		//	{1 or 0, 0, rint(|f|), |f|, f}
	fstp		%st(1)				//	{1 or 0, rint( |f| ), |f|, f }
	fsubr		%st(1), %st(0)		//	{rint(|f|-1or0, rint(|f|), |f|, f }
	fxch							//	{rint(|f|, rint(|f|)-1or0, |f|, f }
	fcmovnbe	%st(1), %st(0)		//	{floor(|f|), rint(|f|)-1, |f|, f}
	fstp		%st(1)				//	{floor(|f|), |f|, f}
	
	//calculate the difference between floor(|f|) and |f|
	fsubr		%st(1), %st(0)		//	{ |f| - floor(|f|), |f|, f }

    //if( |f| - floor(|f|) >= 0.5 )	add 0.5, otherwise add |f| - floor(|f|) (which gives similar results to adding zero)
    flds        8(STACKP)				//  { 0.5, |f| - floor(|f|), |f|, f }
	fucomi		%st(1), %st(0)		//	{ 0.5, |f| - floor(|f|), |f|, f }
	fcmovnb		%st(1), %st(0)		//	{0.5 or |f| - floor(|f|), |f| - floor(|f|), |f|, f }
	fstp		%st(1)				//	{0.5 or |f| - floor(|f|), |f|, f }
	faddp							//	{ |f| + 0.5 or |f| - floor(|f|), f }

	//set the sign properly
	fldz							//	{ 0, a, f }
	fucomip		%st(2), %st(0)		//	{ a, f }
	fld			%st(0)				//	{ a, a, f }
	fchs							//	{ -a, a, f }
	fcmovb		%st(1), %st(0)		//	{ +-a, a, f }
	fstp		%st(2)				//	{ a, +-a }
	fstp		%st(0)				//	{ +-a }
	
	//check for overflow
	xorl		%edx,	%edx		// zero edz
	flds		4(STACKP)				//	{ 0x1.0p31, +-a }
	fxch							//  { +-a, 0x1.0p31 }
	fucomi		%st(1), %st(0)		//	{ +-a, 0x1.0p31 }
	setnb		%dl

#if defined( __LP64__ ) 
	negq		%rdx
    //convert to long long
    fisttpll    (STACKP)              //  {0x1.0p31}
	fstp		%st(0)

    //load into return registers
    movq       (STACKP),     %rax
	xorq		DX_P,		%rax
#else   
	negl		%edx
    //convert to long
    fisttpl     (STACKP)              //  {0x1.0p31}
	fstp		%st(0)

    //load into return registers
    movl        (STACKP),     %eax
	xorl		DX_P,		%eax
#endif

    ADDP        $LOCAL_STACK_SIZE,		STACKP
    ret
	
ENTRY(round)
//convert |f| to an int
    //check to see if f is already an int or zero:      |f| >= 2**63 is an int
#if defined( __LP64__ )
    SUBP        $24,     STACKP
	movl		$0x3f000000, 20(STACKP)
	movl		$0x5f000000, 16(STACKP)
    flds        20(STACKP)             //  {0.5}
	movsd		%xmm0, 8(STACKP)
    fldl        8(STACKP)				//  {f, 0.5}
    fabs                            //  {|f|, 0.5}
    flds        16(STACKP)             //  {limit,     |f|, 0.5}
#else
    pushl       $0x3f000000             //0.5f
    pushl       $0x5f000000             //0x1.0p63 
    SUBP        $4,     STACKP
    flds        8(STACKP)             //  {0.5}
    fldl        (FIRST_ARG_OFFSET)(STACKP)            //  {f, 0.5}
    fabs                            //  {|f|, 0.5}
    flds        4(STACKP)             //  {limit,     |f|, 0.5}
#endif

    fucomip     %ST(1), %ST         //  {|f|, 0.5}       0x1.0p63 <= |f| or NaN

    //if it is a large int, NaN, replace it with 0.5. This avoids spurious overflows, illegal, and inexact
    fld         %ST(0)              //  {|f|,   |f|,    0.5  }
    fcmovbe     %ST(2), %ST(0)      //  {.5or|f|, |f|,  0.5  }

    //since we have removed large integers, inf and NaN from being used, we can now round safely
    //add 0.5 (with same sign as f) to f, or to the 0.5 we just put there. 
    faddp       %ST(0), %ST(2)      //  {|f|, .5or|f|+.5 }
    fxch                            //  {.5or|f|+.5,    |f|} 

    //then convert to int with truncation to zero
    fisttpll    (STACKP)              //  {|f|}   ***USES SSE3***

    //generate NaN result
    fldz                            //  {0, |f|}
    faddp                           //  {|f|}   NaN silenced
    
    //load the integer result back in
    fildll      (STACKP)              //  {|intf|,  |f|}

    //if 2**63 <= |f| or NaN, use |f| instead
    fcmovbe      %ST(1), %ST(0)     //  {|intf| or |f|,   |f|}
    fstp         %ST(1)             //  {|intf| or |f|}
    
    //deal with the sign
#if defined( __LP64__ )
    fldl        8(STACKP)            //  { f, |intf| or |f|} 
#else
    fldl        FIRST_ARG_OFFSET(STACKP)            //  { f, |intf| or |f|} 
#endif
    fldz                            //  { 0, f, |intf| or |f| }
    fucomip     %ST(1), %ST         //  { f, |intf| or |f| }           0 < f or f is NaN 
    fld         %ST(1)              //  { |intf| or |f|,  f, |intf| or |f| }  
    fchs                            //  { -(intf or f+0 or f), f, intf or f+0 or f }
    fcmovb      %ST(2), %ST(0)      //  { result, f, intf or f+0 or f}
    
    //return f, if f == 0
    fcmove      %ST(1), %ST(0)      //  { result, f, intf or f+0 or f}
    
    //clean up the stack
    fstp        %ST(2)              //  { intf or f+0 or f, result }
    fstp        %ST(0)              //  { result }

#if defined( __LP64__ )
	fstpl		(STACKP)
	movsd		(STACKP), %xmm0
    ADDP        $24,    STACKP
#else
    ADDP        $12,    STACKP
#endif

    ret

	
ENTRY(roundf)
    SUBP        $LOCAL_STACK_SIZE,     STACKP
    movl		$0x3f000000, 8(STACKP)             //0.5f
    movl		$0x5f000000, 4(STACKP)             //0x1.0p63 

//convert |f| to an int
    //check to see if f is already an int or zero:      |f| >= 2**63 is an int
    flds        8(STACKP)             //  {0.5}

#if defined( __LP64__ )
	movss		%xmm0,  8(STACKP)
	flds		8(STACKP)
#else
    flds        FIRST_ARG_OFFSET(STACKP)            //  {f, 0.5}
#endif
    fabs                            //  {|f|, 0.5}
    flds        4(STACKP)             //  {limit,     |f|, 0.5}
    fucomip     %ST(1), %ST         //  {|f|, 0.5}       0x1.0p63 <= |f| or NaN

    //if it is a large int, NaN, replace it with 0.5. This avoids spurious overflows, illegal, and inexact
    fld         %ST(0)              //  {|f|,   |f|,    0.5  }
    fcmovbe     %ST(2), %ST(0)      //  {.5or|f|, |f|,  0.5  }

    //since we have removed large integers, inf and NaN from being used, we can now round safely
    //add 0.5 (with same sign as f) to f, or to the 0.5 we just put there. 
    faddp       %ST(0), %ST(2)      //  {|f|, .5or|f|+.5 }
    fxch                            //  {.5or|f|+.5,    |f|} 

    //then convert to int with truncation to zero
    fisttpll    (STACKP)              //  {|f|}   ***USES SSE3***

    //generate NaN result
    fldz                            //  {0, |f|}
    faddp                           //  {|f|}   NaN silenced
    
    //load the integer result back in
    fildll      (STACKP)              //  {|intf|,  |f|}

    //if 2**63 <= |f| or NaN, use |f| instead
    fcmovbe      %ST(1), %ST(0)     //  {|intf| or |f|,   |f|}
    fstp         %ST(1)             //  {|intf| or |f|}
    
    //deal with the sign
#if defined( __LP64__ )
	flds		8(STACKP)
#else
    flds        16(STACKP)            //  { f, |intf| or |f|} 
#endif
    fldz                            //  { 0, f, |intf| or |f| }
    fucomip     %ST(1), %ST         //  { f, |intf| or |f| }           0 < f or f is NaN 
    fld         %ST(1)              //  { |intf| or |f|,  f, |intf| or |f| }  
    fchs                            //  { -(intf or f+0 or f), f, intf or f+0 or f }
    fcmovb      %ST(2), %ST(0)      //  { result, f, intf or f+0 or f}
    
    //return f, if f == 0
    fcmove      %ST(1), %ST(0)      //  { result, f, intf or f+0 or f}
    
    //clean up the stack
    fstp        %ST(2)              //  { intf or f+0 or f, result }
    fstp        %ST(0)              //  { result }

#if defined( __LP64__ )
	fstps		(STACKP)
	movss		(STACKP), %xmm0
#endif

    ADDP        $LOCAL_STACK_SIZE,    STACKP
    ret