lround.s   [plain text]



/*
 *	lround.s
 *
 *		by Ian Ollmann
 *
 *	Copyright (c) 2007,  Apple Inc.  All Rights Reserved.
 *
 *	Implementation of C99 lround and llround functions for i386 and x86_64.
 */
 
#include <machine/asm.h>
#include "abi.h"


#if defined( __i386__ )


	ENTRY( llround )
		movl	4+FRAME_SIZE(STACKP),	%eax
		fldl	FRAME_SIZE(STACKP)				//  { x }
		movsd	FRAME_SIZE(STACKP),		%xmm0
		fld		%st(0)							//	{ x, x }
		SUBP	$12,					STACKP

#if defined( __SSE3__ )
		fisttpll	(STACKP)					// { x }, trunc(x), set invalid / inexact if necessary
#else
        fnstcw      8( STACKP )
        movw        8( STACKP ),		%dx
        movw        %dx,                %cx
        orw         $0xc00,             %dx
        movw        %dx,				8( STACKP )
        fldcw       8( STACKP )
		fistpll		(STACKP)					// { x }, trunc(x), set invalid / inexact if necessary
#endif
		fildll	(STACKP)						// { trunc(x), x }
		fucomip %st(1),					%st(0)	// { x }	if( x == trunc(x) || isnan(x) )
		je		1f								//				use the result we already calculated  (avoid setting inexact)

		movl	%eax,					%edx	// x >> 32
		andl	$0x7fffffff,			%eax	// |x >> 32|
		xorl	%eax,					%edx	// signof( x )		
		cmpl	$0x43e00000,			%eax	// |x| >= 0x1.0p63
		jae		2f


		orl		$0x3f000000,			%edx	//	copysign( 0.5f, x )
		movl	%edx,					8(STACKP)
		fadds	8(STACKP)						// { copysign( 0.5f, x ) + x }		//exact due to extra precision. (We dont support the case where someone changes the precsion control bits.)

#if defined( __SSE3__ )
		fisttpll	(STACKP)					// trunc(x), set invalid / inexact if necessary
#else
		fistpll		(STACKP)					// trunc(x), set invalid / inexact if necessary
#endif

		//exit
#if ! defined( __SSE3__ )
		movw	%cx,					8(STACKP)
		fldcw	8(STACKP)
#endif
		movl	(STACKP),				%eax	
		movl	4(STACKP),				%edx
		ADDP	$12,					STACKP
		ret

//		x is an integer or NaN
1:		fstp	%st(0)							// {}
#if ! defined( __SSE3__ )
		movw	%cx,					8(STACKP)
		fldcw	8(STACKP)
#endif
		movl	(STACKP),				%eax	
		movl	4(STACKP),				%edx
		ADDP	$12,					STACKP
		ret

2:		// overflow
		fstp	%st(0)							// {}
		subl	$1,						%edx
		sarl	$31,					%edx
#if ! defined( __SSE3__ )
		movw	%cx,					8(STACKP)
		fldcw	8(STACKP)
#endif
		movl	(STACKP),				%eax	
		xorl	%edx,					%eax
		xorl	4(STACKP),				%edx
		ADDP	$12,					STACKP
		ret

		
#define LONG_MIN_hi		0x41E00000

	ENTRY( lround )
		movsd	FRAME_SIZE(STACKP),		%xmm1
		movapd	%xmm1,					%xmm0
		psrlq	$32,					%xmm1
		movd	%xmm1,					%edx
#elif defined( __x86_64__ )

#define LONG_MIN_hi		0x43E00000


	ENTRY( lround )
	ENTRY( llround )
		movd	%xmm0,					%rdx		// x
		shrq	$32,					%rdx		// x >> 32
#endif

		cvttsd2si	%xmm0,				AX_P		// (long) x, set invalid / inexact if necessary

		cvtsi2sd	AX_P,				%xmm1		// trunc(x)
		ucomisd		%xmm0,				%xmm1		// x == trunc(x) || isnan(x)
		je		1f									//		return (long) x

		MOVP	DX_P,					CX_P		// x >> 32
		and		$0x7fffffff,			DX_P		// |x >> 32 |
		XORP	DX_P,					CX_P		// signof( x )
		cmpl	$LONG_MIN_hi,			%edx		// |x >> 32| >= 0x1.0p63 >> 32
		jae		3f

		orl		$0x3fe00000,			%ecx		// copysign( 0.5, x ) >> 32
		movd	%ecx,					%xmm1		// copysign( 0.5, x ) >> 32
		psllq	$32,					%xmm1		// copysign( 0.5, x )
		pcmpeqb %xmm2,					%xmm2		// -1ULL
		paddq	%xmm1,					%xmm2		// copysign( 0.5 - 1 ulp, x )
		ucomisd	%xmm0,					%xmm2		// |x| == 0.5 - 1 ulp
		je		1f									//		return (long) x

		addsd	%xmm1,					%xmm0		// x += copysign( 0.5, x )
		cvttsd2si %xmm0,				AX_P		// (int) (x + copysign( 0.5, x ) )
#if defined( __i386__ )
		cmpl	$0x80000000,			AX_P
		je		2f
#endif

	1:	ret
			
		// overflow
#if defined( __i386__ )
	2:	andl	$0x80000000,			%ecx
#endif
	3:	SUBP	$1,						CX_P		// x < 0 ? 0x7fffffff : -1
		sar		$31,					CX_P		// x < 0 ? 0 : -1
		XORP	CX_P,					AX_P		// flip LONG_LONG_MIN to LONG_LONG_MAX if needed
		ret