floor.s   [plain text]


/* double floor( double )
 *
 * Reimplemented by Steve Canon, based on Ian Ollmann's implementations
 * tuned for increased performance on in-order machines (but faster on
 * out-of-order machines as well).
 *
 * Copyright 2009, Apple Inc.
 */

#ifdef __i386__

#ifdef __SSE3__

.text
.align 4
.globl _floor
_floor:
	mov		  8(%esp),			%ecx
 	fldl      4(%esp)
	cmp			$0x43300000,	%ecx	// if x is negative or x > 0x1.0p53
	jae			1f						//    goto 1
	
	fisttpll  4(%esp)					// fast path if +0 <= x < 0x1.0p52
	fildll	  4(%esp)					// return trunc(x)
	ret
	
1:	and			$0x7fffffff,	%ecx
	sub			$1,				%ecx
	cmp			$0x43300000,	%ecx	// if |x| > 0x1.0p52 or isnan(x) or
	fld			%st(0)					// the high word of |x| is zero,
	jae			4f						//    goto 4
	
2:	fistpll	  4(%esp)
	fildll    4(%esp)					// rint(x)
	fucomi      %st(1),			%st(0)
	fstp        %st(1)
	jbe			3f						// if rint(x) <= x, return rint(x)
	fld1
	fsubrp      %st(0),			%st(1)	// otherwise, return rint(x) - 1.0
3:	ret

4:	jg			5f						// if |x| > 0x1.0p52 or isnan(x), return x
	testl		$0xffffffff,  4(%esp)	// if x is -denorm, go back through the
	jnz			2b						// main path, returning -1.0
5:	fstp		%st(1)					// otherwise, pop the extra copy of x, and
	ret									// return x.
	
#else // i386, no SSE3

.text
.align 4
.globl _floor
_floor:
	mov		  8(%esp),			%ecx
	and			$0x7fffffff,	%ecx	// high word of |x|
 	fldl      4(%esp)
	sub			$1,				%ecx
	cmp			$0x432fffff,	%ecx	// if |x| >= 0x1.0p52 or isnan(x) or iszero(x)
	fld         %st(0)					// or x is a denormal whose high 32 bits are
	jae			2f						// all zero, goto 2

0:	fistpll	  4(%esp)
	fildll    4(%esp)					// rint(x)
	fucomi      %st(1),			%st(0)
	fstp        %st(1)
	jbe         1f						// if rint(x) <= x, return rint(x)
	fld1
	fsubrp      %st(0),			%st(1)	// otherwise, return rint(x) - 1.0
1:	ret

2:	jg			3f						// if |x| >= 0x1.0p52 or isnan(x), goto 3
	testl		$0xffffffff,  4(%esp)	// if x is denorm (not zero)
	jnz			0b						//    jump back to the mainline
3:	fstp		%st(1)					// pop the extra copy of x off the stack
	ret									// and return x

#endif // SSE3
	
#else // x86_64

.const
.align 4
mone:	.quad	0xbff0000000000000
absmask:.quad   0x7fffffffffffffff
thresh: .quad	0x432fffffffffffff

.text
.align 4
.globl _floor
_floor:
	movd		%xmm0,			%rcx
	andq		absmask(%rip),	%rcx	// |x|
	subq		$1,				%rcx
	cmpq		thresh(%rip),	%rcx	// if |x| >= 0x1.0p52 or isnan(x) or iszero(x)
	jae			1f						//    early out, returning x.

	cvttsd2si   %xmm0,			%rax
	cvtsi2sd    %rax,			%xmm1	// trunc(x)
	cmpltsd		%xmm1,			%xmm0
	andpd		mone(%rip),		%xmm0	// x < trunc(x) ? -1.0 : 0.0
	addsd       %xmm1,			%xmm0	// floor(x)
1:  ret

#endif