/* double ceil(double ) * * Reimplemented by Steve Canon, based on Ian Ollmann's implementations * tuned for increased performance on in-order machines (but faster on * out-of-order machines as well). * * Copyright 2009, Apple Inc. */ #ifdef __i386__ #ifdef __SSE3__ .text .align 4 .globl _ceil _ceil: mov 8(%esp), %ecx fldl 4(%esp) cmp $0x43300000, %ecx // if x is negative or x > 0x1.0p53 jae 2f // goto 2 fld %st(0) fistpll 4(%esp) fildll 4(%esp) // rint(x) fucomi %st(1), %st(0) fstp %st(1) jae 1f // if rint(x) >= x, return rint(x) fld1 faddp %st(0), %st(1) // otherwise, return rint(x) + 1.0 1: ret 2: and $0x7fffffff, %ecx cmp $0x43300000, %ecx // if |x| >= 0x1.0p53 jae 3f fisttpll 4(%esp) fldz fildll 4(%esp) // ceil(x), up to sign of zero fucomi %st(1), %st(0) // if result == 0 fstp %st(1) // jne 3f // fchs // patch up sign bit 3: ret #else // i386, no SSE3 .text .align 4 .globl _ceil _ceil: mov 8(%esp), %ecx fldl 4(%esp) cmp $0x43300000, %ecx // if x is negative or x > 0x1.0p53 jae 2f // goto 2 fld %st(0) fistpll 4(%esp) fildll 4(%esp) // rint(x) fucomi %st(1), %st(0) fstp %st(1) jae 1f // if rint(x) >= x, return rint(x) fld1 faddp // otherwise, return rint(x) + 1.0 1: ret 2: and $0x7fffffff, %ecx cmp $0x43300000, %ecx // if |x| >= 0x1.0p53 fld %st(0) jae 4f fistpll 4(%esp) fldz fildll 4(%esp) // rint(x) fucomi %st(2), %st(0) fstp %st(2) jae 3f // if rint(x) < x, add one fld1 faddp %st(2), %st(0) 3: fucomi %st(1), %st(0) jne 4f // if ceil(x) == 0, patch up sign bit fchs fstp %st(1) ret 4: fstp %st(0) ret #endif // SSE3 #else // x86_64 .const .align 4 one: .quad 0x3ff0000000000000 absmask:.quad 0x7fffffffffffffff thresh: .quad 0x4330000000000000 .text .align 4 .globl _ceil _ceil: movd %xmm0, %rcx andq absmask(%rip), %rcx // |x| cmpq thresh(%rip), %rcx // if |x| >= 0x1.0p52 or isnan(x) movsd absmask(%rip), %xmm2 // jae 1f // early out, returning x. cvttsd2si %xmm0, %rax andnpd %xmm0, %xmm2 // signbit(x) cvtsi2sd %rax, %xmm1 // trunc(x), except for sign of zero cmplesd %xmm1, %xmm0 orpd %xmm2, %xmm1 // trunc(x) andnpd one(%rip), %xmm0 // x <= trunc(x) ? 0.0 : 1.0 orpd %xmm2, %xmm0 // x <= trunc(x) ? copysign(0.0, x) : 1.0 addsd %xmm1, %xmm0 // ceil(x) 1: ret #endif