/* * lroundl.s * * by Ian Ollmann * * Apple Inc. Copyright (c) 2007. All rights reserved. * */ #include "abi.h" #include <machine/asm.h> .align 2 .literal4 two63: .long 0x5f000000 mtwo63: .long 0xdf000000 one: .long 1 inf: .long 0x7f800000 .align 3 .literal8 cutoff32: .double 2147483647.5 // 2**31-0.5 mcutoff32: .double -2147483648.5 // 2**31-0.5 .align 4 .literal16 cutoff: .quad 0xffffffffffffffff, 0x403d sign: .quad 0x0, 0xffffffffffffffff .text #if defined( __x86_64__ ) ENTRY( lroundl ) ENTRY( llroundl ) movswl 8+FRAME_SIZE( STACKP ), %edx andl $0x7fff, %edx // exponent of x movq FRAME_SIZE( STACKP ), %rax subl $0x3ffe, %edx // push exponents less than -1 negative fldt FRAME_SIZE( STACKP ) // { x } cmpl $(63+1), %edx // if( |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) ) jae 1f // goto 1 // 0.5 <= |x| < 0x1.0p63 fldt cutoff( %rip ) // { 0x1.0p63 - 0.5, x } fucomip %st(1), %st(0) // { x } je 3f //Shift the significand right so that units bit is at units + 1 position movl $63, %ecx subl %edx, %ecx // 63 - (exponent+1) shrq %cl, %rax // shift so that the units bit is at the +1 position movq %rax, %rdx // set aside a copy shrq $1, %rax // finish the shift with shift right by 1 bit -- we need to do 64-bit shifts here at times and not possible with ISA andq $1, %rdx // isolate the leading fractional bit addq %rdx, %rax // round the result up. //fix sign movswq 8+FRAME_SIZE( STACKP ), %rdx // read the sign + exponent sarq $16, %rdx // remove exponent xorq %rdx, %rax // flip the sign of the result subq %rdx, %rax // correct for 2's complement // set inexact as necessary fabs // { |x| } fadds two63(%rip) // { |x| + 0x1.0p63 } set inexact as necessary fstp %st(0) // throw away numerical result. ret // |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) 1: jge 2f // |x| < 0.5 xorq %rax, %rax fistpl FRAME_SIZE( STACKP ) // set inexact as necessary ret // |x| >= 0x1.0p63 || isnan(x) 2: movswq 8+FRAME_SIZE( STACKP ), %rdx flds mtwo63( %rip ) fucomip %st(1), %st(0) je 4f fistpl FRAME_SIZE( STACKP ) // set invalid shrq $63, %rdx subq $1, %rdx movq $0x8000000000000000, %rax xorq %rdx, %rax ret // 0x1.0p63 - 0.5, positive overflow 3: fistpl FRAME_SIZE( STACKP ) // set invalid movq $0x7fffffffffffffff, %rax ret // -0x1.0p63 or nan 4: jp 5f fstp %st(0) movq $0x8000000000000000, %rax ret // nan 5: fistpl FRAME_SIZE( STACKP ) // set invalid movq $0x8000000000000000, %rax ret #else ENTRY( lroundl ) movswl 8+FRAME_SIZE( STACKP ), %edx andl $0x7fff, %edx // exponent of x movl 4+FRAME_SIZE( STACKP ), %eax subl $0x3ffe, %edx // push exponents less than -1 negative fldt FRAME_SIZE( STACKP ) // { x } cmpl $(31+1), %edx // if( |x| >= 0x1.0p31 || |x| < 0.5 || isnan(x) ) jae 1f // goto 1 // call 0f 0: popl %ecx fldl (cutoff32-0b)(%ecx) fucomip %st(1), %st(0) jbe 3f // set inexact fabs fadds (two63-0b)(%ecx) fstp %st(0) // round movl $31, %ecx subl %edx, %ecx shrl %cl, %eax movl %eax, %edx shrl $1, %eax andl $1, %edx addl %edx, %eax // fix sign movswl 8+FRAME_SIZE( STACKP ), %edx sarl $16, %edx xorl %edx, %eax subl %edx, %eax ret 1: jge 2f // |x| < 0.5 xorl %eax, %eax fistpl FRAME_SIZE( STACKP ) // set inexact as necessary ret 2: movswl 8+FRAME_SIZE( STACKP), %edx call 0f 0: popl %ecx fldl ( mcutoff32-0b)(%ecx) fucomip %st(1), %st(0) jae 4f fldl ( cutoff32-0b )(%ecx) fucomip %st(1), %st(0) jbe 3f // non overflowing result shrl $31, %edx subl $1, %edx movl $0x80000000, %eax xorl %edx, %eax //set inexact fabs fadds (two63-0b)(%ecx) fstp %st(0) ret // positive overflow 3: jp 5f fistps FRAME_SIZE( STACKP ) movl $0x7fffffff, %eax ret // negative overflow 4: fistps FRAME_SIZE( STACKP ) movl $0x80000000, %eax ret // nan 5: fistpl FRAME_SIZE( STACKP ) movl $0x80000000, %eax ret ENTRY( llroundl ) movswl 8+FRAME_SIZE( STACKP ), %edx andl $0x7fff, %edx // exponent of x movq FRAME_SIZE( STACKP ), %xmm0 subl $0x3ffe, %edx // push exponents less than -1 negative fldt FRAME_SIZE( STACKP ) // { x } cmpl $(63+1), %edx // if( |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) ) jae 1f // goto 1 call 0f 0: popl %ecx // 0.5 <= |x| < 0x1.0p63 fldt (cutoff-0b)( %ecx ) // { 0x1.0p63 - 0.5, x } fucomip %st(1), %st(0) // { x } je 3f //Shift the significand right so that units bit is at units + 1 position movl $63, %eax movd (one-0b)(%ecx), %xmm2 // 1 subl %edx, %eax // 63 - (exponent+1) movd %eax, %xmm1 psrlq %xmm1, %xmm0 // shift so that the units bit is at the +1 position movq %xmm0, %xmm1 // set aside a copy psrlq $1, %xmm0 // finish the shift with shift right by 1 bit -- we need to do 64-bit shifts here at times and not possible with ISA pand %xmm2, %xmm1 // isolate the leading fractional bit paddq %xmm1, %xmm0 // round the result up. // set inexact as necessary fabs // { |x| } fadds (two63-0b)(%ecx) // { |x| + 0x1.0p63 } set inexact as necessary fstp %st(0) // throw away numerical result. //fix sign movswl 8+FRAME_SIZE( STACKP ), %eax // read the sign + exponent shrl $31, %eax // remove exponent movq (sign-0b)(%ecx, %eax,8), %xmm1 pxor %xmm1, %xmm0 psubq %xmm1, %xmm0 movd %xmm0, %eax psrlq $32, %xmm0 movd %xmm0, %edx ret // |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) 1: jge 2f // |x| < 0.5 xorl %eax, %eax xorl %edx, %edx fistpl FRAME_SIZE( STACKP ) // set inexact as necessary ret // |x| >= 0x1.0p63 || isnan(x) 2: movswl 8+FRAME_SIZE( STACKP ), %eax call 0f 0: popl %ecx flds (mtwo63-0b)( %ecx ) fucomip %st(1), %st(0) je 4f fistpl FRAME_SIZE( STACKP ) // set invalid shrl $31, %eax subl $1, %eax movl $0x80000000, %edx xorl %eax, %edx ret // 0x1.0p63 - 0.5, positive overflow 3: fistpl FRAME_SIZE( STACKP ) // set invalid movl $-1, %eax movl $0x7fffffff, %edx ret // -0x1.0p63 or nan 4: jp 5f fstp %st(0) movl $0x80000000, %edx xorl %eax, %eax ret // nan 5: fistpl FRAME_SIZE( STACKP ) // set invalid movl $0x80000000, %edx xorl %eax, %eax ret #endif