/* * Written by Ian Ollmann. * Copyright © 2005 Apple Computer Inc. */ #include <machine/asm.h> #include "abi.h" ENTRY(rintl) fldt FIRST_ARG_OFFSET(STACKP) frndint ret #if defined( __i386__ ) ENTRY(rintf) flds FIRST_ARG_OFFSET(STACKP) frndint ret ENTRY(rint) fldl FIRST_ARG_OFFSET(STACKP) frndint ret #endif ENTRY( nearbyintl ) SUBP $28, STACKP fldt (FIRST_ARG_OFFSET+28)( STACKP ) //{f} //read fpcw + fpsw fnstenv (STACKP) movw (STACKP), %ax //or it with 0x20 movl %eax, %edx orl $0x20, %eax //stick it back int the fpcw movw %ax, (STACKP) fldenv (STACKP) //round frndint //{ result } //reset fpsw and fpcw movw %dx, (STACKP) fldenv (STACKP) ADDP $28, STACKP ret # if defined( __LP64__ ) ENTRY( llrintl ) ENTRY( lrintl ) SUBP $12, STACKP movl $0x5f000000, 8(STACKP) //limit = 0x1.0p63f #else ENTRY( llrintl ) SUBP $12, STACKP movl $0x5f000000, 8(STACKP) //0x1.0p63f xor %edx, %edx flds 8(STACKP) //{0x1.0p63 } fldt (FIRST_ARG_OFFSET+12)( STACKP ) //{f, 0x1.0p63} fucomi %ST(1), %ST //{f, 0x1.0p63} f>=0x1.0p63 fistpll (STACKP) //{0x1.0p63} fstp %ST(0) //{} setnb %dl // copy f >= 0x1.0p63 to the d register negl %edx // edx = -edx movl (STACKP), %eax // load in the low part of the result from the fistpll above to eax xorl %edx, %eax // xor with edx. This flips 0x8000000000000000 to 0x7fffffffffffffff for overflow xorl 4(STACKP), %edx // load in the high part and flip it ADDP $12, STACKP ret ENTRY( lrintl ) SUBP $12, STACKP movl $0x4f000000, 8(STACKP) //limit = 0x1.0p31f #endif XORP DX_P, DX_P flds 8(STACKP) //{limit } fldt (FIRST_ARG_OFFSET+12)( STACKP ) //{f, limit} fucomi %ST(1), %ST //{f, limit} f>=limit test for overflow FISTPP (STACKP) //{limit} fstp %ST(0) //{} setnb %dl // copy f >= limit to the d register NEGP DX_P // rdx = -rdx MOVP (STACKP), AX_P // load in the result from the fistpll to the a register XORP DX_P, AX_P // xor with the d register to flip 0x8000... to 0x7fff... in the case of overflow ADDP $12, STACKP ret //i386 versions if these functions are in xmm_floor.c //On x86_64 we can take advantage of the REX form of cvtsd2si to produce 64-bit values #if defined( __LP64__ ) ENTRY( lrint ) ENTRY( llrint ) movl $0x43e00000, %eax //Exponent for 0x1.0p63 movd %eax, %xmm1 //copy to low 32-bits of xmm1 psllq $32, %xmm1 //move it to the high 32-bits of the low double in xmm1, to make 0x1.0p63 cmplesd %xmm0, %xmm1 //compare 0x1.0p63 <= x. Since there are no double precision values between LONG_MAX and 0x1.0p63 we don't need to worry about them cvtsd2siq %xmm0, %rax //convert x to long movd %xmm1, %rdx //copy compare result (all 64-bits) to %rdx xorq %rdx, %rax //flip overflow values to 0x7fffffffffffffff ret ENTRY( lrintf ) ENTRY( llrintf ) movl $0x5f000000, %eax //load 0x1.063f movd %eax, %xmm1 //copy to xmm cmpless %xmm0, %xmm1 //compare 0x1.063f <= x cvtss2siq %xmm0, %rax //convert x to long movd %xmm1, %rdx //copy 64 bits of the comparison result to %rdx xorq %rdx, %rax //flip overflow results to 0x7fffffffffffffff ret #else ENTRY( lrintf ) movl $0x4f000000, %eax //load 0x1.0p31f movss (FIRST_ARG_OFFSET)( STACKP ), %xmm0 //load x movd %eax, %xmm1 //copy 0x1.0p31f to xmm1 cmpless %xmm0, %xmm1 //compare 0x1.0p31f <= x. There are no single precision values between INT_MAX and 0x1.0p31f, so no need to worry here. cvtss2si %xmm0, %eax //convert to int movd %xmm1, %edx //move the compare result to edx xorl %edx, %eax //saturate overflow results to 0x7fffffff ret ENTRY( lrint ) movsd (FIRST_ARG_OFFSET)( STACKP ), %xmm0 // load x xorpd %xmm1, %xmm1 // load 0.0f cmpltsd %xmm0, %xmm1 // test 0.0f < x cvtsd2si %xmm0, %eax // convert x to int movd %xmm1, %edx // copy the compare result to %edx xorl %ecx, %ecx // set %ecx to 0 cmp $0x80000000, %eax // check the result to see if it is 0x80000000 -- the overflow result cmovne %ecx, %edx // if the result is not 0x80000000, overwrite the earlier compare result with 0 xorl %edx, %eax // saturate overflow results to 0x7fffffff (was 0x80000000) ret ENTRY( llrintf ) SUBP $12, STACKP movl $0x5f000000, 8(STACKP) //0x1.0p63f xor %edx, %edx flds 8(STACKP) //{0x1.0p63 } flds (FIRST_ARG_OFFSET+12)( STACKP ) //{f, 0x1.0p63} fucomi %ST(1), %ST //{f, 0x1.0p63} f>=0x1.0p63 fistpll (STACKP) //{0x1.0p63} fstp %ST(0) //{} setnb %dl // copy f >= 0x1.0p63 to the d register negl %edx // convert [0,1] to [0,-1] movl (STACKP), %eax // load low 32-bits of the result xorl %edx, %eax // saturate to 0xffffffff if overflow xorl 4(STACKP), %edx // load the high 32-bits of the result and saturate to 0x7fffffff if overflow ADDP $12, STACKP ret ENTRY( llrint ) SUBP $12, STACKP movl $0x5f000000, 8(STACKP) //0x1.0p63f xor %edx, %edx flds 8(STACKP) //{0x1.0p63 } fldl (FIRST_ARG_OFFSET+12)( STACKP ) //{f, 0x1.0p63} fucomi %ST(1), %ST //{f, 0x1.0p63} f>=0x1.0p63 fistpll (STACKP) //{0x1.0p63} fstp %ST(0) //{} setnb %dl // copy f >= 0x1.0p63 to the d register negl %edx // convert [0,1] to [0,-1] movl (STACKP), %eax // load low 32-bits of the result xorl %edx, %eax // saturate to 0xffffffff if overflow xorl 4(STACKP), %edx // load the high 32-bits of the result and saturate to 0x7fffffff if overflow ADDP $12, STACKP ret #endif