// float hypotf(float x, float y)
// float cabsf(float complex z)
//
// Returns sqrt(x*x + y*y), computed without spurious overflow or underflow.
//
// -- Stephen Canon, August 2009
#if !defined __i386__ && !defined __x86_64__
#error This implementation is for i386 and x86_64 only
#endif
#define ABSMASK $0x7fffffff
#define INFINITY $0x7f800000
.text
.globl _cabsf
.globl _hypotf
// Entry points -------------------------------------------------------
#if defined __i386__
.align 4
_cabsf: // on i386, we can use the same code for
_hypotf: // hypotf and cabsf, because the arguments
mov 4(%esp), %eax // come in at the same stack offsets
mov 8(%esp), %edx //
movss 4(%esp), %xmm0 // real at esp + 4
movss 8(%esp), %xmm1 // imag at esp + 8
#else
.align 4 // however, on x86_64, the registers used
_cabsf: // are different. cabsf's arguments come
pshufd $0xfd, %xmm0, %xmm1 // in packed in xmm0.
.align 4
_hypotf: //
movd %xmm0, %eax // hypotf, on the other hand, gets x in
movd %xmm1, %edx // xmm0 and y in xmm1.
#endif
mov ABSMASK, %ecx
// Detect special cases -----------------------------------------------
and %ecx, %eax // |x|
and %ecx, %edx // |y|
cmp INFINITY, %eax // we need to detect if either x or y is
jz L_returnAbsX // an infinity and return +infinity if so jz L_returnAbsY // to return +inf, not NaN.
// Mainline -----------------------------------------------------------
cvtss2sd %xmm0, %xmm0 // Naive computation of sqrt(x*x + y*y)
cvtss2sd %xmm1, %xmm1 // in double. The extra exponent range
mulsd %xmm0, %xmm0 // and precision protect against spurious
mulsd %xmm1, %xmm1 // over/underflow and the double rounding
addsd %xmm1, %xmm0 // is inconsequential, because hypot
sqrtsd %xmm0, %xmm0 // does not need to be correctly rounded.
cvtsd2ss %xmm0, %xmm0 //
#ifdef __i386__
movss %xmm0, 4(%esp)
flds 4(%esp)
#endif
ret
// Special case handling ----------------------------------------------
#ifdef __i386__
L_returnAbsY:
mov %edx, 4(%esp)
flds 4(%esp)
ret
L_returnAbsX:
mov %eax, 4(%esp)
flds 4(%esp)
ret
#else
L_returnAbsY:
movd %edx, %xmm0
ret
L_returnAbsX:
movd %eax, %xmm0
ret
#endif