PCMBlitterLibPPC.c [plain text]
#if 1 //TARGET_CPU_PPC
#include "PCMBlitterLibPPC.h"
#define __lwbrx( index, base ) ({ register long result; __asm__ __volatile__("lwbrx %0, %1, %2" : "=r" (result) : "b%" (index), "r" (base) : "memory" ); result; } )
#define __lhbrx(index, base) \
({ register signed short lhbrxResult; \
__asm__ ("lhbrx %0, %1, %2" : "=r" (lhbrxResult) : "b%" (index), "r" (base) : "memory"); \
lhbrxResult; } )
#define __rlwimi( rA, rS, cnt, mb, me ) \
({ __asm__ __volatile__( "rlwimi %0, %2, %3, %4, %5" : "=r" (rA) : "0" (rA), "r" (rS), "n" (cnt), "n" (mb), "n" (me) ); rA; })
#define __stwbrx( value, index, base ) \
__asm__( "stwbrx %0, %1, %2" : : "r" (value), "b%" (index), "r" (base) : "memory" )
#define __rlwimi_volatile( rA, rS, cnt, mb, me ) \
({ __asm__ __volatile__( "rlwimi %0, %2, %3, %4, %5" : "=r" (rA) : "0" (rA), "r" (rS), "n" (cnt), "n" (mb), "n" (me) ); rA; })
#define __stfiwx( value, offset, addr ) \
asm( "stfiwx %0, %1, %2" : : "f" (value), "b%" (offset), "r" (addr) : "memory" )
static inline double __fctiw( register double B )
{
register double result;
asm( "fctiw %0, %1" : "=f" (result) : "f" (B) );
return result;
}
void NativeInt16ToFloat32( signed short *src, float *dest, unsigned int count, int bitDepth )
{
register float bias;
register long exponentMask = ((0x97UL - bitDepth) << 23) | 0x8000; register long int0, int1, int2, int3;
register float float0, float1, float2, float3;
register unsigned long loopCount;
union
{
float f;
long i;
}exponent;
exponent.i = exponentMask;
bias = exponent.f;
src--;
if( count >= 8 )
{
int0 = (++src)[0];
int1 = (++src)[0];
int0 += exponentMask;
int2 = (++src)[0];
int1 += exponentMask;
((long*) dest)[0] = int0;
int3 = (++src)[0];
int2 += exponentMask;
((long*) dest)[1] = int1;
int0 = (++src)[0];
int3 += exponentMask;
((long*) dest)[2] = int2;
float0 = dest[0];
int1 = (++src)[0];
int0 += exponentMask;
((long*) dest)[3] = int3;
float1 = dest[1];
float0 -= bias;
int2 = (++src)[0];
int1 += exponentMask;
((long*) dest)[4] = int0;
float2 = dest[2];
float1 -= bias;
dest--;
int3 = (++src)[0];
int2 += exponentMask;
((long*) dest)[6] = int1;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
count -= 8;
loopCount = count / 4;
count &= 3;
while( loopCount-- )
{
int0 = (++src)[0];
int3 += exponentMask;
((long*) dest)[6] = int2;
float0 = dest[4];
float3 -= bias;
(++dest)[0] = float1;
int1 = (++src)[0];
int0 += exponentMask;
((long*) dest)[6] = int3;
float1 = dest[4];
float0 -= bias;
(++dest)[0] = float2;
int2 = (++src)[0];
int1 += exponentMask;
((long*) dest)[6] = int0;
float2 = dest[4];
float1 -= bias;
(++dest)[0] = float3;
int3 = (++src)[0];
int2 += exponentMask;
((long*) dest)[6] = int1;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
}
int3 += exponentMask;
((long*) dest)[6] = int2;
float0 = dest[4];
float3 -= bias;
(++dest)[0] = float1;
((long*) dest)[6] = int3;
float1 = dest[4];
float0 -= bias;
(++dest)[0] = float2;
float2 = dest[4];
float1 -= bias;
(++dest)[0] = float3;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
float3 -= bias;
(++dest)[0] = float1;
(++dest)[0] = float2;
(++dest)[0] = float3;
dest++;
}
while( count-- )
{
register long value = (++src)[0];
value += exponentMask;
((long*) dest)[0] = value;
dest[0] -= bias;
dest++;
}
}
void SwapInt16ToFloat32( signed short *src, float *dest, unsigned int count, int bitDepth )
{
register float bias;
register long exponentMask = ((0x97UL - bitDepth) << 23) | 0x8000; register long int0, int1, int2, int3;
register float float0, float1, float2, float3;
register unsigned long loopCount;
union
{
float f;
long i;
}exponent;
exponent.i = exponentMask;
bias = exponent.f;
src--;
if( count >= 8 )
{
int0 = __lhbrx(0, ++src);
int1 = __lhbrx(0, ++src);
int0 += exponentMask;
int2 = __lhbrx(0, ++src);
int1 += exponentMask;
((long*) dest)[0] = int0;
int3 = __lhbrx(0, ++src);
int2 += exponentMask;
((long*) dest)[1] = int1;
int0 = __lhbrx(0, ++src);
int3 += exponentMask;
((long*) dest)[2] = int2;
float0 = dest[0];
int1 = __lhbrx(0, ++src);
int0 += exponentMask;
((long*) dest)[3] = int3;
float1 = dest[1];
float0 -= bias;
int2 = __lhbrx(0, ++src);
int1 += exponentMask;
((long*) dest)[4] = int0;
float2 = dest[2];
float1 -= bias;
dest--;
int3 = __lhbrx(0, ++src);
int2 += exponentMask;
((long*) dest)[6] = int1;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
count -= 8;
loopCount = count / 4;
count &= 3;
while( loopCount-- )
{
int0 = __lhbrx(0, ++src);
int3 += exponentMask;
((long*) dest)[6] = int2;
float0 = dest[4];
float3 -= bias;
(++dest)[0] = float1;
int1 = __lhbrx(0, ++src);
int0 += exponentMask;
((long*) dest)[6] = int3;
float1 = dest[4];
float0 -= bias;
(++dest)[0] = float2;
int2 = __lhbrx(0, ++src);
int1 += exponentMask;
((long*) dest)[6] = int0;
float2 = dest[4];
float1 -= bias;
(++dest)[0] = float3;
int3 = __lhbrx(0, ++src);
int2 += exponentMask;
((long*) dest)[6] = int1;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
}
int3 += exponentMask;
((long*) dest)[6] = int2;
float0 = dest[4];
float3 -= bias;
(++dest)[0] = float1;
((long*) dest)[6] = int3;
float1 = dest[4];
float0 -= bias;
(++dest)[0] = float2;
float2 = dest[4];
float1 -= bias;
(++dest)[0] = float3;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
float3 -= bias;
(++dest)[0] = float1;
(++dest)[0] = float2;
(++dest)[0] = float3;
dest++;
}
while( count-- )
{
register long value = __lhbrx(0, ++src);
value += exponentMask;
((long*) dest)[0] = value;
dest[0] -= bias;
dest++;
}
}
void NativeInt24ToFloat32( long *src, float *dest, unsigned int count, int bitDepth )
{
union
{
double d[4];
unsigned int i[8];
}transfer;
register double dBias;
register unsigned int loopCount, load0SignMask;
register unsigned long load0, load1, load2;
register unsigned long int0, int1, int2, int3;
register double d0, d1, d2, d3;
register float f0, f1, f2, f3;
transfer.i[0] = transfer.i[2] = transfer.i[4] = transfer.i[6] = 0x41C00000UL;
transfer.i[1] = 0x00800000;
int0 = int1 = int2 = int3 = 0;
load0SignMask = 0x80000080UL;
dBias = transfer.d[0];
src--;
dest--;
if( count >= 8 )
{
count -= 8;
loopCount = count / 4;
count &= 3;
load0 = (++src)[0];
load1 = (++src)[0];
load0 ^= load0SignMask;
load2 = (++src)[0];
load1 ^= 0x00008000UL;
int0 = load0 >> 8;
int1 = __rlwimi( int1, load0, 16, 8, 15);
load2 ^= 0x00800000UL;
int1 = __rlwimi( int1, load1, 16, 16, 31);
int2 = __rlwimi( int2, load1, 8, 8, 23 );
transfer.i[1] = int0;
load0 = (++src)[0];
int2 = __rlwimi( int2, load2, 8, 24, 31 );
int3 = load2 & 0x00FFFFFF;
transfer.i[3] = int1;
load1 = (++src)[0];
load0 ^= load0SignMask;
transfer.i[5] = int2;
d0 = transfer.d[0];
load2 = (++src)[0];
load1 ^= 0x00008000UL;
int0 = load0 >> 8;
int1 = __rlwimi( int1, load0, 16, 8, 15 );
transfer.i[7] = int3;
d1 = transfer.d[1];
d0 -= dBias;
load2 ^= 0x00800000UL;
int1 = __rlwimi( int1, load1, 16, 16, 31);
int2 = __rlwimi( int2, load1, 8, 8, 23 );
transfer.i[1] = int0;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
while( loopCount-- )
{
load0 = (++src)[0];
int2 = __rlwimi( int2, load2, 8, 24, 31 );
int3 = load2 & 0x00FFFFFF;
transfer.i[3] = int1;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
load1 = (++src)[0];
load0 ^= load0SignMask;
transfer.i[5] = int2;
d0 = transfer.d[0];
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
load2 = (++src)[0];
load1 ^= 0x00008000UL;
int0 = load0 >> 8;
int1 = __rlwimi( int1, load0, 16, 8, 15 );
transfer.i[7] = int3;
d1 = transfer.d[1];
d0 -= dBias;
f3 = d3;
(++dest)[0] = f2;
load2 ^= 0x00800000UL;
int1 = __rlwimi( int1, load1, 16, 16, 31);
int2 = __rlwimi( int2, load1, 8, 8, 23 );
transfer.i[1] = int0;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
(++dest)[0] = f3;
}
int2 = __rlwimi( int2, load2, 8, 24, 31 );
int3 = load2 & 0x00FFFFFF;
transfer.i[3] = int1;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
transfer.i[5] = int2;
d0 = transfer.d[0];
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
transfer.i[7] = int3;
d1 = transfer.d[1];
d0 -= dBias;
f3 = d3;
(++dest)[0] = f2;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
(++dest)[0] = f3;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
f3 = d3;
(++dest)[0] = f2;
(++dest)[0] = f3;
}
src = (long*) ((char*) src + 1 );
while( count-- )
{
int0 = ((unsigned char*)(src = (long*)( (char*) src + 3 )))[0];
int1 = ((unsigned short*)( (char*) src + 1 ))[0];
int0 ^= 0x00000080UL;
int1 = __rlwimi( int1, int0, 16, 8, 15 );
transfer.i[1] = int1;
d0 = transfer.d[0];
d0 -= dBias;
f0 = d0;
(++dest)[0] = f0;
}
}
void SwapInt24ToFloat32( long *src, float *dest, unsigned int count, int bitDepth )
{
union
{
double d[4];
unsigned int i[8];
}transfer;
register double dBias;
register unsigned int loopCount, load2SignMask;
register unsigned long load0, load1, load2;
register unsigned long int0, int1, int2, int3;
register double d0, d1, d2, d3;
register float f0, f1, f2, f3;
transfer.i[0] = transfer.i[2] = transfer.i[4] = transfer.i[6] = 0x41400000UL; transfer.i[1] = 0x80000000;
int0 = int1 = int2 = int3 = 0;
load2SignMask = 0x80000080UL;
dBias = transfer.d[0];
src--;
dest--;
if( count >= 8 )
{
count -= 8;
loopCount = count / 4;
count &= 3;
load0 = (++src)[0];
load1 = (++src)[0];
load0 ^= 0x00008000;
load2 = (++src)[0];
load1 ^= 0x00800000UL;
int0 = load0 >> 8;
int1 = __rlwimi( int1, load0, 16, 8, 15);
load2 ^= load2SignMask;
int1 = __rlwimi( int1, load1, 16, 16, 31);
int2 = __rlwimi( int2, load1, 8, 8, 23 );
__stwbrx( int0, 0, &transfer.i[1]);
load0 = (++src)[0];
int2 = __rlwimi( int2, load2, 8, 24, 31 );
int3 = load2 & 0x00FFFFFF;
__stwbrx( int1, 0, &transfer.i[3]);
load1 = (++src)[0];
load0 ^= 0x00008000;
__stwbrx( int2, 0, &transfer.i[5]);
d0 = transfer.d[0];
load2 = (++src)[0];
load1 ^= 0x00800000UL;
int0 = load0 >> 8;
int1 = __rlwimi( int1, load0, 16, 8, 15 );
__stwbrx( int3, 0, &transfer.i[7]);
d1 = transfer.d[1];
d0 -= dBias;
load2 ^= load2SignMask;
int1 = __rlwimi( int1, load1, 16, 16, 31);
int2 = __rlwimi( int2, load1, 8, 8, 23 );
__stwbrx( int0, 0, &transfer.i[1]);
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
while( loopCount-- )
{
load0 = (++src)[0];
int2 = __rlwimi( int2, load2, 8, 24, 31 );
int3 = load2 & 0x00FFFFFF;
__stwbrx( int1, 0, &transfer.i[3]);
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
load1 = (++src)[0];
load0 ^= 0x00008000;
__stwbrx( int2, 0, &transfer.i[5]);
d0 = transfer.d[0];
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
load2 = (++src)[0];
load1 ^= 0x00800000UL;
int0 = load0 >> 8;
int1 = __rlwimi( int1, load0, 16, 8, 15 );
__stwbrx( int3, 0, &transfer.i[7]);
d1 = transfer.d[1];
d0 -= dBias;
f3 = d3;
(++dest)[0] = f2;
load2 ^= load2SignMask;
int1 = __rlwimi( int1, load1, 16, 16, 31);
int2 = __rlwimi( int2, load1, 8, 8, 23 );
__stwbrx( int0, 0, &transfer.i[1]);
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
(++dest)[0] = f3;
}
int2 = __rlwimi( int2, load2, 8, 24, 31 );
int3 = load2 & 0x00FFFFFF;
__stwbrx( int1, 0, &transfer.i[3]);
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
__stwbrx( int2, 0, &transfer.i[5]);
d0 = transfer.d[0];
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
__stwbrx( int3, 0, &transfer.i[7]);
d1 = transfer.d[1];
d0 -= dBias;
f3 = d3;
(++dest)[0] = f2;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
(++dest)[0] = f3;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
f3 = d3;
(++dest)[0] = f2;
(++dest)[0] = f3;
}
if( count > 0 )
{
int1 = ((unsigned char*) src)[6];
int0 = ((unsigned short*)(++src))[0];
int1 ^= 0x80;
int1 = __rlwimi( int1, int0, 8, 8, 23 );
__stwbrx( int1, 0, &transfer.i[1]);
d0 = transfer.d[0];
d0 -= dBias;
f0 = d0;
(++dest)[0] = f0;
src = (long*) ((char*)src - 1 );
while( --count )
{
int0 = (src = (long*)( (char*) src + 3 ))[0];
int0 ^= 0x80UL;
int0 &= 0x00FFFFFFUL;
__stwbrx( int0, 0, &transfer.i[1]);
d0 = transfer.d[0];
d0 -= dBias;
f0 = d0;
(++dest)[0] = f0;
}
}
}
void NativeInt32ToFloat32( long *src, float *dest, unsigned int count, int bitDepth )
{
union
{
double d[4];
unsigned int i[8];
}transfer;
register double dBias;
register unsigned int loopCount;
register long int0, int1, int2, int3;
register double d0, d1, d2, d3;
register float f0, f1, f2, f3;
transfer.i[0] = transfer.i[2] = transfer.i[4] = transfer.i[6] = (0x434UL - bitDepth) << 20;
transfer.i[1] = 0x80000000;
dBias = transfer.d[0];
src--;
dest--;
if( count >= 8 )
{
count -= 8;
loopCount = count / 4;
count &= 3;
int0 = (++src)[0];
int1 = (++src)[0];
int0 ^= 0x80000000UL;
int2 = (++src)[0];
int1 ^= 0x80000000UL;
transfer.i[1] = int0;
int3 = (++src)[0];
int2 ^= 0x80000000UL;
transfer.i[3] = int1;
int0 = (++src)[0];
int3 ^= 0x80000000UL;
transfer.i[5] = int2;
int1 = (++src)[0];
int0 ^= 0x80000000UL;
transfer.i[7] = int3;
d0 = transfer.d[0];
int2 = (++src)[0];
int1 ^= 0x80000000UL;
transfer.i[1] = int0;
d1 = transfer.d[1];
d0 -= dBias;
int3 = (++src)[0];
int2 ^= 0x80000000UL;
transfer.i[3] = int1;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
while( loopCount-- )
{
int0 = (++src)[0];
int3 ^= 0x80000000UL;
transfer.i[5] = int2;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
int1 = (++src)[0];
int0 ^= 0x80000000UL;
transfer.i[7] = int3;
d0 = transfer.d[0];
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
int2 = (++src)[0];
int1 ^= 0x80000000UL;
transfer.i[1] = int0;
d1 = transfer.d[1];
d0 -= dBias;
f3 = d3;
(++dest)[0] = f2;
int3 = (++src)[0];
int2 ^= 0x80000000UL;
transfer.i[3] = int1;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
(++dest)[0] = f3;
}
int3 ^= 0x80000000UL;
transfer.i[5] = int2;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
transfer.i[7] = int3;
d0 = transfer.d[0];
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
d1 = transfer.d[1];
d0 -= dBias;
f3 = d3;
(++dest)[0] = f2;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
(++dest)[0] = f3;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
f3 = d3;
(++dest)[0] = f2;
(++dest)[0] = f3;
}
while( count-- )
{
int0 = (++src)[0];
int0 ^= 0x80000000UL;
transfer.i[1] = int0;
d0 = transfer.d[0];
d0 -= dBias;
f0 = d0;
(++dest)[0] = f0;
}
}
void SwapInt32ToFloat32( long *src, float *dest, unsigned int count, int bitDepth )
{
union
{
double d[4];
unsigned int i[8];
}transfer;
register double dBias;
register unsigned int loopCount;
register long int0, int1, int2, int3;
register double d0, d1, d2, d3;
register float f0, f1, f2, f3;
transfer.i[0] = transfer.i[2] = transfer.i[4] = transfer.i[6] = (0x434UL - bitDepth) << 20;
transfer.i[1] = 0x80000000;
dBias = transfer.d[0];
src--;
dest--;
if( count >= 8 )
{
count -= 8;
loopCount = count / 4;
count &= 3;
int0 = __lwbrx( 0, ++src);
int1 = __lwbrx( 0, ++src);
int0 ^= 0x80000000UL;
int2 = __lwbrx( 0, ++src);
int1 ^= 0x80000000UL;
transfer.i[1] = int0;
int3 = __lwbrx( 0, ++src);
int2 ^= 0x80000000UL;
transfer.i[3] = int1;
int0 = __lwbrx( 0, ++src);
int3 ^= 0x80000000UL;
transfer.i[5] = int2;
int1 = __lwbrx( 0, ++src);
int0 ^= 0x80000000UL;
transfer.i[7] = int3;
d0 = transfer.d[0];
int2 = __lwbrx( 0, ++src);
int1 ^= 0x80000000UL;
transfer.i[1] = int0;
d1 = transfer.d[1];
d0 -= dBias;
int3 = __lwbrx( 0, ++src);
int2 ^= 0x80000000UL;
transfer.i[3] = int1;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
while( loopCount-- )
{
int0 = __lwbrx( 0, ++src);
int3 ^= 0x80000000UL;
transfer.i[5] = int2;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
int1 = __lwbrx( 0, ++src);
int0 ^= 0x80000000UL;
transfer.i[7] = int3;
d0 = transfer.d[0];
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
int2 = __lwbrx( 0, ++src);
int1 ^= 0x80000000UL;
transfer.i[1] = int0;
d1 = transfer.d[1];
d0 -= dBias;
f3 = d3;
(++dest)[0] = f2;
int3 = __lwbrx( 0, ++src);
int2 ^= 0x80000000UL;
transfer.i[3] = int1;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
(++dest)[0] = f3;
}
int3 ^= 0x80000000UL;
transfer.i[5] = int2;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
transfer.i[7] = int3;
d0 = transfer.d[0];
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
d1 = transfer.d[1];
d0 -= dBias;
f3 = d3;
(++dest)[0] = f2;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
(++dest)[0] = f3;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
f3 = d3;
(++dest)[0] = f2;
(++dest)[0] = f3;
}
while( count-- )
{
int0 = __lwbrx( 0, ++src);
int0 ^= 0x80000000UL;
transfer.i[1] = int0;
d0 = transfer.d[0];
d0 -= dBias;
f0 = d0;
(++dest)[0] = f0;
}
}
void Float32ToNativeInt16( float *src, signed short *dst, unsigned int count )
{
register double scale = 2147483648.0;
register double round = 32768.0;
unsigned long loopCount = count / 4;
long buffer[2];
register float startingFloat;
register double scaled;
register double converted;
register short copy;
if( count >= 6 )
{
startingFloat = (src++)[0];
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
__stfiwx( converted, sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
copy = ((short*) buffer)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
count -= 6;
loopCount = count / 2;
count &= 1;
while( loopCount-- )
{
register float startingFloat2;
register double scaled2;
register double converted2;
register short copy2;
(dst++)[0] = copy;
__asm__ __volatile__ ( "fctiw %0, %1" : "=f" (converted2) : "f" ( scaled ) );
copy2 = ((short*) buffer)[2];
__asm__ __volatile__ ( "fmadd %0, %1, %2, %3" : "=f" (scaled2) : "f" ( startingFloat), "f" (scale), "f" (round) );
__asm__ __volatile__ ( "stfiwx %0, %1, %2" : : "f" (converted), "b%" (sizeof(float)), "r" (buffer) : "memory" );
startingFloat2 = (src++)[0];
(dst++)[0] = copy2;
__asm__ __volatile__ ( "fctiw %0, %1" : "=f" (converted) : "f" ( scaled2 ) );
copy = ((short*) buffer)[0];
__asm__ __volatile__ ( "fmadd %0, %1, %2, %3" : "=f" (scaled) : "f" ( startingFloat2), "f" (scale), "f" (round) );
__asm__ __volatile__ ( "stfiwx %0, %1, %2" : : "f" (converted2), "b%" (0), "r" (buffer) : "memory" );
startingFloat = (src++)[0];
}
(dst++)[0] = copy;
copy = ((short*) buffer)[2];
__stfiwx( converted, sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
(dst++)[0] = copy;
copy = ((short*) buffer)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
(dst++)[0] = copy;
copy = ((short*) buffer)[2];
__stfiwx( converted, sizeof(float), buffer );
(dst++)[0] = copy;
copy = ((short*) buffer)[0];
(dst++)[0] = copy;
copy = ((short*) buffer)[2];
(dst++)[0] = copy;
}
while( count-- )
{
double scaled = src[0] * scale + round;
double converted = __fctiw( scaled );
__stfiwx( converted, 0, buffer );
dst[0] = buffer[0] >> 16;
src++;
dst++;
}
}
void Float32ToSwapInt16( float *src, signed short *dst, unsigned int count )
{
register double scale = 2147483648.0;
register double round = 32768.0;
unsigned long loopCount = count / 4;
long buffer[2];
register float startingFloat;
register double scaled;
register double converted;
register short copy;
if( count >= 6 )
{
startingFloat = (src++)[0];
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
__stfiwx( converted, sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
copy = ((short*) buffer)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
count -= 6;
loopCount = count / 2;
count &= 1;
while( loopCount-- )
{
register float startingFloat2;
register double scaled2;
register double converted2;
register short copy2;
__asm__ __volatile__ ( "sthbrx %0, 0, %1" : : "r" (copy), "r" (dst) );
__asm__ __volatile__ ( "fctiw %0, %1" : "=f" (converted2) : "f" ( scaled ) );
copy2 = ((short*) buffer)[2];
__asm__ __volatile__ ( "fmadd %0, %1, %2, %3" : "=f" (scaled2) : "f" ( startingFloat), "f" (scale), "f" (round) );
__asm__ __volatile__ ( "stfiwx %0, %1, %2" : : "f" (converted), "b%" (sizeof(float)), "r" (buffer) : "memory" );
startingFloat2 = (src)[0]; src+=2;
dst+=2;
__asm__ __volatile__ ( "sthbrx %0, %1, %2" : : "r" (copy2), "r" (-2), "r" (dst) );
__asm__ __volatile__ ( "fctiw %0, %1" : "=f" (converted) : "f" ( scaled2 ) );
copy = ((short*) buffer)[0];
__asm__ __volatile__ ( "fmadd %0, %1, %2, %3" : "=f" (scaled) : "f" ( startingFloat2), "f" (scale), "f" (round) );
__asm__ __volatile__ ( "stfiwx %0, %1, %2" : : "f" (converted2), "b%" (0), "r" (buffer) : "memory" );
startingFloat = (src)[-1];
}
__asm__ __volatile__ ( "sthbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = ((short*) buffer)[2];
__stfiwx( converted, sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
__asm__ __volatile__ ( "sthbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = ((short*) buffer)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
__asm__ __volatile__ ( "sthbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = ((short*) buffer)[2];
__stfiwx( converted, sizeof(float), buffer );
__asm__ __volatile__ ( "sthbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = ((short*) buffer)[0];
__asm__ __volatile__ ( "sthbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = ((short*) buffer)[2];
__asm__ __volatile__ ( "sthbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
}
while( count-- )
{
double scaled = src[0] * scale + round;
double converted = __fctiw( scaled );
__stfiwx( converted, 0, buffer );
copy = buffer[0] >> 16;
__asm__ __volatile__ ( "sthbrx %0, 0, %1" : : "r" (copy), "r" (dst) );
src++;
dst++;
}
}
void Float32ToNativeInt24( float *src, signed long *dst, unsigned int count )
{
register double scale = 2147483648.0;
register double round = 0.5 * 256.0;
unsigned long loopCount = count / 4;
long buffer[4];
register float startingFloat, startingFloat2;
register double scaled, scaled2;
register double converted, converted2;
register long copy1; register long copy2; register long copy3; register long copy4; register double oldSetting;
{
union
{
double d;
int i[2];
}setting;
register double newSetting;
asm volatile ( "mffs %0" : "=f" ( oldSetting ) );
setting.d = oldSetting;
setting.i[1] |= 3;
newSetting = setting.d;
asm volatile( "mtfsf 7, %0" : : "f" (newSetting ) );
}
src--;
dst--;
if( count >= 8 )
{
startingFloat = (++src)[0];
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
__stfiwx( converted, sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
copy1 = buffer[0];
__stfiwx( converted, 2 * sizeof( float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
copy2 = buffer[1];
__stfiwx( converted, 3 * sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
copy1 = __rlwimi( copy1, copy2, 8, 24, 31 );
copy3 = buffer[2];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
count -= 8;
loopCount = count / 4;
count &= 3;
while( loopCount-- )
{
__asm__ __volatile__( "fmadds %0, %1, %2, %3" : "=f"(scaled2) : "f" (startingFloat), "f" ( scale ), "f" ( round )); startingFloat2 = (++src)[0]; __asm__ __volatile__( "fctiw %0, %1" : "=f" (converted2) : "f" ( scaled ) ); copy4 = buffer[3]; copy2 = __rlwimi_volatile( copy2, copy3, 8, 24, 7 ); __stfiwx( converted, 1 * sizeof(float), buffer );
__asm__ __volatile__( "fmadds %0, %1, %2, %3" : "=f"(scaled) : "f" (startingFloat2), "f" ( scale ), "f" ( round )); startingFloat = (++src)[0]; __asm__ __volatile__( "fctiw %0, %1" : "=f" (converted) : "f" ( scaled2 ) ); (++dst)[0] = copy1; copy3 = __rlwimi_volatile( copy3, copy4, 8, 24, 15 ); copy1 = buffer[0]; copy2 = __rlwimi_volatile( copy2, copy2, 8, 0, 31 ); __stfiwx( converted2, 2 * sizeof(float), buffer );
__asm__ __volatile__( "fmadds %0, %1, %2, %3" : "=f"(scaled2) : "f" (startingFloat), "f" ( scale ), "f" ( round )); startingFloat2 = (++src)[0]; converted2 = __fctiw( scaled ); (++dst)[0] = copy2; copy3 = __rlwimi_volatile( copy3, copy3, 16, 0, 31 ); copy2 = buffer[1]; __stfiwx( converted, 3 * sizeof(float), buffer );
__asm__ ( "fmadds %0, %1, %2, %3" : "=f"(scaled) : "f" (startingFloat2), "f" ( scale ), "f" ( round )); startingFloat = (++src)[0]; converted = __fctiw( scaled2 ); (++dst)[0] = copy3; copy1 = __rlwimi_volatile( copy1, copy2, 8, 24, 31 ); __stfiwx( converted2, 0 * sizeof(float), buffer ); copy3 = buffer[2]; }
copy2 = __rlwimi( copy2, copy3, 8, 24, 7 ); copy4 = buffer[3]; __stfiwx( converted, 1 * sizeof(float), buffer ); converted2 = __fctiw( scaled ); scaled2 = startingFloat * scale + round;
(++dst)[0] = copy1; copy2 = __rlwimi( copy2, copy2, 8, 0, 31 ); copy3 = __rlwimi( copy3, copy4, 8, 24, 15 ); copy1 = buffer[0]; __stfiwx( converted2, 2 * sizeof(float), buffer ); converted = __fctiw( scaled2 );
(++dst)[0] = copy2; copy3 = __rlwimi( copy3, copy3, 16, 0, 31 ); copy2 = buffer[1]; __stfiwx( converted, 3 * sizeof(float), buffer );
(++dst)[0] = copy3; copy1 = __rlwimi( copy1, copy2, 8, 24, 31 ); copy3 = buffer[2];
copy2 = __rlwimi( copy2, copy3, 8, 24, 7 ); copy4 = buffer[3];
(++dst)[0] = copy1; copy2 = __rlwimi( copy2, copy2, 8, 0, 31 ); copy3 = __rlwimi( copy3, copy4, 8, 24, 15 );
(++dst)[0] = copy2; copy3 = __rlwimi( copy3, copy3, 16, 0, 31 );
(++dst)[0] = copy3; }
dst++;
while( count-- )
{
startingFloat = (++src)[0]; scaled = startingFloat * scale + round; converted = __fctiw( scaled ); __stfiwx( converted, 0, buffer ); copy1 = buffer[0]; ((signed char*) dst)[0] = copy1 >> 24;
dst = (signed long*) ((signed char*) dst + 1 );
((unsigned short*) dst)[0] = copy1 >> 8;
dst = (signed long*) ((unsigned short*) dst + 1 );
}
__asm__ __volatile__ ( "mtfsf 7, %0" : : "f" (oldSetting) );
}
void Float32ToSwapInt24( float *src, signed long *dst, unsigned int count )
{
register double scale = 2147483648.0;
register double round = 0.5 * 256.0;
unsigned long loopCount = count / 4;
long buffer[4];
register float startingFloat, startingFloat2;
register double scaled, scaled2;
register double converted, converted2;
register long copy1;
register long copy2;
register long copy3;
register long copy4;
register double oldSetting;
{
union
{
double d;
int i[2];
}setting;
register double newSetting;
asm volatile ( "mffs %0" : "=f" ( oldSetting ) );
setting.d = oldSetting;
setting.i[1] |= 3;
newSetting = setting.d;
asm volatile( "mtfsf 7, %0" : : "f" (newSetting ) );
}
src--;
dst--;
if( count >= 8 )
{
startingFloat = (++src)[0];
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
__stfiwx( converted, sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
copy1 = __lwbrx( 0, buffer );
__stfiwx( converted, 2 * sizeof( float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
copy2 = __lwbrx( 4, buffer );
__stfiwx( converted, 3 * sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
copy1 = __rlwimi( copy1, copy2, 8, 0, 7 );
copy3 = __lwbrx( 8, buffer );;
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
count -= 8;
loopCount = count / 4;
count &= 3;
while( loopCount-- )
{
copy1 = __rlwimi( copy1, copy1, 8, 0, 31 ); __asm__ __volatile__( "fmadds %0, %1, %2, %3" : "=f"(scaled2) : "f" (startingFloat), "f" ( scale ), "f" ( round )); startingFloat2 = (++src)[0]; __asm__ __volatile__( "fctiw %0, %1" : "=f" (converted2) : "f" ( scaled ) ); copy4 = __lwbrx( 12, buffer ); copy2 = __rlwimi_volatile( copy2, copy3, 8, 0, 15 ); __stfiwx( converted, 1 * sizeof(float), buffer );
__asm__ __volatile__( "fmadds %0, %1, %2, %3" : "=f"(scaled) : "f" (startingFloat2), "f" ( scale ), "f" ( round )); startingFloat = (++src)[0]; __asm__ __volatile__( "fctiw %0, %1" : "=f" (converted) : "f" ( scaled2 ) ); (++dst)[0] = copy1; copy4 = __rlwimi_volatile( copy4, copy3, 24, 0, 7 ); copy1 = __lwbrx( 0, buffer ); copy2 = __rlwimi_volatile( copy2, copy2, 16, 0, 31 ); __stfiwx( converted2, 2 * sizeof(float), buffer );
__asm__ __volatile__( "fmadds %0, %1, %2, %3" : "=f"(scaled2) : "f" (startingFloat), "f" ( scale ), "f" ( round )); startingFloat2 = (++src)[0]; converted2 = __fctiw( scaled ); (++dst)[0] = copy2; copy2 = __lwbrx( 4, buffer ); __stfiwx( converted, 3 * sizeof(float), buffer );
__asm__ ( "fmadds %0, %1, %2, %3" : "=f"(scaled) : "f" (startingFloat2), "f" ( scale ), "f" ( round )); startingFloat = (++src)[0]; converted = __fctiw( scaled2 ); (++dst)[0] = copy4; copy1 = __rlwimi_volatile( copy1, copy2, 8, 0, 7 ); __stfiwx( converted2, 0 * sizeof(float), buffer ); copy3 = __lwbrx( 8, buffer ); }
copy1 = __rlwimi( copy1, copy1, 8, 0, 31 ); __asm__ __volatile__( "fmadds %0, %1, %2, %3" : "=f"(scaled2) : "f" (startingFloat), "f" ( scale ), "f" ( round )); __asm__ __volatile__( "fctiw %0, %1" : "=f" (converted2) : "f" ( scaled ) ); copy4 = __lwbrx( 12, buffer ); copy2 = __rlwimi_volatile( copy2, copy3, 8, 0, 15 ); __stfiwx( converted, 1 * sizeof(float), buffer );
__asm__ __volatile__( "fctiw %0, %1" : "=f" (converted) : "f" ( scaled2 ) ); (++dst)[0] = copy1; copy4 = __rlwimi_volatile( copy4, copy3, 24, 0, 7 ); copy1 = __lwbrx( 0, buffer ); copy2 = __rlwimi_volatile( copy2, copy2, 16, 0, 31 ); __stfiwx( converted2, 2 * sizeof(float), buffer );
(++dst)[0] = copy2; copy2 = __lwbrx( 4, buffer ); __stfiwx( converted, 3 * sizeof(float), buffer );
(++dst)[0] = copy4; copy1 = __rlwimi_volatile( copy1, copy2, 8, 0, 7 ); copy3 = __lwbrx( 8, buffer );
copy1 = __rlwimi( copy1, copy1, 8, 0, 31 ); copy4 = __lwbrx( 12, buffer ); copy2 = __rlwimi_volatile( copy2, copy3, 8, 0, 15 );
(++dst)[0] = copy1; copy4 = __rlwimi_volatile( copy4, copy3, 24, 0, 7 ); copy2 = __rlwimi_volatile( copy2, copy2, 16, 0, 31 );
(++dst)[0] = copy2;
(++dst)[0] = copy4; }
dst++;
while( count-- )
{
startingFloat = (++src)[0]; scaled = startingFloat * scale + round; converted = __fctiw( scaled ); __stfiwx( converted, 0, buffer ); copy1 = __lwbrx( 0, buffer); ((signed char*) dst)[0] = copy1 >> 16;
dst = (signed long*) ((signed char*) dst + 1 );
((unsigned short*) dst)[0] = copy1;
dst = (signed long*) ((unsigned short*) dst + 1 );
}
__asm__ __volatile__ ( "mtfsf 7, %0" : : "f" (oldSetting) );
}
void Float32ToSwapInt32( float *src, signed long *dst, unsigned int count )
{
register double scale = 2147483648.0;
unsigned long loopCount = count / 4;
long buffer[2];
register float startingFloat;
register double scaled;
register double converted;
register long copy;
register double oldSetting;
{
union
{
double d;
int i[2];
}setting;
register double newSetting;
asm volatile ( "mffs %0" : "=f" ( oldSetting ) );
setting.d = oldSetting;
setting.i[1] &= 0xFFFFFFFC;
newSetting = setting.d;
asm volatile( "mtfsf 7, %0" : : "f" (newSetting ) );
}
if( count >= 6 )
{
startingFloat = (src++)[0];
scaled = startingFloat * scale;
startingFloat = (src++)[0];
converted = __fctiw( scaled );
scaled = startingFloat * scale;
startingFloat = (src++)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale;
startingFloat = (src++)[0];
__stfiwx( converted, sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale;
startingFloat = (src++)[0];
copy = buffer[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale;
startingFloat = (src++)[0];
count -= 6;
loopCount = count / 2;
count &= 1;
while( loopCount-- )
{
register float startingFloat2;
register double scaled2;
register double converted2;
register long copy2;
__asm__ __volatile__ ( "stwbrx %0, 0, %1" : : "r" (copy), "r" (dst) );
__asm__ __volatile__ ( "fctiw %0, %1" : "=f" (converted2) : "f" ( scaled ) );
copy2 = buffer[1];
__asm__ __volatile__ ( "fmuls %0, %1, %2" : "=f" (scaled2) : "f" ( startingFloat), "f" (scale) );
__asm__ __volatile__ ( "stfiwx %0, %1, %2" : : "f" (converted), "b%" (sizeof(*buffer)), "r" (buffer) : "memory" );
startingFloat2 = (src)[0]; src+=2;
dst+=2;
__asm__ __volatile__ ( "stwbrx %0, %1, %2" : : "r" (copy2), "r" (-sizeof(dst[0])), "r" (dst) );
__asm__ __volatile__ ( "fctiw %0, %1" : "=f" (converted) : "f" ( scaled2 ) );
copy = buffer[0];
__asm__ __volatile__ ( "fmuls %0, %1, %2" : "=f" (scaled) : "f" ( startingFloat2), "f" (scale) );
__asm__ __volatile__ ( "stfiwx %0, %1, %2" : : "f" (converted2), "b%" (0), "r" (buffer) : "memory" );
startingFloat = (src)[-1];
}
__asm__ __volatile__ ( "stwbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = buffer[1];
__stfiwx( converted, sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale;
__asm__ __volatile__ ( "stwbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = buffer[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
__asm__ __volatile__ ( "stwbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = buffer[1];
__stfiwx( converted, sizeof(float), buffer );
__asm__ __volatile__ ( "stwbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = buffer[0];
__asm__ __volatile__ ( "stwbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = buffer[1];
__asm__ __volatile__ ( "stwbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
}
while( count-- )
{
double scaled = src[0] * scale;
double converted = __fctiw( scaled );
__stfiwx( converted, 0, buffer );
copy = buffer[0];
__asm__ __volatile__ ( "stwbrx %0, 0, %1" : : "r" (copy), "r" (dst) );
src++;
dst++;
}
__asm__ __volatile__ ( "mtfsf 7, %0" : : "f" (oldSetting) );
}
void Float32ToNativeInt32( float *src, signed long *dst, unsigned int count )
{
register double scale = 2147483648.0;
unsigned long loopCount;
register float startingFloat;
register double scaled;
register double converted;
register double oldSetting;
{
union
{
double d;
int i[2];
}setting;
register double newSetting;
asm volatile ( "mffs %0" : "=f" ( oldSetting ) );
setting.d = oldSetting;
setting.i[1] &= 0xFFFFFFFC;
newSetting = setting.d;
asm volatile( "mtfsf 7, %0" : : "f" (newSetting ) );
}
if( count >= 3 )
{
startingFloat = (src++)[0];
scaled = startingFloat * scale;
startingFloat = (src++)[0];
converted = __fctiw( scaled );
scaled = startingFloat * scale;
startingFloat = (src++)[0];
count -= 3;
loopCount = count / 2;
count &= 1;
while( loopCount-- )
{
register float startingFloat2;
register double scaled2;
register double converted2;
startingFloat2 = (src)[0];
__asm__ __volatile__ ( "fmul %0, %1, %2" : "=f" (scaled2) : "f" ( startingFloat), "f" (scale) );
__asm__ __volatile__ ( "stfiwx %0, %1, %2" : : "f" (converted), "b%" (0), "r" (dst) : "memory" );
__asm__ __volatile__ ( "fctiw %0, %1" : "=f" (converted2) : "f" ( scaled ) );
startingFloat = (src)[1]; src+=2;
__asm__ __volatile__ ( "fmul %0, %1, %2" : "=f" (scaled) : "f" ( startingFloat2), "f" (scale) );
__asm__ __volatile__ ( "stfiwx %0, %1, %2" : : "f" (converted2), "b%" (4), "r" (dst) : "memory" );
__asm__ __volatile__ ( "fctiw %0, %1" : "=f" (converted) : "f" ( scaled2 ) );
dst+=2;
}
__stfiwx( converted, 0, dst++ );
converted = __fctiw( scaled );
__asm__ __volatile__ ( "fmul %0, %1, %2" : "=f" (scaled) : "f" ( startingFloat), "f" (scale) );
__stfiwx( converted, 0, dst++ );
converted = __fctiw( scaled );
__stfiwx( converted, 0, dst++ );
}
while( count-- )
{
double scaled = src[0] * scale;
double converted = __fctiw( scaled );
__stfiwx( converted, 0, dst );
dst++;
src++;
}
asm volatile( "mtfsf 7, %0" : : "f" (oldSetting) );
}
#endif // TARGET_CPU_PPC