IOAudioBlitterLib.h [plain text]
#ifndef __IOAudioBlitterLib_h__
#define __IOAudioBlitterLib_h__
#include <TargetConditionals.h>
#include <libkern/OSTypes.h>
#include <libkern/OSByteOrder.h>
typedef float Float32;
typedef double Float64;
#define NO_EXPORT __attribute__((visibility("hidden")))
#define PCMBLIT_PPC TARGET_CPU_PPC
#define PCMBLIT_X86 (__i386__ || __LP64__)
#if TARGET_CPU_PPC && !PCMBLIT_PPC
#warning "PPC optimizations turned off"
#endif
#if __i386__ && !PCMBLIT_X86
#warning "X86 optimizations turned off"
#endif
#define PCMBLIT_INTERLEAVE_SUPPORT TARGET_CPU_PPC // uses Altivec
typedef const void *ConstVoidPtr;
#pragma mark -
#pragma mark Utilities
#if TARGET_OS_MAC && TARGET_CPU_PPC
#define SET_ROUNDMODE \
double oldSetting; \
\
{ \
union { \
double d; \
int i[2]; \
} setting; \
register double newSetting; \
\
__asm__ __volatile__ ( "mffs %0" : "=f" ( oldSetting ) ); \
\
setting.d = oldSetting; \
\
\
\
setting.i[1] |= 3; \
\
newSetting = setting.d; \
\
__asm__ __volatile__( "mtfsf 7, %0" : : "f" (newSetting ) ); \
}
#define RESTORE_ROUNDMODE \
\
__asm__ __volatile__ ( "mtfsf 7, %0" : : "f" (oldSetting) );
#define DISABLE_DENORMALS
#define RESTORE_DENORMALS
#elif TARGET_OS_MAC && __i386__
#define GETCSR() ({ int _result; asm volatile ("stmxcsr %0" : "=m" (*&_result) ); _result; })
#define SETCSR( a ) { int _temp = a; asm volatile( "ldmxcsr %0" : : "m" (*&_temp ) ); }
#define DISABLE_DENORMALS int _savemxcsr = GETCSR(); SETCSR(_savemxcsr | 0x8040);
#define RESTORE_DENORMALS SETCSR(_savemxcsr);
#define ROUNDMODE_NEG_INF int _savemxcsr = GETCSR(); SETCSR((_savemxcsr & ~0x6000) | 0x2000);
#define ROUNDMODE_NEAREST int _savemxcsr = GETCSR(); SETCSR((_savemxcsr & ~0x6000));
#define RESTORE_ROUNDMODE SETCSR(_savemxcsr);
#define SET_ROUNDMODE ROUNDMODE_NEG_INF
#elif TARGET_OS_MAC && __LP64__
#define GETCSR() ({ int _result; asm volatile ("stmxcsr %0" : "=m" (*&_result) ); _result; })
#define SETCSR( a ) { int _temp = a; asm volatile( "ldmxcsr %0" : : "m" (*&_temp ) ); }
#define DISABLE_DENORMALS int _savemxcsr = GETCSR(); SETCSR(_savemxcsr | 0x8040);
#define RESTORE_DENORMALS SETCSR(_savemxcsr);
#define ROUNDMODE_NEG_INF int _savemxcsr = GETCSR(); SETCSR((_savemxcsr & ~0x6000) | 0x2000);
#define ROUNDMODE_NEAREST int _savemxcsr = GETCSR(); SETCSR((_savemxcsr & ~0x6000));
#define RESTORE_ROUNDMODE SETCSR(_savemxcsr);
#define SET_ROUNDMODE ROUNDMODE_NEG_INF
#else
#define DISABLE_DENORMALS
#define RESTORE_DENORMALS
#define ROUNDMODE_NEG_INF
#define RESTORE_ROUNDMODE
#endif
static inline SInt32 FloatToInt(double inf, double min32, double max32)
{
#if TARGET_CPU_PPC
#pragma unused ( min32,max32)
SInt32 i;
union {
Float64 d;
UInt32 i[2];
} u;
__asm__ ("fctiw %0, %1"
: "=f" (u.d)
: "f" (inf));
i = u.i[1];
return i;
#elif defined( __i386__ ) || defined( __LP64__ )
#pragma unused ( min32 )
if (inf >= max32) return 0x7FFFFFFF;
return (SInt32)inf;
#else
if (inf >= max32) return 0x7FFFFFFF;
else if (inf <= min32) return 0x80000000;
return (SInt32)inf;
#endif
}
#ifdef __cplusplus
#pragma mark -
#pragma mark C++ templates
class PCMBlitter {
public:
virtual void Convert(const void *vsrc, void *vdest, unsigned int nSamples) = 0;
virtual ~PCMBlitter() { }
#if PCMBLIT_INTERLEAVE_SUPPORT
virtual void Interleave(ConstVoidPtr , void *, int , unsigned int ) { }
virtual void Deinterleave(ConstVoidPtr , void *, int , unsigned int ) { }
#endif
};
class PCMFloat32 {
public:
typedef Float32 value_type;
static value_type load(const value_type *p) { return *p; }
static void store(value_type *p, float val) { *p = val; }
};
class PCMFloat64 {
public:
typedef Float64 value_type;
static value_type load(const value_type *p) { return *p; }
static void store(value_type *p, value_type val) { *p = val; }
};
class PCMSInt8 {
public:
typedef SInt8 value_type;
static value_type load(const value_type *p) { return *p; }
static void store(value_type *p, int val) { *p = val; }
};
class PCMUInt8 {
public:
typedef SInt8 value_type;
static value_type load(const value_type *p) { return *p ^ 0x80; }
static void store(value_type *p, int val) { *p = val ^ 0x80; }
};
class PCMSInt16Native {
public:
typedef SInt16 value_type;
static value_type load(const value_type *p) { return *p; }
static void store(value_type *p, int val) { *p = val; }
};
class PCMSInt16Swap {
public:
typedef SInt16 value_type;
static value_type load(const value_type *p)
{
#if PCMBLIT_PPC
register SInt32 result;
__asm__ volatile("lhbrx %0, %1, %2"
: "=r" (result)
: "b%" (0), "r" (p)
: "memory");
return result;
#elif PCMBLIT_X86 && !TARGET_OS_WIN32
return OSReadSwapInt16(p, 0);
#else
return Endian16_Swap(*p);
#endif
}
static void store(value_type *p, int val)
{
#if PCMBLIT_PPC
__asm__ volatile("sthbrx %0, %1, %2" : : "r" (val), "b%" (0), "r" (p) : "memory");
#elif PCMBLIT_X86 && !TARGET_OS_WIN32
OSWriteSwapInt16(p, 0, val);
#else
*p = Endian16_Swap(val);
#endif
}
};
class PCMSInt32Native {
public:
typedef SInt32 value_type;
static value_type load(const value_type *p) { return *p; }
static void store(value_type *p, int val) { *p = val; }
};
class PCMSInt32Swap {
public:
typedef UInt32 value_type;
static value_type load(const value_type *p)
{
#if PCMBLIT_PPC
register long lwbrxResult;
__asm__ volatile("lwbrx %0, %1, %2" : "=r" (lwbrxResult) : "b%" (0), "r" (p) : "memory");
return lwbrxResult;
#elif PCMBLIT_X86 && !TARGET_OS_WIN32
return OSReadSwapInt32(p, 0);
#else
return Endian32_Swap(*p);
#endif
}
static void store(value_type *p, int val)
{
*p = val;
#if PCMBLIT_PPC
__asm__ volatile("stwbrx %0, %1, %2" : : "r" (val), "b%" (0), "r" (p) : "memory");
#elif PCMBLIT_X86 && !TARGET_OS_WIN32
OSWriteSwapInt32(p, 0, val);
#else
*p = Endian32_Swap(val);
#endif
}
};
class PCMFloat64Swap {
public:
typedef Float64 value_type;
static value_type load(const value_type *vp) {
union {
Float64 d;
UInt32 i[2];
} u;
UInt32 *p = (UInt32 *)vp;
u.i[0] = PCMSInt32Swap::load(p + 1);
u.i[1] = PCMSInt32Swap::load(p + 0);
return u.d;
}
static void store(value_type *vp, value_type val) {
union {
Float64 d;
UInt32 i[2];
} u;
u.d = val;
UInt32 *p = (UInt32 *)vp;
PCMSInt32Swap::store(p + 1, u.i[0]);
PCMSInt32Swap::store(p + 0, u.i[1]);
}
};
class FloatToIntBlitter : public PCMBlitter {
public:
FloatToIntBlitter(int bitDepth)
{
int rightShift = 32 - bitDepth;
mShift = rightShift;
mRound = (rightShift > 0) ? double(1L << (rightShift - 1)) : 0.;
}
protected:
double mRound;
int mShift; };
template <class FloatType, class IntType>
class TFloatToIntBlitter : public FloatToIntBlitter {
public:
typedef typename FloatType::value_type float_val;
typedef typename IntType::value_type int_val;
TFloatToIntBlitter(int bitDepth) : FloatToIntBlitter(bitDepth) { }
virtual void Convert(const void *vsrc, void *vdest, unsigned int nSamples)
{
const float_val *src = (const float_val *)vsrc;
int_val *dest = (int_val *)vdest;
double maxInt32 = 2147483648.0; double round = mRound;
double max32 = maxInt32 - 1.0 - round;
double min32 = -2147483648.0;
int shift = mShift, count;
double f1, f2, f3, f4;
int i1, i2, i3, i4;
SET_ROUNDMODE
if (nSamples >= 8) {
f1 = FloatType::load(src + 0);
f2 = FloatType::load(src + 1);
f1 = f1 * maxInt32 + round;
f3 = FloatType::load(src + 2);
f2 = f2 * maxInt32 + round;
i1 = FloatToInt(f1, min32, max32);
src += 3;
nSamples -= 4;
count = nSamples >> 2;
nSamples &= 3;
while (count--) {
f4 = FloatType::load(src + 0);
f3 = f3 * maxInt32 + round;
i2 = FloatToInt(f2, min32, max32);
IntType::store(dest + 0, i1 >> shift);
f1 = FloatType::load(src + 1);
f4 = f4 * maxInt32 + round;
i3 = FloatToInt(f3, min32, max32);
IntType::store(dest + 1, i2 >> shift);
f2 = FloatType::load(src + 2);
f1 = f1 * maxInt32 + round;
i4 = FloatToInt(f4, min32, max32);
IntType::store(dest + 2, i3 >> shift);
f3 = FloatType::load(src + 3);
f2 = f2 * maxInt32 + round;
i1 = FloatToInt(f1, min32, max32);
IntType::store(dest + 3, i4 >> shift);
src += 4;
dest += 4;
}
f4 = FloatType::load(src);
f3 = f3 * maxInt32 + round;
i2 = FloatToInt(f2, min32, max32);
IntType::store(dest + 0, i1 >> shift);
f4 = f4 * maxInt32 + round;
i3 = FloatToInt(f3, min32, max32);
IntType::store(dest + 1, i2 >> shift);
i4 = FloatToInt(f4, min32, max32);
IntType::store(dest + 2, i3 >> shift);
IntType::store(dest + 3, i4 >> shift);
src += 1;
dest += 4;
}
count = nSamples;
while (count--) {
f1 = FloatType::load(src) * maxInt32 + round;
i1 = FloatToInt(f1, min32, max32) >> shift;
IntType::store(dest, i1);
src += 1;
dest += 1;
}
RESTORE_ROUNDMODE
}
#if 0//PCMBLIT_INTERLEAVE_SUPPORT
virtual void Interleave(ConstVoidPtr vsrc[], void *vdest, int nChannels, unsigned int nFrames)
{
verify_action(nChannels == 2, return);
const float_val *srcA = (const float_val *)vsrc[0];
const float_val *srcB = (const float_val *)vsrc[1];
int_val *dest = (int_val *)vdest;
double maxInt32 = 2147483648.0; double round = mRound;
double max32 = maxInt32 - 1.0 - round;
double min32 = -2147483648.0;
int shift = mShift, count;
double f1, f2, f3, f4;
int i1, i2, i3, i4;
SET_ROUNDMODE
count = nFrames >> 1;
while (count--) {
f1 = FloatType::load(srcA);
f2 = FloatType::load(srcB);
f3 = FloatType::load(srcA+1);
f4 = FloatType::load(srcB+1);
f1 = f1 * maxInt32 + round;
f2 = f2 * maxInt32 + round;
f3 = f3 * maxInt32 + round;
f4 = f4 * maxInt32 + round;
i1 = FloatToInt(f1, min32, max32) >> shift;
i2 = FloatToInt(f2, min32, max32) >> shift;
i3 = FloatToInt(f3, min32, max32) >> shift;
i4 = FloatToInt(f4, min32, max32) >> shift;
IntType::store(dest, i1);
IntType::store(dest+1, i2);
IntType::store(dest+2, i3);
IntType::store(dest+3, i4);
srcA += 2;
srcB += 2;
dest += 4;
}
count = nFrames & 1;
while (count--) {
f1 = FloatType::load(srcA);
f2 = FloatType::load(srcB);
f1 = f1 * maxInt32 + round;
f2 = f2 * maxInt32 + round;
i1 = FloatToInt(f1, min32, max32) >> shift;
i2 = FloatToInt(f2, min32, max32) >> shift;
IntType::store(dest, i1);
IntType::store(dest+1, i2);
srcA += 1;
srcB += 1;
dest += 2;
}
RESTORE_ROUNDMODE
}
#endif // PCMBLIT_INTERLEAVE_SUPPORT
};
class IntToFloatBlitter : public PCMBlitter {
public:
IntToFloatBlitter(int bitDepth) :
mBitDepth(bitDepth)
{
mScale = static_cast<Float32>(1.0 / float(1UL << (bitDepth - 1)));
}
Float32 mScale;
UInt32 mBitDepth;
};
template <class IntType, class FloatType>
class TIntToFloatBlitter : public IntToFloatBlitter {
public:
typedef typename FloatType::value_type float_val;
typedef typename IntType::value_type int_val;
TIntToFloatBlitter(int bitDepth) : IntToFloatBlitter(bitDepth) { }
virtual void Convert(const void *vsrc, void *vdest, unsigned int nSamples)
{
const int_val *src = (const int_val *)vsrc;
float_val *dest = (float_val *)vdest;
int count = nSamples;
Float32 scale = mScale;
int_val i0, i1, i2, i3;
float_val f0, f1, f2, f3;
if (count >= 4) {
i0 = IntType::load(src); ++src;
i1 = IntType::load(src); ++src;
f0 = i0;
i2 = IntType::load(src); ++src;
f1 = i1;
f0 *= scale;
i3 = IntType::load(src); ++src;
f2 = i2;
f1 *= scale;
FloatType::store(dest, f0); ++dest;
count -= 4;
int loopCount = count / 4;
count -= 4 * loopCount;
while (loopCount--) {
i0 = IntType::load(src); ++src;
f3 = i3;
f2 *= scale;
FloatType::store(dest, f1); ++dest;
i1 = IntType::load(src); ++src;
f0 = i0;
f3 *= scale;
FloatType::store(dest, f2); ++dest;
i2 = IntType::load(src); ++src;
f1 = i1;
f0 *= scale;
FloatType::store(dest, f3); ++dest;
i3 = IntType::load(src); ++src;
f2 = i2;
f1 *= scale;
FloatType::store(dest, f0); ++dest;
}
f3 = i3;
f2 *= scale;
FloatType::store(dest, f1); ++dest;
f3 *= scale;
FloatType::store(dest, f2); ++dest;
FloatType::store(dest, f3); ++dest;
}
while (count--) {
i0 = IntType::load(src); ++src;
f0 = i0;
f0 *= scale;
FloatType::store(dest, f0); ++dest;
}
}
#if 0//PCMBLIT_INTERLEAVE_SUPPORT
virtual void Deinterleave(ConstVoidPtr vsrc, void *vdest[], int nChannels, unsigned int nFrames)
{
verify_action(nChannels == 2, return);
const int_val *src = (const int_val *)vsrc;
float_val *destA = (float_val *)vdest[0];
float_val *destB = (float_val *)vdest[1];
int count;
Float32 scale = mScale;
count = nFrames;
while (count--) {
int i1 = IntType::load(src);
int i2 = IntType::load(src+1);
float_val f1 = i1 * scale;
float_val f2 = i2 * scale;
FloatType::store(destA, f1);
FloatType::store(destB, f2);
src += 2;
destA += 1;
destB += 1;
}
}
#endif // PCMBLIT_INTERLEAVE_SUPPORT
};
#endif // __cplusplus
#ifdef __cplusplus
extern "C" {
#endif
#if TARGET_CPU_PPC
#pragma mark -
#pragma mark PPC scalar
NO_EXPORT void NativeInt16ToFloat32_Scalar( const SInt16 *src, Float32 *dest, unsigned int count );
NO_EXPORT void SwapInt16ToFloat32_Scalar( const SInt16 *src, Float32 *dest, unsigned int count );
NO_EXPORT void NativeInt24ToFloat32_Scalar( const SInt32 *src, Float32 *dest, unsigned int count );
NO_EXPORT void SwapInt24ToFloat32_Scalar( const SInt32 *src, Float32 *dest, unsigned int count );
NO_EXPORT void NativeInt32ToFloat32_Scalar( const SInt32 *src, Float32 *dest, unsigned int count );
NO_EXPORT void SwapInt32ToFloat32_Scalar( const SInt32 *src, Float32 *dest, unsigned int count );
NO_EXPORT void Float32ToNativeInt16_Scalar( const Float32 *src, SInt16 *dest, unsigned int count );
NO_EXPORT void Float32ToSwapInt16_Scalar( const Float32 *src, SInt16 *dest, unsigned int count );
NO_EXPORT void Float32ToNativeInt24_Scalar( const Float32 *src, SInt32 *dest, unsigned int count );
NO_EXPORT void Float32ToSwapInt24_Scalar( const Float32 *src, SInt32 *dest, unsigned int count );
NO_EXPORT void Float32ToNativeInt32_Scalar( const Float32 *src, SInt32 *dest, unsigned int count );
NO_EXPORT void Float32ToSwapInt32_Scalar( const Float32 *src, SInt32 *dest, unsigned int count );
#pragma mark -
#pragma mark PPC Altivec
NO_EXPORT void Float32ToInt16_Altivec(const Float32 *fsrc, SInt16 *idst, unsigned int numToConvert, int bigEndian);
NO_EXPORT void Float32ToInt24_Altivec(const Float32 *fsrc, UInt8 *dst, unsigned int numToConvert, int bigEndian);
NO_EXPORT void Float32ToInt32_Altivec(const Float32 *fsrc, SInt32 *dst, unsigned int numToConvert, int bigEndian);
NO_EXPORT void Int16ToFloat32_Altivec(const SInt16 *isrc, Float32 *fdst, unsigned int numToConvert, int bigEndian);
NO_EXPORT void Int24ToFloat32_Altivec(const UInt8 *isrc, Float32 *fdst, unsigned int numToConvert, int bigEndian);
NO_EXPORT void Int32ToFloat32_Altivec(const SInt32 *isrc, Float32 *fdst, unsigned int numToConvert, int bigEndian);
NO_EXPORT void DeinterleaveInt16ToFloat32_Altivec(const SInt16 *isrc, Float32 *dstA, Float32 *dstB, unsigned int numFrames, int bigEndian);
NO_EXPORT void Interleave32_Altivec(const void *vsrcA, const void *vsrcB, void *vdst, unsigned int numFrames);
#elif defined( __i386__ ) || defined( __LP64__ )
#pragma mark -
#pragma mark X86 SSE2
NO_EXPORT void NativeInt16ToFloat32_X86( const SInt16 *src, Float32 *dest, unsigned int count );
NO_EXPORT void SwapInt16ToFloat32_X86( const SInt16 *src, Float32 *dest, unsigned int count );
NO_EXPORT void Float32ToNativeInt16_X86( const Float32 *src, SInt16 *dest, unsigned int count );
NO_EXPORT void Float32ToSwapInt16_X86( const Float32 *src, SInt16 *dest, unsigned int count );
NO_EXPORT void Float32ToNativeInt24_X86( const Float32 *src, UInt8 *dst, unsigned int numToConvert );
NO_EXPORT void Float32ToSwapInt24_X86( const Float32 *src, UInt8 *dst, unsigned int numToConvert );
NO_EXPORT void NativeInt32ToFloat32_X86( const SInt32 *src, Float32 *dest, unsigned int count );
NO_EXPORT void SwapInt32ToFloat32_X86( const SInt32 *src, Float32 *dest, unsigned int count );
NO_EXPORT void Float32ToNativeInt32_X86( const Float32 *src, SInt32 *dest, unsigned int count );
NO_EXPORT void Float32ToSwapInt32_X86( const Float32 *src, SInt32 *dest, unsigned int count );
#elif __LP64__
#pragma mark -
#pragma mark X86 SSE2
NO_EXPORT void NativeInt16ToFloat32_X86( const SInt16 *src, Float32 *dest, unsigned int count );
NO_EXPORT void SwapInt16ToFloat32_X86( const SInt16 *src, Float32 *dest, unsigned int count );
NO_EXPORT void Float32ToNativeInt16_X86( const Float32 *src, SInt16 *dest, unsigned int count );
NO_EXPORT void Float32ToSwapInt16_X86( const Float32 *src, SInt16 *dest, unsigned int count );
NO_EXPORT void Float32ToNativeInt24_X86( const Float32 *src, UInt8 *dst, unsigned int numToConvert );
NO_EXPORT void Float32ToSwapInt24_X86( const Float32 *src, UInt8 *dst, unsigned int numToConvert );
NO_EXPORT void NativeInt32ToFloat32_X86( const SInt32 *src, Float32 *dest, unsigned int count );
NO_EXPORT void SwapInt32ToFloat32_X86( const SInt32 *src, Float32 *dest, unsigned int count );
NO_EXPORT void Float32ToNativeInt32_X86( const Float32 *src, SInt32 *dest, unsigned int count );
NO_EXPORT void Float32ToSwapInt32_X86( const Float32 *src, SInt32 *dest, unsigned int count );
#endif
#pragma mark -
#pragma mark Portable
NO_EXPORT void Float32ToNativeInt16_Portable( const Float32 *src, SInt16 *dest, unsigned int count );
NO_EXPORT void Float32ToSwapInt16_Portable( const Float32 *src, SInt16 *dest, unsigned int count );
NO_EXPORT void NativeInt16ToFloat32_Portable( const SInt16 *src, Float32 *dest, unsigned int count );
NO_EXPORT void SwapInt16ToFloat32_Portable( const SInt16 *src, Float32 *dest, unsigned int count );
NO_EXPORT void Float32ToNativeInt24_Portable( const Float32 *src, UInt8 *dest, unsigned int count );
NO_EXPORT void Float32ToSwapInt24_Portable( const Float32 *src, UInt8 *dest, unsigned int count );
NO_EXPORT void NativeInt24ToFloat32_Portable( const UInt8 *src, Float32 *dest, unsigned int count );
NO_EXPORT void SwapInt24ToFloat32_Portable( const UInt8 *src, Float32 *dest, unsigned int count );
NO_EXPORT void Float32ToNativeInt32_Portable( const Float32 *src, SInt32 *dest, unsigned int count );
NO_EXPORT void Float32ToSwapInt32_Portable( const Float32 *src, SInt32 *dest, unsigned int count );
NO_EXPORT void NativeInt32ToFloat32_Portable( const SInt32 *src, Float32 *dest, unsigned int count );
NO_EXPORT void SwapInt32ToFloat32_Portable( const SInt32 *src, Float32 *dest, unsigned int count );
#ifdef __cplusplus
};
#endif
#endif // __IOAudioBlitterLib_h__