#include <libkern/OSTypes.h>
#include <IOKit/IOReturn.h>
#include <IOKit/audio/IOAudioTypes.h>
#include <IOKit/audio/IOAudioDebug.h>
#include "AppleDBDMAClipLib.h"
#include "fp_internal.h"
#pragma mark ------------------------
#pragma mark ••• Constants and Tables
#pragma mark ------------------------
typedef float Float32;
typedef double Float64;
float gOldSample = 0.0f;
#ifdef USE_DYNAMIC_DOWNSAMPLING
UInt32 gDRCDownsampleFactor = 4;
#endif
static float zeroGaindBConvTable[] = {
0.0631f, 0.0708f, 0.0794f, 0.0891f, 0.1000f, 0.1122f, 0.1259f,
0.1413f, 0.1585f, 0.1778f, 0.1995f, 0.2239f, 0.2512f, 0.2818f,
0.3162f, 0.3548f, 0.3981f, 0.4467f, 0.5012f, 0.5623f, 0.6310f,
0.7079f, 0.7943f, 0.8913f, 1.0000f, 1.1220f, 1.2589f, 1.4125f,
1.5849f, 1.7783f, 1.9953f, 2.2387f, 2.5119f, 2.8184f, 3.1623f,
3.5481f, 3.9811f, 4.4668f, 5.0119f, 5.6234f, 6.3096f, 7.0795f,
7.9433f, 8.9125f, 10.0000f, 11.2202f, 12.5893f, 14.1254f, 15.8489f
};
const UInt16 kInputGaindBConvTableOffset = 24;
static float inputGaindBConvTable[] = {
0.251189f, 0.266073f, 0.281838f, 0.298538f,
0.316228f, 0.334965f, 0.354813f, 0.375837f,
0.398107f, 0.421697f, 0.446684f, 0.473151f,
0.501187f, 0.530884f, 0.562341f, 0.595662f,
0.630957f, 0.668344f, 0.707946f, 0.749894f,
0.794328f, 0.841395f, 0.891251f, 0.944061f,
1.000000f, 1.059254f, 1.122018f, 1.188502f,
1.258925f, 1.333521f, 1.412538f, 1.496236f,
1.584893f, 1.678804f, 1.778279f, 1.883649f,
1.995262f, 2.113489f, 2.238721f, 2.371374f,
2.511886f, 2.660725f, 2.818383f, 2.985383f,
3.162278f, 3.349654f, 3.548134f, 3.758374f,
3.981072f
};
static float aoa_log2table[] = {
-9.96578428f, -4.95455703f, -3.97709960f, -3.39973025f,
-2.98850436f, -2.66886808f, -2.40736357f, -2.18606493f,
-1.99424073f, -1.82495451f, -1.67346265f, -1.53637754f,
-1.41119543f, -1.29601340f, -1.18935125f, -1.09003493f,
-0.99711749f, -0.90982405f, -0.82751248f, -0.74964473f,
-0.67576544f, -0.60548586f, -0.53847144f, -0.47443221f,
-0.41311519f, -0.35429834f, -0.29778575f, -0.24340365f,
-0.19099723f, -0.14042794f, -0.09157135f, -0.04431522f,
0.00144197f, 0.04579242f, 0.08882003f, 0.13060145f,
0.17120683f, 0.21070056f, 0.24914190f, 0.28658548f,
0.32308179f, 0.35867757f, 0.39341620f, 0.42733799f,
0.46048047f, 0.49287865f, 0.52456522f, 0.55557078f,
0.58592398f, 0.61565170f, 0.64477922f, 0.67333028f,
0.70132726f, 0.72879125f, 0.75574218f, 0.78219886f,
0.80817908f, 0.83369972f, 0.85877675f, 0.88342532f,
0.90765983f, 0.93149396f, 0.95494074f, 0.97801253f,
1.00072117f, 1.02307789f, 1.04509344f, 1.06677807f,
1.08814160f, 1.10919338f, 1.12994238f, 1.15039720f,
1.17056606f, 1.19045683f, 1.21007710f, 1.22943411f,
1.24853484f, 1.26738598f, 1.28599397f, 1.30436501f,
1.32250506f, 1.34041984f, 1.35811490f, 1.37559554f,
1.39286692f, 1.40993397f, 1.42680147f, 1.44347404f,
1.45995614f, 1.47625206f, 1.49236596f, 1.50830187f,
1.52406368f, 1.53965514f, 1.55507990f, 1.57034149f,
1.58544332f, 1.60038870f, 1.61518085f, 1.62982287f,
1.64431778f, 1.65866850f, 1.67287788f, 1.68694867f,
1.70088356f, 1.71468513f, 1.72835592f, 1.74189838f,
1.75531490f, 1.76860781f, 1.78177935f, 1.79483172f,
1.80776706f, 1.82058746f, 1.83329492f, 1.84589144f,
1.85837893f, 1.87075925f, 1.88303424f, 1.89520566f,
1.90727526f, 1.91924473f, 1.93111570f, 1.94288979f,
1.95456857f, 1.96615357f, 1.97764628f, 1.98904815f
};
static float aoa_antilog2table[] = {
0.994599423f, 0.912051693f, 0.836355090f, 0.766940999f,
0.703287997f, 0.644917937f, 0.591392355f, 0.542309181f,
0.497299712f, 0.456025846f, 0.418177545f, 0.383470499f,
0.351643998f, 0.322458968f, 0.295696178f, 0.271154591f,
0.248649856f, 0.228012923f, 0.209088772f, 0.191735250f,
0.175821999f, 0.161229484f, 0.147848089f, 0.135577295f,
0.124324928f, 0.114006462f, 0.104544386f, 0.095867625f,
0.087911000f, 0.080614742f, 0.073924044f, 0.067788648f,
0.062162464f, 0.057003231f, 0.052272193f, 0.047933812f,
0.043955500f, 0.040307371f, 0.036962022f, 0.033894324f,
0.031081232f, 0.028501615f, 0.026136097f, 0.023966906f,
0.021977750f, 0.020153686f, 0.018481011f, 0.016947162f,
0.015540616f, 0.014250808f, 0.013068048f, 0.011983453f,
0.010988875f, 0.010076843f, 0.009240506f, 0.008473581f,
0.007770308f, 0.007125404f, 0.006534024f, 0.005991727f,
0.005494437f, 0.005038421f, 0.004620253f, 0.004236790f,
0.003885154f, 0.003562702f, 0.003267012f, 0.002995863f,
0.002747219f, 0.002519211f, 0.002310126f, 0.002118395f,
0.001942577f, 0.001781351f, 0.001633506f, 0.001497932f,
0.001373609f, 0.001259605f, 0.001155063f, 0.001059198f,
0.000971288f, 0.000890675f, 0.000816753f, 0.000748966f,
0.000686805f, 0.000629803f, 0.000577532f, 0.000529599f,
0.000485644f, 0.000445338f, 0.000408377f, 0.000374483f,
0.000343402f, 0.000314901f, 0.000288766f, 0.000264799f,
0.000242822f, 0.000222669f, 0.000204188f, 0.000187241f,
0.000171701f, 0.000157451f, 0.000144383f, 0.000132400f,
0.000121411f, 0.000111334f, 0.000102094f, 0.000093621f,
0.000085851f, 0.000078725f, 0.000072191f, 0.000066200f,
0.000060706f, 0.000055667f, 0.000051047f, 0.000046810f,
0.000042925f, 0.000039363f, 0.000036096f, 0.000033100f,
0.000030353f, 0.000027834f, 0.000025524f, 0.000023405f,
0.000021463f, 0.000019681f, 0.000018048f, 0.000016550f
};
static float aoa_antilog2table_expand[] = { 1.000000, 1.055645, 1.114387, 1.176397,
1.241858, 1.310961, 1.383910, 1.460918,
1.542211, 1.628027, 1.718619, 1.814252,
1.915207, 2.021779, 2.134281, 2.253043,
2.378414, 2.510762, 2.650473, 2.797959,
2.953652, 3.118009, 3.291511, 3.474668,
3.668016, 3.872124, 4.087589, 4.315043,
4.555155, 4.808627, 5.076204, 5.358670,
5.656854, 5.971631, 6.303923, 6.654706,
7.025009, 7.415917, 7.828576, 8.264199,
8.724062, 9.209514, 9.721979, 10.262960,
10.834044, 11.436907, 12.073315, 12.745137,
13.454343, 14.203012, 14.993341, 15.827648,
16.708381, 17.638121, 18.619598, 19.655689,
20.749433, 21.904039, 23.122893, 24.409570,
25.767845, 27.201702, 28.715345, 30.313216,
32.000000, 33.780646, 35.660376, 37.644704,
39.739450, 41.950759, 44.285116, 46.749369,
49.350746, 52.096877, 54.995818, 58.056070,
61.286610, 64.696914, 68.296986, 72.097384,
76.109255, 80.344368, 84.815145, 89.534699,
94.516873, 99.776282, 105.328351, 111.189365,
117.376518, 123.907955, 130.802835, 138.081382,
145.764945, 153.876062, 162.438523, 171.477443,
181.019336, 191.092189, 201.725548, 212.950602,
224.800277, 237.309328, 250.514448, 264.454369,
279.169980, 294.704443, 311.103324, 328.414724,
346.689420, 365.981015, 386.346093, 407.844391,
430.538965, 454.496382, 479.786914, 506.484743,
534.668177, 564.419883, 595.827128, 628.982034,
663.981852, 700.929241, 739.932573, 781.106253,
824.571050, 870.454453, 918.891046, 970.022903
};
const static UInt16 kAOAAntiLog2TableRatioExpand = 12.800256081f;
#pragma mark ------------------------
#pragma mark ••• Processing Routines
#pragma mark ------------------------
void delayLR(float* inFloatBufferPtr, float* outFloatBufferPtr, UInt32 numSamples, DelayStructPtr inDelay)
{
register UInt32 i, writeIndex, bufferMaxIndex, delayTimeInt, numFrames;
register float* inPtr;
register float* outPtr;
register float* delayBuffer;
register float delayFrac, temp, inL, inR;
register SInt32 delayIndexInt;
inPtr = inFloatBufferPtr;
outPtr = outFloatBufferPtr;
delayBuffer = inDelay->buffer;
delayTimeInt = inDelay->delayTimeInt;
delayFrac = inDelay->delayFrac;
writeIndex = inDelay->writeIndex;
bufferMaxIndex = kNumDelayBufferSamples - 1;
numFrames = numSamples >> 1;
delayIndexInt = (SInt32)writeIndex - (SInt32)delayTimeInt;
if (delayIndexInt < 0) {
delayIndexInt += kNumDelayBufferSamples;
}
if (0 == inDelay->channel) {
for (i = 0; i < numFrames; i++ ) {
delayBuffer[writeIndex] = *(inPtr++);
inR = *(inPtr++);
inL = delayBuffer[delayIndexInt];
if (delayIndexInt == 0) {
temp = delayBuffer[bufferMaxIndex];
} else {
temp = delayBuffer[delayIndexInt - 1];
}
temp -= inL;
temp *= delayFrac;
inL += temp;
*(outPtr++) = inL;
*(outPtr++) = inR;
writeIndex++;
if (writeIndex > bufferMaxIndex) {
writeIndex = 0;
}
delayIndexInt++;
if (delayIndexInt > bufferMaxIndex) {
delayIndexInt = 0;
}
}
} else if (1 == inDelay->channel) {
for (i = 0; i < numFrames; i++ ) {
inL = *(inPtr++);
delayBuffer[writeIndex] = *(inPtr++);
inR = delayBuffer[delayIndexInt];
if (delayIndexInt == 0) {
temp = delayBuffer[bufferMaxIndex];
} else {
temp = delayBuffer[delayIndexInt - 1];
}
temp -= inR;
temp *= delayFrac;
inR += temp;
*(outPtr++) = inL;
*(outPtr++) = inR;
writeIndex++;
if (writeIndex > bufferMaxIndex) {
writeIndex = 0;
}
delayIndexInt++;
if (delayIndexInt > bufferMaxIndex) {
delayIndexInt = 0;
}
}
}
inDelay->writeIndex = writeIndex;
inDelay->delayIndexInt = delayIndexInt;
}
void volume (float* inFloatBufferPtr, UInt32 numSamples, float* inLeftVolume, float* inRightVolume, float* inPreviousLeftVolume, float* inPreviousRightVolume)
{
register UInt32 i;
register UInt32 numFrames;
register UInt32 leftOver;
register float* inPtr;
register float* outPtr;
register float leftGain;
register float rightGain;
register float inL0;
register float inR0;
register float inL1;
register float inR1;
register float inL2;
register float inR2;
register float inL3;
register float inR3;
register float timeConstant, oneMinusTimeConstant, oldLeftGain, oldRightGain, leftGain_s, rightGain_s;
inPtr = inFloatBufferPtr;
outPtr = inFloatBufferPtr;
leftGain = *inLeftVolume;
rightGain = *inRightVolume;
numFrames = numSamples >> 1;
leftOver = numFrames % 4;
numSamples = numFrames >> 2;
timeConstant = 0.000453411f;
oneMinusTimeConstant = 1.0f - timeConstant;
oldLeftGain = *inPreviousLeftVolume;
oldRightGain = *inPreviousRightVolume;
for (i = 0; i < numSamples; i++ )
{
leftGain_s = leftGain*timeConstant;
rightGain_s = rightGain*timeConstant;
leftGain_s += oneMinusTimeConstant*oldLeftGain;
rightGain_s += oneMinusTimeConstant*oldRightGain;
oldLeftGain = leftGain_s;
oldRightGain = rightGain_s;
inL0 = *(inPtr++);
inR0 = *(inPtr++);
inL1 = *(inPtr++);
inL0 *= leftGain_s;
inR1 = *(inPtr++);
inR0 *= rightGain_s;
leftGain_s = leftGain*timeConstant;
rightGain_s = rightGain*timeConstant;
leftGain_s += oneMinusTimeConstant*oldLeftGain;
rightGain_s += oneMinusTimeConstant*oldRightGain;
oldLeftGain = leftGain_s;
oldRightGain = rightGain_s;
inL2 = *(inPtr++);
inL1 *= leftGain_s;
*(outPtr++) = inL0;
inR2 = *(inPtr++);
inR1 *= rightGain_s;
*(outPtr++) = inR0;
leftGain_s = leftGain*timeConstant;
rightGain_s = rightGain*timeConstant;
leftGain_s += oneMinusTimeConstant*oldLeftGain;
rightGain_s += oneMinusTimeConstant*oldRightGain;
oldLeftGain = leftGain_s;
oldRightGain = rightGain_s;
inL3 = *(inPtr++);
inL2 *= leftGain_s;
*(outPtr++) = inL1;
inR3 = *(inPtr++);
inR2 *= rightGain_s;
*(outPtr++) = inR1;
leftGain_s = leftGain*timeConstant;
rightGain_s = rightGain*timeConstant;
leftGain_s += oneMinusTimeConstant*oldLeftGain;
rightGain_s += oneMinusTimeConstant*oldRightGain;
oldLeftGain = leftGain_s;
oldRightGain = rightGain_s;
inL3 *= leftGain_s;
*(outPtr++) = inL2;
inR3 *= rightGain_s;
*(outPtr++) = inR2;
*(outPtr++) = inL3;
*(outPtr++) = inR3;
}
for (i = 0; i < leftOver; i ++ )
{
leftGain_s = leftGain*timeConstant;
rightGain_s = rightGain*timeConstant;
leftGain_s += oneMinusTimeConstant*oldLeftGain;
rightGain_s += oneMinusTimeConstant*oldRightGain;
inL0 = *(inPtr++);
inR0 = *(inPtr++);
inL0 *= leftGain_s;
inR0 *= rightGain_s;
oldLeftGain = leftGain_s;
oldRightGain = rightGain_s;
*(outPtr++) = inL0;
*(outPtr++) = inR0;
}
*inPreviousLeftVolume = oldLeftGain;
*inPreviousRightVolume = oldRightGain;
}
void balanceAdjust(float* inFloatBufferPtr, float* outFloatBufferPtr, UInt32 numSamples, GainStructPtr inGain)
{
register UInt32 i;
register UInt32 numFrames;
register UInt32 leftOver;
register float* inPtr;
register float* outPtr;
register float leftGain;
register float rightGain;
register float inL0;
register float inR0;
register float inL1;
register float inR1;
register float inL2;
register float inR2;
register float inL3;
register float inR3;
inPtr = inFloatBufferPtr;
outPtr = outFloatBufferPtr;
leftGain = inGain->leftSoftVolume;
rightGain = inGain->rightSoftVolume;
numFrames = numSamples >> 1;
leftOver = numFrames % 4;
numSamples = numFrames >> 2;
for (i = 0; i < numSamples; i++ )
{
inL0 = *(inPtr++);
inR0 = *(inPtr++);
inL1 = *(inPtr++);
inL0 *= leftGain;
inR1 = *(inPtr++);
inR0 *= rightGain;
inL2 = *(inPtr++);
inL1 *= leftGain;
*(outPtr++) = inL0;
inR2 = *(inPtr++);
inR1 *= rightGain;
*(outPtr++) = inR0;
inL3 = *(inPtr++);
inL2 *= leftGain;
*(outPtr++) = inL1;
inR3 = *(inPtr++);
inR2 *= rightGain;
*(outPtr++) = inR1;
inL3 *= leftGain;
*(outPtr++) = inL2;
inR3 *= rightGain;
*(outPtr++) = inR2;
*(outPtr++) = inL3;
*(outPtr++) = inR3;
}
for (i = 0; i < leftOver; i ++ )
{
inL0 = *(inPtr++);
inR0 = *(inPtr++);
inL0 *= leftGain;
inR0 *= rightGain;
*(outPtr++) = inL0;
*(outPtr++) = inR0;
}
}
void mixAndMuteRightChannel(float* inFloatBufferPtr, float* outFloatBufferPtr, UInt32 numSamples)
{
UInt32 i, leftOver;
register float* inPtr;
register float* outPtr;
register float inL0;
register float inL1;
register float inL2;
register float inL3;
register float inR0;
register float inR1;
register float inR2;
register float inR3;
inPtr = inFloatBufferPtr;
outPtr = outFloatBufferPtr;
leftOver = numSamples % 8;
numSamples = numSamples >> 3;
for (i = 0; i < numSamples; i++ )
{
inL0 = *(inPtr++);
inR0 = *(inPtr++);
inL1 = *(inPtr++);
inR1 = *(inPtr++);
inL0 += inR0;
inL2 = *(inPtr++);
inR2 = *(inPtr++);
inL1 += inR1;
inL0 *= kMixingToMonoScale;
inL3 = *(inPtr++);
inR3 = *(inPtr++);
*(outPtr++) = inL0;
*(outPtr++) = 0.0f;
inL1 *= kMixingToMonoScale;
inL2 += inR2;
inL3 += inR3;
*(outPtr++) = inL1;
*(outPtr++) = 0.0f;
inL2 *= kMixingToMonoScale;
*(outPtr++) = inL2;
*(outPtr++) = 0.0f;
inL3 *= kMixingToMonoScale;
*(outPtr++) = inL3;
*(outPtr++) = 0.0f;
}
for (i = 0; i < leftOver; i += 2 )
{
inL0 = *(inPtr++);
inR0 = *(inPtr++);
inL0 += inR0;
inL0 *= kMixingToMonoScale;
*(outPtr++) = inL0;
*(outPtr++) = 0.0f;
}
}
#pragma mark ------------------------
#pragma mark ••• iSub Processing Routines
#pragma mark ------------------------
void iSubDownSampleLinearAndConvert( float* inData, float* srcPhase, float* srcState, UInt32 adaptiveSampleRate, UInt32 outputSampleRate, UInt32 sampleIndex, UInt32 maxSampleIndex, SInt16 *iSubBufferMemory, SInt32 *iSubBufferOffset, UInt32 iSubBufferLen, UInt32 *loopCount )
{
Float32 iSubSampleFloat;
SInt16 iSubSampleInt;
UInt32 baseIndex;
float* inDataPtr;
float x0, x1, temp;
float phaseInc; float phase;
inDataPtr = inData;
baseIndex = sampleIndex;
phase = *srcPhase;
phaseInc = ((float)adaptiveSampleRate)/((float)outputSampleRate);
while (sampleIndex < maxSampleIndex) {
if (phase >= 1.0f) {
phase -= 1.0f;
sampleIndex+=2;
} else {
if (sampleIndex == baseIndex) {
x0 = *srcState;
} else {
x0 = inDataPtr[sampleIndex-2];
temp = inDataPtr[sampleIndex-1];
x0 = 0.5f*(x0 + temp);
}
x1 = inDataPtr[sampleIndex];
temp = inDataPtr[sampleIndex+1];
x1 = 0.5f*(x1 + temp);
iSubSampleFloat = x0 + phase*(x1 - x0);
if (iSubSampleFloat > 1.0f) {
iSubSampleFloat = 1.0f;
} else if (iSubSampleFloat < -1.0f) {
iSubSampleFloat = -1.0f;
}
iSubSampleInt = (SInt16) (iSubSampleFloat * 32767.0f);
if (*iSubBufferOffset >= (SInt32)iSubBufferLen) {
*iSubBufferOffset = 0;
(*loopCount)++;
}
iSubBufferMemory[(*iSubBufferOffset)++] = ((((UInt16)iSubSampleInt) << 8) & 0xFF00) | ((((UInt16)iSubSampleInt) >> 8) & 0x00FF);
phase += phaseInc;
}
}
if (phase < 1) {
x1 = inDataPtr[maxSampleIndex-2];
temp = inDataPtr[maxSampleIndex-1];
*srcState = 0.5f*(x1 + temp);
} else {
*srcState = 0.0f;
}
*srcPhase = phase;
return;
}
Boolean Set4thOrderCoefficients (Float32 *b0, Float32 *b1, Float32 *b2, Float32 *a1, Float32 *a2, UInt32 samplingRate)
{
Boolean success = TRUE;
switch ( samplingRate )
{
case 8000: *b0 = 0.00782020803350;
*b1 = 0.01564041606699;
*b2 = 0.00782020803350;
*a1 = -1.73472576880928;
*a2 = 0.76600660094326;
break;
case 11025: *b0 = 0.00425905333005;
*b1 = 0.00851810666010;
*b2 = 0.00425905333005;
*a1 = -1.80709136077571;
*a2 = 0.82412757409590;
break;
case 22050: *b0 = 0.00111491512001;
*b1 = 0.00222983024003;
*b2 = 0.00111491512001;
*a1 = -1.90335434048751;
*a2 = 0.90781400096756;
break;
case 32000: *b0 = 0.00053716977481;
*b1 = 0.00107433954962;
*b2 = 0.00053716977481;
*a1 = -1.93338022587993;
*a2 = 0.93552890497918;
break;
case 44100: *b0 = 0.00028538351548666;
*b1 = 0.00057076703097332;
*b2 = 0.00028538351548666;
*a1 = -1.95165117996464;
*a2 = 0.95279271402659;
break;
case 48000: *b0 = 0.00024135904904198;
*b1 = 0.00048271809808396;
*b2 = 0.00024135904904198;
*a1 = -1.95557824031504;
*a2 = 0.95654367651120;
break;
case 96000: *b0 = 0.00006100617875806425;
*b1 = 0.0001220123575161285;
*b2 = 0.00006100617875806425;
*a1 = -1.977786483776763;
*a2 = 0.9780305084917958;
break;
default: success = FALSE;
break;
}
return(success);
}
Boolean Set4thOrderPhaseCompCoefficients (Float32 *b0, Float32 *b1, Float32 *a1, Float32 *a2, UInt32 samplingRate)
{
Boolean success = TRUE;
switch ( samplingRate )
{
case 8000: *a1 = -1.734725768809275;
*a2 = 0.7660066009432638;
*b0 = *a2;
*b1 = *a1;
break;
case 11025: *a1 = -1.807091360775707;
*a2 = 0.8241275740958973;
*b0 = *a2;
*b1 = *a1;
break;
case 22050: *a1 = -1.903354340487510;
*a2 = 0.9078140009675627;
*b0 = *a2;
*b1 = *a1;
break;
case 32000: *a1 = -1.93338022587993;
*a2 = 0.93552890497918;
*b0 = *a2;
*b1 = *a1;
break;
case 44100: *a1 = -1.951651179964643;
*a2 = 0.9527927140265903;
*b0 = *a2;
*b1 = *a1;
break;
case 48000: *a1 = -1.955578240315035;
*a2 = 0.9565436765112033;
*b0 = *a2;
*b1 = *a1;
break;
case 96000: *a1 = -1.977786483776763;
*a2 = 0.9780305084917958;
*b0 = *a2;
*b1 = *a1;
break;
default:
success = FALSE;
break;
}
return(success);
}
void StereoCrossover4thOrderPhaseComp (Float32 *in, Float32 *low, Float32 *high, UInt32 frames, UInt32 SamplingRate, PreviousValues *section1State, PreviousValues *section2State, PreviousValues *phaseCompState)
{
UInt32 i;
Float32 inL, inR, outL1, outR1, outL, outR, inPhaseCompL, inPhaseCompR;
Float32 b0, b1, b2, a1, a2;
Float32 bp0, bp1, ap1, ap2;
Float32 inLTap1, inLTap2, inRTap1, inRTap2;
Float32 outLTap1, outLTap2, outRTap1, outRTap2;
Float32 inLTap1_2, inLTap2_2, inRTap1_2, inRTap2_2;
Float32 outLTap1_2, outLTap2_2, outRTap1_2, outRTap2_2;
Float32 inLTap1_p, inLTap2_p, inRTap1_p, inRTap2_p;
Float32 outLTap1_p, outLTap2_p, outRTap1_p, outRTap2_p;
inLTap1 = section1State->xl_1;
inLTap2 = section1State->xl_2;
inRTap1 = section1State->xr_1;
inRTap2 = section1State->xr_2;
outLTap1 = section1State->yl_1;
outLTap2 = section1State->yl_2;
outRTap1 = section1State->yr_1;
outRTap2 = section1State->yr_2;
inLTap1_2 = section2State->xl_1;
inLTap2_2 = section2State->xl_2;
inRTap1_2 = section2State->xr_1;
inRTap2_2 = section2State->xr_2;
outLTap1_2 = section2State->yl_1;
outLTap2_2 = section2State->yl_2;
outRTap1_2 = section2State->yr_1;
outRTap2_2 = section2State->yr_2;
inLTap1_p = phaseCompState->xl_1;
inLTap2_p = phaseCompState->xl_2;
inRTap1_p = phaseCompState->xr_1;
inRTap2_p = phaseCompState->xr_2;
outLTap1_p = phaseCompState->yl_1;
outLTap2_p = phaseCompState->yl_2;
outRTap1_p = phaseCompState->yr_1;
outRTap2_p = phaseCompState->yr_2;
if (Set4thOrderCoefficients (&b0, &b1, &b2, &a1, &a2, SamplingRate) == FALSE)
return;
if (Set4thOrderPhaseCompCoefficients (&bp0, &bp1, &ap1, &ap2, SamplingRate) == FALSE)
return;
for ( i = 0 ; i < frames ; i ++ )
{
inL = in[2*i];
inR = in[2*i+1];
outL1 = b0*inL + b1*inLTap1 + b2*inLTap2 - a1*outLTap1 - a2*outLTap2;
outR1 = b0*inR + b1*inRTap1 + b2*inRTap2 - a1*outRTap1 - a2*outRTap2;
inLTap2 = inLTap1;
inRTap2 = inRTap1;
inLTap1 = inL;
inRTap1 = inR;
outLTap2 = outLTap1;
outRTap2 = outRTap1;
outLTap1 = outL1;
outRTap1 = outR1;
outL = b0*outL1 + b1*inLTap1_2 + b2*inLTap2_2 - a1*outLTap1_2 - a2*outLTap2_2;
outR = b0*outR1 + b1*inRTap1_2 + b2*inRTap2_2 - a1*outRTap1_2 - a2*outRTap2_2;
inLTap2_2 = inLTap1_2;
inRTap2_2 = inRTap1_2;
inLTap1_2 = outL1;
inRTap1_2 = outR1;
outLTap2_2 = outLTap1_2;
outRTap2_2 = outRTap1_2;
outLTap1_2 = outL;
outRTap1_2 = outR;
inPhaseCompL = bp0*inL + bp1*inLTap1_p + inLTap2_p - ap1*outLTap1_p - ap2*outLTap2_p;
inPhaseCompR = bp0*inR + bp1*inRTap1_p + inRTap2_p - ap1*outRTap1_p - ap2*outRTap2_p;
inLTap2_p = inLTap1_p;
inRTap2_p = inRTap1_p;
inLTap1_p = inL;
inRTap1_p = inR;
outLTap2_p = outLTap1_p;
outRTap2_p = outRTap1_p;
outLTap1_p = inPhaseCompL;
outRTap1_p = inPhaseCompR;
low[2*i] = outL;
low[2*i+1] = outR;
high[2*i] = inPhaseCompL-outL;
high[2*i+1] = inPhaseCompR-outR;
}
section1State->xl_1 = inLTap1;
section1State->xl_2 = inLTap2;
section1State->xr_1 = inRTap1;
section1State->xr_2 = inRTap2;
section1State->yl_1 = outLTap1;
section1State->yl_2 = outLTap2;
section1State->yr_1 = outRTap1;
section1State->yr_2 = outRTap2;
section2State->xl_1 = inLTap1_2;
section2State->xl_2 = inLTap2_2;
section2State->xr_1 = inRTap1_2;
section2State->xr_2 = inRTap2_2;
section2State->yl_1 = outLTap1_2;
section2State->yl_2 = outLTap2_2;
section2State->yr_1 = outRTap1_2;
section2State->yr_2 = outRTap2_2;
phaseCompState->xl_1 = inLTap1_p;
phaseCompState->xl_2 = inLTap2_p;
phaseCompState->xr_1 = inRTap1_p;
phaseCompState->xr_2 = inRTap2_p;
phaseCompState->yl_1 = outLTap1_p;
phaseCompState->yl_2 = outLTap2_p;
phaseCompState->yr_1 = outRTap1_p;
phaseCompState->yr_2 = outRTap2_p;
return;
}
void StereoLowPass4thOrder (Float32 *in, Float32 *low, UInt32 frames, UInt32 SamplingRate, PreviousValues *section1State, PreviousValues *section2State)
{
UInt32 i;
Float32 inL, inR, outL1, outR1, outL, outR;
Float32 b0, b1, b2, a1, a2;
Float32 inLTap1, inLTap2, inRTap1, inRTap2;
Float32 outLTap1, outLTap2, outRTap1, outRTap2;
Float32 inLTap1_2, inLTap2_2, inRTap1_2, inRTap2_2;
Float32 outLTap1_2, outLTap2_2, outRTap1_2, outRTap2_2;
inLTap1 = section1State->xl_1;
inLTap2 = section1State->xl_2;
inRTap1 = section1State->xr_1;
inRTap2 = section1State->xr_2;
outLTap1 = section1State->yl_1;
outLTap2 = section1State->yl_2;
outRTap1 = section1State->yr_1;
outRTap2 = section1State->yr_2;
inLTap1_2 = section2State->xl_1;
inLTap2_2 = section2State->xl_2;
inRTap1_2 = section2State->xr_1;
inRTap2_2 = section2State->xr_2;
outLTap1_2 = section2State->yl_1;
outLTap2_2 = section2State->yl_2;
outRTap1_2 = section2State->yr_1;
outRTap2_2 = section2State->yr_2;
if (Set4thOrderCoefficients (&b0, &b1, &b2, &a1, &a2, SamplingRate) == FALSE)
return;
for ( i = 0 ; i < frames ; i ++ )
{
inL = in[2*i];
inR = in[2*i+1];
outL1 = b0*inL + b1*inLTap1 + b2*inLTap2 - a1*outLTap1 - a2*outLTap2;
outR1 = b0*inR + b1*inRTap1 + b2*inRTap2 - a1*outRTap1 - a2*outRTap2;
inLTap2 = inLTap1;
inRTap2 = inRTap1;
inLTap1 = inL;
inRTap1 = inR;
outLTap2 = outLTap1;
outRTap2 = outRTap1;
outLTap1 = outL1;
outRTap1 = outR1;
outL = b0*outL1 + b1*inLTap1_2 + b2*inLTap2_2 - a1*outLTap1_2 - a2*outLTap2_2;
outR = b0*outR1 + b1*inRTap1_2 + b2*inRTap2_2 - a1*outRTap1_2 - a2*outRTap2_2;
inLTap2_2 = inLTap1_2;
inRTap2_2 = inRTap1_2;
inLTap1_2 = outL1;
inRTap1_2 = outR1;
outLTap2_2 = outLTap1_2;
outRTap2_2 = outRTap1_2;
outLTap1_2 = outL;
outRTap1_2 = outR;
low[2*i] = outL;
low[2*i+1] = outR;
}
section1State->xl_1 = inLTap1;
section1State->xl_2 = inLTap2;
section1State->xr_1 = inRTap1;
section1State->xr_2 = inRTap2;
section1State->yl_1 = outLTap1;
section1State->yl_2 = outLTap2;
section1State->yr_1 = outRTap1;
section1State->yr_2 = outRTap2;
section2State->xl_1 = inLTap1_2;
section2State->xl_2 = inLTap2_2;
section2State->xr_1 = inRTap1_2;
section2State->xr_2 = inRTap2_2;
section2State->yl_1 = outLTap1_2;
section2State->yl_2 = outLTap2_2;
section2State->yr_1 = outRTap1_2;
section2State->yr_2 = outRTap2_2;
return;
}
#pragma mark ------------------------
#pragma mark ••• Conversion Routines
#pragma mark ------------------------
#if defined(__ppc__)
#define __lwbrx( index, base ) ({ register long result; __asm__ __volatile__("lwbrx %0, %1, %2" : "=r" (result) : "b%" (index), "r" (base) : "memory" ); result; } )
#define __lhbrx(index, base) \
({ register signed short lhbrxResult; \
__asm__ ("lhbrx %0, %1, %2" : "=r" (lhbrxResult) : "b%" (index), "r" (base) : "memory"); \
lhbrxResult; } )
#define __rlwimi( rA, rS, cnt, mb, me ) \
({ __asm__ __volatile__( "rlwimi %0, %2, %3, %4, %5" : "=r" (rA) : "0" (rA), "r" (rS), "n" (cnt), "n" (mb), "n" (me) ); rA; })
#define __stwbrx( value, index, base ) \
__asm__( "stwbrx %0, %1, %2" : : "r" (value), "b%" (index), "r" (base) : "memory" )
#define __rlwimi_volatile( rA, rS, cnt, mb, me ) \
({ __asm__ __volatile__( "rlwimi %0, %2, %3, %4, %5" : "=r" (rA) : "0" (rA), "r" (rS), "n" (cnt), "n" (mb), "n" (me) ); rA; })
#define __stfiwx( value, offset, addr ) \
asm( "stfiwx %0, %1, %2" : : "f" (value), "b%" (offset), "r" (addr) : "memory" )
static inline double __fctiw( register double B )
{
register double result;
asm( "fctiw %0, %1" : "=f" (result) : "f" (B) );
return result;
}
void Int8ToFloat32( SInt8 *src, float *dest, unsigned int count )
{
register float bias;
register long exponentMask = ((0x97UL - 8) << 23) | 0x8000; register long int0, int1, int2, int3;
register float float0, float1, float2, float3;
register unsigned long loopCount;
union
{
float f;
long i;
}exponent;
exponent.i = exponentMask;
bias = exponent.f;
src--;
if( count >= 8 )
{
int0 = (++src)[0];
int1 = (++src)[0];
int0 += exponentMask;
int2 = (++src)[0];
int1 += exponentMask;
((long*) dest)[0] = int0;
int3 = (++src)[0];
int2 += exponentMask;
((long*) dest)[1] = int1;
int0 = (++src)[0];
int3 += exponentMask;
((long*) dest)[2] = int2;
float0 = dest[0];
int1 = (++src)[0];
int0 += exponentMask;
((long*) dest)[3] = int3;
float1 = dest[1];
float0 -= bias;
int2 = (++src)[0];
int1 += exponentMask;
((long*) dest)[4] = int0;
float2 = dest[2];
float1 -= bias;
dest--;
int3 = (++src)[0];
int2 += exponentMask;
((long*) dest)[6] = int1;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
count -= 8;
loopCount = count / 4;
count &= 3;
while( loopCount-- )
{
int0 = (++src)[0];
int3 += exponentMask;
((long*) dest)[6] = int2;
float0 = dest[4];
float3 -= bias;
(++dest)[0] = float1;
int1 = (++src)[0];
int0 += exponentMask;
((long*) dest)[6] = int3;
float1 = dest[4];
float0 -= bias;
(++dest)[0] = float2;
int2 = (++src)[0];
int1 += exponentMask;
((long*) dest)[6] = int0;
float2 = dest[4];
float1 -= bias;
(++dest)[0] = float3;
int3 = (++src)[0];
int2 += exponentMask;
((long*) dest)[6] = int1;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
}
int3 += exponentMask;
((long*) dest)[6] = int2;
float0 = dest[4];
float3 -= bias;
(++dest)[0] = float1;
((long*) dest)[6] = int3;
float1 = dest[4];
float0 -= bias;
(++dest)[0] = float2;
float2 = dest[4];
float1 -= bias;
(++dest)[0] = float3;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
float3 -= bias;
(++dest)[0] = float1;
(++dest)[0] = float2;
(++dest)[0] = float3;
dest++;
}
while( count-- )
{
register long value = (++src)[0];
value += exponentMask;
((long*) dest)[0] = value;
dest[0] -= bias;
dest++;
}
}
void NativeInt16ToFloat32( signed short *src, float *dest, unsigned int count, int bitDepth )
{
register float bias;
register long exponentMask = ((0x97UL - bitDepth) << 23) | 0x8000; register long int0, int1, int2, int3;
register float float0, float1, float2, float3;
register unsigned long loopCount;
union
{
float f;
long i;
} exponent;
exponent.i = exponentMask;
bias = exponent.f;
src--;
if( count >= 8 )
{
int0 = (++src)[0];
int1 = (++src)[0];
int0 += exponentMask;
int2 = (++src)[0];
int1 += exponentMask;
((long*) dest)[0] = int0;
int3 = (++src)[0];
int2 += exponentMask;
((long*) dest)[1] = int1;
int0 = (++src)[0];
int3 += exponentMask;
((long*) dest)[2] = int2;
float0 = dest[0];
int1 = (++src)[0];
int0 += exponentMask;
((long*) dest)[3] = int3;
float1 = dest[1];
float0 -= bias;
int2 = (++src)[0];
int1 += exponentMask;
((long*) dest)[4] = int0;
float2 = dest[2];
float1 -= bias;
dest--;
int3 = (++src)[0];
int2 += exponentMask;
((long*) dest)[6] = int1;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
count -= 8;
loopCount = count / 4;
count &= 3;
while( loopCount-- )
{
int0 = (++src)[0];
int3 += exponentMask;
((long*) dest)[6] = int2;
float0 = dest[4];
float3 -= bias;
(++dest)[0] = float1;
int1 = (++src)[0];
int0 += exponentMask;
((long*) dest)[6] = int3;
float1 = dest[4];
float0 -= bias;
(++dest)[0] = float2;
int2 = (++src)[0];
int1 += exponentMask;
((long*) dest)[6] = int0;
float2 = dest[4];
float1 -= bias;
(++dest)[0] = float3;
int3 = (++src)[0];
int2 += exponentMask;
((long*) dest)[6] = int1;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
}
int3 += exponentMask;
((long*) dest)[6] = int2;
float0 = dest[4];
float3 -= bias;
(++dest)[0] = float1;
((long*) dest)[6] = int3;
float1 = dest[4];
float0 -= bias;
(++dest)[0] = float2;
float2 = dest[4];
float1 -= bias;
(++dest)[0] = float3;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
float3 -= bias;
(++dest)[0] = float1;
(++dest)[0] = float2;
(++dest)[0] = float3;
dest++;
}
while( count-- )
{
register long value = (++src)[0];
value += exponentMask;
((long*) dest)[0] = value;
dest[0] -= bias;
dest++;
}
}
void NativeInt24ToFloat32( long *src, float *dest, unsigned int count, int bitDepth )
{
union
{
double d[4];
unsigned int i[8];
} transfer;
register double dBias;
register unsigned int loopCount, load0SignMask;
register unsigned long load0, load1, load2;
register unsigned long int0, int1, int2, int3;
register double d0, d1, d2, d3;
register float f0, f1, f2, f3;
transfer.i[0] = transfer.i[2] = transfer.i[4] = transfer.i[6] = (0x434UL - bitDepth) << 20; transfer.i[1] = 0x00800000;
int0 = int1 = int2 = int3 = 0;
load0SignMask = 0x80000080UL;
dBias = transfer.d[0];
src--;
dest--;
if( count >= 8 )
{
count -= 8;
loopCount = count / 4;
count &= 3;
load0 = (++src)[0];
load1 = (++src)[0];
load0 ^= load0SignMask;
load2 = (++src)[0];
load1 ^= 0x00008000UL;
int0 = load0 >> 8;
int1 = __rlwimi( int1, load0, 16, 8, 15);
load2 ^= 0x00800000UL;
int1 = __rlwimi( int1, load1, 16, 16, 31);
int2 = __rlwimi( int2, load1, 8, 8, 23 );
transfer.i[1] = int0;
load0 = (++src)[0];
int2 = __rlwimi( int2, load2, 8, 24, 31 );
int3 = load2 & 0x00FFFFFF;
transfer.i[3] = int1;
load1 = (++src)[0];
load0 ^= load0SignMask;
transfer.i[5] = int2;
d0 = transfer.d[0];
load2 = (++src)[0];
load1 ^= 0x00008000UL;
int0 = load0 >> 8;
int1 = __rlwimi( int1, load0, 16, 8, 15 );
transfer.i[7] = int3;
d1 = transfer.d[1];
d0 -= dBias;
load2 ^= 0x00800000UL;
int1 = __rlwimi( int1, load1, 16, 16, 31);
int2 = __rlwimi( int2, load1, 8, 8, 23 );
transfer.i[1] = int0;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
while( loopCount-- )
{
load0 = (++src)[0];
int2 = __rlwimi( int2, load2, 8, 24, 31 );
int3 = load2 & 0x00FFFFFF;
transfer.i[3] = int1;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
load1 = (++src)[0];
load0 ^= load0SignMask;
transfer.i[5] = int2;
d0 = transfer.d[0];
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
load2 = (++src)[0];
load1 ^= 0x00008000UL;
int0 = load0 >> 8;
int1 = __rlwimi( int1, load0, 16, 8, 15 );
transfer.i[7] = int3;
d1 = transfer.d[1];
d0 -= dBias;
f3 = d3;
(++dest)[0] = f2;
load2 ^= 0x00800000UL;
int1 = __rlwimi( int1, load1, 16, 16, 31);
int2 = __rlwimi( int2, load1, 8, 8, 23 );
transfer.i[1] = int0;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
(++dest)[0] = f3;
}
int2 = __rlwimi( int2, load2, 8, 24, 31 );
int3 = load2 & 0x00FFFFFF;
transfer.i[3] = int1;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
transfer.i[5] = int2;
d0 = transfer.d[0];
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
transfer.i[7] = int3;
d1 = transfer.d[1];
d0 -= dBias;
f3 = d3;
(++dest)[0] = f2;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
(++dest)[0] = f3;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
f3 = d3;
(++dest)[0] = f2;
(++dest)[0] = f3;
}
src = (long*) ((char*) src + 1 );
while( count-- )
{
int0 = ((unsigned char*)(src = (long*)( (char*) src + 3 )))[0];
int1 = ((unsigned short*)( (char*) src + 1 ))[0];
int0 ^= 0x00000080UL;
int1 = __rlwimi( int1, int0, 16, 8, 15 );
transfer.i[1] = int1;
d0 = transfer.d[0];
d0 -= dBias;
f0 = d0;
(++dest)[0] = f0;
}
}
void NativeInt32ToFloat32( long *src, float *dest, unsigned int count, int bitDepth )
{
union
{
double d[4];
unsigned int i[8];
}transfer;
register double dBias;
register unsigned int loopCount;
register long int0, int1, int2, int3;
register double d0, d1, d2, d3;
register float f0, f1, f2, f3;
transfer.i[0] = transfer.i[2] = transfer.i[4] = transfer.i[6] = (0x434UL - bitDepth) << 20;
transfer.i[1] = 0x80000000;
dBias = transfer.d[0];
src--;
dest--;
if( count >= 8 )
{
count -= 8;
loopCount = count / 4;
count &= 3;
int0 = (++src)[0];
int1 = (++src)[0];
int0 ^= 0x80000000UL;
int2 = (++src)[0];
int1 ^= 0x80000000UL;
transfer.i[1] = int0;
int3 = (++src)[0];
int2 ^= 0x80000000UL;
transfer.i[3] = int1;
int0 = (++src)[0];
int3 ^= 0x80000000UL;
transfer.i[5] = int2;
int1 = (++src)[0];
int0 ^= 0x80000000UL;
transfer.i[7] = int3;
d0 = transfer.d[0];
int2 = (++src)[0];
int1 ^= 0x80000000UL;
transfer.i[1] = int0;
d1 = transfer.d[1];
d0 -= dBias;
int3 = (++src)[0];
int2 ^= 0x80000000UL;
transfer.i[3] = int1;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
while( loopCount-- )
{
int0 = (++src)[0];
int3 ^= 0x80000000UL;
transfer.i[5] = int2;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
int1 = (++src)[0];
int0 ^= 0x80000000UL;
transfer.i[7] = int3;
d0 = transfer.d[0];
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
int2 = (++src)[0];
int1 ^= 0x80000000UL;
transfer.i[1] = int0;
d1 = transfer.d[1];
d0 -= dBias;
f3 = d3;
(++dest)[0] = f2;
int3 = (++src)[0];
int2 ^= 0x80000000UL;
transfer.i[3] = int1;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
(++dest)[0] = f3;
}
int3 ^= 0x80000000UL;
transfer.i[5] = int2;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
transfer.i[7] = int3;
d0 = transfer.d[0];
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
d1 = transfer.d[1];
d0 -= dBias;
f3 = d3;
(++dest)[0] = f2;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
(++dest)[0] = f3;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
f3 = d3;
(++dest)[0] = f2;
(++dest)[0] = f3;
}
while( count-- )
{
int0 = (++src)[0];
int0 ^= 0x80000000UL;
transfer.i[1] = int0;
d0 = transfer.d[0];
d0 -= dBias;
f0 = d0;
(++dest)[0] = f0;
}
}
void NativeInt16ToFloat32Gain( signed short *src, float *dest, unsigned int count, int bitDepth, float* inGainLPtr, float* inGainRPtr )
{
register float bias, gainL, gainR;
register long exponentMask = ((0x97UL - bitDepth) << 23) | 0x8000; register long int0, int1, int2, int3;
register float float0, float1, float2, float3;
register unsigned long loopCount;
union
{
float f;
long i;
} exponent;
exponent.i = exponentMask;
bias = exponent.f;
if (inGainLPtr) {
gainL = *inGainLPtr;
} else {
gainL = 1.0f;
}
if (inGainRPtr) {
gainR = *inGainRPtr;
} else {
gainR = 1.0f;
}
src--;
if( count >= 8 )
{
int0 = (++src)[0];
int1 = (++src)[0];
int0 += exponentMask;
int2 = (++src)[0];
int1 += exponentMask;
((long*) dest)[0] = int0;
int3 = (++src)[0];
int2 += exponentMask;
((long*) dest)[1] = int1;
int0 = (++src)[0];
int3 += exponentMask;
((long*) dest)[2] = int2;
float0 = dest[0];
int1 = (++src)[0];
int0 += exponentMask;
((long*) dest)[3] = int3;
float1 = dest[1];
float0 -= bias;
int2 = (++src)[0];
int1 += exponentMask;
((long*) dest)[4] = int0;
float0 *= gainL;
float2 = dest[2];
float1 -= bias;
dest--;
int3 = (++src)[0];
int2 += exponentMask;
((long*) dest)[6] = int1;
float1 *= gainR;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
count -= 8;
loopCount = count / 4;
count &= 3;
while( loopCount-- )
{
int0 = (++src)[0];
int3 += exponentMask;
((long*) dest)[6] = int2;
float2 *= gainL;
float0 = dest[4];
float3 -= bias;
(++dest)[0] = float1;
int1 = (++src)[0];
int0 += exponentMask;
((long*) dest)[6] = int3;
float3 *= gainR;
float1 = dest[4];
float0 -= bias;
(++dest)[0] = float2;
int2 = (++src)[0];
int1 += exponentMask;
((long*) dest)[6] = int0;
float0 *= gainL;
float2 = dest[4];
float1 -= bias;
(++dest)[0] = float3;
int3 = (++src)[0];
int2 += exponentMask;
((long*) dest)[6] = int1;
float1 *= gainR;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
}
int3 += exponentMask;
((long*) dest)[6] = int2;
float2 *= gainL;
float0 = dest[4];
float3 -= bias;
(++dest)[0] = float1;
((long*) dest)[6] = int3;
float3 *= gainR;
float1 = dest[4];
float0 -= bias;
(++dest)[0] = float2;
float0 *= gainL;
float2 = dest[4];
float1 -= bias;
(++dest)[0] = float3;
float1 *= gainR;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
float2 *= gainL;
float3 -= bias;
(++dest)[0] = float1;
float3 *= gainR;
(++dest)[0] = float2;
(++dest)[0] = float3;
dest++;
}
loopCount = count/2;
while( loopCount-- )
{
register long value = (++src)[0];
value += exponentMask;
((long*) dest)[0] = value;
value = (++src)[0];
dest[0] -= bias;
dest[0] *= gainL;
value += exponentMask;
dest++;
((long*) dest)[0] = value;
dest[0] -= bias;
dest[0] *= gainR;
dest++;
}
if (count % 2) {
register long value = (++src)[0];
value += exponentMask;
((long*) dest)[0] = value;
dest[0] -= bias;
dest[0] *= gainL;
}
}
void NativeInt16ToFloat32CopyRightToLeft( signed short *src, float *dest, unsigned int count, int bitDepth )
{
register float bias;
register long exponentMask = ((0x97UL - bitDepth) << 23) | 0x8000; register long int0, int1, int2, int3;
register float float0, float1, float2, float3;
register unsigned long loopCount;
union
{
float f;
long i;
} exponent;
exponent.i = exponentMask;
bias = exponent.f;
if( count >= 8 )
{
int0 = (++src)[0];
int1 = (src++)[0]; int0 += exponentMask;
int2 = (++src)[0]; int1 += exponentMask;
((long*) dest)[0] = int0;
int3 = (src++)[0]; int2 += exponentMask;
((long*) dest)[1] = int1;
int0 = (++src)[0]; int3 += exponentMask;
((long*) dest)[2] = int2;
float0 = dest[0];
int1 = (src++)[0]; int0 += exponentMask;
((long*) dest)[3] = int3;
float1 = dest[1];
float0 -= bias;
int2 = (++src)[0]; int1 += exponentMask;
((long*) dest)[4] = int0;
float2 = dest[2];
float1 -= bias;
dest--;
int3 = (src++)[0]; int2 += exponentMask;
((long*) dest)[6] = int1;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
count -= 8;
loopCount = count / 4;
count &= 3;
while( loopCount-- )
{
int0 = (++src)[0];
int3 += exponentMask;
((long*) dest)[6] = int2;
float0 = dest[4];
float3 -= bias;
(++dest)[0] = float1;
int1 = (src++)[0];
int0 += exponentMask;
((long*) dest)[6] = int3;
float1 = dest[4];
float0 -= bias;
(++dest)[0] = float2;
int2 = (++src)[0];
int1 += exponentMask;
((long*) dest)[6] = int0;
float2 = dest[4];
float1 -= bias;
(++dest)[0] = float3;
int3 = (src++)[0];
int2 += exponentMask;
((long*) dest)[6] = int1;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
}
int3 += exponentMask;
((long*) dest)[6] = int2;
float0 = dest[4];
float3 -= bias;
(++dest)[0] = float1;
((long*) dest)[6] = int3;
float1 = dest[4];
float0 -= bias;
(++dest)[0] = float2;
float2 = dest[4];
float1 -= bias;
(++dest)[0] = float3;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
float3 -= bias;
(++dest)[0] = float1;
(++dest)[0] = float2;
(++dest)[0] = float3;
dest++;
}
loopCount = count/2;
while( loopCount-- )
{
register long value = (++src)[0];
value += exponentMask;
((long*) dest)[0] = value;
dest[0] -= bias;
dest++;
++src;
((long*) dest)[0] = value;
dest[0] -= bias;
dest++;
}
if (count % 2) {
register long value = (++src)[0];
value += exponentMask;
((long*) dest)[0] = value;
dest[0] -= bias;
}
}
void NativeInt32ToFloat32Gain( signed long *src, float *dest, unsigned int count, int bitDepth, float* inGainLPtr, float* inGainRPtr )
{
union
{
double d[4];
unsigned int i[8];
} transfer;
register double dBias, gainL, gainR;
register unsigned int loopCount;
register long int0, int1, int2, int3;
register double d0, d1, d2, d3;
register float f0, f1, f2, f3;
transfer.i[0] = transfer.i[2] = transfer.i[4] = transfer.i[6] = (0x434UL - bitDepth) << 20;
transfer.i[1] = 0x80000000;
dBias = transfer.d[0];
if (inGainLPtr) {
gainL = *inGainLPtr;
} else {
gainL = 1.0f;
}
if (inGainRPtr) {
gainR = *inGainRPtr;
} else {
gainR = 1.0f;
}
src--;
dest--;
if( count >= 8 )
{
count -= 8;
loopCount = count / 4;
count &= 3;
int0 = (++src)[0];
int1 = (++src)[0];
int0 ^= 0x80000000UL;
int2 = (++src)[0];
int1 ^= 0x80000000UL;
transfer.i[1] = int0;
int3 = (++src)[0];
int2 ^= 0x80000000UL;
transfer.i[3] = int1;
int0 = (++src)[0];
int3 ^= 0x80000000UL;
transfer.i[5] = int2;
int1 = (++src)[0];
int0 ^= 0x80000000UL;
transfer.i[7] = int3;
d0 = transfer.d[0];
int2 = (++src)[0];
int1 ^= 0x80000000UL;
transfer.i[1] = int0;
d1 = transfer.d[1];
d0 -= dBias;
int3 = (++src)[0];
int2 ^= 0x80000000UL;
transfer.i[3] = int1;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0*gainL;
while( loopCount-- )
{
int0 = (++src)[0];
int3 ^= 0x80000000UL;
transfer.i[5] = int2;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1*gainR;
(++dest)[0] = f0;
int1 = (++src)[0];
int0 ^= 0x80000000UL;
transfer.i[7] = int3;
d0 = transfer.d[0];
d3 -= dBias;
f2 = d2*gainL;
(++dest)[0] = f1;
int2 = (++src)[0];
int1 ^= 0x80000000UL;
transfer.i[1] = int0;
d1 = transfer.d[1];
d0 -= dBias;
f3 = d3*gainR;
(++dest)[0] = f2;
int3 = (++src)[0];
int2 ^= 0x80000000UL;
transfer.i[3] = int1;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0*gainL;
(++dest)[0] = f3;
}
int3 ^= 0x80000000UL;
transfer.i[5] = int2;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1*gainR;
(++dest)[0] = f0;
transfer.i[7] = int3;
d0 = transfer.d[0];
d3 -= dBias;
f2 = d2*gainL;
(++dest)[0] = f1;
d1 = transfer.d[1];
d0 -= dBias;
f3 = d3*gainR;
(++dest)[0] = f2;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0*gainL;
(++dest)[0] = f3;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1*gainR;
(++dest)[0] = f0;
d3 -= dBias;
f2 = d2*gainL;
(++dest)[0] = f1;
f3 = d3*gainR;
(++dest)[0] = f2;
(++dest)[0] = f3;
}
loopCount = count/2;
while( loopCount-- )
{
int0 = (++src)[0];
int0 ^= 0x80000000UL;
transfer.i[1] = int0;
d0 = transfer.d[0];
d0 -= dBias;
f0 = d0*gainL;
(++dest)[0] = f0;
int0 = (++src)[0];
int0 ^= 0x80000000UL;
transfer.i[1] = int0;
d0 = transfer.d[0];
d0 -= dBias;
f0 = d0*gainR;
(++dest)[0] = f0;
}
if (count % 2) {
int0 = (++src)[0];
int0 ^= 0x80000000UL;
transfer.i[1] = int0;
d0 = transfer.d[0];
d0 -= dBias;
f0 = d0*gainL;
(++dest)[0] = f0;
}
}
void SwapInt16ToFloat32( signed short *src, float *dest, unsigned int count, int bitDepth )
{
register float bias;
register long exponentMask = ((0x97UL - bitDepth) << 23) | 0x8000; register long int0, int1, int2, int3;
register float float0, float1, float2, float3;
register unsigned long loopCount;
union
{
float f;
long i;
}exponent;
exponent.i = exponentMask;
bias = exponent.f;
src--;
if( count >= 8 )
{
int0 = __lhbrx(0, ++src);
int1 = __lhbrx(0, ++src);
int0 += exponentMask;
int2 = __lhbrx(0, ++src);
int1 += exponentMask;
((long*) dest)[0] = int0;
int3 = __lhbrx(0, ++src);
int2 += exponentMask;
((long*) dest)[1] = int1;
int0 = __lhbrx(0, ++src);
int3 += exponentMask;
((long*) dest)[2] = int2;
float0 = dest[0];
int1 = __lhbrx(0, ++src);
int0 += exponentMask;
((long*) dest)[3] = int3;
float1 = dest[1];
float0 -= bias;
int2 = __lhbrx(0, ++src);
int1 += exponentMask;
((long*) dest)[4] = int0;
float2 = dest[2];
float1 -= bias;
dest--;
int3 = __lhbrx(0, ++src);
int2 += exponentMask;
((long*) dest)[6] = int1;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
count -= 8;
loopCount = count / 4;
count &= 3;
while( loopCount-- )
{
int0 = __lhbrx(0, ++src);
int3 += exponentMask;
((long*) dest)[6] = int2;
float0 = dest[4];
float3 -= bias;
(++dest)[0] = float1;
int1 = __lhbrx(0, ++src);
int0 += exponentMask;
((long*) dest)[6] = int3;
float1 = dest[4];
float0 -= bias;
(++dest)[0] = float2;
int2 = __lhbrx(0, ++src);
int1 += exponentMask;
((long*) dest)[6] = int0;
float2 = dest[4];
float1 -= bias;
(++dest)[0] = float3;
int3 = __lhbrx(0, ++src);
int2 += exponentMask;
((long*) dest)[6] = int1;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
}
int3 += exponentMask;
((long*) dest)[6] = int2;
float0 = dest[4];
float3 -= bias;
(++dest)[0] = float1;
((long*) dest)[6] = int3;
float1 = dest[4];
float0 -= bias;
(++dest)[0] = float2;
float2 = dest[4];
float1 -= bias;
(++dest)[0] = float3;
float3 = dest[4];
float2 -= bias;
(++dest)[0] = float0;
float3 -= bias;
(++dest)[0] = float1;
(++dest)[0] = float2;
(++dest)[0] = float3;
dest++;
}
while( count-- )
{
register long value = __lhbrx(0, ++src);
value += exponentMask;
((long*) dest)[0] = value;
dest[0] -= bias;
dest++;
}
}
void SwapInt24ToFloat32( long *src, float *dest, unsigned int count, int bitDepth )
{
union
{
double d[4];
unsigned int i[8];
}transfer;
register double dBias;
register unsigned int loopCount, load2SignMask;
register unsigned long load0, load1, load2;
register unsigned long int0, int1, int2, int3;
register double d0, d1, d2, d3;
register float f0, f1, f2, f3;
transfer.i[0] = transfer.i[2] = transfer.i[4] = transfer.i[6] = 0x41400000UL;
transfer.i[1] = 0x80000000;
int0 = int1 = int2 = int3 = 0;
load2SignMask = 0x80000080UL;
dBias = transfer.d[0];
src--;
dest--;
if( count >= 8 )
{
count -= 8;
loopCount = count / 4;
count &= 3;
load0 = (++src)[0];
load1 = (++src)[0];
load0 ^= 0x00008000;
load2 = (++src)[0];
load1 ^= 0x00800000UL;
int0 = load0 >> 8;
int1 = __rlwimi( int1, load0, 16, 8, 15);
load2 ^= load2SignMask;
int1 = __rlwimi( int1, load1, 16, 16, 31);
int2 = __rlwimi( int2, load1, 8, 8, 23 );
__stwbrx( int0, 0, &transfer.i[1]);
load0 = (++src)[0];
int2 = __rlwimi( int2, load2, 8, 24, 31 );
int3 = load2 & 0x00FFFFFF;
__stwbrx( int1, 0, &transfer.i[3]);
load1 = (++src)[0];
load0 ^= 0x00008000;
__stwbrx( int2, 0, &transfer.i[5]);
d0 = transfer.d[0];
load2 = (++src)[0];
load1 ^= 0x00800000UL;
int0 = load0 >> 8;
int1 = __rlwimi( int1, load0, 16, 8, 15 );
__stwbrx( int3, 0, &transfer.i[7]);
d1 = transfer.d[1];
d0 -= dBias;
load2 ^= load2SignMask;
int1 = __rlwimi( int1, load1, 16, 16, 31);
int2 = __rlwimi( int2, load1, 8, 8, 23 );
__stwbrx( int0, 0, &transfer.i[1]);
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
while( loopCount-- )
{
load0 = (++src)[0];
int2 = __rlwimi( int2, load2, 8, 24, 31 );
int3 = load2 & 0x00FFFFFF;
__stwbrx( int1, 0, &transfer.i[3]);
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
load1 = (++src)[0];
load0 ^= 0x00008000;
__stwbrx( int2, 0, &transfer.i[5]);
d0 = transfer.d[0];
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
load2 = (++src)[0];
load1 ^= 0x00800000UL;
int0 = load0 >> 8;
int1 = __rlwimi( int1, load0, 16, 8, 15 );
__stwbrx( int3, 0, &transfer.i[7]);
d1 = transfer.d[1];
d0 -= dBias;
f3 = d3;
(++dest)[0] = f2;
load2 ^= load2SignMask;
int1 = __rlwimi( int1, load1, 16, 16, 31);
int2 = __rlwimi( int2, load1, 8, 8, 23 );
__stwbrx( int0, 0, &transfer.i[1]);
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
(++dest)[0] = f3;
}
int2 = __rlwimi( int2, load2, 8, 24, 31 );
int3 = load2 & 0x00FFFFFF;
__stwbrx( int1, 0, &transfer.i[3]);
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
__stwbrx( int2, 0, &transfer.i[5]);
d0 = transfer.d[0];
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
__stwbrx( int3, 0, &transfer.i[7]);
d1 = transfer.d[1];
d0 -= dBias;
f3 = d3;
(++dest)[0] = f2;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
(++dest)[0] = f3;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
f3 = d3;
(++dest)[0] = f2;
(++dest)[0] = f3;
}
if( count > 0 )
{
int1 = ((unsigned char*) src)[6];
int0 = ((unsigned short*)(++src))[0];
int1 ^= 0x80;
int1 = __rlwimi( int1, int0, 8, 8, 23 );
__stwbrx( int1, 0, &transfer.i[1]);
d0 = transfer.d[0];
d0 -= dBias;
f0 = d0;
(++dest)[0] = f0;
src = (long*) ((char*)src - 1 );
while( --count )
{
int0 = (src = (long*)( (char*) src + 3 ))[0];
int0 ^= 0x80UL;
int0 &= 0x00FFFFFFUL;
__stwbrx( int0, 0, &transfer.i[1]);
d0 = transfer.d[0];
d0 -= dBias;
f0 = d0;
(++dest)[0] = f0;
}
}
}
void SwapInt32ToFloat32( long *src, float *dest, unsigned int count, int bitDepth )
{
union
{
double d[4];
unsigned int i[8];
}transfer;
register double dBias;
register unsigned int loopCount;
register long int0, int1, int2, int3;
register double d0, d1, d2, d3;
register float f0, f1, f2, f3;
transfer.i[0] = transfer.i[2] = transfer.i[4] = transfer.i[6] = (0x434UL - bitDepth) << 20;
transfer.i[1] = 0x80000000;
dBias = transfer.d[0];
src--;
dest--;
if( count >= 8 )
{
count -= 8;
loopCount = count / 4;
count &= 3;
int0 = __lwbrx( 0, ++src);
int1 = __lwbrx( 0, ++src);
int0 ^= 0x80000000UL;
int2 = __lwbrx( 0, ++src);
int1 ^= 0x80000000UL;
transfer.i[1] = int0;
int3 = __lwbrx( 0, ++src);
int2 ^= 0x80000000UL;
transfer.i[3] = int1;
int0 = __lwbrx( 0, ++src);
int3 ^= 0x80000000UL;
transfer.i[5] = int2;
int1 = __lwbrx( 0, ++src);
int0 ^= 0x80000000UL;
transfer.i[7] = int3;
d0 = transfer.d[0];
int2 = __lwbrx( 0, ++src);
int1 ^= 0x80000000UL;
transfer.i[1] = int0;
d1 = transfer.d[1];
d0 -= dBias;
int3 = __lwbrx( 0, ++src);
int2 ^= 0x80000000UL;
transfer.i[3] = int1;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
while( loopCount-- )
{
int0 = __lwbrx( 0, ++src);
int3 ^= 0x80000000UL;
transfer.i[5] = int2;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
int1 = __lwbrx( 0, ++src);
int0 ^= 0x80000000UL;
transfer.i[7] = int3;
d0 = transfer.d[0];
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
int2 = __lwbrx( 0, ++src);
int1 ^= 0x80000000UL;
transfer.i[1] = int0;
d1 = transfer.d[1];
d0 -= dBias;
f3 = d3;
(++dest)[0] = f2;
int3 = __lwbrx( 0, ++src);
int2 ^= 0x80000000UL;
transfer.i[3] = int1;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
(++dest)[0] = f3;
}
int3 ^= 0x80000000UL;
transfer.i[5] = int2;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
transfer.i[7] = int3;
d0 = transfer.d[0];
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
d1 = transfer.d[1];
d0 -= dBias;
f3 = d3;
(++dest)[0] = f2;
d2 = transfer.d[2];
d1 -= dBias;
f0 = d0;
(++dest)[0] = f3;
d3 = transfer.d[3];
d2 -= dBias;
f1 = d1;
(++dest)[0] = f0;
d3 -= dBias;
f2 = d2;
(++dest)[0] = f1;
f3 = d3;
(++dest)[0] = f2;
(++dest)[0] = f3;
}
while( count-- )
{
int0 = __lwbrx( 0, ++src);
int0 ^= 0x80000000UL;
transfer.i[1] = int0;
d0 = transfer.d[0];
d0 -= dBias;
f0 = d0;
(++dest)[0] = f0;
}
}
void Float32ToInt8( float *src, SInt8 *dst, unsigned int count )
{
register double scale = 2147483648.0;
register double round = 128.0;
unsigned long loopCount = count / 4;
long buffer[2];
register float startingFloat;
register double scaled;
register double converted;
register SInt8 copy;
if( count >= 6 )
{
startingFloat = (src++)[0];
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
__stfiwx( converted, sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
copy = ((SInt8*) buffer)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
count -= 6;
loopCount = count / 2;
count &= 1;
while( loopCount-- )
{
register float startingFloat2;
register double scaled2;
register double converted2;
register SInt8 copy2;
(dst++)[0] = copy;
__asm__ __volatile__ ( "fctiw %0, %1" : "=f" (converted2) : "f" ( scaled ) );
copy2 = ((SInt8*) buffer)[4];
__asm__ __volatile__ ( "fmadd %0, %1, %2, %3" : "=f" (scaled2) : "f" ( startingFloat), "f" (scale), "f" (round) );
__asm__ __volatile__ ( "stfiwx %0, %1, %2" : : "f" (converted), "b%" (sizeof(float)), "r" (buffer) : "memory" );
startingFloat2 = (src++)[0];
(dst++)[0] = copy2;
__asm__ __volatile__ ( "fctiw %0, %1" : "=f" (converted) : "f" ( scaled2 ) );
copy = ((SInt8*) buffer)[0];
__asm__ __volatile__ ( "fmadd %0, %1, %2, %3" : "=f" (scaled) : "f" ( startingFloat2), "f" (scale), "f" (round) );
__asm__ __volatile__ ( "stfiwx %0, %1, %2" : : "f" (converted2), "b%" (0), "r" (buffer) : "memory" );
startingFloat = (src++)[0];
}
(dst++)[0] = copy;
copy = ((SInt8*) buffer)[4];
__stfiwx( converted, sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
(dst++)[0] = copy;
copy = ((SInt8*) buffer)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
(dst++)[0] = copy;
copy = ((SInt8*) buffer)[4];
__stfiwx( converted, sizeof(float), buffer );
(dst++)[0] = copy;
copy = ((SInt8*) buffer)[0];
(dst++)[0] = copy;
copy = ((SInt8*) buffer)[4];
(dst++)[0] = copy;
}
while( count-- )
{
double scaled = src[0] * scale + round;
double converted = __fctiw( scaled );
__stfiwx( converted, 0, buffer );
dst[0] = buffer[0] >> 24;
src++;
dst++;
}
}
void Float32ToNativeInt16( float *src, signed short *dst, unsigned int count )
{
register double scale = 2147483648.0;
register double round = 32768.0;
unsigned long loopCount = count / 4;
long buffer[2];
register float startingFloat;
register double scaled;
register double converted;
register short copy;
if( count >= 6 )
{
startingFloat = (src++)[0];
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
__stfiwx( converted, sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
copy = ((short*) buffer)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
count -= 6;
loopCount = count / 2;
count &= 1;
while( loopCount-- )
{
register float startingFloat2;
register double scaled2;
register double converted2;
register short copy2;
(dst++)[0] = copy;
__asm__ __volatile__ ( "fctiw %0, %1" : "=f" (converted2) : "f" ( scaled ) );
copy2 = ((short*) buffer)[2];
__asm__ __volatile__ ( "fmadd %0, %1, %2, %3" : "=f" (scaled2) : "f" ( startingFloat), "f" (scale), "f" (round) );
__asm__ __volatile__ ( "stfiwx %0, %1, %2" : : "f" (converted), "b%" (sizeof(float)), "r" (buffer) : "memory" );
startingFloat2 = (src++)[0];
(dst++)[0] = copy2;
__asm__ __volatile__ ( "fctiw %0, %1" : "=f" (converted) : "f" ( scaled2 ) );
copy = ((short*) buffer)[0];
__asm__ __volatile__ ( "fmadd %0, %1, %2, %3" : "=f" (scaled) : "f" ( startingFloat2), "f" (scale), "f" (round) );
__asm__ __volatile__ ( "stfiwx %0, %1, %2" : : "f" (converted2), "b%" (0), "r" (buffer) : "memory" );
startingFloat = (src++)[0];
}
(dst++)[0] = copy;
copy = ((short*) buffer)[2];
__stfiwx( converted, sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
(dst++)[0] = copy;
copy = ((short*) buffer)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
(dst++)[0] = copy;
copy = ((short*) buffer)[2];
__stfiwx( converted, sizeof(float), buffer );
(dst++)[0] = copy;
copy = ((short*) buffer)[0];
(dst++)[0] = copy;
copy = ((short*) buffer)[2];
(dst++)[0] = copy;
}
while( count-- )
{
double scaled = src[0] * scale + round;
double converted = __fctiw( scaled );
__stfiwx( converted, 0, buffer );
dst[0] = buffer[0] >> 16;
src++;
dst++;
}
}
void Float32ToSwapInt16( float *src, signed short *dst, unsigned int count )
{
register double scale = 2147483648.0;
register double round = 32768.0;
unsigned long loopCount = count / 4;
long buffer[2];
register float startingFloat;
register double scaled;
register double converted;
register short copy;
if( count >= 6 )
{
startingFloat = (src++)[0];
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
__stfiwx( converted, sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
copy = ((short*) buffer)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (src++)[0];
count -= 6;
loopCount = count / 2;
count &= 1;
while( loopCount-- )
{
register float startingFloat2;
register double scaled2;
register double converted2;
register short copy2;
__asm__ __volatile__ ( "sthbrx %0, 0, %1" : : "r" (copy), "r" (dst) );
__asm__ __volatile__ ( "fctiw %0, %1" : "=f" (converted2) : "f" ( scaled ) );
copy2 = ((short*) buffer)[2];
__asm__ __volatile__ ( "fmadd %0, %1, %2, %3" : "=f" (scaled2) : "f" ( startingFloat), "f" (scale), "f" (round) );
__asm__ __volatile__ ( "stfiwx %0, %1, %2" : : "f" (converted), "b%" (sizeof(float)), "r" (buffer) : "memory" );
startingFloat2 = (src)[0]; src+=2;
dst+=2;
__asm__ __volatile__ ( "sthbrx %0, %1, %2" : : "r" (copy2), "r" (-2), "r" (dst) );
__asm__ __volatile__ ( "fctiw %0, %1" : "=f" (converted) : "f" ( scaled2 ) );
copy = ((short*) buffer)[0];
__asm__ __volatile__ ( "fmadd %0, %1, %2, %3" : "=f" (scaled) : "f" ( startingFloat2), "f" (scale), "f" (round) );
__asm__ __volatile__ ( "stfiwx %0, %1, %2" : : "f" (converted2), "b%" (0), "r" (buffer) : "memory" );
startingFloat = (src)[-1];
}
__asm__ __volatile__ ( "sthbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = ((short*) buffer)[2];
__stfiwx( converted, sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
__asm__ __volatile__ ( "sthbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = ((short*) buffer)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
__asm__ __volatile__ ( "sthbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = ((short*) buffer)[2];
__stfiwx( converted, sizeof(float), buffer );
__asm__ __volatile__ ( "sthbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = ((short*) buffer)[0];
__asm__ __volatile__ ( "sthbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = ((short*) buffer)[2];
__asm__ __volatile__ ( "sthbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
}
while( count-- )
{
double scaled = src[0] * scale + round;
double converted = __fctiw( scaled );
__stfiwx( converted, 0, buffer );
copy = buffer[0] >> 16;
__asm__ __volatile__ ( "sthbrx %0, 0, %1" : : "r" (copy), "r" (dst) );
src++;
dst++;
}
}
void Float32ToNativeInt24( float *src, signed long *dst, unsigned int count )
{
register double scale = 2147483648.0;
register double round = 0.5 * 256.0;
unsigned long loopCount = count / 4;
long buffer[4];
register float startingFloat, startingFloat2;
register double scaled, scaled2;
register double converted, converted2;
register long copy1; register long copy2; register long copy3; register long copy4; register double oldSetting;
{
union
{
double d;
int i[2];
}setting;
register double newSetting;
asm volatile ( "mffs %0" : "=f" ( oldSetting ) );
setting.d = oldSetting;
setting.i[1] |= 3;
newSetting = setting.d;
asm volatile( "mtfsf 7, %0" : : "f" (newSetting ) );
}
src--;
dst--;
if( count >= 8 )
{
startingFloat = (++src)[0];
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
__stfiwx( converted, sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
copy1 = buffer[0];
__stfiwx( converted, 2 * sizeof( float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
copy2 = buffer[1];
__stfiwx( converted, 3 * sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
copy1 = __rlwimi( copy1, copy2, 8, 24, 31 );
copy3 = buffer[2];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
count -= 8;
loopCount = count / 4;
count &= 3;
while( loopCount-- )
{
__asm__ __volatile__( "fmadds %0, %1, %2, %3" : "=f"(scaled2) : "f" (startingFloat), "f" ( scale ), "f" ( round )); startingFloat2 = (++src)[0]; __asm__ __volatile__( "fctiw %0, %1" : "=f" (converted2) : "f" ( scaled ) ); copy4 = buffer[3]; copy2 = __rlwimi_volatile( copy2, copy3, 8, 24, 7 ); __stfiwx( converted, 1 * sizeof(float), buffer );
__asm__ __volatile__( "fmadds %0, %1, %2, %3" : "=f"(scaled) : "f" (startingFloat2), "f" ( scale ), "f" ( round )); startingFloat = (++src)[0]; __asm__ __volatile__( "fctiw %0, %1" : "=f" (converted) : "f" ( scaled2 ) ); (++dst)[0] = copy1; copy3 = __rlwimi_volatile( copy3, copy4, 8, 24, 15 ); copy1 = buffer[0]; copy2 = __rlwimi_volatile( copy2, copy2, 8, 0, 31 ); __stfiwx( converted2, 2 * sizeof(float), buffer );
__asm__ __volatile__( "fmadds %0, %1, %2, %3" : "=f"(scaled2) : "f" (startingFloat), "f" ( scale ), "f" ( round )); startingFloat2 = (++src)[0]; converted2 = __fctiw( scaled ); (++dst)[0] = copy2; copy3 = __rlwimi_volatile( copy3, copy3, 16, 0, 31 ); copy2 = buffer[1]; __stfiwx( converted, 3 * sizeof(float), buffer );
__asm__ ( "fmadds %0, %1, %2, %3" : "=f"(scaled) : "f" (startingFloat2), "f" ( scale ), "f" ( round )); startingFloat = (++src)[0]; converted = __fctiw( scaled2 ); (++dst)[0] = copy3; copy1 = __rlwimi_volatile( copy1, copy2, 8, 24, 31 ); __stfiwx( converted2, 0 * sizeof(float), buffer ); copy3 = buffer[2]; }
copy2 = __rlwimi( copy2, copy3, 8, 24, 7 ); copy4 = buffer[3]; __stfiwx( converted, 1 * sizeof(float), buffer ); converted2 = __fctiw( scaled ); scaled2 = startingFloat * scale + round;
(++dst)[0] = copy1; copy2 = __rlwimi( copy2, copy2, 8, 0, 31 ); copy3 = __rlwimi( copy3, copy4, 8, 24, 15 ); copy1 = buffer[0]; __stfiwx( converted2, 2 * sizeof(float), buffer ); converted = __fctiw( scaled2 );
(++dst)[0] = copy2; copy3 = __rlwimi( copy3, copy3, 16, 0, 31 ); copy2 = buffer[1]; __stfiwx( converted, 3 * sizeof(float), buffer );
(++dst)[0] = copy3; copy1 = __rlwimi( copy1, copy2, 8, 24, 31 ); copy3 = buffer[2];
copy2 = __rlwimi( copy2, copy3, 8, 24, 7 ); copy4 = buffer[3];
(++dst)[0] = copy1; copy2 = __rlwimi( copy2, copy2, 8, 0, 31 ); copy3 = __rlwimi( copy3, copy4, 8, 24, 15 );
(++dst)[0] = copy2; copy3 = __rlwimi( copy3, copy3, 16, 0, 31 );
(++dst)[0] = copy3; }
dst++;
while( count-- )
{
startingFloat = (++src)[0]; scaled = startingFloat * scale + round; converted = __fctiw( scaled ); __stfiwx( converted, 0, buffer ); copy1 = buffer[0]; ((signed char*) dst)[0] = copy1 >> 24;
dst = (signed long*) ((signed char*) dst + 1 );
((unsigned short*) dst)[0] = copy1 >> 8;
dst = (signed long*) ((unsigned short*) dst + 1 );
}
__asm__ __volatile__ ( "mtfsf 7, %0" : : "f" (oldSetting) );
}
void Float32ToSwapInt24( float *src, signed long *dst, unsigned int count )
{
register double scale = 2147483648.0;
register double round = 0.5 * 256.0;
unsigned long loopCount = count / 4;
long buffer[4];
register float startingFloat, startingFloat2;
register double scaled, scaled2;
register double converted, converted2;
register long copy1;
register long copy2;
register long copy3;
register long copy4;
register double oldSetting;
{
union
{
double d;
int i[2];
}setting;
register double newSetting;
asm volatile ( "mffs %0" : "=f" ( oldSetting ) );
setting.d = oldSetting;
setting.i[1] |= 3;
newSetting = setting.d;
asm volatile( "mtfsf 7, %0" : : "f" (newSetting ) );
}
src--;
dst--;
if( count >= 8 )
{
startingFloat = (++src)[0];
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
__stfiwx( converted, sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
copy1 = __lwbrx( 0, buffer );
__stfiwx( converted, 2 * sizeof( float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
copy2 = __lwbrx( 4, buffer );
__stfiwx( converted, 3 * sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
copy1 = __rlwimi( copy1, copy2, 8, 0, 7 );
copy3 = __lwbrx( 8, buffer );;
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale + round;
startingFloat = (++src)[0];
count -= 8;
loopCount = count / 4;
count &= 3;
while( loopCount-- )
{
copy1 = __rlwimi( copy1, copy1, 8, 0, 31 ); __asm__ __volatile__( "fmadds %0, %1, %2, %3" : "=f"(scaled2) : "f" (startingFloat), "f" ( scale ), "f" ( round )); startingFloat2 = (++src)[0]; __asm__ __volatile__( "fctiw %0, %1" : "=f" (converted2) : "f" ( scaled ) ); copy4 = __lwbrx( 12, buffer ); copy2 = __rlwimi_volatile( copy2, copy3, 8, 0, 15 ); __stfiwx( converted, 1 * sizeof(float), buffer );
__asm__ __volatile__( "fmadds %0, %1, %2, %3" : "=f"(scaled) : "f" (startingFloat2), "f" ( scale ), "f" ( round )); startingFloat = (++src)[0]; __asm__ __volatile__( "fctiw %0, %1" : "=f" (converted) : "f" ( scaled2 ) ); (++dst)[0] = copy1; copy4 = __rlwimi_volatile( copy4, copy3, 24, 0, 7 ); copy1 = __lwbrx( 0, buffer ); copy2 = __rlwimi_volatile( copy2, copy2, 16, 0, 31 ); __stfiwx( converted2, 2 * sizeof(float), buffer );
__asm__ __volatile__( "fmadds %0, %1, %2, %3" : "=f"(scaled2) : "f" (startingFloat), "f" ( scale ), "f" ( round )); startingFloat2 = (++src)[0]; converted2 = __fctiw( scaled ); (++dst)[0] = copy2; copy2 = __lwbrx( 4, buffer ); __stfiwx( converted, 3 * sizeof(float), buffer );
__asm__ ( "fmadds %0, %1, %2, %3" : "=f"(scaled) : "f" (startingFloat2), "f" ( scale ), "f" ( round )); startingFloat = (++src)[0]; converted = __fctiw( scaled2 ); (++dst)[0] = copy4; copy1 = __rlwimi_volatile( copy1, copy2, 8, 0, 7 ); __stfiwx( converted2, 0 * sizeof(float), buffer ); copy3 = __lwbrx( 8, buffer ); }
copy1 = __rlwimi( copy1, copy1, 8, 0, 31 ); __asm__ __volatile__( "fmadds %0, %1, %2, %3" : "=f"(scaled2) : "f" (startingFloat), "f" ( scale ), "f" ( round )); __asm__ __volatile__( "fctiw %0, %1" : "=f" (converted2) : "f" ( scaled ) ); copy4 = __lwbrx( 12, buffer ); copy2 = __rlwimi_volatile( copy2, copy3, 8, 0, 15 ); __stfiwx( converted, 1 * sizeof(float), buffer );
__asm__ __volatile__( "fctiw %0, %1" : "=f" (converted) : "f" ( scaled2 ) ); (++dst)[0] = copy1; copy4 = __rlwimi_volatile( copy4, copy3, 24, 0, 7 ); copy1 = __lwbrx( 0, buffer ); copy2 = __rlwimi_volatile( copy2, copy2, 16, 0, 31 ); __stfiwx( converted2, 2 * sizeof(float), buffer );
(++dst)[0] = copy2; copy2 = __lwbrx( 4, buffer ); __stfiwx( converted, 3 * sizeof(float), buffer );
(++dst)[0] = copy4; copy1 = __rlwimi_volatile( copy1, copy2, 8, 0, 7 ); copy3 = __lwbrx( 8, buffer );
copy1 = __rlwimi( copy1, copy1, 8, 0, 31 ); copy4 = __lwbrx( 12, buffer ); copy2 = __rlwimi_volatile( copy2, copy3, 8, 0, 15 );
(++dst)[0] = copy1; copy4 = __rlwimi_volatile( copy4, copy3, 24, 0, 7 ); copy2 = __rlwimi_volatile( copy2, copy2, 16, 0, 31 );
(++dst)[0] = copy2;
(++dst)[0] = copy4; }
dst++;
while( count-- )
{
startingFloat = (++src)[0]; scaled = startingFloat * scale + round; converted = __fctiw( scaled ); __stfiwx( converted, 0, buffer ); copy1 = __lwbrx( 0, buffer); ((signed char*) dst)[0] = copy1 >> 16;
dst = (signed long*) ((signed char*) dst + 1 );
((unsigned short*) dst)[0] = copy1;
dst = (signed long*) ((unsigned short*) dst + 1 );
}
__asm__ __volatile__ ( "mtfsf 7, %0" : : "f" (oldSetting) );
}
void Float32ToSwapInt32( float *src, signed long *dst, unsigned int count )
{
register double scale = 2147483648.0;
unsigned long loopCount = count / 4;
long buffer[2];
register float startingFloat;
register double scaled;
register double converted;
register long copy;
register double oldSetting;
{
union
{
double d;
int i[2];
}setting;
register double newSetting;
asm volatile ( "mffs %0" : "=f" ( oldSetting ) );
setting.d = oldSetting;
setting.i[1] &= 0xFFFFFFFC;
newSetting = setting.d;
asm volatile( "mtfsf 7, %0" : : "f" (newSetting ) );
}
if( count >= 6 )
{
startingFloat = (src++)[0];
scaled = startingFloat * scale;
startingFloat = (src++)[0];
converted = __fctiw( scaled );
scaled = startingFloat * scale;
startingFloat = (src++)[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale;
startingFloat = (src++)[0];
__stfiwx( converted, sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale;
startingFloat = (src++)[0];
copy = buffer[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale;
startingFloat = (src++)[0];
count -= 6;
loopCount = count / 2;
count &= 1;
while( loopCount-- )
{
register float startingFloat2;
register double scaled2;
register double converted2;
register long copy2;
__asm__ __volatile__ ( "stwbrx %0, 0, %1" : : "r" (copy), "r" (dst) );
__asm__ __volatile__ ( "fctiw %0, %1" : "=f" (converted2) : "f" ( scaled ) );
copy2 = buffer[1];
__asm__ __volatile__ ( "fmuls %0, %1, %2" : "=f" (scaled2) : "f" ( startingFloat), "f" (scale) );
__asm__ __volatile__ ( "stfiwx %0, %1, %2" : : "f" (converted), "b%" (sizeof(*buffer)), "r" (buffer) : "memory" );
startingFloat2 = (src)[0]; src+=2;
dst+=2;
__asm__ __volatile__ ( "stwbrx %0, %1, %2" : : "r" (copy2), "r" (-sizeof(dst[0])), "r" (dst) );
__asm__ __volatile__ ( "fctiw %0, %1" : "=f" (converted) : "f" ( scaled2 ) );
copy = buffer[0];
__asm__ __volatile__ ( "fmuls %0, %1, %2" : "=f" (scaled) : "f" ( startingFloat2), "f" (scale) );
__asm__ __volatile__ ( "stfiwx %0, %1, %2" : : "f" (converted2), "b%" (0), "r" (buffer) : "memory" );
startingFloat = (src)[-1];
}
__asm__ __volatile__ ( "stwbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = buffer[1];
__stfiwx( converted, sizeof(float), buffer );
converted = __fctiw( scaled );
scaled = startingFloat * scale;
__asm__ __volatile__ ( "stwbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = buffer[0];
__stfiwx( converted, 0, buffer );
converted = __fctiw( scaled );
__asm__ __volatile__ ( "stwbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = buffer[1];
__stfiwx( converted, sizeof(float), buffer );
__asm__ __volatile__ ( "stwbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = buffer[0];
__asm__ __volatile__ ( "stwbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
copy = buffer[1];
__asm__ __volatile__ ( "stwbrx %0, 0, %1" : : "r" (copy), "r" (dst) ); dst++;
}
while( count-- )
{
double scaled = src[0] * scale;
double converted = __fctiw( scaled );
__stfiwx( converted, 0, buffer );
copy = buffer[0];
__asm__ __volatile__ ( "stwbrx %0, 0, %1" : : "r" (copy), "r" (dst) );
src++;
dst++;
}
__asm__ __volatile__ ( "mtfsf 7, %0" : : "f" (oldSetting) );
}
void Float32ToNativeInt32( float *src, signed long *dst, unsigned int count )
{
register double scale = 2147483648.0;
unsigned long loopCount;
register float startingFloat;
register double scaled;
register double converted;
register double oldSetting;
{
union
{
double d;
int i[2];
}setting;
register double newSetting;
asm volatile ( "mffs %0" : "=f" ( oldSetting ) );
setting.d = oldSetting;
setting.i[1] &= 0xFFFFFFFC;
newSetting = setting.d;
asm volatile( "mtfsf 7, %0" : : "f" (newSetting ) );
}
if( count >= 3 )
{
startingFloat = (src++)[0];
scaled = startingFloat * scale;
startingFloat = (src++)[0];
converted = __fctiw( scaled );
scaled = startingFloat * scale;
startingFloat = (src++)[0];
count -= 3;
loopCount = count / 2;
count &= 1;
while( loopCount-- )
{
register float startingFloat2;
register double scaled2;
register double converted2;
startingFloat2 = (src)[0];
__asm__ __volatile__ ( "fmul %0, %1, %2" : "=f" (scaled2) : "f" ( startingFloat), "f" (scale) );
__asm__ __volatile__ ( "stfiwx %0, %1, %2" : : "f" (converted), "b%" (0), "r" (dst) : "memory" );
__asm__ __volatile__ ( "fctiw %0, %1" : "=f" (converted2) : "f" ( scaled ) );
startingFloat = (src)[1]; src+=2;
__asm__ __volatile__ ( "fmul %0, %1, %2" : "=f" (scaled) : "f" ( startingFloat2), "f" (scale) );
__asm__ __volatile__ ( "stfiwx %0, %1, %2" : : "f" (converted2), "b%" (4), "r" (dst) : "memory" );
__asm__ __volatile__ ( "fctiw %0, %1" : "=f" (converted) : "f" ( scaled2 ) );
dst+=2;
}
__stfiwx( converted, 0, dst++ );
converted = __fctiw( scaled );
__asm__ __volatile__ ( "fmul %0, %1, %2" : "=f" (scaled) : "f" ( startingFloat), "f" (scale) );
__stfiwx( converted, 0, dst++ );
converted = __fctiw( scaled );
__stfiwx( converted, 0, dst++ );
}
while( count-- )
{
double scaled = src[0] * scale;
double converted = __fctiw( scaled );
__stfiwx( converted, 0, dst );
dst++;
src++;
}
asm volatile( "mtfsf 7, %0" : : "f" (oldSetting) );
}
#endif
#pragma mark ------------------------
#pragma mark ••• Utility Routines
#pragma mark ------------------------
UInt32 CalculateOffset (UInt64 nanoseconds, UInt32 sampleRate) {
return (UInt32)((double)sampleRate * kOneOver1000000000) * nanoseconds;
}
void dBfixed2float(UInt32 indBfixed, float* ioGainPtr) {
float out, temp, frac;
int index = (SInt16)(indBfixed >> 16);
if (index >= kMaxZeroGain) {
index = kMaxZeroGain;
indBfixed = 0;
} else if (index <= -kMinZeroGain) {
index = -kMinZeroGain;
indBfixed = 0;
}
frac = ((float)((UInt32)(indBfixed & 0x0000FFFF)))*kOneOver65535;
out = zeroGaindBConvTable[index + kZeroGaindBConvTableOffset];
if (frac > 0.0f) {
if (index >= 0) {
temp = zeroGaindBConvTable[index + kZeroGaindBConvTableOffset + 1];
out = out + frac*(temp - out);
} else {
temp = zeroGaindBConvTable[index + kZeroGaindBConvTableOffset - 1];
out = out + frac*(temp - out);
}
}
*ioGainPtr = out;
return;
}
void inputGainConverter(UInt32 inGainIndex, float* ioGainPtr) {
float out = 1.0f;
if (inGainIndex > (UInt32)(2*kInputGaindBConvTableOffset)) {
inGainIndex = 2*kInputGaindBConvTableOffset;
}
out = inputGaindBConvTable[inGainIndex];
*ioGainPtr = out;
return;
}
void convertToFourDotTwenty(FourDotTwenty* ioFourDotTwenty, float* inFloatPtr)
{
float scale, floatValue;
SInt32 result_int;
scale = kFourDotTwentyScaleFactor;
ioFourDotTwenty->integerAndFraction1 = 0;
ioFourDotTwenty->fraction2 = 0;
ioFourDotTwenty->fraction3 = 0;
floatValue = *inFloatPtr;
if(floatValue > 8.0) floatValue = 8.0;
else if(floatValue < -8.0)
floatValue = -8.0;
result_int = (SInt32)(floatValue*scale);
ioFourDotTwenty->fraction3 = (UInt8)(result_int & 0x000000FF); ioFourDotTwenty->fraction2 = (UInt8)((result_int & 0x0000FF00) >> 8);
ioFourDotTwenty->integerAndFraction1 = (UInt8)((result_int & 0x00FF0000) >> 16);
return;
}