FELightingNEON.cpp [plain text]
#include "config.h"
#include "FELightingNEON.h"
#if ENABLE(SVG) && ENABLE(FILTERS)
#if CPU(ARM_NEON) && COMPILER(GCC)
#include <wtf/Alignment.h>
namespace WebCore {
static WTF_ALIGNED(short, s_FELightingConstantsForNeon[], 16) = {
-2, 1, 0, -1, 2, 1, 0, -1,
0, -1, -2, -1, 0, 1, 2, 1,
0x0f0e, 0x0302, 0x0504, 0x0706,
0x0b0a, 0x1312, 0x1514, 0x1716,
};
short* feLightingConstantsForNeon()
{
return s_FELightingConstantsForNeon;
}
#define ASSTRING(str) #str
#define TOSTRING(value) ASSTRING(value)
#define PIXELS_OFFSET TOSTRING(0)
#define WIDTH_OFFSET TOSTRING(4)
#define HEIGHT_OFFSET TOSTRING(8)
#define FLAGS_OFFSET TOSTRING(12)
#define SPECULAR_EXPONENT_OFFSET TOSTRING(16)
#define CONE_EXPONENT_OFFSET TOSTRING(20)
#define FLOAT_ARGUMENTS_OFFSET TOSTRING(24)
#define PAINTING_CONSTANTS_OFFSET TOSTRING(28)
#define NL "\n"
#define PAINTING_DATA_R "r11"
#define RESET_WIDTH_R PAINTING_DATA_R
#define PIXELS_R "r4"
#define WIDTH_R "r5"
#define HEIGHT_R "r6"
#define FLAGS_R "r7"
#define SPECULAR_EXPONENT_R "r8"
#define CONE_EXPONENT_R "r10"
#define SCANLINE_R "r12"
#define TMP1_Q "q0"
#define TMP1_D0 "d0"
#define TMP1_S0 "s0"
#define TMP1_S1 "s1"
#define TMP1_D1 "d1"
#define TMP1_S2 "s2"
#define TMP1_S3 "s3"
#define TMP2_Q "q1"
#define TMP2_D0 "d2"
#define TMP2_S0 "s4"
#define TMP2_S1 "s5"
#define TMP2_D1 "d3"
#define TMP2_S2 "s6"
#define TMP2_S3 "s7"
#define TMP3_Q "q2"
#define TMP3_D0 "d4"
#define TMP3_S0 "s8"
#define TMP3_S1 "s9"
#define TMP3_D1 "d5"
#define TMP3_S2 "s10"
#define TMP3_S3 "s11"
#define COSINE_OF_ANGLE "s12"
#define POWF_INT_S "s13"
#define POWF_FRAC_S "s14"
#define SPOT_COLOR_Q "q4"
#define POSITION_Q "q5"
#define POSITION_X_S "s20"
#define POSITION_Y_S "s21"
#define POSITION_Z_S "s22"
#define CONST_ZERO_HI_D "d11"
#define CONST_ZERO_S "s23"
#define READ1_RANGE "d12-d15"
#define READ2_RANGE "d16-d19"
#define READ3_RANGE "d20-d21"
#define SCALE_S "s24"
#define SCALE_DIV4_S "s25"
#define DIFFUSE_CONST_S "s26"
#define CONE_CUT_OFF_S "s28"
#define CONE_FULL_LIGHT_S "s29"
#define CONE_CUT_OFF_RANGE_S "s30"
#define CONST_ONE_HI_D "d15"
#define CONST_ONE_S "s31"
#define LIGHT_Q "q8"
#define DIRECTION_Q "q9"
#define COLOR_Q "q10"
#define READ4_RANGE "d22-d25"
#define READ5_RANGE "d26-d27"
#define ALPHAX_Q "q11"
#define ALPHAY_Q "q12"
#define REMAPX_D "d26"
#define REMAPY_D "d27"
#define ALL_ROWS_D "{d28,d29,d30}"
#define TOP_ROW_D "d28"
#define MIDDLE_ROW_D "d29"
#define BOTTOM_ROW_D "d30"
#define GET_LENGTH(source, temp) \
"vmul.f32 " temp##_Q ", " source##_Q ", " source##_Q NL \
"vadd.f32 " source##_S3 ", " temp##_S0 ", " temp##_S1 NL \
"vadd.f32 " source##_S3 ", " source##_S3 ", " temp##_S2 NL \
"vsqrt.f32 " source##_S3 ", " source##_S3 NL
#define DOT_PRODUCT(destination, source1, source2) \
"vmul.f32 " destination##_Q ", " source1##_Q ", " source2##_Q NL \
"vadd.f32 " destination##_S0 ", " destination##_S0 ", " destination##_S1 NL \
"vadd.f32 " destination##_S0 ", " destination##_S0 ", " destination##_S2 NL
#define MULTIPLY_BY_DIFFUSE_CONST(normalVectorLength, dotProductLength) \
"tst " FLAGS_R ", #" TOSTRING(FLAG_DIFFUSE_CONST_IS_1) NL \
"vmuleq.f32 " TMP2_S1 ", " DIFFUSE_CONST_S ", " normalVectorLength NL \
"vdiveq.f32 " TMP2_S1 ", " TMP2_S1 ", " dotProductLength NL \
"vdivne.f32 " TMP2_S1 ", " normalVectorLength ", " dotProductLength NL
#define POWF_SQR(value, exponent, current, remaining) \
"tst " exponent ", #" ASSTRING(current) NL \
"vmulne.f32 " value ", " value ", " POWF_INT_S NL \
"tst " exponent ", #" ASSTRING(remaining) NL \
"vmulne.f32 " POWF_INT_S ", " POWF_INT_S ", " POWF_INT_S NL
#define POWF_SQRT(value, exponent, current, remaining) \
"tst " exponent ", #" ASSTRING(remaining) NL \
"vsqrtne.f32 " POWF_FRAC_S ", " POWF_FRAC_S NL \
"tst " exponent ", #" ASSTRING(current) NL \
"vmulne.f32 " value ", " value ", " POWF_FRAC_S NL
#define POWF(value, exponent) \
"tst " exponent ", #0xfc0" NL \
"vmovne.f32 " POWF_INT_S ", " value NL \
"tst " exponent ", #0x03f" NL \
"vmovne.f32 " POWF_FRAC_S ", " value NL \
"vmov.f32 " value ", " CONST_ONE_S NL \
\
POWF_SQR(value, exponent, 0x040, 0xf80) \
POWF_SQR(value, exponent, 0x080, 0xf00) \
POWF_SQR(value, exponent, 0x100, 0xe00) \
POWF_SQR(value, exponent, 0x200, 0xc00) \
POWF_SQR(value, exponent, 0x400, 0x800) \
"tst " exponent ", #0x800" NL \
"vmulne.f32 " value ", " value ", " POWF_INT_S NL \
\
POWF_SQRT(value, exponent, 0x20, 0x3f) \
POWF_SQRT(value, exponent, 0x10, 0x1f) \
POWF_SQRT(value, exponent, 0x08, 0x0f) \
POWF_SQRT(value, exponent, 0x04, 0x07) \
POWF_SQRT(value, exponent, 0x02, 0x03) \
POWF_SQRT(value, exponent, 0x01, 0x01)
asm ( ".globl " TOSTRING(neonDrawLighting) NL
TOSTRING(neonDrawLighting) ":" NL
"stmdb sp!, {r4-r8, r10, r11, lr}" NL
"vstmdb sp!, {d8-d15}" NL
"mov " PAINTING_DATA_R ", r0" NL
"ldr r0, [" PAINTING_DATA_R ", #" FLOAT_ARGUMENTS_OFFSET "]" NL
"ldr r1, [" PAINTING_DATA_R ", #" PAINTING_CONSTANTS_OFFSET "]" NL
"ldr " PIXELS_R ", [" PAINTING_DATA_R ", #" PIXELS_OFFSET "]" NL
"ldr " WIDTH_R ", [" PAINTING_DATA_R ", #" WIDTH_OFFSET "]" NL
"ldr " HEIGHT_R ", [" PAINTING_DATA_R ", #" HEIGHT_OFFSET "]" NL
"ldr " FLAGS_R ", [" PAINTING_DATA_R ", #" FLAGS_OFFSET "]" NL
"ldr " SPECULAR_EXPONENT_R ", [" PAINTING_DATA_R ", #" SPECULAR_EXPONENT_OFFSET "]" NL
"ldr " CONE_EXPONENT_R ", [" PAINTING_DATA_R ", #" CONE_EXPONENT_OFFSET "]" NL
"vld1.f32 { " READ1_RANGE " }, [r0]!" NL
"vld1.f32 { " READ2_RANGE " }, [r0]!" NL
"vld1.f32 { " READ3_RANGE " }, [r0]!" NL
"vld1.s16 {" READ4_RANGE "}, [r1]!" NL
"vld1.s16 {" READ5_RANGE "}, [r1]!" NL
"mov " SCANLINE_R ", " WIDTH_R ", lsl #2" NL
"add " SCANLINE_R ", " SCANLINE_R ", #8" NL
"add " PIXELS_R ", " PIXELS_R ", " SCANLINE_R NL
"add " PIXELS_R ", " PIXELS_R ", #3" NL
"mov r0, #0" NL
"vmov.f32 " CONST_ZERO_S ", r0" NL
"vmov.f32 " POSITION_Y_S ", " CONST_ONE_S NL
"tst " FLAGS_R ", #" TOSTRING(FLAG_SPOT_LIGHT) NL
"vmov.f32 " SPOT_COLOR_Q ", " COLOR_Q NL
"mov " RESET_WIDTH_R ", " WIDTH_R NL
".mainLoop:" NL
"mov r3, #3" NL
"vmov.f32 " POSITION_X_S ", " CONST_ONE_S NL
".scanline:" NL
"ldrb r0, [" PIXELS_R ", -" SCANLINE_R "]" NL
"ldrb r1, [" PIXELS_R ", +" SCANLINE_R "]" NL
"ldrb r2, [" PIXELS_R "], #4" NL
"vext.s16 " TOP_ROW_D ", " TOP_ROW_D ", " TOP_ROW_D ", #3" NL
"vext.s16 " MIDDLE_ROW_D ", " MIDDLE_ROW_D ", " MIDDLE_ROW_D ", #3" NL
"vext.s16 " BOTTOM_ROW_D ", " BOTTOM_ROW_D ", " BOTTOM_ROW_D ", #3" NL
"vmov.s16 " TOP_ROW_D "[1], r0" NL
"vmov.s16 " MIDDLE_ROW_D "[1], r2" NL
"vmov.s16 " BOTTOM_ROW_D "[1], r1" NL
"subs r3, r3, #1" NL
"bne .scanline" NL
"tst " FLAGS_R ", #" TOSTRING(FLAG_POINT_LIGHT | FLAG_SPOT_LIGHT) NL
"beq .distantLight" NL
"vmov.s16 r3, " MIDDLE_ROW_D "[2]" NL
"vmov.f32 " POSITION_Z_S ", r3" NL
"vcvt.f32.s32 " POSITION_Z_S ", " POSITION_Z_S NL
"vmul.f32 " POSITION_Z_S ", " POSITION_Z_S ", " SCALE_S NL
"vsub.f32 " TMP1_Q ", " LIGHT_Q ", " POSITION_Q NL
GET_LENGTH(TMP1, TMP2)
"tst " FLAGS_R ", #" TOSTRING(FLAG_SPOT_LIGHT) NL
"bne .cosineOfAngle" NL
".visiblePixel:" NL
"vtbl.8 " TMP3_D0 ", " ALL_ROWS_D ", " REMAPX_D NL
"vtbl.8 " TMP3_D1 ", " ALL_ROWS_D ", " REMAPY_D NL
"vmul.s16 " TMP2_Q ", " TMP3_Q ", " ALPHAX_Q NL
"vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D1 NL
"vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D0 NL
"vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D0 NL
"vmov.s16 r0, " TMP2_D0 "[0]" NL
"vmul.s16 " TMP2_Q ", " TMP3_Q ", " ALPHAY_Q NL
"vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D1 NL
"vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D0 NL
"vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D0 NL
"vmov.s16 r1, " TMP2_D0 "[0]" NL
"tst " FLAGS_R ", #" TOSTRING(FLAG_SPOT_LIGHT) NL
"beq .endLight" NL
"vneg.f32 " TMP3_S1 ", " COSINE_OF_ANGLE NL
"tst " FLAGS_R ", #" TOSTRING(FLAG_CONE_EXPONENT_IS_1) NL
"beq .coneExpPowf" NL
".coneExpPowfFinished:" NL
"vcmp.f32 " COSINE_OF_ANGLE ", " CONE_FULL_LIGHT_S NL
"fmstat" NL
"bhi .cutOff" NL
".cutOffFinished:" NL
"vmin.f32 " TMP3_D0 ", " TMP3_D0 ", " CONST_ONE_HI_D NL
"vmul.f32 " COLOR_Q ", " SPOT_COLOR_Q ", " TMP3_D0 "[1]" NL
".endLight:" NL
"orrs r2, r0, r1" NL
"bne .normalVectorIsNonZero" NL
"tst " FLAGS_R ", #" TOSTRING(FLAG_SPECULAR_LIGHT) NL
"bne .specularLight1" NL
MULTIPLY_BY_DIFFUSE_CONST(TMP1_S2, TMP1_S3)
"b .lightStrengthCalculated" NL
".specularLight1:" NL
"vadd.f32 " TMP1_S2 ", " TMP1_S2 ", " TMP1_S3 NL
GET_LENGTH(TMP1, TMP2)
"tst " FLAGS_R ", #" TOSTRING(FLAG_SPECULAR_EXPONENT_IS_1) NL
"vdiveq.f32 " TMP2_S1 ", " TMP1_S2 ", " TMP1_S3 NL
"beq .specularExpPowf" NL
MULTIPLY_BY_DIFFUSE_CONST(TMP1_S2, TMP1_S3)
"b .lightStrengthCalculated" NL
".normalVectorIsNonZero:" NL
"vmov.s32 " TMP2_S0 ", r0" NL
"vcvt.f32.s32 " TMP2_S0 ", " TMP2_S0 NL
"vmul.f32 " TMP2_S0 ", " TMP2_S0 ", " SCALE_DIV4_S NL
"vmov.s32 " TMP2_S1 ", r1" NL
"vcvt.f32.s32 " TMP2_S1 ", " TMP2_S1 NL
"vmul.f32 " TMP2_S1 ", " TMP2_S1 ", " SCALE_DIV4_S NL
"vmov.f32 " TMP2_S2 ", " CONST_ONE_S NL
GET_LENGTH(TMP2, TMP3)
"tst " FLAGS_R ", #" TOSTRING(FLAG_SPECULAR_LIGHT) NL
"bne .specularLight2" NL
DOT_PRODUCT(TMP3, TMP2, TMP1)
MULTIPLY_BY_DIFFUSE_CONST(TMP3_S0, TMP3_S3)
"b .lightStrengthCalculated" NL
".specularLight2:" NL
"vadd.f32 " TMP1_S2 ", " TMP1_S2 ", " TMP1_S3 NL
GET_LENGTH(TMP1, TMP3)
DOT_PRODUCT(TMP3, TMP2, TMP1)
"tst " FLAGS_R ", #" TOSTRING(FLAG_SPECULAR_EXPONENT_IS_1) NL
"vdiveq.f32 " TMP2_S1 ", " TMP3_S0 ", " TMP3_S3 NL
"beq .specularExpPowf" NL
MULTIPLY_BY_DIFFUSE_CONST(TMP3_S0, TMP3_S3)
".lightStrengthCalculated:" NL
"vmax.f32 " TMP2_D0 ", " TMP2_D0 ", " CONST_ZERO_HI_D NL
"vmin.f32 " TMP2_D0 ", " TMP2_D0 ", " CONST_ONE_HI_D NL
"vmul.f32 " TMP3_Q ", " COLOR_Q ", " TMP2_D0 "[1]" NL
"vcvt.u32.f32 " TMP3_Q ", " TMP3_Q NL
"vmov.u32 r2, r3, " TMP3_S0 ", " TMP3_S1 NL
"strb r2, [" PIXELS_R ", #-11]" NL
"strb r3, [" PIXELS_R ", #-10]" NL
"vmov.u32 r2, " TMP3_S2 NL
"strb r2, [" PIXELS_R ", #-9]" NL
".blackPixel:" NL
"vadd.f32 " POSITION_X_S ", " CONST_ONE_S NL
"mov r3, #1" NL
"subs " WIDTH_R ", " WIDTH_R ", #1" NL
"bne .scanline" NL
"vadd.f32 " POSITION_Y_S ", " CONST_ONE_S NL
"mov " WIDTH_R ", " RESET_WIDTH_R NL
"subs " HEIGHT_R ", " HEIGHT_R ", #1" NL
"bne .mainLoop" NL
"vldmia sp!, {d8-d15}" NL
"ldmia sp!, {r4-r8, r10, r11, pc}" NL
".distantLight:" NL
"vmov.f32 " TMP1_Q ", " LIGHT_Q NL
"b .visiblePixel" NL
".cosineOfAngle:" NL
DOT_PRODUCT(TMP3, TMP1, DIRECTION)
"vdiv.f32 " COSINE_OF_ANGLE ", " TMP3_S0 ", " TMP1_S3 NL
"vcmp.f32 " COSINE_OF_ANGLE ", " CONE_CUT_OFF_S NL
"fmstat" NL
"bls .visiblePixel" NL
"mov r0, #0" NL
"strh r0, [" PIXELS_R ", #-11]" NL
"strb r0, [" PIXELS_R ", #-9]" NL
"b .blackPixel" NL
".cutOff:" NL
"vsub.f32 " TMP3_S0 ", " CONE_CUT_OFF_S ", " COSINE_OF_ANGLE NL
"vdiv.f32 " TMP3_S0 ", " TMP3_S0 ", " CONE_CUT_OFF_RANGE_S NL
"vmul.f32 " TMP3_S1 ", " TMP3_S1 ", " TMP3_S0 NL
"b .cutOffFinished" NL
".coneExpPowf:" NL
POWF(TMP3_S1, CONE_EXPONENT_R)
"b .coneExpPowfFinished" NL
".specularExpPowf:" NL
POWF(TMP2_S1, SPECULAR_EXPONENT_R)
"tst " FLAGS_R ", #" TOSTRING(FLAG_DIFFUSE_CONST_IS_1) NL
"vmuleq.f32 " TMP2_S1 ", " TMP2_S1 ", " DIFFUSE_CONST_S NL
"b .lightStrengthCalculated" NL
);
int FELighting::getPowerCoefficients(float exponent)
{
int i, result;
if (exponent < 0)
exponent = 1 / (-exponent);
if (exponent > 63.99)
exponent = 63.99;
exponent /= 64;
result = 0;
for (i = 11; i >= 0; --i) {
exponent *= 2;
if (exponent >= 1) {
result |= 1 << i;
exponent -= 1;
}
}
return result;
}
}
#endif // CPU(ARM_NEON) && COMPILER(GCC)
#endif // ENABLE(SVG) && ENABLE(FILTERS)