pixman-arm-simd-asm.S   [plain text]


/*
 * Copyright © 2008 Mozilla Corporation
 *
 * Permission to use, copy, modify, distribute, and sell this software and its
 * documentation for any purpose is hereby granted without fee, provided that
 * the above copyright notice appear in all copies and that both that
 * copyright notice and this permission notice appear in supporting
 * documentation, and that the name of Mozilla Corporation not be used in
 * advertising or publicity pertaining to distribution of the software without
 * specific, written prior permission.  Mozilla Corporation makes no
 * representations about the suitability of this software for any purpose.  It
 * is provided "as is" without express or implied warranty.
 *
 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 * SOFTWARE.
 *
 * Author:  Jeff Muizelaar (jeff@infidigm.net)
 *
 */

/* Prevent the stack from becoming executable */
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

	.text
	.arch armv6
	.object_arch armv4
	.arm
	.altmacro

/* Supplementary macro for setting function attributes */
.macro pixman_asm_function fname
	.func fname
	.global fname
#ifdef __ELF__
	.hidden fname
	.type fname, %function
#endif
fname:
.endm

/*
 * The code below was generated by gcc 4.3.4 from the commented out
 * functions in 'pixman-arm-simd.c' file with the following optimization
 * options: "-O3 -mcpu=arm1136jf-s -fomit-frame-pointer"
 *
 * TODO: replace gcc generated code with hand tuned versions because
 * the code quality is not very good, introduce symbolic register
 * aliases for better readability and maintainability.
 */

pixman_asm_function pixman_composite_add_8_8_asm_armv6
	push	{r4, r5, r6, r7, r8, r9, r10, r11}
	mov	r10, r1
	sub	sp, sp, #4
	subs	r10, r10, #1
	mov	r11, r0
	mov	r8, r2
	str	r3, [sp]
	ldr	r7, [sp, #36]
	bcc	0f
6:	cmp	r11, #0
	beq	1f
	orr	r3, r8, r7
	tst	r3, #3
	beq	2f
	mov	r1, r8
	mov	r0, r7
	mov	r12, r11
	b	3f
5:	tst	r3, #3
	beq	4f
3:	ldrb	r2, [r0], #1
	subs	r12, r12, #1
	ldrb	r3, [r1]
	uqadd8	r3, r2, r3
	strb	r3, [r1], #1
	orr	r3, r1, r0
	bne	5b
1:	ldr	r3, [sp]
	add	r8, r8, r3
	ldr	r3, [sp, #40]
	add	r7, r7, r3
10:	subs	r10, r10, #1
	bcs	6b
0:	add	sp, sp, #4
	pop	{r4, r5, r6, r7, r8, r9, r10, r11}
	bx	lr
2:	mov	r12, r11
	mov	r1, r8
	mov	r0, r7
4:	cmp	r12, #3
	subgt	r6, r12, #4
	movgt	r9, r12
	lsrgt	r5, r6, #2
	addgt	r3, r5, #1
	movgt	r12, #0
	lslgt	r4, r3, #2
	ble	7f
8:	ldr	r3, [r0, r12]
	ldr	r2, [r1, r12]
	uqadd8	r3, r3, r2
	str	r3, [r1, r12]
	add	r12, r12, #4
	cmp	r12, r4
	bne	8b
	sub	r3, r9, #4
	bic	r3, r3, #3
	add	r3, r3, #4
	subs	r12, r6, r5, lsl #2
	add	r1, r1, r3
	add	r0, r0, r3
	beq	1b
7:	mov	r4, #0
9:	ldrb	r3, [r1, r4]
	ldrb	r2, [r0, r4]
	uqadd8	r3, r2, r3
	strb	r3, [r1, r4]
	add	r4, r4, #1
	cmp	r4, r12
	bne	9b
	ldr	r3, [sp]
	add	r8, r8, r3
	ldr	r3, [sp, #40]
	add	r7, r7, r3
	b	10b
.endfunc

pixman_asm_function pixman_composite_over_8888_8888_asm_armv6
	push	{r4, r5, r6, r7, r8, r9, r10, r11}
	sub	sp, sp, #20
	cmp	r1, #0
	mov	r12, r2
	str	r1, [sp, #12]
	str	r0, [sp, #16]
	ldr	r2, [sp, #52]
	beq	0f
	lsl	r3, r3, #2
	str	r3, [sp]
	ldr	r3, [sp, #56]
	mov	r10, #0
	lsl	r3, r3, #2
	str	r3, [sp, #8]
	mov	r11, r3
	b	1f
6:	ldr	r11, [sp, #8]
1:	ldr	r9, [sp]
	mov	r0, r12
	add	r12, r12, r9
	mov	r1, r2
	str	r12, [sp, #4]
	add	r2, r2, r11
	ldr	r12, [sp, #16]
	ldr	r3, =0x00800080
	ldr	r9, =0xff00ff00
	mov	r11, #255
	cmp	r12, #0
	beq	4f
5:	ldr	r5, [r1], #4
	ldr	r4, [r0]
	sub	r8, r11, r5, lsr #24
	uxtb16	r6, r4
	uxtb16	r7, r4, ror #8
	mla	r6, r6, r8, r3
	mla	r7, r7, r8, r3
	uxtab16	r6, r6, r6, ror #8
	uxtab16	r7, r7, r7, ror #8
	and	r7, r7, r9
	uxtab16	r6, r7, r6, ror #8
	uqadd8	r5, r6, r5
	str	r5, [r0], #4
	subs	r12, r12, #1
	bne	5b
4:	ldr	r3, [sp, #12]
	add	r10, r10, #1
	cmp	r10, r3
	ldr	r12, [sp, #4]
	bne	6b
0:	add	sp, sp, #20
	pop	{r4, r5, r6, r7, r8, r9, r10, r11}
	bx	lr
.endfunc

pixman_asm_function pixman_composite_over_8888_n_8888_asm_armv6
	push	{r4, r5, r6, r7, r8, r9, r10, r11}
	sub	sp, sp, #28
	cmp	r1, #0
	str	r1, [sp, #12]
	ldrb	r1, [sp, #71]
	mov	r12, r2
	str	r0, [sp, #16]
	ldr	r2, [sp, #60]
	str	r1, [sp, #24]
	beq	0f
	lsl	r3, r3, #2
	str	r3, [sp, #20]
	ldr	r3, [sp, #64]
	mov	r10, #0
	lsl	r3, r3, #2
	str	r3, [sp, #8]
	mov	r11, r3
	b	1f
5:	ldr	r11, [sp, #8]
1:	ldr	r4, [sp, #20]
	mov	r0, r12
	mov	r1, r2
	add	r12, r12, r4
	add	r2, r2, r11
	str	r12, [sp]
	str	r2, [sp, #4]
	ldr	r12, [sp, #16]
	ldr	r2, =0x00800080
	ldr	r3, [sp, #24]
	mov	r11, #255
	cmp	r12, #0
	beq	3f
4:	ldr	r5, [r1], #4
	ldr	r4, [r0]
	uxtb16	r6, r5
	uxtb16	r7, r5, ror #8
	mla	r6, r6, r3, r2
	mla	r7, r7, r3, r2
	uxtab16	r6, r6, r6, ror #8
	uxtab16	r7, r7, r7, ror #8
	uxtb16	r6, r6, ror #8
	uxtb16	r7, r7, ror #8
	orr	r5, r6, r7, lsl #8
	uxtb16	r6, r4
	uxtb16	r7, r4, ror #8
	sub	r8, r11, r5, lsr #24
	mla	r6, r6, r8, r2
	mla	r7, r7, r8, r2
	uxtab16	r6, r6, r6, ror #8
	uxtab16	r7, r7, r7, ror #8
	uxtb16	r6, r6, ror #8
	uxtb16	r7, r7, ror #8
	orr	r6, r6, r7, lsl #8
	uqadd8	r5, r6, r5
	str	r5, [r0], #4
	subs	r12, r12, #1
	bne	4b
3:	ldr	r1, [sp, #12]
	add	r10, r10, #1
	cmp	r10, r1
	ldr	r12, [sp]
	ldr	r2, [sp, #4]
	bne	5b
0:	add	sp, sp, #28
	pop	{r4, r5, r6, r7, r8, r9, r10, r11}
	bx	lr
.endfunc

pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6
	push	{r4, r5, r6, r7, r8, r9, r10, r11}
	sub	sp, sp, #28
	cmp	r1, #0
	ldr	r9, [sp, #60]
	str	r1, [sp, #12]
	bic	r1, r9, #-16777216
	str	r1, [sp, #20]
	mov	r12, r2
	lsr	r1, r9, #8
	ldr	r2, [sp, #20]
	bic	r1, r1, #-16777216
	bic	r2, r2, #65280
	bic	r1, r1, #65280
	str	r2, [sp, #20]
	str	r0, [sp, #16]
	str	r1, [sp, #4]
	ldr	r2, [sp, #68]
	beq	0f
	lsl	r3, r3, #2
	str	r3, [sp, #24]
	mov	r0, #0
	b	1f
5:	ldr	r3, [sp, #24]
1:	ldr	r4, [sp, #72]
	mov	r10, r12
	mov	r1, r2
	add	r12, r12, r3
	add	r2, r2, r4
	str	r12, [sp, #8]
	str	r2, [sp]
	ldr	r12, [sp, #16]
	ldr	r11, =0x00800080
	ldr	r2, [sp, #4]
	ldr	r3, [sp, #20]
	cmp	r12, #0
	beq	3f
4:	ldrb	r5, [r1], #1
	ldr	r4, [r10]
	mla	r6, r3, r5, r11
	mla	r7, r2, r5, r11
	uxtab16	r6, r6, r6, ror #8
	uxtab16	r7, r7, r7, ror #8
	uxtb16	r6, r6, ror #8
	uxtb16	r7, r7, ror #8
	orr	r5, r6, r7, lsl #8
	uxtb16	r6, r4
	uxtb16	r7, r4, ror #8
	mvn	r8, r5
	lsr	r8, r8, #24
	mla	r6, r6, r8, r11
	mla	r7, r7, r8, r11
	uxtab16	r6, r6, r6, ror #8
	uxtab16	r7, r7, r7, ror #8
	uxtb16	r6, r6, ror #8
	uxtb16	r7, r7, ror #8
	orr	r6, r6, r7, lsl #8
	uqadd8	r5, r6, r5
	str	r5, [r10], #4
	subs	r12, r12, #1
	bne	4b
3:	ldr	r4, [sp, #12]
	add	r0, r0, #1
	cmp	r0, r4
	ldr	r12, [sp, #8]
	ldr	r2, [sp]
	bne	5b
0:	add	sp, sp, #28
	pop	{r4, r5, r6, r7, r8, r9, r10, r11}
	bx	lr
.endfunc