From 3c54d94191b0888af3712f7c330943068604cab8 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Tue, 23 Jan 2024 11:21:14 +0100 Subject: Add improved renderer routines with DMA option --- src/gba/utils.s | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100755 src/gba/utils.s (limited to 'src/gba') diff --git a/src/gba/utils.s b/src/gba/utils.s new file mode 100755 index 0000000..d70d7ef --- /dev/null +++ b/src/gba/utils.s @@ -0,0 +1,82 @@ +.file "utils.s" +.section .iwram, "ax", %progbits +.arm +.align + +@ Efficient memcpy32 function (borrowed from TONC). It uses a two step +@ approach. It tries to copy 8 u32 chunks at a time with the ldm and stm +@ instructions and then copy the remainder if there are less than 8 chunks +@ left. +@ +@ r0: destination address +@ r1: source address +@ r2: number of 32bit chunks to copy +@ +.global copy32 +copy32: + cmp r2, #0 + beq .copy32_end + + and r12, r2, #7 @ r12 = r2 % 8 + movs r2, r2, lsr #3 @ r2 = r2 / 8 + beq .Lcopy32_residual + + @ Copy 8 32B chunks at a time + push {r4-r10} +.Lcopy32_chunks: + ldmia r1!, {r3-r10} + stmia r0!, {r3-r10} + subs r2, r2, #1 + bhi .Lcopy32_chunks + pop {r4-r10} + + @ Copy residual 32B chunks (0-7) +.Lcopy32_residual: + subs r12, r12, #1 + ldrhs r3, [r1], #4 + strhs r3, [r0], #4 + bhi .Lcopy32_residual + +.copy32_end: + bx lr + + +@ Efficient memset32 function (borrowed from TONC). It uses a two step +@ approach. Uses the same stmia approach from memcpy32 but, no need for ldmia +@ +@ r0: destination address +@ r1: u32 value to set +@ r2: number of 32bit chunks to set +@ +.global set32 +set32: + cmp r2, #0 + beq .set32_end + + and r12, r2, #7 @ r12 = r2 % 8 + movs r2, r2, lsr #3 @ r2 = r2 / 8 + beq .Lset32_residual + + @ Set 8 32B chunks at a time + push {r4-r9} + mov r3, r1 + mov r4, r1 + mov r5, r1 + mov r6, r1 + mov r7, r1 + mov r8, r1 + mov r9, r1 +.Lset32_chunks: + stmia r0!, {r1, r3-r9} + subs r2, r2, #1 + bhi .Lset32_chunks + pop {r4-r9} + + @ Set residual 32B chunks (0-7) +.Lset32_residual: + subs r12, r12, #1 + strhs r1, [r0], #4 + bhi .Lset32_residual + +.set32_end: + bx lr -- cgit v1.2.1