aboutsummaryrefslogtreecommitdiffstats
path: root/src/gba
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2024-01-23 11:21:14 +0100
committerBad Diode <bd@badd10de.dev>2024-01-23 11:21:14 +0100
commit3c54d94191b0888af3712f7c330943068604cab8 (patch)
tree9207d386470d084fc1f23becd7dfd0039204bef4 /src/gba
parenta7ce765b1b57ec8a528263420852ed36da6d9d84 (diff)
downloadstepper-3c54d94191b0888af3712f7c330943068604cab8.tar.gz
stepper-3c54d94191b0888af3712f7c330943068604cab8.zip
Add improved renderer routines with DMA option
Diffstat (limited to 'src/gba')
-rwxr-xr-xsrc/gba/utils.s82
1 files changed, 82 insertions, 0 deletions
diff --git a/src/gba/utils.s b/src/gba/utils.s
new file mode 100755
index 0000000..d70d7ef
--- /dev/null
+++ b/src/gba/utils.s
@@ -0,0 +1,82 @@
1.file "utils.s"
2.section .iwram, "ax", %progbits
3.arm
4.align
5
6@ Efficient memcpy32 function (borrowed from TONC). It uses a two step
7@ approach. It tries to copy 8 u32 chunks at a time with the ldm and stm
8@ instructions and then copy the remainder if there are less than 8 chunks
9@ left.
10@
11@ r0: destination address
12@ r1: source address
13@ r2: number of 32bit chunks to copy
14@
15.global copy32
16copy32:
17 cmp r2, #0
18 beq .copy32_end
19
20 and r12, r2, #7 @ r12 = r2 % 8
21 movs r2, r2, lsr #3 @ r2 = r2 / 8
22 beq .Lcopy32_residual
23
24 @ Copy 8 32B chunks at a time
25 push {r4-r10}
26.Lcopy32_chunks:
27 ldmia r1!, {r3-r10}
28 stmia r0!, {r3-r10}
29 subs r2, r2, #1
30 bhi .Lcopy32_chunks
31 pop {r4-r10}
32
33 @ Copy residual 32B chunks (0-7)
34.Lcopy32_residual:
35 subs r12, r12, #1
36 ldrhs r3, [r1], #4
37 strhs r3, [r0], #4
38 bhi .Lcopy32_residual
39
40.copy32_end:
41 bx lr
42
43
44@ Efficient memset32 function (borrowed from TONC). It uses a two step
45@ approach. Uses the same stmia approach from memcpy32 but, no need for ldmia
46@
47@ r0: destination address
48@ r1: u32 value to set
49@ r2: number of 32bit chunks to set
50@
51.global set32
52set32:
53 cmp r2, #0
54 beq .set32_end
55
56 and r12, r2, #7 @ r12 = r2 % 8
57 movs r2, r2, lsr #3 @ r2 = r2 / 8
58 beq .Lset32_residual
59
60 @ Set 8 32B chunks at a time
61 push {r4-r9}
62 mov r3, r1
63 mov r4, r1
64 mov r5, r1
65 mov r6, r1
66 mov r7, r1
67 mov r8, r1
68 mov r9, r1
69.Lset32_chunks:
70 stmia r0!, {r1, r3-r9}
71 subs r2, r2, #1
72 bhi .Lset32_chunks
73 pop {r4-r9}
74
75 @ Set residual 32B chunks (0-7)
76.Lset32_residual:
77 subs r12, r12, #1
78 strhs r1, [r0], #4
79 bhi .Lset32_residual
80
81.set32_end:
82 bx lr