aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2024-01-23 11:21:14 +0100
committerBad Diode <bd@badd10de.dev>2024-01-23 11:21:14 +0100
commit3c54d94191b0888af3712f7c330943068604cab8 (patch)
tree9207d386470d084fc1f23becd7dfd0039204bef4
parenta7ce765b1b57ec8a528263420852ed36da6d9d84 (diff)
downloadstepper-3c54d94191b0888af3712f7c330943068604cab8.tar.gz
stepper-3c54d94191b0888af3712f7c330943068604cab8.zip
Add improved renderer routines with DMA option
-rw-r--r--Makefile2
-rwxr-xr-xsrc/gba/utils.s82
-rw-r--r--src/profiling.c5
-rw-r--r--src/renderer_m0.c534
4 files changed, 362 insertions, 261 deletions
diff --git a/Makefile b/Makefile
index 89ea9b0..7608f79 100644
--- a/Makefile
+++ b/Makefile
@@ -27,7 +27,7 @@ INC_FLAGS := $(addprefix -I,$(INC_DIRS))
27INC_FLAGS += -I$(LIBGBA_SRC) 27INC_FLAGS += -I$(LIBGBA_SRC)
28 28
29# Output library names and executables. 29# Output library names and executables.
30TARGET := STEPPER-v1.8-dev-21 30TARGET := STEPPER-v1.8-dev-22
31ELF := $(BUILD_DIR)/$(TARGET).elf 31ELF := $(BUILD_DIR)/$(TARGET).elf
32BIN := $(BUILD_DIR)/$(TARGET).gba 32BIN := $(BUILD_DIR)/$(TARGET).gba
33 33
diff --git a/src/gba/utils.s b/src/gba/utils.s
new file mode 100755
index 0000000..d70d7ef
--- /dev/null
+++ b/src/gba/utils.s
@@ -0,0 +1,82 @@
1.file "utils.s"
2.section .iwram, "ax", %progbits
3.arm
4.align
5
6@ Efficient memcpy32 function (borrowed from TONC). It uses a two step
7@ approach. It tries to copy 8 u32 chunks at a time with the ldm and stm
8@ instructions and then copy the remainder if there are less than 8 chunks
9@ left.
10@
11@ r0: destination address
12@ r1: source address
13@ r2: number of 32bit chunks to copy
14@
15.global copy32
16copy32:
17 cmp r2, #0
18 beq .copy32_end
19
20 and r12, r2, #7 @ r12 = r2 % 8
21 movs r2, r2, lsr #3 @ r2 = r2 / 8
22 beq .Lcopy32_residual
23
24 @ Copy 8 32B chunks at a time
25 push {r4-r10}
26.Lcopy32_chunks:
27 ldmia r1!, {r3-r10}
28 stmia r0!, {r3-r10}
29 subs r2, r2, #1
30 bhi .Lcopy32_chunks
31 pop {r4-r10}
32
33 @ Copy residual 32B chunks (0-7)
34.Lcopy32_residual:
35 subs r12, r12, #1
36 ldrhs r3, [r1], #4
37 strhs r3, [r0], #4
38 bhi .Lcopy32_residual
39
40.copy32_end:
41 bx lr
42
43
44@ Efficient memset32 function (borrowed from TONC). It uses a two step
45@ approach. Uses the same stmia approach from memcpy32 but, no need for ldmia
46@
47@ r0: destination address
48@ r1: u32 value to set
49@ r2: number of 32bit chunks to set
50@
51.global set32
52set32:
53 cmp r2, #0
54 beq .set32_end
55
56 and r12, r2, #7 @ r12 = r2 % 8
57 movs r2, r2, lsr #3 @ r2 = r2 / 8
58 beq .Lset32_residual
59
60 @ Set 8 32B chunks at a time
61 push {r4-r9}
62 mov r3, r1
63 mov r4, r1
64 mov r5, r1
65 mov r6, r1
66 mov r7, r1
67 mov r8, r1
68 mov r9, r1
69.Lset32_chunks:
70 stmia r0!, {r1, r3-r9}
71 subs r2, r2, #1
72 bhi .Lset32_chunks
73 pop {r4-r9}
74
75 @ Set residual 32B chunks (0-7)
76.Lset32_residual:
77 subs r12, r12, #1
78 strhs r1, [r0], #4
79 bhi .Lset32_residual
80
81.set32_end:
82 bx lr
diff --git a/src/profiling.c b/src/profiling.c
index 6b073ed..07f4bbf 100644
--- a/src/profiling.c
+++ b/src/profiling.c
@@ -9,7 +9,7 @@
9#if PROF_ENABLE > 0 9#if PROF_ENABLE > 0
10 10
11#ifndef PROF_RESET_MINMAX 11#ifndef PROF_RESET_MINMAX
12#define PROF_RESET_MINMAX false 12#define PROF_RESET_MINMAX true
13#endif 13#endif
14 14
15// Maximum number of profiling to monitor. 15// Maximum number of profiling to monitor.
@@ -74,7 +74,7 @@ bool prof_show = true;
74 prof_frame_avg, \ 74 prof_frame_avg, \
75 (u32)((u64)280896 * 60 / (prof_frame_avg + 1)));\ 75 (u32)((u64)280896 * 60 / (prof_frame_avg + 1)));\
76 txt_drawf_small("MAX: %.9l/%l", 8 * 19, 0, COL_FG, \ 76 txt_drawf_small("MAX: %.9l/%l", 8 * 19, 0, COL_FG, \
77 prof_frame_time_max,280896);\ 77 prof_frame_time_max, 280896);\
78 for (size_t idx = 0; idx < PROF_NUM; idx++) { \ 78 for (size_t idx = 0; idx < PROF_NUM; idx++) { \
79 txt_drawf_small("%s %.9l (%.9l %.9l) %08x:%08x", 0, 8 * (idx + 1), COL_FG, \ 79 txt_drawf_small("%s %.9l (%.9l %.9l) %08x:%08x", 0, 8 * (idx + 1), COL_FG, \
80 prof_type_str[idx], \ 80 prof_type_str[idx], \
@@ -91,6 +91,7 @@ bool prof_show = true;
91 if (prof_reset_minmax) { \ 91 if (prof_reset_minmax) { \
92 prof_min[idx] = -1; \ 92 prof_min[idx] = -1; \
93 prof_max[idx] = 0; \ 93 prof_max[idx] = 0; \
94 prof_frame_time_max = 0; \
94 } \ 95 } \
95 prof_times[idx] = 0; \ 96 prof_times[idx] = 0; \
96 prof_count[idx] = 0; \ 97 prof_count[idx] = 0; \
diff --git a/src/renderer_m0.c b/src/renderer_m0.c
index 133b39d..064cc0e 100644
--- a/src/renderer_m0.c
+++ b/src/renderer_m0.c
@@ -13,8 +13,9 @@
13// 13//
14 14
15#define SUBPIXEL_LINES 1 15#define SUBPIXEL_LINES 1
16#define DEC_BIG_LUT 1
17#define FLIP_TYPE 3 16#define FLIP_TYPE 3
17#define DISABLE_BOUNDCHECK_SCREEN 0
18#define NO_DMA 0
18 19
19// Front/back buffers for double buffering. 20// Front/back buffers for double buffering.
20#define BUF_0 ((u32*)(MEM_VRAM)) 21#define BUF_0 ((u32*)(MEM_VRAM))
@@ -38,7 +39,7 @@ static u32 dirty_tiles[21] = {0};
38// Boundchecks can be disable at compile time but this will not always improve 39// Boundchecks can be disable at compile time but this will not always improve
39// the performance and can in fact make it worse. It is possible that this is 40// the performance and can in fact make it worse. It is possible that this is
40// due to some aliasing optimizations but not sure at this moment. 41// due to some aliasing optimizations but not sure at this moment.
41#ifdef DISABLE_BOUNDCHECK_SCREEN 42#if DISABLE_BOUNDCHECK_SCREEN > 0
42#define BOUNDCHECK_SCREEN(X,Y) 43#define BOUNDCHECK_SCREEN(X,Y)
43#else 44#else
44#define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return; 45#define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return;
@@ -66,7 +67,11 @@ IWRAM_CODE
66void screen_fill(u8 clr) { 67void screen_fill(u8 clr) {
67 // We have to make sure we leave the last tile blank to use as alpha channel 68 // We have to make sure we leave the last tile blank to use as alpha channel
68 // when moving the BG during double buffering. 69 // when moving the BG during double buffering.
70#if NO_DMA == 0
69 dma_fill(backbuf, 0x11111111 * clr, KB(20) - 32, 3); 71 dma_fill(backbuf, 0x11111111 * clr, KB(20) - 32, 3);
72#else
73 set32(backbuf, 0x11111111 * clr, (KB(20) / 4) - 8);
74#endif
70 redraw(); 75 redraw();
71} 76}
72 77
@@ -85,8 +90,8 @@ draw_pixel(size_t x, size_t y, u8 clr) {
85 // Update backbuffer. 90 // Update backbuffer.
86 size_t shift = start_col * sizeof(u32); 91 size_t shift = start_col * sizeof(u32);
87 u32 mask = 0xF << shift; 92 u32 mask = 0xF << shift;
88 u32 row = clr << shift; 93 u32 color = clr << shift;
89 *dst = (*dst & ~mask) | row; 94 *dst = (*dst & ~mask) | color;
90 dirty_tiles[tile_y] |= 1 << tile_x; 95 dirty_tiles[tile_y] |= 1 << tile_x;
91} 96}
92 97
@@ -117,21 +122,21 @@ draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) {
117 size_t shift_left = start_col * 4; 122 size_t shift_left = start_col * 4;
118 size_t shift_right = (7 - end_col) * 4; 123 size_t shift_right = (7 - end_col) * 4;
119 u32 mask = (0xFFFFFFFF >> shift_right) & (0xFFFFFFFF << shift_left); 124 u32 mask = (0xFFFFFFFF >> shift_right) & (0xFFFFFFFF << shift_left);
120 u32 row = (0x11111111 * clr) & mask; 125 u32 color = (0x11111111 * clr) & mask;
121 *dst = (*dst & ~mask) | row; 126 *dst = (*dst & ~mask) | color;
122 } else { 127 } else {
123 size_t shift_left = start_col * 4; 128 size_t shift_left = start_col * 4;
124 size_t shift_right = (7 - end_col) * 4; 129 size_t shift_right = (7 - end_col) * 4;
125 u32 mask = 0xFFFFFFFF; 130 u32 mask = 0xFFFFFFFF;
126 u32 row = 0x11111111 * clr; 131 u32 color = 0x11111111 * clr;
127 *dst = (*dst & ~(mask << shift_left)) | (row << shift_left); 132 *dst = (*dst & ~(mask << shift_left)) | (color << shift_left);
128 dst += 8; 133 dst += 8;
129 for (size_t i = 1; i < dtx; i++) { 134 for (size_t i = 1; i < dtx; i++) {
130 dirty |= (1 << (tile_x0 + i)); 135 dirty |= (1 << (tile_x0 + i));
131 *dst = row; 136 *dst = color;
132 dst += 8; 137 dst += 8;
133 } 138 }
134 *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right); 139 *dst = (*dst & ~(mask >> shift_right)) | (color >> shift_right);
135 } 140 }
136 dirty_tiles[tile_y] |= dirty; 141 dirty_tiles[tile_y] |= dirty;
137} 142}
@@ -156,26 +161,26 @@ draw_vline(size_t x0, size_t y0, size_t y1, u8 clr) {
156 161
157 u32 *dst = &backbuf[start_row0 + (tile_x + tile_y * 32) * 8]; 162 u32 *dst = &backbuf[start_row0 + (tile_x + tile_y * 32) * 8];
158 u32 mask = 0x0000000F << shift_left; 163 u32 mask = 0x0000000F << shift_left;
159 u32 row = (0x11111111 * clr) & mask; 164 u32 color = clr << shift_left;
160 u32 dty = tile_y1 - tile_y0; 165 u32 dty = tile_y1 - tile_y0;
161 if (dty < 1) { 166 if (dty < 1) {
162 for (size_t i = 0; i <= (y1 - y0); i++, dst++) { 167 for (size_t i = 0; i <= (y1 - y0); i++, dst++) {
163 dst[0] = (dst[0] & ~mask) | row; 168 dst[0] = (dst[0] & ~mask) | color;
164 } 169 }
165 } else { 170 } else {
166 for (size_t i = 0; i < (8 - start_row0); i++, dst++) { 171 for (size_t i = 0; i < (8 - start_row0); i++, dst++) {
167 dst[0] = (dst[0] & ~mask) | row; 172 dst[0] = (dst[0] & ~mask) | color;
168 } 173 }
169 dst += 8 * 31; 174 dst += 8 * 31;
170 for (size_t j = 1; j < dty; j++) { 175 for (size_t j = 1; j < dty; j++) {
171 dirty_tiles[tile_y0 + j] |= dirty; 176 dirty_tiles[tile_y0 + j] |= dirty;
172 for (size_t i = 0; i < 8; i++, dst++) { 177 for (size_t i = 0; i < 8; i++, dst++) {
173 dst[0] = (dst[0] & ~mask) | row; 178 dst[0] = (dst[0] & ~mask) | color;
174 } 179 }
175 dst += 8 * 31; 180 dst += 8 * 31;
176 } 181 }
177 for (size_t i = 0; i <= start_row1; i++, dst++) { 182 for (size_t i = 0; i <= start_row1; i++, dst++) {
178 dst[0] = (dst[0] & ~mask) | row; 183 dst[0] = (dst[0] & ~mask) | color;
179 } 184 }
180 } 185 }
181 dirty_tiles[tile_y0] |= dirty; 186 dirty_tiles[tile_y0] |= dirty;
@@ -313,8 +318,8 @@ draw_filled_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
313 MAYBE_SWAP(x0, x1); 318 MAYBE_SWAP(x0, x1);
314 MAYBE_SWAP(y0, y1); 319 MAYBE_SWAP(y0, y1);
315 320
316 // Special condition. If the screen is to be completely filled, use the DMA 321 // Special condition. If the screen is to be completely filled, use the
317 // instead. 322 // full clearing functions instead.
318 if (x0 == 0 && x1 >= (SCREEN_WIDTH - 1) && y0 == 0 && y1 >= (SCREEN_HEIGHT - 1)) { 323 if (x0 == 0 && x1 >= (SCREEN_WIDTH - 1) && y0 == 0 && y1 >= (SCREEN_HEIGHT - 1)) {
319 screen_fill(clr); 324 screen_fill(clr);
320 return; 325 return;
@@ -326,161 +331,135 @@ draw_filled_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
326} 331}
327 332
328// 333//
329// Sprites (chr/icn). 334// Sprites (1bpp).
330// 335//
331 336
332#if DEC_BIG_LUT == 1 337static u32 lut_1bpp_mask[256] = {
333static u32 dec_byte_flip_x[256] = { 338 0x00000000, 0xf0000000, 0x0f000000, 0xff000000, 0x00f00000,
334 0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100, 339 0xf0f00000, 0x0ff00000, 0xfff00000, 0x000f0000, 0xf00f0000,
335 0x00000101, 0x00000110, 0x00000111, 0x00001000, 0x00001001, 340 0x0f0f0000, 0xff0f0000, 0x00ff0000, 0xf0ff0000, 0x0fff0000,
336 0x00001010, 0x00001011, 0x00001100, 0x00001101, 0x00001110, 341 0xffff0000, 0x0000f000, 0xf000f000, 0x0f00f000, 0xff00f000,
337 0x00001111, 0x00010000, 0x00010001, 0x00010010, 0x00010011, 342 0x00f0f000, 0xf0f0f000, 0x0ff0f000, 0xfff0f000, 0x000ff000,
338 0x00010100, 0x00010101, 0x00010110, 0x00010111, 0x00011000, 343 0xf00ff000, 0x0f0ff000, 0xff0ff000, 0x00fff000, 0xf0fff000,
339 0x00011001, 0x00011010, 0x00011011, 0x00011100, 0x00011101, 344 0x0ffff000, 0xfffff000, 0x00000f00, 0xf0000f00, 0x0f000f00,
340 0x00011110, 0x00011111, 0x00100000, 0x00100001, 0x00100010, 345 0xff000f00, 0x00f00f00, 0xf0f00f00, 0x0ff00f00, 0xfff00f00,
341 0x00100011, 0x00100100, 0x00100101, 0x00100110, 0x00100111, 346 0x000f0f00, 0xf00f0f00, 0x0f0f0f00, 0xff0f0f00, 0x00ff0f00,
342 0x00101000, 0x00101001, 0x00101010, 0x00101011, 0x00101100, 347 0xf0ff0f00, 0x0fff0f00, 0xffff0f00, 0x0000ff00, 0xf000ff00,
343 0x00101101, 0x00101110, 0x00101111, 0x00110000, 0x00110001, 348 0x0f00ff00, 0xff00ff00, 0x00f0ff00, 0xf0f0ff00, 0x0ff0ff00,
344 0x00110010, 0x00110011, 0x00110100, 0x00110101, 0x00110110, 349 0xfff0ff00, 0x000fff00, 0xf00fff00, 0x0f0fff00, 0xff0fff00,
345 0x00110111, 0x00111000, 0x00111001, 0x00111010, 0x00111011, 350 0x00ffff00, 0xf0ffff00, 0x0fffff00, 0xffffff00, 0x000000f0,
346 0x00111100, 0x00111101, 0x00111110, 0x00111111, 0x01000000, 351 0xf00000f0, 0x0f0000f0, 0xff0000f0, 0x00f000f0, 0xf0f000f0,
347 0x01000001, 0x01000010, 0x01000011, 0x01000100, 0x01000101, 352 0x0ff000f0, 0xfff000f0, 0x000f00f0, 0xf00f00f0, 0x0f0f00f0,
348 0x01000110, 0x01000111, 0x01001000, 0x01001001, 0x01001010, 353 0xff0f00f0, 0x00ff00f0, 0xf0ff00f0, 0x0fff00f0, 0xffff00f0,
349 0x01001011, 0x01001100, 0x01001101, 0x01001110, 0x01001111, 354 0x0000f0f0, 0xf000f0f0, 0x0f00f0f0, 0xff00f0f0, 0x00f0f0f0,
350 0x01010000, 0x01010001, 0x01010010, 0x01010011, 0x01010100, 355 0xf0f0f0f0, 0x0ff0f0f0, 0xfff0f0f0, 0x000ff0f0, 0xf00ff0f0,
351 0x01010101, 0x01010110, 0x01010111, 0x01011000, 0x01011001, 356 0x0f0ff0f0, 0xff0ff0f0, 0x00fff0f0, 0xf0fff0f0, 0x0ffff0f0,
352 0x01011010, 0x01011011, 0x01011100, 0x01011101, 0x01011110, 357 0xfffff0f0, 0x00000ff0, 0xf0000ff0, 0x0f000ff0, 0xff000ff0,
353 0x01011111, 0x01100000, 0x01100001, 0x01100010, 0x01100011, 358 0x00f00ff0, 0xf0f00ff0, 0x0ff00ff0, 0xfff00ff0, 0x000f0ff0,
354 0x01100100, 0x01100101, 0x01100110, 0x01100111, 0x01101000, 359 0xf00f0ff0, 0x0f0f0ff0, 0xff0f0ff0, 0x00ff0ff0, 0xf0ff0ff0,
355 0x01101001, 0x01101010, 0x01101011, 0x01101100, 0x01101101, 360 0x0fff0ff0, 0xffff0ff0, 0x0000fff0, 0xf000fff0, 0x0f00fff0,
356 0x01101110, 0x01101111, 0x01110000, 0x01110001, 0x01110010, 361 0xff00fff0, 0x00f0fff0, 0xf0f0fff0, 0x0ff0fff0, 0xfff0fff0,
357 0x01110011, 0x01110100, 0x01110101, 0x01110110, 0x01110111, 362 0x000ffff0, 0xf00ffff0, 0x0f0ffff0, 0xff0ffff0, 0x00fffff0,
358 0x01111000, 0x01111001, 0x01111010, 0x01111011, 0x01111100, 363 0xf0fffff0, 0x0ffffff0, 0xfffffff0, 0x0000000f, 0xf000000f,
359 0x01111101, 0x01111110, 0x01111111, 0x10000000, 0x10000001, 364 0x0f00000f, 0xff00000f, 0x00f0000f, 0xf0f0000f, 0x0ff0000f,
360 0x10000010, 0x10000011, 0x10000100, 0x10000101, 0x10000110, 365 0xfff0000f, 0x000f000f, 0xf00f000f, 0x0f0f000f, 0xff0f000f,
361 0x10000111, 0x10001000, 0x10001001, 0x10001010, 0x10001011, 366 0x00ff000f, 0xf0ff000f, 0x0fff000f, 0xffff000f, 0x0000f00f,
362 0x10001100, 0x10001101, 0x10001110, 0x10001111, 0x10010000, 367 0xf000f00f, 0x0f00f00f, 0xff00f00f, 0x00f0f00f, 0xf0f0f00f,
363 0x10010001, 0x10010010, 0x10010011, 0x10010100, 0x10010101, 368 0x0ff0f00f, 0xfff0f00f, 0x000ff00f, 0xf00ff00f, 0x0f0ff00f,
364 0x10010110, 0x10010111, 0x10011000, 0x10011001, 0x10011010, 369 0xff0ff00f, 0x00fff00f, 0xf0fff00f, 0x0ffff00f, 0xfffff00f,
365 0x10011011, 0x10011100, 0x10011101, 0x10011110, 0x10011111, 370 0x00000f0f, 0xf0000f0f, 0x0f000f0f, 0xff000f0f, 0x00f00f0f,
366 0x10100000, 0x10100001, 0x10100010, 0x10100011, 0x10100100, 371 0xf0f00f0f, 0x0ff00f0f, 0xfff00f0f, 0x000f0f0f, 0xf00f0f0f,
367 0x10100101, 0x10100110, 0x10100111, 0x10101000, 0x10101001, 372 0x0f0f0f0f, 0xff0f0f0f, 0x00ff0f0f, 0xf0ff0f0f, 0x0fff0f0f,
368 0x10101010, 0x10101011, 0x10101100, 0x10101101, 0x10101110, 373 0xffff0f0f, 0x0000ff0f, 0xf000ff0f, 0x0f00ff0f, 0xff00ff0f,
369 0x10101111, 0x10110000, 0x10110001, 0x10110010, 0x10110011, 374 0x00f0ff0f, 0xf0f0ff0f, 0x0ff0ff0f, 0xfff0ff0f, 0x000fff0f,
370 0x10110100, 0x10110101, 0x10110110, 0x10110111, 0x10111000, 375 0xf00fff0f, 0x0f0fff0f, 0xff0fff0f, 0x00ffff0f, 0xf0ffff0f,
371 0x10111001, 0x10111010, 0x10111011, 0x10111100, 0x10111101, 376 0x0fffff0f, 0xffffff0f, 0x000000ff, 0xf00000ff, 0x0f0000ff,
372 0x10111110, 0x10111111, 0x11000000, 0x11000001, 0x11000010, 377 0xff0000ff, 0x00f000ff, 0xf0f000ff, 0x0ff000ff, 0xfff000ff,
373 0x11000011, 0x11000100, 0x11000101, 0x11000110, 0x11000111, 378 0x000f00ff, 0xf00f00ff, 0x0f0f00ff, 0xff0f00ff, 0x00ff00ff,
374 0x11001000, 0x11001001, 0x11001010, 0x11001011, 0x11001100, 379 0xf0ff00ff, 0x0fff00ff, 0xffff00ff, 0x0000f0ff, 0xf000f0ff,
375 0x11001101, 0x11001110, 0x11001111, 0x11010000, 0x11010001, 380 0x0f00f0ff, 0xff00f0ff, 0x00f0f0ff, 0xf0f0f0ff, 0x0ff0f0ff,
376 0x11010010, 0x11010011, 0x11010100, 0x11010101, 0x11010110, 381 0xfff0f0ff, 0x000ff0ff, 0xf00ff0ff, 0x0f0ff0ff, 0xff0ff0ff,
377 0x11010111, 0x11011000, 0x11011001, 0x11011010, 0x11011011, 382 0x00fff0ff, 0xf0fff0ff, 0x0ffff0ff, 0xfffff0ff, 0x00000fff,
378 0x11011100, 0x11011101, 0x11011110, 0x11011111, 0x11100000, 383 0xf0000fff, 0x0f000fff, 0xff000fff, 0x00f00fff, 0xf0f00fff,
379 0x11100001, 0x11100010, 0x11100011, 0x11100100, 0x11100101, 384 0x0ff00fff, 0xfff00fff, 0x000f0fff, 0xf00f0fff, 0x0f0f0fff,
380 0x11100110, 0x11100111, 0x11101000, 0x11101001, 0x11101010, 385 0xff0f0fff, 0x00ff0fff, 0xf0ff0fff, 0x0fff0fff, 0xffff0fff,
381 0x11101011, 0x11101100, 0x11101101, 0x11101110, 0x11101111, 386 0x0000ffff, 0xf000ffff, 0x0f00ffff, 0xff00ffff, 0x00f0ffff,
382 0x11110000, 0x11110001, 0x11110010, 0x11110011, 0x11110100, 387 0xf0f0ffff, 0x0ff0ffff, 0xfff0ffff, 0x000fffff, 0xf00fffff,
383 0x11110101, 0x11110110, 0x11110111, 0x11111000, 0x11111001, 388 0x0f0fffff, 0xff0fffff, 0x00ffffff, 0xf0ffffff, 0x0fffffff,
384 0x11111010, 0x11111011, 0x11111100, 0x11111101, 0x11111110, 389 0xffffffff
385 0x11111111
386}; 390};
387 391
388static u32 dec_byte[256] = { 392static u32 lut_1bpp_mask_flip_x[256] = {
389 0x00000000, 0x10000000, 0x01000000, 0x11000000, 0x00100000, 393 0x00000000, 0x0000000f, 0x000000f0, 0x000000ff, 0x00000f00,
390 0x10100000, 0x01100000, 0x11100000, 0x00010000, 0x10010000, 394 0x00000f0f, 0x00000ff0, 0x00000fff, 0x0000f000, 0x0000f00f,
391 0x01010000, 0x11010000, 0x00110000, 0x10110000, 0x01110000, 395 0x0000f0f0, 0x0000f0ff, 0x0000ff00, 0x0000ff0f, 0x0000fff0,
392 0x11110000, 0x00001000, 0x10001000, 0x01001000, 0x11001000, 396 0x0000ffff, 0x000f0000, 0x000f000f, 0x000f00f0, 0x000f00ff,
393 0x00101000, 0x10101000, 0x01101000, 0x11101000, 0x00011000, 397 0x000f0f00, 0x000f0f0f, 0x000f0ff0, 0x000f0fff, 0x000ff000,
394 0x10011000, 0x01011000, 0x11011000, 0x00111000, 0x10111000, 398 0x000ff00f, 0x000ff0f0, 0x000ff0ff, 0x000fff00, 0x000fff0f,
395 0x01111000, 0x11111000, 0x00000100, 0x10000100, 0x01000100, 399 0x000ffff0, 0x000fffff, 0x00f00000, 0x00f0000f, 0x00f000f0,
396 0x11000100, 0x00100100, 0x10100100, 0x01100100, 0x11100100, 400 0x00f000ff, 0x00f00f00, 0x00f00f0f, 0x00f00ff0, 0x00f00fff,
397 0x00010100, 0x10010100, 0x01010100, 0x11010100, 0x00110100, 401 0x00f0f000, 0x00f0f00f, 0x00f0f0f0, 0x00f0f0ff, 0x00f0ff00,
398 0x10110100, 0x01110100, 0x11110100, 0x00001100, 0x10001100, 402 0x00f0ff0f, 0x00f0fff0, 0x00f0ffff, 0x00ff0000, 0x00ff000f,
399 0x01001100, 0x11001100, 0x00101100, 0x10101100, 0x01101100, 403 0x00ff00f0, 0x00ff00ff, 0x00ff0f00, 0x00ff0f0f, 0x00ff0ff0,
400 0x11101100, 0x00011100, 0x10011100, 0x01011100, 0x11011100, 404 0x00ff0fff, 0x00fff000, 0x00fff00f, 0x00fff0f0, 0x00fff0ff,
401 0x00111100, 0x10111100, 0x01111100, 0x11111100, 0x00000010, 405 0x00ffff00, 0x00ffff0f, 0x00fffff0, 0x00ffffff, 0x0f000000,
402 0x10000010, 0x01000010, 0x11000010, 0x00100010, 0x10100010, 406 0x0f00000f, 0x0f0000f0, 0x0f0000ff, 0x0f000f00, 0x0f000f0f,
403 0x01100010, 0x11100010, 0x00010010, 0x10010010, 0x01010010, 407 0x0f000ff0, 0x0f000fff, 0x0f00f000, 0x0f00f00f, 0x0f00f0f0,
404 0x11010010, 0x00110010, 0x10110010, 0x01110010, 0x11110010, 408 0x0f00f0ff, 0x0f00ff00, 0x0f00ff0f, 0x0f00fff0, 0x0f00ffff,
405 0x00001010, 0x10001010, 0x01001010, 0x11001010, 0x00101010, 409 0x0f0f0000, 0x0f0f000f, 0x0f0f00f0, 0x0f0f00ff, 0x0f0f0f00,
406 0x10101010, 0x01101010, 0x11101010, 0x00011010, 0x10011010, 410 0x0f0f0f0f, 0x0f0f0ff0, 0x0f0f0fff, 0x0f0ff000, 0x0f0ff00f,
407 0x01011010, 0x11011010, 0x00111010, 0x10111010, 0x01111010, 411 0x0f0ff0f0, 0x0f0ff0ff, 0x0f0fff00, 0x0f0fff0f, 0x0f0ffff0,
408 0x11111010, 0x00000110, 0x10000110, 0x01000110, 0x11000110, 412 0x0f0fffff, 0x0ff00000, 0x0ff0000f, 0x0ff000f0, 0x0ff000ff,
409 0x00100110, 0x10100110, 0x01100110, 0x11100110, 0x00010110, 413 0x0ff00f00, 0x0ff00f0f, 0x0ff00ff0, 0x0ff00fff, 0x0ff0f000,
410 0x10010110, 0x01010110, 0x11010110, 0x00110110, 0x10110110, 414 0x0ff0f00f, 0x0ff0f0f0, 0x0ff0f0ff, 0x0ff0ff00, 0x0ff0ff0f,
411 0x01110110, 0x11110110, 0x00001110, 0x10001110, 0x01001110, 415 0x0ff0fff0, 0x0ff0ffff, 0x0fff0000, 0x0fff000f, 0x0fff00f0,
412 0x11001110, 0x00101110, 0x10101110, 0x01101110, 0x11101110, 416 0x0fff00ff, 0x0fff0f00, 0x0fff0f0f, 0x0fff0ff0, 0x0fff0fff,
413 0x00011110, 0x10011110, 0x01011110, 0x11011110, 0x00111110, 417 0x0ffff000, 0x0ffff00f, 0x0ffff0f0, 0x0ffff0ff, 0x0fffff00,
414 0x10111110, 0x01111110, 0x11111110, 0x00000001, 0x10000001, 418 0x0fffff0f, 0x0ffffff0, 0x0fffffff, 0xf0000000, 0xf000000f,
415 0x01000001, 0x11000001, 0x00100001, 0x10100001, 0x01100001, 419 0xf00000f0, 0xf00000ff, 0xf0000f00, 0xf0000f0f, 0xf0000ff0,
416 0x11100001, 0x00010001, 0x10010001, 0x01010001, 0x11010001, 420 0xf0000fff, 0xf000f000, 0xf000f00f, 0xf000f0f0, 0xf000f0ff,
417 0x00110001, 0x10110001, 0x01110001, 0x11110001, 0x00001001, 421 0xf000ff00, 0xf000ff0f, 0xf000fff0, 0xf000ffff, 0xf00f0000,
418 0x10001001, 0x01001001, 0x11001001, 0x00101001, 0x10101001, 422 0xf00f000f, 0xf00f00f0, 0xf00f00ff, 0xf00f0f00, 0xf00f0f0f,
419 0x01101001, 0x11101001, 0x00011001, 0x10011001, 0x01011001, 423 0xf00f0ff0, 0xf00f0fff, 0xf00ff000, 0xf00ff00f, 0xf00ff0f0,
420 0x11011001, 0x00111001, 0x10111001, 0x01111001, 0x11111001, 424 0xf00ff0ff, 0xf00fff00, 0xf00fff0f, 0xf00ffff0, 0xf00fffff,
421 0x00000101, 0x10000101, 0x01000101, 0x11000101, 0x00100101, 425 0xf0f00000, 0xf0f0000f, 0xf0f000f0, 0xf0f000ff, 0xf0f00f00,
422 0x10100101, 0x01100101, 0x11100101, 0x00010101, 0x10010101, 426 0xf0f00f0f, 0xf0f00ff0, 0xf0f00fff, 0xf0f0f000, 0xf0f0f00f,
423 0x01010101, 0x11010101, 0x00110101, 0x10110101, 0x01110101, 427 0xf0f0f0f0, 0xf0f0f0ff, 0xf0f0ff00, 0xf0f0ff0f, 0xf0f0fff0,
424 0x11110101, 0x00001101, 0x10001101, 0x01001101, 0x11001101, 428 0xf0f0ffff, 0xf0ff0000, 0xf0ff000f, 0xf0ff00f0, 0xf0ff00ff,
425 0x00101101, 0x10101101, 0x01101101, 0x11101101, 0x00011101, 429 0xf0ff0f00, 0xf0ff0f0f, 0xf0ff0ff0, 0xf0ff0fff, 0xf0fff000,
426 0x10011101, 0x01011101, 0x11011101, 0x00111101, 0x10111101, 430 0xf0fff00f, 0xf0fff0f0, 0xf0fff0ff, 0xf0ffff00, 0xf0ffff0f,
427 0x01111101, 0x11111101, 0x00000011, 0x10000011, 0x01000011, 431 0xf0fffff0, 0xf0ffffff, 0xff000000, 0xff00000f, 0xff0000f0,
428 0x11000011, 0x00100011, 0x10100011, 0x01100011, 0x11100011, 432 0xff0000ff, 0xff000f00, 0xff000f0f, 0xff000ff0, 0xff000fff,
429 0x00010011, 0x10010011, 0x01010011, 0x11010011, 0x00110011, 433 0xff00f000, 0xff00f00f, 0xff00f0f0, 0xff00f0ff, 0xff00ff00,
430 0x10110011, 0x01110011, 0x11110011, 0x00001011, 0x10001011, 434 0xff00ff0f, 0xff00fff0, 0xff00ffff, 0xff0f0000, 0xff0f000f,
431 0x01001011, 0x11001011, 0x00101011, 0x10101011, 0x01101011, 435 0xff0f00f0, 0xff0f00ff, 0xff0f0f00, 0xff0f0f0f, 0xff0f0ff0,
432 0x11101011, 0x00011011, 0x10011011, 0x01011011, 0x11011011, 436 0xff0f0fff, 0xff0ff000, 0xff0ff00f, 0xff0ff0f0, 0xff0ff0ff,
433 0x00111011, 0x10111011, 0x01111011, 0x11111011, 0x00000111, 437 0xff0fff00, 0xff0fff0f, 0xff0ffff0, 0xff0fffff, 0xfff00000,
434 0x10000111, 0x01000111, 0x11000111, 0x00100111, 0x10100111, 438 0xfff0000f, 0xfff000f0, 0xfff000ff, 0xfff00f00, 0xfff00f0f,
435 0x01100111, 0x11100111, 0x00010111, 0x10010111, 0x01010111, 439 0xfff00ff0, 0xfff00fff, 0xfff0f000, 0xfff0f00f, 0xfff0f0f0,
436 0x11010111, 0x00110111, 0x10110111, 0x01110111, 0x11110111, 440 0xfff0f0ff, 0xfff0ff00, 0xfff0ff0f, 0xfff0fff0, 0xfff0ffff,
437 0x00001111, 0x10001111, 0x01001111, 0x11001111, 0x00101111, 441 0xffff0000, 0xffff000f, 0xffff00f0, 0xffff00ff, 0xffff0f00,
438 0x10101111, 0x01101111, 0x11101111, 0x00011111, 0x10011111, 442 0xffff0f0f, 0xffff0ff0, 0xffff0fff, 0xfffff000, 0xfffff00f,
439 0x01011111, 0x11011111, 0x00111111, 0x10111111, 0x01111111, 443 0xfffff0f0, 0xfffff0ff, 0xffffff00, 0xffffff0f, 0xfffffff0,
440 0x11111111 444 0xffffffff
441}; 445};
442 446
443IWRAM_CODE 447// Create a mask for zero sprite values in each nibble.
444static inline 448// For example: 0x12305008 -> 0xFFF0F00F
449INLINE
445u32 450u32
446decode_1bpp(u8 row, u8 flip_x) { 451create_zero_mask(u32 x) {
447 if (flip_x) { 452 x |= x >> 2;
448 return dec_byte_flip_x[row]; 453 x |= x >> 1;
449 } 454 x &= 0x11111111;
450 return dec_byte[row]; 455 return x * 0xf;
451} 456}
452#else
453static u16 dec_nibble[] = {
454 0x0000, 0x1000, 0x0100, 0x1100,
455 0x0010, 0x1010, 0x0110, 0x1110,
456 0x0001, 0x1001, 0x0101, 0x1101,
457 0x0011, 0x1011, 0x0111, 0x1111,
458};
459
460static u16 dec_nibble_flip_x[] = {
461 0x0000, 0x0001, 0x0010, 0x0011,
462 0x0100, 0x0101, 0x0110, 0x0111,
463 0x1000, 0x1001, 0x1010, 0x1011,
464 0x1100, 0x1101, 0x1110, 0x1111,
465};
466
467IWRAM_CODE
468static inline
469u32
470decode_1bpp(u8 row, u8 flip_x) {
471 if (flip_x) {
472 u16 *lut = dec_nibble_flip_x;
473 return (u32)lut[(row >> 4) & 0xF] << 16 | (u32)lut[(row >> 0) & 0xF];
474 }
475 u16 *lut = dec_nibble;
476 return (u32)lut[(row >> 0) & 0xF] << 16 | (u32)lut[(row >> 4) & 0xF];
477}
478#endif
479 457
480IWRAM_CODE 458IWRAM_CODE
481UNROLL_LOOPS 459UNROLL_LOOPS
482void 460void
483draw_chr(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { 461draw_sprite(size_t x, size_t y, u32 *sprite, u8 clear) {
462 // Copy a 4bpp sprite into memory. Color 0 is the transparency color.
484 BOUNDCHECK_SCREEN(x, y); 463 BOUNDCHECK_SCREEN(x, y);
485 size_t tile_x0 = x / 8; 464 size_t tile_x0 = x / 8;
486 size_t tile_x1 = (x + 7) / 8; 465 size_t tile_x1 = (x + 7) / 8;
@@ -491,71 +470,65 @@ draw_chr(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
491 size_t shift_right = (8 - start_col) * 4; 470 size_t shift_right = (8 - start_col) * 4;
492 u32 dirty = (1 << tile_x0) | (1 << tile_x1); 471 u32 dirty = (1 << tile_x0) | (1 << tile_x1);
493 u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8]; 472 u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8];
494#if DEC_BIG_LUT 473 size_t n_rows = 8;
495 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; 474 if (y + 8 > SCREEN_HEIGHT) {
496#endif 475 n_rows = 8 - ((y + 8) - SCREEN_HEIGHT);
497 if (!flip_y) { 476 }
498 for(size_t v = 0; v < 8; v++, dst++) { 477
499 if ((y + v) >= SCREEN_HEIGHT) break; 478 size_t n0 = MIN(8 - start_row, n_rows);
500 u8 ch1 = sprite[v + 0]; 479 if (clear) {
501 u8 ch2 = sprite[v + 8]; 480 for(size_t v = 0; v < n0; v++, dst++) {
502#if DEC_BIG_LUT 481 u32 row = sprite[v];
503 u32 clr_a = lut[ch1]; 482
504 u32 clr_b = lut[ch2]; 483 u32 mask = create_zero_mask(row);
505#else 484 u32 msk0 = mask << shift_left;
506 u32 clr_a = decode_1bpp(ch1, flip_x); 485 u32 msk1 = mask >> shift_right;
507 u32 clr_b = decode_1bpp(ch2, flip_x); 486
508#endif 487 dst[0] = (dst[0] & ~msk0);
509 u32 mask_a = (clr_a * 0xF); 488 dst[8] = (dst[8] & ~msk1);
510 u32 mask_b = (clr_b * 0xF); 489 }
511 u32 mask = (mask_a | mask_b); 490 dst += (32 - 1) * 8;
512 u32 color; 491 for(size_t v = n0; v < n_rows; v++, dst++) {
513 if (clr == 0) { 492 u32 row = sprite[v];
514 color = clr_a + (clr_b << 1); 493
515 } else if (clr == 15) { 494 u32 mask = create_zero_mask(row);
516 color = 0; 495 u32 msk0 = mask << shift_left;
517 } else { 496 u32 msk1 = mask >> shift_right;
518 color = (clr_a | clr_b) * clr; 497
519 } 498 dst[0] = (dst[0] & ~msk0);
520 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); 499 dst[8] = (dst[8] & ~msk1);
521 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
522 if ((start_row + v) == 7) {
523 dirty_tiles[tile_y + 1] |= dirty;
524 dst += (32 - 1) * 8;
525 }
526 } 500 }
527 } else { 501 } else {
528 for(size_t v = 0; v < 8; v++, dst++) { 502 for(size_t v = 0; v < n0; v++, dst++) {
529 if ((y + v) >= SCREEN_HEIGHT) break; 503 u32 row = sprite[v];
530 u8 ch1 = sprite[(7 - v) + 0]; 504
531 u8 ch2 = sprite[(7 - v) + 8]; 505 u32 mask = create_zero_mask(row);
532#if DEC_BIG_LUT 506 u32 msk0 = mask << shift_left;
533 u32 clr_a = lut[ch1]; 507 u32 msk1 = mask >> shift_right;
534 u32 clr_b = lut[ch2]; 508 u32 clr0 = row << shift_left;
535#else 509 u32 clr1 = row >> shift_right;
536 u32 clr_a = decode_1bpp(ch1, flip_x); 510
537 u32 clr_b = decode_1bpp(ch2, flip_x); 511 dst[0] = (dst[0] & ~msk0) | clr0;
538#endif 512 dst[8] = (dst[8] & ~msk1) | clr1;
539 u32 mask_a = (clr_a * 0xF); 513 }
540 u32 mask_b = (clr_b * 0xF); 514 dst += (32 - 1) * 8;
541 u32 mask = (mask_a | mask_b); 515 for(size_t v = n0; v < n_rows; v++, dst++) {
542 u32 color; 516 u32 row = sprite[v];
543 if (clr == 0) { 517
544 color = clr_a + (clr_b << 1); 518 u32 mask = create_zero_mask(row);
545 } else if (clr == 15) { 519 u32 msk0 = mask << shift_left;
546 color = 0; 520 u32 msk1 = mask >> shift_right;
547 } else { 521 u32 clr0 = row << shift_left;
548 color = (clr_a | clr_b) * clr; 522 u32 clr1 = row >> shift_right;
549 } 523
550 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); 524 dst[0] = (dst[0] & ~msk0) | clr0;
551 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); 525 dst[8] = (dst[8] & ~msk1) | clr1;
552 if ((start_row + v) == 7) {
553 dirty_tiles[tile_y + 1] |= dirty;
554 dst += (32 - 1) * 8;
555 }
556 } 526 }
557 } 527 }
558 dirty_tiles[tile_y] |= dirty; 528 dirty_tiles[tile_y] |= dirty;
529 if (start_row != 0) {
530 dirty_tiles[tile_y + 1] |= dirty;
531 }
559} 532}
560 533
561IWRAM_CODE 534IWRAM_CODE
@@ -572,47 +545,63 @@ draw_icn(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
572 size_t shift_right = (8 - start_col) * 4; 545 size_t shift_right = (8 - start_col) * 4;
573 u32 dirty = (1 << tile_x0) | (1 << tile_x1); 546 u32 dirty = (1 << tile_x0) | (1 << tile_x1);
574 u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8]; 547 u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8];
575#if DEC_BIG_LUT 548 u32 color = clr * 0x11111111;
576 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; 549 u32 *lut = flip_x ? lut_1bpp_mask_flip_x : lut_1bpp_mask;
577#endif 550 size_t n_rows = 8;
551 if (y + 8 > SCREEN_HEIGHT) {
552 n_rows = 8 - ((y + 8) - SCREEN_HEIGHT);
553 }
554 size_t n0 = MIN(8 - start_row, n_rows);
578 if (!flip_y) { 555 if (!flip_y) {
579 for(size_t v = 0; v < 8; v++, dst++) { 556 for(size_t v = 0; v < n0; v++, dst++) {
580 if ((y + v) >= SCREEN_HEIGHT) break; 557 u32 mask = lut[*sprite];
581 u8 ch1 = sprite[v + 0]; 558 u32 msk0 = mask << shift_left;
582#if DEC_BIG_LUT 559 u32 msk1 = mask >> shift_right;
583 u32 color = lut[ch1]; 560 u32 clr0 = msk0 & color;
584#else 561 u32 clr1 = msk1 & color;
585 u32 color = decode_1bpp(ch1, flip_x); 562 dst[0] = (dst[0] & ~msk0) | clr0;
586#endif 563 dst[8] = (dst[8] & ~msk1) | clr1;
587 u32 mask = (color * 0xF); 564 sprite++;
588 color *= clr; 565 }
589 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); 566 dst += (32 - 1) * 8;
590 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); 567 for(size_t v = n0; v < n_rows; v++, dst++) {
591 if ((start_row + v) == 7) { 568 u32 mask = lut[*sprite];
592 dirty_tiles[tile_y + 1] |= dirty; 569 u32 msk0 = mask << shift_left;
593 dst += (32 - 1) * 8; 570 u32 msk1 = mask >> shift_right;
594 } 571 u32 clr0 = msk0 & color;
572 u32 clr1 = msk1 & color;
573 dst[0] = (dst[0] & ~msk0) | clr0;
574 dst[8] = (dst[8] & ~msk1) | clr1;
575 sprite++;
595 } 576 }
596 } else { 577 } else {
597 for(size_t v = 0; v < 8; v++, dst++) { 578 sprite += 7;
598 if ((y + v) >= SCREEN_HEIGHT) break; 579 for(size_t v = 0; v < n0; v++, dst++) {
599 u8 ch1 = sprite[(7 - v) + 0]; 580 u32 mask = lut[*sprite];
600#if DEC_BIG_LUT 581 u32 msk0 = mask << shift_left;
601 u32 color = lut[ch1]; 582 u32 msk1 = mask >> shift_right;
602#else 583 u32 clr0 = msk0 & color;
603 u32 color = decode_1bpp(ch1, flip_x); 584 u32 clr1 = msk1 & color;
604#endif 585 dst[0] = (dst[0] & ~msk0) | clr0;
605 u32 mask = (color * 0xF); 586 dst[8] = (dst[8] & ~msk1) | clr1;
606 color *= clr; 587 sprite--;
607 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); 588 }
608 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); 589 dst += (32 - 1) * 8;
609 if ((start_row + v) == 7) { 590 for(size_t v = n0; v < n_rows; v++, dst++) {
610 dirty_tiles[tile_y + 1] |= dirty; 591 u32 mask = lut[*sprite];
611 dst += (32 - 1) * 8; 592 u32 msk0 = mask << shift_left;
612 } 593 u32 msk1 = mask >> shift_right;
594 u32 clr0 = msk0 & color;
595 u32 clr1 = msk1 & color;
596 dst[0] = (dst[0] & ~msk0) | clr0;
597 dst[8] = (dst[8] & ~msk1) | clr1;
598 sprite--;
613 } 599 }
614 } 600 }
615 dirty_tiles[tile_y] |= dirty; 601 dirty_tiles[tile_y] |= dirty;
602 if (start_row != 0) {
603 dirty_tiles[tile_y + 1] |= dirty;
604 }
616} 605}
617 606
618// 607//
@@ -620,9 +609,11 @@ draw_icn(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
620// 609//
621 610
622IWRAM_CODE 611IWRAM_CODE
612UNROLL_LOOPS
623void 613void
624flip_buffer(void) { 614flip_buffer(void) {
625// Mode 0: double buffering without dirty tiles. 615// Mode 0: double buffering without dirty tiles. Use this when we are clearing
616// the screen every single frame.
626#if FLIP_TYPE == 0 617#if FLIP_TYPE == 0
627 if (backbuf == BUF_0) { 618 if (backbuf == BUF_0) {
628 backbuf = BUF_1; 619 backbuf = BUF_1;
@@ -635,7 +626,7 @@ flip_buffer(void) {
635 } 626 }
636 627
637// Mode 1: single buffer, copy the dirty lines from backbuffer (BUF_1) to 628// Mode 1: single buffer, copy the dirty lines from backbuffer (BUF_1) to
638// frontbuffer (BUF_0) using the DMA. 629// frontbuffer (BUF_0).
639#elif FLIP_TYPE == 1 630#elif FLIP_TYPE == 1
640 u32 *front = BUF_0; 631 u32 *front = BUF_0;
641 u32 *back = BUF_1; 632 u32 *back = BUF_1;
@@ -646,7 +637,11 @@ flip_buffer(void) {
646 continue; 637 continue;
647 } 638 }
648 u32 offset = j * 32 * 8; 639 u32 offset = j * 32 * 8;
640#if NO_DMA == 0
649 dma_copy(front + offset, back + offset, (30 * 8 * 4), 3); 641 dma_copy(front + offset, back + offset, (30 * 8 * 4), 3);
642#else
643 copy32(front + offset, back + offset, (30 * 8));
644#endif
650 dirty_tiles[j] = 0; 645 dirty_tiles[j] = 0;
651 } 646 }
652 647
@@ -673,7 +668,7 @@ flip_buffer(void) {
673 } 668 }
674 669
675// Mode 3: Double buffering with dirty line, copying the dirty lines if needed 670// Mode 3: Double buffering with dirty line, copying the dirty lines if needed
676// after flipping buffers with the DMA. 671// after flipping buffers.
677#elif FLIP_TYPE == 3 672#elif FLIP_TYPE == 3
678 bool should_flip = false; 673 bool should_flip = false;
679 for (size_t j = 0; j < 20; ++j) { 674 for (size_t j = 0; j < 20; ++j) {
@@ -701,7 +696,11 @@ flip_buffer(void) {
701 continue; 696 continue;
702 } 697 }
703 u32 offset = j * 32 * 8; 698 u32 offset = j * 32 * 8;
699#if NO_DMA == 0
704 dma_copy(backbuf + offset, frontbuf + offset, (30 * 8 * 4), 3); 700 dma_copy(backbuf + offset, frontbuf + offset, (30 * 8 * 4), 3);
701#else
702 copy32(backbuf + offset, frontbuf + offset, (30 * 8));
703#endif
705 dirty_tiles[j] = 0; 704 dirty_tiles[j] = 0;
706 } 705 }
707 706
@@ -746,6 +745,21 @@ flip_buffer(void) {
746#endif 745#endif
747} 746}
748 747
748IWRAM_CODE
749UNROLL_LOOPS
750void
751decode_1bpp(u32 *dst, u8 *src, u8 clr, u8 flip_x, u32 n_tiles) {
752 u32 color = 0x11111111 * clr;
753 if (!flip_x) {
754 for (size_t i = 0; i < n_tiles * 8; i++) {
755 *dst++ = lut_1bpp_mask[*src++] & color;
756 }
757 } else {
758 for (size_t i = 0; i < n_tiles * 8; i++) {
759 *dst++ = lut_1bpp_mask_flip_x[*src++] & color;
760 }
761 }
762}
749// 763//
750// Text rendering. 764// Text rendering.
751// 765//
@@ -846,7 +860,11 @@ renderer_init(void) {
846 DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1; 860 DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1;
847 861
848 // Clear VRAM. 862 // Clear VRAM.
863#if NO_DMA == 0
849 dma_fill((u32*)MEM_VRAM, 0, KB(96), 3); 864 dma_fill((u32*)MEM_VRAM, 0, KB(96), 3);
865#else
866 set32((u32*)MEM_VRAM, 0, KB(96)/4);
867#endif
850 868
851 // Initialize backgrounds. 869 // Initialize backgrounds.
852 BG_CTRL(0) = BG_CHARBLOCK(CB_0) | BG_SCREENBLOCK(SB_0) | BG_PRIORITY(0) | BG_SIZE(1); 870 BG_CTRL(0) = BG_CHARBLOCK(CB_0) | BG_SCREENBLOCK(SB_0) | BG_PRIORITY(0) | BG_SIZE(1);