summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2023-04-18 09:18:34 +0200
committerBad Diode <bd@badd10de.dev>2023-04-18 09:18:34 +0200
commit195f219514d02826571a89ff380debfbb2882872 (patch)
treee4d3e2a1a945d9ae1b9d9664ffe785ef029c405a
parent4d1fc4013ec24b37bd2815c4143aaa46b17ef574 (diff)
downloadgba-link-cable-tester-195f219514d02826571a89ff380debfbb2882872.tar.gz
gba-link-cable-tester-195f219514d02826571a89ff380debfbb2882872.zip
Remove unused code paths and #if #else testing grounds
-rw-r--r--src/renderer_m0.c213
-rw-r--r--src/renderer_m4.c61
2 files changed, 84 insertions, 190 deletions
diff --git a/src/renderer_m0.c b/src/renderer_m0.c
index 7dd5b22..6be01a4 100644
--- a/src/renderer_m0.c
+++ b/src/renderer_m0.c
@@ -12,6 +12,9 @@
12#define BUF_0 ((u32*)(MEM_VRAM)) 12#define BUF_0 ((u32*)(MEM_VRAM))
13#define BUF_1 ((u32*)(MEM_VRAM + KB(20))) 13#define BUF_1 ((u32*)(MEM_VRAM + KB(20)))
14 14
15// Pointer to the backbuffer.
16static u32 *backbuf = BUF_1;
17
15// Position of the tilemap. 18// Position of the tilemap.
16#define TILE_MAP ((u32*)(MEM_VRAM + KB(40))) 19#define TILE_MAP ((u32*)(MEM_VRAM + KB(40)))
17 20
@@ -21,15 +24,6 @@
21#define SB_0 20 24#define SB_0 20
22#define SB_1 22 25#define SB_1 22
23 26
24// Available storage for other memory.
25// #define FG_PIXELS ((u32*)(MEM_VRAM + KB(44)))
26// #define BG_PIXELS ((u32*)(MEM_VRAM + KB(64)))
27
28// Keep track of which tiles need to be copied to the frontbuffer.
29static u32 dirty_tiles[21] = {0};
30
31static u32 *backbuf = BUF_1;
32
33// Boundchecks can be disable at compile time but this will not always improve 27// Boundchecks can be disable at compile time but this will not always improve
34// the performance and can in fact make it worse. It is possible that this is 28// the performance and can in fact make it worse. It is possible that this is
35// due to some aliasing optimiztions but not sure at this moment. 29// due to some aliasing optimiztions but not sure at this moment.
@@ -45,18 +39,15 @@ static u32 *backbuf = BUF_1;
45// Swap A and B values to make sure A <= B. 39// Swap A and B values to make sure A <= B.
46#define MAYBE_SWAP(A,B) if ((A) > (B)) { SWAP(A,B); } 40#define MAYBE_SWAP(A,B) if ((A) > (B)) { SWAP(A,B); }
47 41
42//
43// Basic primitives.
44//
45
48IWRAM_CODE 46IWRAM_CODE
49void screen_fill(u8 clr) { 47void screen_fill(u8 clr) {
50 // We have to make sure we leave the last tile blank to use as alpha channel 48 // We have to make sure we leave the last tile blank to use as alpha channel
51 // when moving the BG during double buffering. 49 // when moving the BG during double buffering.
52#if 0
53 u32 *dst = backbuf;
54 for(int i = 0; i < KB(20) / 4 - 8; i++) {
55 *dst++ = 0x11111111 * clr;
56 }
57#else
58 dma_fill(backbuf, 0x11111111 * clr, KB(20) - 32, 3); 50 dma_fill(backbuf, 0x11111111 * clr, KB(20) - 32, 3);
59#endif
60} 51}
61 52
62IWRAM_CODE 53IWRAM_CODE
@@ -76,9 +67,6 @@ draw_pixel(size_t x, size_t y, u8 clr) {
76 u32 mask = 0xF << shift; 67 u32 mask = 0xF << shift;
77 u32 row = clr << shift; 68 u32 row = clr << shift;
78 *dst = (*dst & ~mask) | row; 69 *dst = (*dst & ~mask) | row;
79
80 // Mark tile as dirty.
81 dirty_tiles[tile_y] |= 1 << tile_x;
82} 70}
83 71
84IWRAM_CODE 72IWRAM_CODE
@@ -250,7 +238,6 @@ draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
250IWRAM_CODE 238IWRAM_CODE
251void 239void
252draw_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { 240draw_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
253#if 1
254 BOUNDCHECK_SCREEN(x0, y0); 241 BOUNDCHECK_SCREEN(x0, y0);
255 BOUNDCHECK_SCREEN(x1, y1); 242 BOUNDCHECK_SCREEN(x1, y1);
256 MAYBE_SWAP(x0, x1); 243 MAYBE_SWAP(x0, x1);
@@ -260,102 +247,6 @@ draw_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
260 draw_hline(x0, x1, y1, clr); 247 draw_hline(x0, x1, y1, clr);
261 draw_vline(x0, y0, y1, clr); 248 draw_vline(x0, y0, y1, clr);
262 draw_vline(x1, y0, y1, clr); 249 draw_vline(x1, y0, y1, clr);
263#else
264 // NOTE: This is the specialized version trying to update 2 pixels at
265 // a time. As of writing this it is slightly slower than using
266 // 4 draw_hline/vline functions. The horizontal drawing part seems
267 // marginally faster but the vertical one is slower. Potentially the
268 // vertical part could be optimized or just use 2 draw_vline instead. The
269 // perf improvements seem marginal enough that it doesn't seem worth it.
270 BOUNDCHECK_SCREEN(x0, y0);
271 BOUNDCHECK_SCREEN(x1, y1);
272 MAYBE_SWAP(x0, x1);
273 MAYBE_SWAP(y0, y1);
274
275 // Find row positions for the given x/y coordinates.
276 size_t tile_x0 = x0 / 8;
277 size_t tile_y0 = y0 / 8;
278 size_t tile_x1 = x1 / 8;
279 size_t tile_y1 = y1 / 8;
280 size_t start_col0 = x0 % 8;
281 size_t start_col1 = x1 % 8;
282 size_t start_row0 = y0 % 8;
283 size_t start_row1 = y1 % 8;
284
285 // Get a pointer to the backbuffer and the tile row.
286 u32 *dst0 = &backbuf[start_row0 + (tile_x0 + tile_y0 * 32) * 8];
287 u32 *dst1 = &backbuf[start_row1 + (tile_x0 + tile_y1 * 32) * 8];
288
289 size_t dx = tile_x1 - tile_x0;
290 size_t dy = tile_y1 - tile_y0;
291
292 // We can update two lines at a time, which is faster than calling draw_line
293 // four times.
294 if (dx < 1) {
295 u32 mask = 0xFFFFFFFF;
296 mask >>= (7 - start_col1 - dx) * 4;
297 mask &= 0xFFFFFFFF << start_col0 * 4;
298 u32 row = (0x11111111 * clr) & mask;
299 *dst0 = (*dst0 & ~mask) | row;
300 *dst1 = (*dst1 & ~mask) | row;
301 } else {
302 size_t shift_left = start_col0 * 4;
303 size_t shift_right = (7 - start_col1) * 4;
304 u32 mask = 0xFFFFFFFF;
305 u32 row = 0x11111111 * clr;
306 *dst0 = (*dst0 & ~(mask << shift_left)) | (row << shift_left);
307 *dst1 = (*dst1 & ~(mask << shift_left)) | (row << shift_left);
308 for (size_t i = 1; i < dx; i++) {
309 dst0[i * 8] = row;
310 dst1[i * 8] = row;
311 }
312 dst0[dx * 8] = (dst0[dx * 8] & ~(mask >> shift_right)) | (row >> shift_right);
313 dst1[dx * 8] = (dst1[dx * 8] & ~(mask >> shift_right)) | (row >> shift_right);
314 }
315 u32 mask_left = 0xF << start_col0 * 4;
316 u32 mask_right = 0xF << start_col1 * 4;
317 u32 row_left = (0x11111111 * clr) & mask_left;
318 u32 row_right = (0x11111111 * clr) & mask_right;
319 // u32 cur_row = start_row0;
320 // dst0 = &backbuf[start_row0 + (tile_x0 + tile_y0 * 32) * 8];
321 // dst1 = &backbuf[start_row0 + (tile_x1 + tile_y0 * 32) * 8];
322 // for (size_t i = 0; i <= (y1 - y0); i++) {
323 // *dst0 = (*dst0 & ~mask_left) | row_left;
324 // *dst1 = (*dst1 & ~mask_right) | row_right;
325 // if (cur_row == 7) {
326 // dst0 += 8 * 31 + 1;
327 // dst1 += 8 * 31 + 1;
328 // cur_row = 0;
329 // } else {
330 // cur_row++;
331 // dst0++;
332 // dst1++;
333 // }
334 // }
335 if (dy < 1) {
336 for (size_t i = 1; i < (y1 - y0); i++, dst0++) {
337 dst0[1] = (dst0[1] & ~mask_left) | row_left;
338 dst0[1 + 8 * dx] = (dst0[1 + 8 * dx] & ~mask_right) | row_right;
339 }
340 } else {
341 for (size_t i = 1; i < (8 - start_row0); i++, dst0++) {
342 dst0[1] = (dst0[1] & ~mask_left) | row_left;
343 dst0[1 + 8 * dx] = (dst0[1 + 8 * dx] & ~mask_right) | row_right;
344 }
345 dst0 += 8 * 31;
346 for (size_t j = 1; j < dy; j++) {
347 for (size_t i = 0; i < 8; i++, dst0++) {
348 dst0[1] = (dst0[1] & ~mask_left) | row_left;
349 dst0[1 + 8 * dx] = (dst0[1 + 8 * dx] & ~mask_right) | row_right;
350 }
351 dst0 += 8 * 31;
352 }
353 for (size_t i = 0; i < start_row1; i++, dst0++) {
354 dst0[1] = (dst0[1] & ~mask_left) | row_left;
355 dst0[1 + 8 * dx] = (dst0[1 + 8 * dx] & ~mask_right) | row_right;
356 }
357 }
358#endif
359} 250}
360 251
361IWRAM_CODE 252IWRAM_CODE
@@ -373,54 +264,15 @@ draw_filled_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
373 return; 264 return;
374 } 265 }
375 266
376#if 1
377 // Drawline implementation. 267 // Drawline implementation.
378 for (size_t y = y0; y <= y1; y++) { 268 for (size_t y = y0; y <= y1; y++) {
379 draw_hline(x0, x1, y, clr); 269 draw_hline(x0, x1, y, clr);
380 } 270 }
381#else
382 // draw_rect implementation.
383 size_t dx = x1 - x0;
384 size_t dy = y1 - y0;
385 size_t n_rect = MIN(dx, dy);
386 n_rect = n_rect / 2 + 1;
387 for (size_t i = 0; i < n_rect; i++) {
388 draw_rect(x0 + i, y0 + i, x1 - i, y1 - i, clr);
389 }
390#endif
391} 271}
392 272
393IWRAM_CODE 273//
394void 274// Sprites (chr/icn).
395flip_buffer(void) { 275//
396 // NOTE: can this be branchless? Either way is probably unnecessary other
397 // than code clarity maybe?.
398 if (backbuf == BUF_0) {
399 backbuf = BUF_1;
400 BG_H_SCROLL_0 = 0;
401 BG_H_SCROLL_1 = -240;
402 } else {
403 backbuf = BUF_0;
404 BG_H_SCROLL_0 = -240;
405 BG_H_SCROLL_1 = 0;
406 }
407 // TODO: Copying all tiles for now. Study if it's better to use dirty_tiles
408 // or dirty_lines.
409 // Copy dirty tiles from the backbuffer to the frontbuffer.
410 // Tile *dst = FRONTBUF;
411 // Tile *src = backbuf;
412 // for (size_t j = 0; j < 20; ++j) {
413 // // if (dirty_tiles[j] == 0) {
414 // // continue;
415 // // }
416 // for (size_t i = 0, k = 1; i < 30; ++i, k <<= 1) {
417 // // if (dirty_tiles[j] & k) {
418 // dst[i + j * 32] = src[i + j * 32];
419 // // }
420 // }
421 // // dirty_tiles[j] = 0;
422 // }
423}
424 276
425#define DEC_BIG_LUT 1 277#define DEC_BIG_LUT 1
426 278
@@ -544,7 +396,6 @@ decode_1bpp(u8 row, u8 flip_x) {
544 } 396 }
545 return dec_byte[row]; 397 return dec_byte[row];
546} 398}
547
548#else 399#else
549static u16 dec_nibble[] = { 400static u16 dec_nibble[] = {
550 0x0000, 0x1000, 0x0100, 0x1100, 401 0x0000, 0x1000, 0x0100, 0x1100,
@@ -679,6 +530,46 @@ draw_icn(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
679 } 530 }
680} 531}
681 532
533//
534// Flipping buffers/copying memory.
535//
536
537IWRAM_CODE
538void
539flip_buffer(void) {
540 // NOTE: can this be branchless? Either way is probably unnecessary other
541 // than code clarity maybe?.
542 if (backbuf == BUF_0) {
543 backbuf = BUF_1;
544 BG_H_SCROLL_0 = 0;
545 BG_H_SCROLL_1 = -240;
546 } else {
547 backbuf = BUF_0;
548 BG_H_SCROLL_0 = -240;
549 BG_H_SCROLL_1 = 0;
550 }
551 // TODO: Copying all tiles for now. Study if it's better to use dirty_tiles
552 // or dirty_lines.
553 // Copy dirty tiles from the backbuffer to the frontbuffer.
554 // Tile *dst = FRONTBUF;
555 // Tile *src = backbuf;
556 // for (size_t j = 0; j < 20; ++j) {
557 // // if (dirty_tiles[j] == 0) {
558 // // continue;
559 // // }
560 // for (size_t i = 0, k = 1; i < 30; ++i, k <<= 1) {
561 // // if (dirty_tiles[j] & k) {
562 // dst[i + j * 32] = src[i + j * 32];
563 // // }
564 // }
565 // // dirty_tiles[j] = 0;
566 // }
567}
568
569//
570// Text rendering.
571//
572
682#include "font.h" 573#include "font.h"
683 574
684// Font rendering function for the text engine. 575// Font rendering function for the text engine.
@@ -688,6 +579,10 @@ txt_drawc(char c, size_t x, size_t y, u8 clr) {
688 draw_icn(x, y, tile + 8 * c, clr, 1, 0); 579 draw_icn(x, y, tile + 8 * c, clr, 1, 0);
689} 580}
690 581
582//
583// Initialization.
584//
585
691void 586void
692renderer_init(void) { 587renderer_init(void) {
693 // Initialize display mode and bg palette. 588 // Initialize display mode and bg palette.
diff --git a/src/renderer_m4.c b/src/renderer_m4.c
index 6da7996..4e8868a 100644
--- a/src/renderer_m4.c
+++ b/src/renderer_m4.c
@@ -1,10 +1,12 @@
1#include "renderer.h" 1#include "renderer.h"
2#include "text.h" 2#include "text.h"
3 3
4static u16 *backbuf = (u16*)(MEM_VRAM ^ 0x0A000); 4// Front/back buffers for double buffering.
5#define BUF_0 ((u32*)(MEM_VRAM))
6#define BUF_1 ((u32*)(MEM_VRAM ^ 0x0A000))
5 7
6// Keep track of which tiles need to be copied to the frontbuffer. 8// Pointer to the backbuffer.
7static bool screen_updated = true; 9static u16 *backbuf = BUF_1;
8 10
9// Boundchecks can be disable at compile time but this will not always improve 11// Boundchecks can be disable at compile time but this will not always improve
10// the performance and can in fact make it worse. It is possible that this is 12// the performance and can in fact make it worse. It is possible that this is
@@ -21,17 +23,13 @@ static bool screen_updated = true;
21// Swap A and B values to make sure A <= B. 23// Swap A and B values to make sure A <= B.
22#define MAYBE_SWAP(A,B) if ((A) > (B)) { SWAP(A,B); } 24#define MAYBE_SWAP(A,B) if ((A) > (B)) { SWAP(A,B); }
23 25
26//
27// Basic primitives.
28//
29
24IWRAM_CODE 30IWRAM_CODE
25void screen_fill(u8 clr) { 31void screen_fill(u8 clr) {
26#if 0
27 u32 *dst = backbuf;
28 for(int i = 0; i < KB(75) / 8; i++) {
29 *dst++ = 0x01010101 * clr;
30 }
31#else
32 dma_fill(backbuf, 0x01010101 * clr, KB(75) / 2, 3); 32 dma_fill(backbuf, 0x01010101 * clr, KB(75) / 2, 3);
33 screen_updated = true;
34#endif
35} 33}
36 34
37IWRAM_CODE 35IWRAM_CODE
@@ -44,7 +42,6 @@ draw_pixel(size_t x, size_t y, u8 clr) {
44 } else { 42 } else {
45 *dst = (*dst & ~0xFF) | clr; 43 *dst = (*dst & ~0xFF) | clr;
46 } 44 }
47 screen_updated = true;
48} 45}
49 46
50IWRAM_CODE 47IWRAM_CODE
@@ -77,22 +74,12 @@ draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) {
77 size_t shift_right = (7 - end_col) * 8; 74 size_t shift_right = (7 - end_col) * 8;
78 u64 row_mask = 0xFFFFFFFFFFFFFFFF; 75 u64 row_mask = 0xFFFFFFFFFFFFFFFF;
79 u64 row = 0x0101010101010101 * clr; 76 u64 row = 0x0101010101010101 * clr;
80#if 0
81 // No DMA.
82 *dst++ = (*dst & ~(row_mask << shift_left)) | row << shift_left;
83 for (size_t i = 1; i < dx; i++) {
84 *dst++ = row;
85 }
86 *dst = (*dst & ~(row_mask >> shift_right)) | row >> shift_right;
87#else
88 // DMA.
89 dst[0] = (dst[0] & ~(row_mask << shift_left)) | row << shift_left; 77 dst[0] = (dst[0] & ~(row_mask << shift_left)) | row << shift_left;
90 if (dx != 1) { 78 if (dx != 1) {
91 dma_fill(&dst[1], 0x01010101 * clr, (dx - 1) * 8, 3); 79 dma_fill(&dst[1], 0x01010101 * clr, (dx - 1) * 8, 3);
92 } 80 }
93 dst[dx] = dst[dx] & ~(row_mask >> shift_right); 81 dst[dx] = dst[dx] & ~(row_mask >> shift_right);
94 dst[dx] |= row >> shift_right; 82 dst[dx] |= row >> shift_right;
95#endif
96 } 83 }
97} 84}
98 85
@@ -169,7 +156,6 @@ draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
169 } 156 }
170 } 157 }
171 } 158 }
172 screen_updated = true;
173} 159}
174 160
175IWRAM_CODE 161IWRAM_CODE
@@ -184,7 +170,6 @@ draw_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
184 draw_hline(x0, x1, y1, clr); 170 draw_hline(x0, x1, y1, clr);
185 draw_vline(x0, y0, y1, clr); 171 draw_vline(x0, y0, y1, clr);
186 draw_vline(x1, y0, y1, clr); 172 draw_vline(x1, y0, y1, clr);
187 screen_updated = true;
188} 173}
189 174
190IWRAM_CODE 175IWRAM_CODE
@@ -204,15 +189,18 @@ draw_filled_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
204 189
205 // Drawline implementation. 190 // Drawline implementation.
206 for (size_t y = y0; y <= y1; y++) { 191 for (size_t y = y0; y <= y1; y++) {
207 // NOTE: Unclear why here draw_hline is faster than draw_line.
208 draw_hline(x0, x1, y, clr); 192 draw_hline(x0, x1, y, clr);
209 } 193 }
210 screen_updated = true;
211} 194}
212 195
196//
197// Sprites (chr/icn).
198//
199
213#define DEC_BIG_LUT 1 200#define DEC_BIG_LUT 1
214 201
215#if DEC_BIG_LUT 202#if DEC_BIG_LUT
203
216static u64 dec_byte_flip_x[256] = { 204static u64 dec_byte_flip_x[256] = {
217 0x0000000000000000, 0x0000000000000001, 0x0000000000000100, 0x0000000000000101, 0x0000000000010000, 205 0x0000000000000000, 0x0000000000000001, 0x0000000000000100, 0x0000000000000101, 0x0000000000010000,
218 0x0000000000010001, 0x0000000000010100, 0x0000000000010101, 0x0000000001000000, 0x0000000001000001, 206 0x0000000000010001, 0x0000000000010100, 0x0000000000010101, 0x0000000001000000, 0x0000000001000001,
@@ -322,6 +310,7 @@ static u64 dec_byte[256] = {
322 0x0001000101010101, 0x0101000101010101, 0x0000010101010101, 0x0100010101010101, 0x0001010101010101, 310 0x0001000101010101, 0x0101000101010101, 0x0000010101010101, 0x0100010101010101, 0x0001010101010101,
323 0x0101010101010101 311 0x0101010101010101
324}; 312};
313
325IWRAM_CODE 314IWRAM_CODE
326static inline 315static inline
327u64 316u64
@@ -331,6 +320,7 @@ decode_1bpp(u8 row, u8 flip_x) {
331 } 320 }
332 return dec_byte[row]; 321 return dec_byte[row];
333} 322}
323
334#else 324#else
335 325
336static u32 dec_nibble[] = { 326static u32 dec_nibble[] = {
@@ -358,6 +348,7 @@ decode_1bpp(u8 row, u8 flip_x) {
358 u32 *lut = dec_nibble; 348 u32 *lut = dec_nibble;
359 return (u64)lut[(row >> 0) & 0xF] << 32 | (u64)lut[(row >> 4) & 0xF]; 349 return (u64)lut[(row >> 0) & 0xF] << 32 | (u64)lut[(row >> 4) & 0xF];
360} 350}
351
361#endif 352#endif
362 353
363IWRAM_CODE 354IWRAM_CODE
@@ -471,17 +462,21 @@ draw_icn(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
471 } 462 }
472} 463}
473 464
465//
466// Flipping buffers/copying memory.
467//
468
474IWRAM_CODE 469IWRAM_CODE
475void 470void
476flip_buffer(void) { 471flip_buffer(void) {
477 if (!screen_updated) {
478 return;
479 }
480 backbuf = (u16*)((u32)backbuf ^ 0x0A000); 472 backbuf = (u16*)((u32)backbuf ^ 0x0A000);
481 DISP_CTRL ^= DISP_PAGE; 473 DISP_CTRL ^= DISP_PAGE;
482 screen_updated = false;
483} 474}
484 475
476//
477// Text rendering.
478//
479
485#include "font.h" 480#include "font.h"
486 481
487// Font rendering function for the text engine. 482// Font rendering function for the text engine.
@@ -491,13 +486,17 @@ txt_drawc(char c, size_t x, size_t y, u8 clr) {
491 draw_icn(x, y, tile + 8 * c, clr, 1, 0); 486 draw_icn(x, y, tile + 8 * c, clr, 1, 0);
492} 487}
493 488
489//
490// Initialization.
491//
492
494void 493void
495renderer_init(void) { 494renderer_init(void) {
496 // Initialize display mode and bg palette. 495 // Initialize display mode and bg palette.
497 DISP_CTRL = DISP_MODE_4 | DISP_BG_2; 496 DISP_CTRL = DISP_MODE_4 | DISP_BG_2;
498 497
499 // Clear VRAM. 498 // Clear VRAM.
500 dma_fill((u16*)MEM_VRAM, 0x01010101 * 0, KB(96), 3); 499 dma_fill((u16*)MEM_VRAM, 0, KB(96), 3);
501 500
502 // Initialize default palette. 501 // Initialize default palette.
503 PAL_BUFFER_BG[0] = COLOR_BLACK; 502 PAL_BUFFER_BG[0] = COLOR_BLACK;