summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2023-04-22 08:55:51 +0200
committerBad Diode <bd@badd10de.dev>2023-04-22 08:55:51 +0200
commite17dbbd6b6f6314d84e719fd7ef7653f76fa388f (patch)
tree292e2fec9f4fe90d4adb0185dc47645d62352124
parent521a0acbb99928c2c67cb1f396968510ec18721c (diff)
downloadgba-link-cable-tester-e17dbbd6b6f6314d84e719fd7ef7653f76fa388f.tar.gz
gba-link-cable-tester-e17dbbd6b6f6314d84e719fd7ef7653f76fa388f.zip
Add initial dirty_tiles marking for m0 and improve chr/icn perf
-rw-r--r--src/main.c2
-rw-r--r--src/profiling.c2
-rw-r--r--src/renderer_m0.c173
3 files changed, 114 insertions, 63 deletions
diff --git a/src/main.c b/src/main.c
index fa2384f..d97a110 100644
--- a/src/main.c
+++ b/src/main.c
@@ -335,9 +335,9 @@ int main(void) {
335 irs_set(IRQ_VBLANK, irs_stub); 335 irs_set(IRQ_VBLANK, irs_stub);
336 336
337 while (true) { 337 while (true) {
338 test_sprites_bounce();
338 test_text_rendering(); 339 test_text_rendering();
339 test_growing_rects(); 340 test_growing_rects();
340 test_sprites_bounce();
341 test_moving_line(); 341 test_moving_line();
342 test_all_static(); 342 test_all_static();
343 } 343 }
diff --git a/src/profiling.c b/src/profiling.c
index a464372..4475aa8 100644
--- a/src/profiling.c
+++ b/src/profiling.c
@@ -41,7 +41,7 @@
41#endif 41#endif
42 42
43static bool profile_show = true; 43static bool profile_show = true;
44static bool profile_bg_show = true; 44static bool profile_bg_show = false;
45 45
46#define PROF_SHOW() \ 46#define PROF_SHOW() \
47 do { \ 47 do { \
diff --git a/src/renderer_m0.c b/src/renderer_m0.c
index 10bbff5..ecb0402 100644
--- a/src/renderer_m0.c
+++ b/src/renderer_m0.c
@@ -22,6 +22,9 @@
22// Pointer to the backbuffer. 22// Pointer to the backbuffer.
23static u32 *backbuf = BUF_1; 23static u32 *backbuf = BUF_1;
24 24
25// Tracking which tiles are "dirty" and need refreshing.
26static u32 dirty_tiles[21] = {0};
27
25// Position of the tilemap. 28// Position of the tilemap.
26#define TILE_MAP ((u32*)(MEM_VRAM + KB(40))) 29#define TILE_MAP ((u32*)(MEM_VRAM + KB(40)))
27 30
@@ -74,6 +77,7 @@ draw_pixel(size_t x, size_t y, u8 clr) {
74 u32 mask = 0xF << shift; 77 u32 mask = 0xF << shift;
75 u32 row = clr << shift; 78 u32 row = clr << shift;
76 *dst = (*dst & ~mask) | row; 79 *dst = (*dst & ~mask) | row;
80 dirty_tiles[tile_y] |= 1 << tile_x;
77} 81}
78 82
79IWRAM_CODE 83IWRAM_CODE
@@ -89,6 +93,7 @@ draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) {
89 size_t start_col = x0 % 8; 93 size_t start_col = x0 % 8;
90 size_t end_col = x1 % 8; 94 size_t end_col = x1 % 8;
91 size_t start_row = y0 % 8; 95 size_t start_row = y0 % 8;
96 u32 dirty = (1 << tile_x0) | (1 << tile_x1);
92 97
93 // Horizontal line. There are 3 cases: 98 // Horizontal line. There are 3 cases:
94 // 1. Lines fit on a single tile. 99 // 1. Lines fit on a single tile.
@@ -112,11 +117,13 @@ draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) {
112 *dst = (*dst & ~(mask << shift_left)) | (row << shift_left); 117 *dst = (*dst & ~(mask << shift_left)) | (row << shift_left);
113 dst += 8; 118 dst += 8;
114 for (size_t i = 1; i < dtx; i++) { 119 for (size_t i = 1; i < dtx; i++) {
120 dirty |= (1 << (tile_x0 + i));
115 *dst = row; 121 *dst = row;
116 dst += 8; 122 dst += 8;
117 } 123 }
118 *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right); 124 *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right);
119 } 125 }
126 dirty_tiles[tile_y] |= dirty;
120} 127}
121 128
122IWRAM_CODE 129IWRAM_CODE
@@ -135,6 +142,7 @@ draw_vline(size_t x0, size_t y0, size_t y1, u8 clr) {
135 size_t start_row1 = y1 % 8; 142 size_t start_row1 = y1 % 8;
136 143
137 size_t shift_left = start_col * 4; 144 size_t shift_left = start_col * 4;
145 u32 dirty = (1 << tile_x);
138 146
139 u32 *dst = &backbuf[start_row0 + (tile_x + tile_y * 32) * 8]; 147 u32 *dst = &backbuf[start_row0 + (tile_x + tile_y * 32) * 8];
140 u32 mask = 0x0000000F << shift_left; 148 u32 mask = 0x0000000F << shift_left;
@@ -150,6 +158,7 @@ draw_vline(size_t x0, size_t y0, size_t y1, u8 clr) {
150 } 158 }
151 dst += 8 * 31; 159 dst += 8 * 31;
152 for (size_t j = 1; j < dty; j++) { 160 for (size_t j = 1; j < dty; j++) {
161 dirty_tiles[tile_y0 + j] |= dirty;
153 for (size_t i = 0; i < 8; i++, dst++) { 162 for (size_t i = 0; i < 8; i++, dst++) {
154 dst[0] = (dst[0] & ~mask) | row; 163 dst[0] = (dst[0] & ~mask) | row;
155 } 164 }
@@ -159,6 +168,8 @@ draw_vline(size_t x0, size_t y0, size_t y1, u8 clr) {
159 dst[0] = (dst[0] & ~mask) | row; 168 dst[0] = (dst[0] & ~mask) | row;
160 } 169 }
161 } 170 }
171 dirty_tiles[tile_y0] |= dirty;
172 dirty_tiles[tile_y1] |= dirty;
162} 173}
163 174
164IWRAM_CODE 175IWRAM_CODE
@@ -458,101 +469,141 @@ decode_1bpp(u8 row, u8 flip_x) {
458#endif 469#endif
459 470
460IWRAM_CODE 471IWRAM_CODE
461static inline 472UNROLL_LOOPS
462void 473void
463draw_2bpp_row(size_t x, size_t y, u8 a, u8 b, u8 clr, u8 flip_x) { 474draw_chr(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
464 BOUNDCHECK_SCREEN(x, y); 475 BOUNDCHECK_SCREEN(x, y);
465 476 size_t tile_x0 = x / 8;
466 size_t tile_x = x / 8; 477 size_t tile_x1 = (x + 7) / 8;
467 size_t tile_y = y / 8; 478 size_t tile_y = y / 8;
468 size_t start_col = x % 8; 479 size_t start_col = x % 8;
469 size_t start_row = y % 8; 480 size_t start_row = y % 8;
470 size_t shift_left = start_col * 4; 481 size_t shift_left = start_col * 4;
471 size_t shift_right = (8 - start_col) * 4; 482 size_t shift_right = (8 - start_col) * 4;
472 483 u32 dirty = (1 << tile_x0) | (1 << tile_x1);
473 u32 *dst = &backbuf[start_row + (tile_x + tile_y * 32) * 8]; 484 u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8];
474#if DEC_BIG_LUT 485#if DEC_BIG_LUT
475 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; 486 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte;
476 u32 clr_a = lut[a];
477 u32 clr_b = lut[b];
478#else
479 u32 clr_a = decode_1bpp(a, flip_x);
480 u32 clr_b = decode_1bpp(b, flip_x);
481#endif 487#endif
482 u32 mask_a = (clr_a * 0xF);
483 u32 mask_b = (clr_b * 0xF);
484 u32 mask = (mask_a | mask_b);
485 u32 color;
486 if (clr == 0) {
487 color = clr_a + (clr_b << 1);
488 } else if (clr == 15) {
489 color = 0;
490 } else {
491 color = (clr_a | clr_b) * clr;
492 }
493 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
494 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
495}
496
497IWRAM_CODE
498static inline
499void
500draw_1bpp_row(size_t x, size_t y, u8 a, u8 clr, u8 flip_x) {
501 BOUNDCHECK_SCREEN(x, y);
502
503 size_t tile_x = x / 8;
504 size_t tile_y = y / 8;
505 size_t start_col = x % 8;
506 size_t start_row = y % 8;
507 size_t shift_left = start_col * 4;
508 size_t shift_right = (8 - start_col) * 4;
509
510 u32 *dst = &backbuf[start_row + (tile_x + tile_y * 32) * 8];
511 u32 color = decode_1bpp(a, flip_x);
512 u32 mask = (color * 0xF);
513 color *= clr;
514 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
515 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
516}
517
518IWRAM_CODE
519void
520draw_chr(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
521 BOUNDCHECK_SCREEN(x, y);
522 if (!flip_y) { 488 if (!flip_y) {
523 for(size_t v = 0; v < 8; v++) { 489 for(size_t v = 0; v < 8; v++, dst++) {
524 if ((y + v) >= SCREEN_HEIGHT) break; 490 if ((y + v) >= SCREEN_HEIGHT) break;
525 u8 ch1 = sprite[v + 0]; 491 u8 ch1 = sprite[v + 0];
526 u8 ch2 = sprite[v + 8]; 492 u8 ch2 = sprite[v + 8];
527 draw_2bpp_row(x, y + v, ch1, ch2, clr, flip_x); 493#if DEC_BIG_LUT
494 u32 clr_a = lut[ch1];
495 u32 clr_b = lut[ch2];
496#else
497 u32 clr_a = decode_1bpp(ch1, flip_x);
498 u32 clr_b = decode_1bpp(ch2, flip_x);
499#endif
500 u32 mask_a = (clr_a * 0xF);
501 u32 mask_b = (clr_b * 0xF);
502 u32 mask = (mask_a | mask_b);
503 u32 color;
504 if (clr == 0) {
505 color = clr_a + (clr_b << 1);
506 } else if (clr == 15) {
507 color = 0;
508 } else {
509 color = (clr_a | clr_b) * clr;
510 }
511 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
512 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
513 if ((start_row + v) == 7) {
514 dirty_tiles[tile_y + 1] |= dirty;
515 dst += (32 - 1) * 8;
516 }
528 } 517 }
529 } else { 518 } else {
530 for(size_t v = 0; v < 8; v++) { 519 for(size_t v = 0; v < 8; v++, dst++) {
531 if ((y + v) >= SCREEN_HEIGHT) break; 520 if ((y + v) >= SCREEN_HEIGHT) break;
532 u8 ch1 = sprite[(7 - v) + 0]; 521 u8 ch1 = sprite[(7 - v) + 0];
533 u8 ch2 = sprite[(7 - v) + 8]; 522 u8 ch2 = sprite[(7 - v) + 8];
534 draw_2bpp_row(x, y + v, ch1, ch2, clr, flip_x); 523#if DEC_BIG_LUT
524 u32 clr_a = lut[ch1];
525 u32 clr_b = lut[ch2];
526#else
527 u32 clr_a = decode_1bpp(ch1, flip_x);
528 u32 clr_b = decode_1bpp(ch2, flip_x);
529#endif
530 u32 mask_a = (clr_a * 0xF);
531 u32 mask_b = (clr_b * 0xF);
532 u32 mask = (mask_a | mask_b);
533 u32 color;
534 if (clr == 0) {
535 color = clr_a + (clr_b << 1);
536 } else if (clr == 15) {
537 color = 0;
538 } else {
539 color = (clr_a | clr_b) * clr;
540 }
541 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
542 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
543 if ((start_row + v) == 7) {
544 dirty_tiles[tile_y + 1] |= dirty;
545 dst += (32 - 1) * 8;
546 }
535 } 547 }
536 } 548 }
549 dirty_tiles[tile_y] |= dirty;
537} 550}
538 551
539IWRAM_CODE 552IWRAM_CODE
553UNROLL_LOOPS
540void 554void
541draw_icn(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { 555draw_icn(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
542 BOUNDCHECK_SCREEN(x, y); 556 BOUNDCHECK_SCREEN(x, y);
557 size_t tile_x0 = x / 8;
558 size_t tile_x1 = (x + 7) / 8;
559 size_t tile_y = y / 8;
560 size_t start_col = x % 8;
561 size_t start_row = y % 8;
562 size_t shift_left = start_col * 4;
563 size_t shift_right = (8 - start_col) * 4;
564 u32 dirty = (1 << tile_x0) | (1 << tile_x1);
565 u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8];
566#if DEC_BIG_LUT
567 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte;
568#endif
543 if (!flip_y) { 569 if (!flip_y) {
544 for(size_t v = 0; v < 8; v++) { 570 for(size_t v = 0; v < 8; v++, dst++) {
545 if ((y + v) >= SCREEN_HEIGHT) break; 571 if ((y + v) >= SCREEN_HEIGHT) break;
546 u8 ch1 = sprite[v]; 572 u8 ch1 = sprite[v + 0];
547 draw_1bpp_row(x, y + v, ch1, clr, flip_x); 573#if DEC_BIG_LUT
574 u32 color = lut[ch1];
575#else
576 u32 color = decode_1bpp(ch1, flip_x);
577#endif
578 u32 mask = (color * 0xF);
579 color *= clr;
580 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
581 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
582 if ((start_row + v) == 7) {
583 dirty_tiles[tile_y + 1] |= dirty;
584 dst += (32 - 1) * 8;
585 }
548 } 586 }
549 } else { 587 } else {
550 for(size_t v = 0; v < 8; v++) { 588 for(size_t v = 0; v < 8; v++, dst++) {
551 if ((y + v) >= SCREEN_HEIGHT) break; 589 if ((y + v) >= SCREEN_HEIGHT) break;
552 u8 ch1 = sprite[(7 - v)]; 590 u8 ch1 = sprite[v + 0];
553 draw_1bpp_row(x, y + v, ch1, clr, flip_x); 591#if DEC_BIG_LUT
592 u32 color = lut[ch1];
593#else
594 u32 color = decode_1bpp(ch1, flip_x);
595#endif
596 u32 mask = (color * 0xF);
597 color *= clr;
598 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
599 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
600 if ((start_row + v) == 7) {
601 dirty_tiles[tile_y + 1] |= dirty;
602 dst += (32 - 1) * 8;
603 }
554 } 604 }
555 } 605 }
606 dirty_tiles[tile_y] |= dirty;
556} 607}
557 608
558// 609//