diff options
author | Bad Diode <bd@badd10de.dev> | 2023-04-22 08:55:51 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2023-04-22 08:55:51 +0200 |
commit | e17dbbd6b6f6314d84e719fd7ef7653f76fa388f (patch) | |
tree | 292e2fec9f4fe90d4adb0185dc47645d62352124 | |
parent | 521a0acbb99928c2c67cb1f396968510ec18721c (diff) | |
download | gba-renderers-e17dbbd6b6f6314d84e719fd7ef7653f76fa388f.tar.gz gba-renderers-e17dbbd6b6f6314d84e719fd7ef7653f76fa388f.zip |
Add initial dirty_tiles marking for m0 and improve chr/icn perf
-rw-r--r-- | src/main.c | 2 | ||||
-rw-r--r-- | src/profiling.c | 2 | ||||
-rw-r--r-- | src/renderer_m0.c | 173 |
3 files changed, 114 insertions, 63 deletions
@@ -335,9 +335,9 @@ int main(void) { | |||
335 | irs_set(IRQ_VBLANK, irs_stub); | 335 | irs_set(IRQ_VBLANK, irs_stub); |
336 | 336 | ||
337 | while (true) { | 337 | while (true) { |
338 | test_sprites_bounce(); | ||
338 | test_text_rendering(); | 339 | test_text_rendering(); |
339 | test_growing_rects(); | 340 | test_growing_rects(); |
340 | test_sprites_bounce(); | ||
341 | test_moving_line(); | 341 | test_moving_line(); |
342 | test_all_static(); | 342 | test_all_static(); |
343 | } | 343 | } |
diff --git a/src/profiling.c b/src/profiling.c index a464372..4475aa8 100644 --- a/src/profiling.c +++ b/src/profiling.c | |||
@@ -41,7 +41,7 @@ | |||
41 | #endif | 41 | #endif |
42 | 42 | ||
43 | static bool profile_show = true; | 43 | static bool profile_show = true; |
44 | static bool profile_bg_show = true; | 44 | static bool profile_bg_show = false; |
45 | 45 | ||
46 | #define PROF_SHOW() \ | 46 | #define PROF_SHOW() \ |
47 | do { \ | 47 | do { \ |
diff --git a/src/renderer_m0.c b/src/renderer_m0.c index 10bbff5..ecb0402 100644 --- a/src/renderer_m0.c +++ b/src/renderer_m0.c | |||
@@ -22,6 +22,9 @@ | |||
22 | // Pointer to the backbuffer. | 22 | // Pointer to the backbuffer. |
23 | static u32 *backbuf = BUF_1; | 23 | static u32 *backbuf = BUF_1; |
24 | 24 | ||
25 | // Tracking which tiles are "dirty" and need refreshing. | ||
26 | static u32 dirty_tiles[21] = {0}; | ||
27 | |||
25 | // Position of the tilemap. | 28 | // Position of the tilemap. |
26 | #define TILE_MAP ((u32*)(MEM_VRAM + KB(40))) | 29 | #define TILE_MAP ((u32*)(MEM_VRAM + KB(40))) |
27 | 30 | ||
@@ -74,6 +77,7 @@ draw_pixel(size_t x, size_t y, u8 clr) { | |||
74 | u32 mask = 0xF << shift; | 77 | u32 mask = 0xF << shift; |
75 | u32 row = clr << shift; | 78 | u32 row = clr << shift; |
76 | *dst = (*dst & ~mask) | row; | 79 | *dst = (*dst & ~mask) | row; |
80 | dirty_tiles[tile_y] |= 1 << tile_x; | ||
77 | } | 81 | } |
78 | 82 | ||
79 | IWRAM_CODE | 83 | IWRAM_CODE |
@@ -89,6 +93,7 @@ draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) { | |||
89 | size_t start_col = x0 % 8; | 93 | size_t start_col = x0 % 8; |
90 | size_t end_col = x1 % 8; | 94 | size_t end_col = x1 % 8; |
91 | size_t start_row = y0 % 8; | 95 | size_t start_row = y0 % 8; |
96 | u32 dirty = (1 << tile_x0) | (1 << tile_x1); | ||
92 | 97 | ||
93 | // Horizontal line. There are 3 cases: | 98 | // Horizontal line. There are 3 cases: |
94 | // 1. Lines fit on a single tile. | 99 | // 1. Lines fit on a single tile. |
@@ -112,11 +117,13 @@ draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) { | |||
112 | *dst = (*dst & ~(mask << shift_left)) | (row << shift_left); | 117 | *dst = (*dst & ~(mask << shift_left)) | (row << shift_left); |
113 | dst += 8; | 118 | dst += 8; |
114 | for (size_t i = 1; i < dtx; i++) { | 119 | for (size_t i = 1; i < dtx; i++) { |
120 | dirty |= (1 << (tile_x0 + i)); | ||
115 | *dst = row; | 121 | *dst = row; |
116 | dst += 8; | 122 | dst += 8; |
117 | } | 123 | } |
118 | *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right); | 124 | *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right); |
119 | } | 125 | } |
126 | dirty_tiles[tile_y] |= dirty; | ||
120 | } | 127 | } |
121 | 128 | ||
122 | IWRAM_CODE | 129 | IWRAM_CODE |
@@ -135,6 +142,7 @@ draw_vline(size_t x0, size_t y0, size_t y1, u8 clr) { | |||
135 | size_t start_row1 = y1 % 8; | 142 | size_t start_row1 = y1 % 8; |
136 | 143 | ||
137 | size_t shift_left = start_col * 4; | 144 | size_t shift_left = start_col * 4; |
145 | u32 dirty = (1 << tile_x); | ||
138 | 146 | ||
139 | u32 *dst = &backbuf[start_row0 + (tile_x + tile_y * 32) * 8]; | 147 | u32 *dst = &backbuf[start_row0 + (tile_x + tile_y * 32) * 8]; |
140 | u32 mask = 0x0000000F << shift_left; | 148 | u32 mask = 0x0000000F << shift_left; |
@@ -150,6 +158,7 @@ draw_vline(size_t x0, size_t y0, size_t y1, u8 clr) { | |||
150 | } | 158 | } |
151 | dst += 8 * 31; | 159 | dst += 8 * 31; |
152 | for (size_t j = 1; j < dty; j++) { | 160 | for (size_t j = 1; j < dty; j++) { |
161 | dirty_tiles[tile_y0 + j] |= dirty; | ||
153 | for (size_t i = 0; i < 8; i++, dst++) { | 162 | for (size_t i = 0; i < 8; i++, dst++) { |
154 | dst[0] = (dst[0] & ~mask) | row; | 163 | dst[0] = (dst[0] & ~mask) | row; |
155 | } | 164 | } |
@@ -159,6 +168,8 @@ draw_vline(size_t x0, size_t y0, size_t y1, u8 clr) { | |||
159 | dst[0] = (dst[0] & ~mask) | row; | 168 | dst[0] = (dst[0] & ~mask) | row; |
160 | } | 169 | } |
161 | } | 170 | } |
171 | dirty_tiles[tile_y0] |= dirty; | ||
172 | dirty_tiles[tile_y1] |= dirty; | ||
162 | } | 173 | } |
163 | 174 | ||
164 | IWRAM_CODE | 175 | IWRAM_CODE |
@@ -458,101 +469,141 @@ decode_1bpp(u8 row, u8 flip_x) { | |||
458 | #endif | 469 | #endif |
459 | 470 | ||
460 | IWRAM_CODE | 471 | IWRAM_CODE |
461 | static inline | 472 | UNROLL_LOOPS |
462 | void | 473 | void |
463 | draw_2bpp_row(size_t x, size_t y, u8 a, u8 b, u8 clr, u8 flip_x) { | 474 | draw_chr(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { |
464 | BOUNDCHECK_SCREEN(x, y); | 475 | BOUNDCHECK_SCREEN(x, y); |
465 | 476 | size_t tile_x0 = x / 8; | |
466 | size_t tile_x = x / 8; | 477 | size_t tile_x1 = (x + 7) / 8; |
467 | size_t tile_y = y / 8; | 478 | size_t tile_y = y / 8; |
468 | size_t start_col = x % 8; | 479 | size_t start_col = x % 8; |
469 | size_t start_row = y % 8; | 480 | size_t start_row = y % 8; |
470 | size_t shift_left = start_col * 4; | 481 | size_t shift_left = start_col * 4; |
471 | size_t shift_right = (8 - start_col) * 4; | 482 | size_t shift_right = (8 - start_col) * 4; |
472 | 483 | u32 dirty = (1 << tile_x0) | (1 << tile_x1); | |
473 | u32 *dst = &backbuf[start_row + (tile_x + tile_y * 32) * 8]; | 484 | u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8]; |
474 | #if DEC_BIG_LUT | 485 | #if DEC_BIG_LUT |
475 | u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; | 486 | u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; |
476 | u32 clr_a = lut[a]; | ||
477 | u32 clr_b = lut[b]; | ||
478 | #else | ||
479 | u32 clr_a = decode_1bpp(a, flip_x); | ||
480 | u32 clr_b = decode_1bpp(b, flip_x); | ||
481 | #endif | 487 | #endif |
482 | u32 mask_a = (clr_a * 0xF); | ||
483 | u32 mask_b = (clr_b * 0xF); | ||
484 | u32 mask = (mask_a | mask_b); | ||
485 | u32 color; | ||
486 | if (clr == 0) { | ||
487 | color = clr_a + (clr_b << 1); | ||
488 | } else if (clr == 15) { | ||
489 | color = 0; | ||
490 | } else { | ||
491 | color = (clr_a | clr_b) * clr; | ||
492 | } | ||
493 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | ||
494 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); | ||
495 | } | ||
496 | |||
497 | IWRAM_CODE | ||
498 | static inline | ||
499 | void | ||
500 | draw_1bpp_row(size_t x, size_t y, u8 a, u8 clr, u8 flip_x) { | ||
501 | BOUNDCHECK_SCREEN(x, y); | ||
502 | |||
503 | size_t tile_x = x / 8; | ||
504 | size_t tile_y = y / 8; | ||
505 | size_t start_col = x % 8; | ||
506 | size_t start_row = y % 8; | ||
507 | size_t shift_left = start_col * 4; | ||
508 | size_t shift_right = (8 - start_col) * 4; | ||
509 | |||
510 | u32 *dst = &backbuf[start_row + (tile_x + tile_y * 32) * 8]; | ||
511 | u32 color = decode_1bpp(a, flip_x); | ||
512 | u32 mask = (color * 0xF); | ||
513 | color *= clr; | ||
514 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | ||
515 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); | ||
516 | } | ||
517 | |||
518 | IWRAM_CODE | ||
519 | void | ||
520 | draw_chr(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | ||
521 | BOUNDCHECK_SCREEN(x, y); | ||
522 | if (!flip_y) { | 488 | if (!flip_y) { |
523 | for(size_t v = 0; v < 8; v++) { | 489 | for(size_t v = 0; v < 8; v++, dst++) { |
524 | if ((y + v) >= SCREEN_HEIGHT) break; | 490 | if ((y + v) >= SCREEN_HEIGHT) break; |
525 | u8 ch1 = sprite[v + 0]; | 491 | u8 ch1 = sprite[v + 0]; |
526 | u8 ch2 = sprite[v + 8]; | 492 | u8 ch2 = sprite[v + 8]; |
527 | draw_2bpp_row(x, y + v, ch1, ch2, clr, flip_x); | 493 | #if DEC_BIG_LUT |
494 | u32 clr_a = lut[ch1]; | ||
495 | u32 clr_b = lut[ch2]; | ||
496 | #else | ||
497 | u32 clr_a = decode_1bpp(ch1, flip_x); | ||
498 | u32 clr_b = decode_1bpp(ch2, flip_x); | ||
499 | #endif | ||
500 | u32 mask_a = (clr_a * 0xF); | ||
501 | u32 mask_b = (clr_b * 0xF); | ||
502 | u32 mask = (mask_a | mask_b); | ||
503 | u32 color; | ||
504 | if (clr == 0) { | ||
505 | color = clr_a + (clr_b << 1); | ||
506 | } else if (clr == 15) { | ||
507 | color = 0; | ||
508 | } else { | ||
509 | color = (clr_a | clr_b) * clr; | ||
510 | } | ||
511 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | ||
512 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); | ||
513 | if ((start_row + v) == 7) { | ||
514 | dirty_tiles[tile_y + 1] |= dirty; | ||
515 | dst += (32 - 1) * 8; | ||
516 | } | ||
528 | } | 517 | } |
529 | } else { | 518 | } else { |
530 | for(size_t v = 0; v < 8; v++) { | 519 | for(size_t v = 0; v < 8; v++, dst++) { |
531 | if ((y + v) >= SCREEN_HEIGHT) break; | 520 | if ((y + v) >= SCREEN_HEIGHT) break; |
532 | u8 ch1 = sprite[(7 - v) + 0]; | 521 | u8 ch1 = sprite[(7 - v) + 0]; |
533 | u8 ch2 = sprite[(7 - v) + 8]; | 522 | u8 ch2 = sprite[(7 - v) + 8]; |
534 | draw_2bpp_row(x, y + v, ch1, ch2, clr, flip_x); | 523 | #if DEC_BIG_LUT |
524 | u32 clr_a = lut[ch1]; | ||
525 | u32 clr_b = lut[ch2]; | ||
526 | #else | ||
527 | u32 clr_a = decode_1bpp(ch1, flip_x); | ||
528 | u32 clr_b = decode_1bpp(ch2, flip_x); | ||
529 | #endif | ||
530 | u32 mask_a = (clr_a * 0xF); | ||
531 | u32 mask_b = (clr_b * 0xF); | ||
532 | u32 mask = (mask_a | mask_b); | ||
533 | u32 color; | ||
534 | if (clr == 0) { | ||
535 | color = clr_a + (clr_b << 1); | ||
536 | } else if (clr == 15) { | ||
537 | color = 0; | ||
538 | } else { | ||
539 | color = (clr_a | clr_b) * clr; | ||
540 | } | ||
541 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | ||
542 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); | ||
543 | if ((start_row + v) == 7) { | ||
544 | dirty_tiles[tile_y + 1] |= dirty; | ||
545 | dst += (32 - 1) * 8; | ||
546 | } | ||
535 | } | 547 | } |
536 | } | 548 | } |
549 | dirty_tiles[tile_y] |= dirty; | ||
537 | } | 550 | } |
538 | 551 | ||
539 | IWRAM_CODE | 552 | IWRAM_CODE |
553 | UNROLL_LOOPS | ||
540 | void | 554 | void |
541 | draw_icn(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | 555 | draw_icn(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { |
542 | BOUNDCHECK_SCREEN(x, y); | 556 | BOUNDCHECK_SCREEN(x, y); |
557 | size_t tile_x0 = x / 8; | ||
558 | size_t tile_x1 = (x + 7) / 8; | ||
559 | size_t tile_y = y / 8; | ||
560 | size_t start_col = x % 8; | ||
561 | size_t start_row = y % 8; | ||
562 | size_t shift_left = start_col * 4; | ||
563 | size_t shift_right = (8 - start_col) * 4; | ||
564 | u32 dirty = (1 << tile_x0) | (1 << tile_x1); | ||
565 | u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8]; | ||
566 | #if DEC_BIG_LUT | ||
567 | u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; | ||
568 | #endif | ||
543 | if (!flip_y) { | 569 | if (!flip_y) { |
544 | for(size_t v = 0; v < 8; v++) { | 570 | for(size_t v = 0; v < 8; v++, dst++) { |
545 | if ((y + v) >= SCREEN_HEIGHT) break; | 571 | if ((y + v) >= SCREEN_HEIGHT) break; |
546 | u8 ch1 = sprite[v]; | 572 | u8 ch1 = sprite[v + 0]; |
547 | draw_1bpp_row(x, y + v, ch1, clr, flip_x); | 573 | #if DEC_BIG_LUT |
574 | u32 color = lut[ch1]; | ||
575 | #else | ||
576 | u32 color = decode_1bpp(ch1, flip_x); | ||
577 | #endif | ||
578 | u32 mask = (color * 0xF); | ||
579 | color *= clr; | ||
580 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | ||
581 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); | ||
582 | if ((start_row + v) == 7) { | ||
583 | dirty_tiles[tile_y + 1] |= dirty; | ||
584 | dst += (32 - 1) * 8; | ||
585 | } | ||
548 | } | 586 | } |
549 | } else { | 587 | } else { |
550 | for(size_t v = 0; v < 8; v++) { | 588 | for(size_t v = 0; v < 8; v++, dst++) { |
551 | if ((y + v) >= SCREEN_HEIGHT) break; | 589 | if ((y + v) >= SCREEN_HEIGHT) break; |
552 | u8 ch1 = sprite[(7 - v)]; | 590 | u8 ch1 = sprite[v + 0]; |
553 | draw_1bpp_row(x, y + v, ch1, clr, flip_x); | 591 | #if DEC_BIG_LUT |
592 | u32 color = lut[ch1]; | ||
593 | #else | ||
594 | u32 color = decode_1bpp(ch1, flip_x); | ||
595 | #endif | ||
596 | u32 mask = (color * 0xF); | ||
597 | color *= clr; | ||
598 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | ||
599 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); | ||
600 | if ((start_row + v) == 7) { | ||
601 | dirty_tiles[tile_y + 1] |= dirty; | ||
602 | dst += (32 - 1) * 8; | ||
603 | } | ||
554 | } | 604 | } |
555 | } | 605 | } |
606 | dirty_tiles[tile_y] |= dirty; | ||
556 | } | 607 | } |
557 | 608 | ||
558 | // | 609 | // |