aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2023-04-19 15:18:55 +0200
committerBad Diode <bd@badd10de.dev>2023-04-19 15:28:08 +0200
commit2d99fe361bd795dd120f51e6bc0c37105cad491a (patch)
tree0bd81700d0bc6cf30d61a583f52c8ffa27c19baa
parent6d943ebca061683c076bc026f7e15a3a047b2027 (diff)
downloaduxngba-2d99fe361bd795dd120f51e6bc0c37105cad491a.tar.gz
uxngba-2d99fe361bd795dd120f51e6bc0c37105cad491a.zip
Add back dirty tiles optimization
-rw-r--r--src/main.c19
-rw-r--r--src/ppu.c63
2 files changed, 25 insertions, 57 deletions
diff --git a/src/main.c b/src/main.c
index 450a3cb..1dc5b5f 100644
--- a/src/main.c
+++ b/src/main.c
@@ -64,16 +64,21 @@
64// txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+3);\ 64// txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+3);\
65// txt_printf("MIX: %lu ", mix_cycles);\ 65// txt_printf("MIX: %lu ", mix_cycles);\
66// } while (0) 66// } while (0)
67// #define PROF_SHOW() \
68// do { \
69// txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\
70// txt_printf("PIX: %lu ", ppu_pixel_cycles);\
71// txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+1);\
72// txt_printf("1BPP: %lu ", ppu_icn_cycles);\
73// txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+2);\
74// txt_printf("2BPP: %lu ", ppu_chr_cycles);\
75// txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+3);\
76// txt_printf("FLIP: %lu ", flip_cycles);\
77// } while (0)
67#define PROF_SHOW() \ 78#define PROF_SHOW() \
68 do { \ 79 do { \
69 txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ 80 txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\
70 txt_printf("PIX: %lu ", ppu_pixel_cycles);\ 81 txt_printf("1BPP: %lu 2BPP: %lu", ppu_icn_cycles, ppu_chr_cycles);\
71 txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+1);\
72 txt_printf("1BPP: %lu ", ppu_icn_cycles);\
73 txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+2);\
74 txt_printf("2BPP: %lu ", ppu_chr_cycles);\
75 txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+3);\
76 txt_printf("FLIP: %lu ", flip_cycles);\
77 } while (0) 82 } while (0)
78#define PROF_INIT() \ 83#define PROF_INIT() \
79 static u32 ppu_pixel_cycles = 0;\ 84 static u32 ppu_pixel_cycles = 0;\
diff --git a/src/ppu.c b/src/ppu.c
index 96a3bc0..9937d95 100644
--- a/src/ppu.c
+++ b/src/ppu.c
@@ -310,6 +310,8 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
310 size_t start_row = y % 8; 310 size_t start_row = y % 8;
311 size_t shift_left = start_col * 4; 311 size_t shift_left = start_col * 4;
312 size_t shift_right = (8 - start_col) * 4; 312 size_t shift_right = (8 - start_col) * 4;
313 u32 dirtyflag = (1 << tile_x) | (1 << ((x + 7) >> 3));
314
313 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; 315 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8];
314 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; 316 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte;
315 if (blending[4][clr]) { 317 if (blending[4][clr]) {
@@ -374,51 +376,11 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
374 } 376 }
375 } 377 }
376 } 378 }
377 379 dirty_tiles[y >> 3] |= dirtyflag;
378 // dirty_tiles[y >> 3] |= dirtyflag; 380 dirty_tiles[(y + 7) >> 3] |= dirtyflag;
379 // dirty_tiles[(y + 7) >> 3] |= dirtyflag;
380} 381}
381#endif 382#endif
382 383
383IWRAM_CODE
384static inline
385void
386draw_2bpp_row(void *layer, size_t x, size_t y, u8 a, u8 b, u8 flip_x) {
387 // BOUNDCHECK_SCREEN(x, y);
388
389 size_t tile_x = x / 8;
390 size_t tile_y = y / 8;
391 size_t start_col = x % 8;
392 size_t start_row = y % 8;
393 size_t shift_left = start_col * 4;
394 size_t shift_right = (8 - start_col) * 4;
395
396 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8];
397// #if DEC_BIG_LUT
398 u32 *lut = dec_byte;
399 if (flip_x) {
400 lut = dec_byte_flip_x;
401 }
402 u32 clr_a = lut[a];
403 u32 clr_b = lut[b];
404// #else
405// u32 clr_a = decode_1bpp(a, flip_x);
406// u32 clr_b = decode_1bpp(b, flip_x);
407// #endif
408 u32 mask_a = (clr_a * 0xF);
409 u32 mask_b = (clr_b * 0xF);
410 u32 mask = (mask_a | mask_b);
411 u32 color = clr_a + (clr_b << 1);
412 if (start_col == 0) {
413 dst[0] = (dst[0] & ~mask) | color;
414 } else {
415 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
416 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
417 }
418
419 // TODO: different blend modes?
420}
421
422#if NEW_PPU == 0 384#if NEW_PPU == 0
423IWRAM_CODE 385IWRAM_CODE
424void 386void
@@ -550,6 +512,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
550 size_t start_row = y % 8; 512 size_t start_row = y % 8;
551 size_t shift_left = start_col * 4; 513 size_t shift_left = start_col * 4;
552 size_t shift_right = (8 - start_col) * 4; 514 size_t shift_right = (8 - start_col) * 4;
515 u32 dirtyflag = (1 << tile_x) | (1 << ((x + 7) >> 3));
553 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; 516 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8];
554 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; 517 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte;
555 if (clr == 1) { 518 if (clr == 1) {
@@ -689,8 +652,8 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
689 } 652 }
690 } 653 }
691 } 654 }
692 // dirty_tiles[y >> 3] |= dirtyflag; 655 dirty_tiles[y >> 3] |= dirtyflag;
693 // dirty_tiles[(y + 7) >> 3] |= dirtyflag; 656 dirty_tiles[(y + 7) >> 3] |= dirtyflag;
694} 657}
695#endif 658#endif
696 659
@@ -712,20 +675,20 @@ flipbuf(Ppu *p) {
712 Tile *mem_fg = FG_FRONT; 675 Tile *mem_fg = FG_FRONT;
713 Tile *mem_bg = BG_FRONT; 676 Tile *mem_bg = BG_FRONT;
714 for (size_t j = 0; j < 20; ++j) { 677 for (size_t j = 0; j < 20; ++j) {
715 // if (dirty_tiles[j] == 0) { 678 if (dirty_tiles[j] == 0) {
716 // continue; 679 continue;
717 // } 680 }
718 681
719 size_t k = 1; 682 size_t k = 1;
720 for (size_t i = 0; i < 30; ++i, k <<= 1) { 683 for (size_t i = 0; i < 30; ++i, k <<= 1) {
721 // if (dirty_tiles[j] & k) { 684 if (dirty_tiles[j] & k) {
722 Tile *tile_fg = p->fg; 685 Tile *tile_fg = p->fg;
723 Tile *tile_bg = p->bg; 686 Tile *tile_bg = p->bg;
724 mem_fg[i + j * 32] = tile_fg[i + j * 32]; 687 mem_fg[i + j * 32] = tile_fg[i + j * 32];
725 mem_bg[i + j * 32] = tile_bg[i + j * 32]; 688 mem_bg[i + j * 32] = tile_bg[i + j * 32];
726 // } 689 }
727 } 690 }
728 // dirty_tiles[j] = 0; 691 dirty_tiles[j] = 0;
729 } 692 }
730} 693}
731 694