diff options
author | Bad Diode <bd@badd10de.dev> | 2023-04-19 15:18:55 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2023-04-19 15:28:08 +0200 |
commit | 2d99fe361bd795dd120f51e6bc0c37105cad491a (patch) | |
tree | 0bd81700d0bc6cf30d61a583f52c8ffa27c19baa | |
parent | 6d943ebca061683c076bc026f7e15a3a047b2027 (diff) | |
download | uxngba-2d99fe361bd795dd120f51e6bc0c37105cad491a.tar.gz uxngba-2d99fe361bd795dd120f51e6bc0c37105cad491a.zip |
Add back dirty tiles optimization
-rw-r--r-- | src/main.c | 19 | ||||
-rw-r--r-- | src/ppu.c | 63 |
2 files changed, 25 insertions, 57 deletions
@@ -64,16 +64,21 @@ | |||
64 | // txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+3);\ | 64 | // txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+3);\ |
65 | // txt_printf("MIX: %lu ", mix_cycles);\ | 65 | // txt_printf("MIX: %lu ", mix_cycles);\ |
66 | // } while (0) | 66 | // } while (0) |
67 | // #define PROF_SHOW() \ | ||
68 | // do { \ | ||
69 | // txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ | ||
70 | // txt_printf("PIX: %lu ", ppu_pixel_cycles);\ | ||
71 | // txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+1);\ | ||
72 | // txt_printf("1BPP: %lu ", ppu_icn_cycles);\ | ||
73 | // txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+2);\ | ||
74 | // txt_printf("2BPP: %lu ", ppu_chr_cycles);\ | ||
75 | // txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+3);\ | ||
76 | // txt_printf("FLIP: %lu ", flip_cycles);\ | ||
77 | // } while (0) | ||
67 | #define PROF_SHOW() \ | 78 | #define PROF_SHOW() \ |
68 | do { \ | 79 | do { \ |
69 | txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ | 80 | txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ |
70 | txt_printf("PIX: %lu ", ppu_pixel_cycles);\ | 81 | txt_printf("1BPP: %lu 2BPP: %lu", ppu_icn_cycles, ppu_chr_cycles);\ |
71 | txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+1);\ | ||
72 | txt_printf("1BPP: %lu ", ppu_icn_cycles);\ | ||
73 | txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+2);\ | ||
74 | txt_printf("2BPP: %lu ", ppu_chr_cycles);\ | ||
75 | txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+3);\ | ||
76 | txt_printf("FLIP: %lu ", flip_cycles);\ | ||
77 | } while (0) | 82 | } while (0) |
78 | #define PROF_INIT() \ | 83 | #define PROF_INIT() \ |
79 | static u32 ppu_pixel_cycles = 0;\ | 84 | static u32 ppu_pixel_cycles = 0;\ |
@@ -310,6 +310,8 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | |||
310 | size_t start_row = y % 8; | 310 | size_t start_row = y % 8; |
311 | size_t shift_left = start_col * 4; | 311 | size_t shift_left = start_col * 4; |
312 | size_t shift_right = (8 - start_col) * 4; | 312 | size_t shift_right = (8 - start_col) * 4; |
313 | u32 dirtyflag = (1 << tile_x) | (1 << ((x + 7) >> 3)); | ||
314 | |||
313 | u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; | 315 | u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; |
314 | u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; | 316 | u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; |
315 | if (blending[4][clr]) { | 317 | if (blending[4][clr]) { |
@@ -374,51 +376,11 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | |||
374 | } | 376 | } |
375 | } | 377 | } |
376 | } | 378 | } |
377 | 379 | dirty_tiles[y >> 3] |= dirtyflag; | |
378 | // dirty_tiles[y >> 3] |= dirtyflag; | 380 | dirty_tiles[(y + 7) >> 3] |= dirtyflag; |
379 | // dirty_tiles[(y + 7) >> 3] |= dirtyflag; | ||
380 | } | 381 | } |
381 | #endif | 382 | #endif |
382 | 383 | ||
383 | IWRAM_CODE | ||
384 | static inline | ||
385 | void | ||
386 | draw_2bpp_row(void *layer, size_t x, size_t y, u8 a, u8 b, u8 flip_x) { | ||
387 | // BOUNDCHECK_SCREEN(x, y); | ||
388 | |||
389 | size_t tile_x = x / 8; | ||
390 | size_t tile_y = y / 8; | ||
391 | size_t start_col = x % 8; | ||
392 | size_t start_row = y % 8; | ||
393 | size_t shift_left = start_col * 4; | ||
394 | size_t shift_right = (8 - start_col) * 4; | ||
395 | |||
396 | u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; | ||
397 | // #if DEC_BIG_LUT | ||
398 | u32 *lut = dec_byte; | ||
399 | if (flip_x) { | ||
400 | lut = dec_byte_flip_x; | ||
401 | } | ||
402 | u32 clr_a = lut[a]; | ||
403 | u32 clr_b = lut[b]; | ||
404 | // #else | ||
405 | // u32 clr_a = decode_1bpp(a, flip_x); | ||
406 | // u32 clr_b = decode_1bpp(b, flip_x); | ||
407 | // #endif | ||
408 | u32 mask_a = (clr_a * 0xF); | ||
409 | u32 mask_b = (clr_b * 0xF); | ||
410 | u32 mask = (mask_a | mask_b); | ||
411 | u32 color = clr_a + (clr_b << 1); | ||
412 | if (start_col == 0) { | ||
413 | dst[0] = (dst[0] & ~mask) | color; | ||
414 | } else { | ||
415 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | ||
416 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); | ||
417 | } | ||
418 | |||
419 | // TODO: different blend modes? | ||
420 | } | ||
421 | |||
422 | #if NEW_PPU == 0 | 384 | #if NEW_PPU == 0 |
423 | IWRAM_CODE | 385 | IWRAM_CODE |
424 | void | 386 | void |
@@ -550,6 +512,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | |||
550 | size_t start_row = y % 8; | 512 | size_t start_row = y % 8; |
551 | size_t shift_left = start_col * 4; | 513 | size_t shift_left = start_col * 4; |
552 | size_t shift_right = (8 - start_col) * 4; | 514 | size_t shift_right = (8 - start_col) * 4; |
515 | u32 dirtyflag = (1 << tile_x) | (1 << ((x + 7) >> 3)); | ||
553 | u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; | 516 | u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; |
554 | u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; | 517 | u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; |
555 | if (clr == 1) { | 518 | if (clr == 1) { |
@@ -689,8 +652,8 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | |||
689 | } | 652 | } |
690 | } | 653 | } |
691 | } | 654 | } |
692 | // dirty_tiles[y >> 3] |= dirtyflag; | 655 | dirty_tiles[y >> 3] |= dirtyflag; |
693 | // dirty_tiles[(y + 7) >> 3] |= dirtyflag; | 656 | dirty_tiles[(y + 7) >> 3] |= dirtyflag; |
694 | } | 657 | } |
695 | #endif | 658 | #endif |
696 | 659 | ||
@@ -712,20 +675,20 @@ flipbuf(Ppu *p) { | |||
712 | Tile *mem_fg = FG_FRONT; | 675 | Tile *mem_fg = FG_FRONT; |
713 | Tile *mem_bg = BG_FRONT; | 676 | Tile *mem_bg = BG_FRONT; |
714 | for (size_t j = 0; j < 20; ++j) { | 677 | for (size_t j = 0; j < 20; ++j) { |
715 | // if (dirty_tiles[j] == 0) { | 678 | if (dirty_tiles[j] == 0) { |
716 | // continue; | 679 | continue; |
717 | // } | 680 | } |
718 | 681 | ||
719 | size_t k = 1; | 682 | size_t k = 1; |
720 | for (size_t i = 0; i < 30; ++i, k <<= 1) { | 683 | for (size_t i = 0; i < 30; ++i, k <<= 1) { |
721 | // if (dirty_tiles[j] & k) { | 684 | if (dirty_tiles[j] & k) { |
722 | Tile *tile_fg = p->fg; | 685 | Tile *tile_fg = p->fg; |
723 | Tile *tile_bg = p->bg; | 686 | Tile *tile_bg = p->bg; |
724 | mem_fg[i + j * 32] = tile_fg[i + j * 32]; | 687 | mem_fg[i + j * 32] = tile_fg[i + j * 32]; |
725 | mem_bg[i + j * 32] = tile_bg[i + j * 32]; | 688 | mem_bg[i + j * 32] = tile_bg[i + j * 32]; |
726 | // } | 689 | } |
727 | } | 690 | } |
728 | // dirty_tiles[j] = 0; | 691 | dirty_tiles[j] = 0; |
729 | } | 692 | } |
730 | } | 693 | } |
731 | 694 | ||