From 2d99fe361bd795dd120f51e6bc0c37105cad491a Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Wed, 19 Apr 2023 15:18:55 +0200 Subject: Add back dirty tiles optimization --- src/main.c | 19 ++++++++++++------- src/ppu.c | 63 +++++++++++++------------------------------------------------- 2 files changed, 25 insertions(+), 57 deletions(-) diff --git a/src/main.c b/src/main.c index 450a3cb..1dc5b5f 100644 --- a/src/main.c +++ b/src/main.c @@ -64,16 +64,21 @@ // txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+3);\ // txt_printf("MIX: %lu ", mix_cycles);\ // } while (0) +// #define PROF_SHOW() \ +// do { \ +// txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ +// txt_printf("PIX: %lu ", ppu_pixel_cycles);\ +// txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+1);\ +// txt_printf("1BPP: %lu ", ppu_icn_cycles);\ +// txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+2);\ +// txt_printf("2BPP: %lu ", ppu_chr_cycles);\ +// txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+3);\ +// txt_printf("FLIP: %lu ", flip_cycles);\ +// } while (0) #define PROF_SHOW() \ do { \ txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ - txt_printf("PIX: %lu ", ppu_pixel_cycles);\ - txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+1);\ - txt_printf("1BPP: %lu ", ppu_icn_cycles);\ - txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+2);\ - txt_printf("2BPP: %lu ", ppu_chr_cycles);\ - txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+3);\ - txt_printf("FLIP: %lu ", flip_cycles);\ + txt_printf("1BPP: %lu 2BPP: %lu", ppu_icn_cycles, ppu_chr_cycles);\ } while (0) #define PROF_INIT() \ static u32 ppu_pixel_cycles = 0;\ diff --git a/src/ppu.c b/src/ppu.c index 96a3bc0..9937d95 100644 --- a/src/ppu.c +++ b/src/ppu.c @@ -310,6 +310,8 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { size_t start_row = y % 8; size_t shift_left = start_col * 4; size_t shift_right = (8 - start_col) * 4; + u32 dirtyflag = (1 << tile_x) | (1 << ((x + 7) >> 3)); + u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; if (blending[4][clr]) { @@ -374,51 +376,11 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { } } } - - // dirty_tiles[y >> 3] |= dirtyflag; - // dirty_tiles[(y + 7) >> 3] |= dirtyflag; + dirty_tiles[y >> 3] |= dirtyflag; + dirty_tiles[(y + 7) >> 3] |= dirtyflag; } #endif -IWRAM_CODE -static inline -void -draw_2bpp_row(void *layer, size_t x, size_t y, u8 a, u8 b, u8 flip_x) { - // BOUNDCHECK_SCREEN(x, y); - - size_t tile_x = x / 8; - size_t tile_y = y / 8; - size_t start_col = x % 8; - size_t start_row = y % 8; - size_t shift_left = start_col * 4; - size_t shift_right = (8 - start_col) * 4; - - u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; -// #if DEC_BIG_LUT - u32 *lut = dec_byte; - if (flip_x) { - lut = dec_byte_flip_x; - } - u32 clr_a = lut[a]; - u32 clr_b = lut[b]; -// #else -// u32 clr_a = decode_1bpp(a, flip_x); -// u32 clr_b = decode_1bpp(b, flip_x); -// #endif - u32 mask_a = (clr_a * 0xF); - u32 mask_b = (clr_b * 0xF); - u32 mask = (mask_a | mask_b); - u32 color = clr_a + (clr_b << 1); - if (start_col == 0) { - dst[0] = (dst[0] & ~mask) | color; - } else { - dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); - dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); - } - - // TODO: different blend modes? -} - #if NEW_PPU == 0 IWRAM_CODE void @@ -550,6 +512,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { size_t start_row = y % 8; size_t shift_left = start_col * 4; size_t shift_right = (8 - start_col) * 4; + u32 dirtyflag = (1 << tile_x) | (1 << ((x + 7) >> 3)); u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; if (clr == 1) { @@ -689,8 +652,8 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { } } } - // dirty_tiles[y >> 3] |= dirtyflag; - // dirty_tiles[(y + 7) >> 3] |= dirtyflag; + dirty_tiles[y >> 3] |= dirtyflag; + dirty_tiles[(y + 7) >> 3] |= dirtyflag; } #endif @@ -712,20 +675,20 @@ flipbuf(Ppu *p) { Tile *mem_fg = FG_FRONT; Tile *mem_bg = BG_FRONT; for (size_t j = 0; j < 20; ++j) { - // if (dirty_tiles[j] == 0) { - // continue; - // } + if (dirty_tiles[j] == 0) { + continue; + } size_t k = 1; for (size_t i = 0; i < 30; ++i, k <<= 1) { - // if (dirty_tiles[j] & k) { + if (dirty_tiles[j] & k) { Tile *tile_fg = p->fg; Tile *tile_bg = p->bg; mem_fg[i + j * 32] = tile_fg[i + j * 32]; mem_bg[i + j * 32] = tile_bg[i + j * 32]; - // } + } } - // dirty_tiles[j] = 0; + dirty_tiles[j] = 0; } } -- cgit v1.2.1