From ef15a89a8cf161241c3c382e0e332427e46be8a9 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Fri, 4 Jun 2021 14:30:06 +0200 Subject: Add more perf improvements to draw rect --- src/renderer.c | 51 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 13 deletions(-) (limited to 'src/renderer.c') diff --git a/src/renderer.c b/src/renderer.c index 51647cb..c66b87b 100644 --- a/src/renderer.c +++ b/src/renderer.c @@ -26,7 +26,7 @@ static u32 dirty_tiles[21] = {0}; IWRAM_CODE void -draw_pixel(u16 x, u16 y, u8 color) { +draw_pixel(size_t x, size_t y, u8 color) { BOUNDCHECK_SCREEN(x, y); // Find row position for the given x/y coordinates. @@ -46,7 +46,7 @@ draw_pixel(u16 x, u16 y, u8 color) { IWRAM_CODE void -draw_rect(int x0, int y0, int x1, int y1, u8 clr) { +draw_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { BOUNDCHECK_SCREEN(x0, y0); BOUNDCHECK_SCREEN(x1, y1); @@ -64,8 +64,8 @@ draw_rect(int x0, int y0, int x1, int y1, u8 clr) { u32 *backbuffer0 = &BACKBUF[start_row0 + (tile_x0 + tile_y0 * 32) * 8]; u32 *backbuffer1 = &BACKBUF[start_row1 + (tile_x0 + tile_y1 * 32) * 8]; - u16 dx = tile_x1 - tile_x0; - u16 dy = y1 - y0; + size_t dx = tile_x1 - tile_x0; + size_t dy = tile_y1 - tile_y0; // There are 3 cases: // 1. Lines fit on a single tile. @@ -93,24 +93,49 @@ draw_rect(int x0, int y0, int x1, int y1, u8 clr) { for (size_t i = 1; i < dx; i++) { backbuffer0[i * 8] = row; backbuffer1[i * 8] = row; - dirty_tiles[tile_y0] |= 1 << tile_x0 + i; - dirty_tiles[tile_y1] |= 1 << tile_x0 + i; + dirty_tiles[tile_y0] |= 1 << (tile_x0 + i); + dirty_tiles[tile_y1] |= 1 << (tile_x0 + i); } backbuffer0[dx * 8] = (backbuffer0[dx * 8] & ~(row_mask >> shift_right)) | (row >> shift_right); backbuffer1[dx * 8] = (backbuffer1[dx * 8] & ~(row_mask >> shift_right)) | (row >> shift_right); - dirty_tiles[tile_y0] |= 1 << tile_x0 + dx; - dirty_tiles[tile_y1] |= 1 << tile_x0 + dx; + dirty_tiles[tile_y0] |= 1 << (tile_x0 + dx); + dirty_tiles[tile_y1] |= 1 << (tile_x0 + dx); } - // The vertical line cases are analogous to the horizontal cases. - for (int i = 1; i < dy; ++i) { - draw_pixel(x0, y0 + i, clr); - draw_pixel(x1, y0 + i, clr); + // The vertical line cases are analogous to the horizontal ones. + u32 row_mask_left = 0xF << start_col0 * 4; + u32 row_mask_right = 0xF << start_col1 * 4; + u32 row_left = (0x11111111 * clr) & row_mask_left; + u32 row_right = (0x11111111 * clr) & row_mask_right; + if (dy < 1) { + for (size_t i = 1; i < y1 - y0; i++, backbuffer0++) { + backbuffer0[1] = (backbuffer0[1] & ~row_mask_left) | row_left; + backbuffer0[1 + 8 * dx] = (backbuffer0[1 + 8 * dx] & ~row_mask_right) | row_right; + } + } else { + for (size_t i = 1; i < (8 - start_row0); i++, backbuffer0++) { + backbuffer0[1] = (backbuffer0[1] & ~row_mask_left) | row_left; + backbuffer0[1 + 8 * dx] = (backbuffer0[1 + 8 * dx] & ~row_mask_right) | row_right; + } + backbuffer0 += 8 * 31; + for (size_t j = 1; j < dy; j++) { + for (size_t i = 0; i < 8; i++, backbuffer0++) { + backbuffer0[1] = (backbuffer0[1] & ~row_mask_left) | row_left; + backbuffer0[1 + 8 * dx] = (backbuffer0[1 + 8 * dx] & ~row_mask_right) | row_right; + } + backbuffer0 += 8 * 31; + dirty_tiles[tile_y0 + j] |= 1 << tile_x0; + dirty_tiles[tile_y0 + j] |= 1 << (tile_x0 + dx); + } + for (size_t i = 0; i < start_row1; i++, backbuffer0++) { + backbuffer0[1] = (backbuffer0[1] & ~row_mask_left) | row_left; + backbuffer0[1 + 8 * dx] = (backbuffer0[1 + 8 * dx] & ~row_mask_right) | row_right; + } } } IWRAM_CODE void -draw_tile(u16 x, u16 y, Tile *tile, bool merge) { +draw_tile(size_t x, size_t y, Tile *tile, bool merge) { BOUNDCHECK_SCREEN(x, y); // Find row position for the given x/y coordinates. -- cgit v1.2.1