From f3f221524e6be30217838661b4750820a7bebecf Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Fri, 4 Jun 2021 13:38:46 +0200 Subject: Add initial performance optimization for rect draw --- src/renderer.c | 72 ++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 17 deletions(-) (limited to 'src/renderer.c') diff --git a/src/renderer.c b/src/renderer.c index 4aa583d..51647cb 100644 --- a/src/renderer.c +++ b/src/renderer.c @@ -47,24 +47,62 @@ draw_pixel(u16 x, u16 y, u8 color) { IWRAM_CODE void draw_rect(int x0, int y0, int x1, int y1, u8 clr) { - if (x0 > x1) { - int tmp = x0; - x0 = x1; - x1 = tmp; - } - if (y0 > y1) { - int tmp = y0; - y0 = y1; - y1 = tmp; - } - int dx = x1 - x0; - int dy = y1 - y0; - // TODO: SLOW should be vectorized. - for (int i = 0; i <= dx; ++i) { - draw_pixel(x0 + i, y0, clr); - draw_pixel(x0 + i, y1, clr); + BOUNDCHECK_SCREEN(x0, y0); + BOUNDCHECK_SCREEN(x1, y1); + + // Find row positions for the given x/y coordinates. + size_t tile_x0 = x0 / 8; + size_t tile_y0 = y0 / 8; + size_t tile_x1 = x1 / 8; + size_t tile_y1 = y1 / 8; + size_t start_col0 = x0 % 8; + size_t start_col1 = x1 % 8; + size_t start_row0 = y0 % 8; + size_t start_row1 = y1 % 8; + + // Get a pointer to the backbuffer and the tile row. + u32 *backbuffer0 = &BACKBUF[start_row0 + (tile_x0 + tile_y0 * 32) * 8]; + u32 *backbuffer1 = &BACKBUF[start_row1 + (tile_x0 + tile_y1 * 32) * 8]; + + u16 dx = tile_x1 - tile_x0; + u16 dy = y1 - y0; + + // There are 3 cases: + // 1. Lines fit on a single tile. + // 2. Lines go through 2 tiles, both require partial row updates. + // 3. Lines go through 3 or more tiles, first and last tiles use partial + // row updates, rows in the middle can write the. + if (dx < 1) { + u32 row_mask = 0xFFFFFFFF; + row_mask >>= (7 - start_col1 - dx) * 4; + row_mask &= 0xFFFFFFFF << start_col0 * 4; + u32 row = (0x11111111 * clr) & row_mask; + backbuffer0[0] = (backbuffer0[0] & ~row_mask) | row; + backbuffer1[0] = (backbuffer1[0] & ~row_mask) | row; + dirty_tiles[tile_y0] |= 1 << tile_x0; + dirty_tiles[tile_y1] |= 1 << tile_x0; + } else { + size_t shift_left = start_col0 * 4; + size_t shift_right = (7 - start_col1) * 4; + u32 row_mask = 0xFFFFFFFF; + u32 row = 0x11111111 * clr; + backbuffer0[0] = (backbuffer0[0] & ~(row_mask << shift_left)) | (row << shift_left); + backbuffer1[0] = (backbuffer1[0] & ~(row_mask << shift_left)) | (row << shift_left); + dirty_tiles[tile_y0] |= 1 << tile_x0; + dirty_tiles[tile_y1] |= 1 << tile_x0; + for (size_t i = 1; i < dx; i++) { + backbuffer0[i * 8] = row; + backbuffer1[i * 8] = row; + dirty_tiles[tile_y0] |= 1 << tile_x0 + i; + dirty_tiles[tile_y1] |= 1 << tile_x0 + i; + } + backbuffer0[dx * 8] = (backbuffer0[dx * 8] & ~(row_mask >> shift_right)) | (row >> shift_right); + backbuffer1[dx * 8] = (backbuffer1[dx * 8] & ~(row_mask >> shift_right)) | (row >> shift_right); + dirty_tiles[tile_y0] |= 1 << tile_x0 + dx; + dirty_tiles[tile_y1] |= 1 << tile_x0 + dx; } - for (int i = 0; i <= dy; ++i) { + // The vertical line cases are analogous to the horizontal cases. + for (int i = 1; i < dy; ++i) { draw_pixel(x0, y0 + i, clr); draw_pixel(x1, y0 + i, clr); } -- cgit v1.2.1