From 402a74bf60e6e00e625364628e2d1ffe28d225ca Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Thu, 20 Apr 2023 09:10:50 +0200 Subject: Add initial screen fill implementation Still need to add dirty tiles to it but thus far should be fine. --- src/ppu.c | 160 +++++++++++++++++++++++--------------------------------------- 1 file changed, 59 insertions(+), 101 deletions(-) (limited to 'src/ppu.c') diff --git a/src/ppu.c b/src/ppu.c index 3b159af..8e1710c 100644 --- a/src/ppu.c +++ b/src/ppu.c @@ -248,115 +248,73 @@ ppu_pixel(u32 *layer, u16 x, u16 y, u8 clr) { } IWRAM_CODE +void clear_screen(u32 *layer, u8 clr) { + // We have to make sure we leave the last tile blank to use as alpha channel + // when moving the BG during double buffering in case we are using that. + dma_fill(layer, 0x11111111 * clr, KB(20) - 32, 3); +} + +IWRAM_CODE +static inline void -ppu_rect(u32 *layer, size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { +draw_hline(u32 *layer, size_t x0, size_t x1, size_t y0, u8 clr) { BOUNDCHECK_SCREEN(x0, y0); - BOUNDCHECK_SCREEN(x1, y1); - + BOUNDCHECK_SCREEN(x1, y0); // Find row positions for the given x/y coordinates. size_t tile_x0 = x0 / 8; - size_t tile_y0 = y0 / 8; size_t tile_x1 = x1 / 8; - size_t tile_y1 = y1 / 8; - size_t start_col0 = x0 % 8; - size_t start_col1 = x1 % 8; - size_t start_row0 = y0 % 8; - size_t start_row1 = y1 % 8; - - // Get a pointer to the backbuffer and the tile row. - u32 *buf_top = &layer[start_row0 + (tile_x0 + tile_y0 * 32) * 8]; - u32 *buf_bot = &layer[start_row1 + (tile_x0 + tile_y1 * 32) * 8]; - - size_t dx = tile_x1 - tile_x0; - size_t dy = tile_y1 - tile_y0; - - // We can update two lines at a time, which is faster than calling draw_line - // four times. - if (dx < 1) { - u32 row_mask = 0xFFFFFFFF; - row_mask >>= (7 - start_col1 - dx) * 4; - row_mask &= 0xFFFFFFFF << start_col0 * 4; - u32 row = (0x11111111 * clr) & row_mask; - buf_top[0] = (buf_top[0] & ~row_mask) | row; - buf_bot[0] = (buf_bot[0] & ~row_mask) | row; - dirty_tiles[tile_y0] |= 1 << tile_x0; - dirty_tiles[tile_y1] |= 1 << tile_x0; + size_t tile_y = y0 / 8; + size_t start_col = x0 % 8; + size_t end_col = x1 % 8; + size_t start_row = y0 % 8; + + // Horizontal line. There are 3 cases: + // 1. Lines fit on a single tile. + // 2. Lines go through 2 tiles, both require partial row updates. + // 3. Lines go through 3 or more tiles, first and last tiles use + // partial row updates, rows in the middle can write the entire + // row. + size_t dtx = tile_x1 - tile_x0; + u32 *dst = &layer[start_row + (tile_x0 + tile_y * 32) * 8]; + if (dtx < 1) { + size_t shift_left = start_col * 4; + size_t shift_right = (7 - end_col) * 4; + u32 mask = (0xFFFFFFFF >> shift_right) & (0xFFFFFFFF << shift_left); + u32 row = (0x11111111 * clr) & mask; + *dst = (*dst & ~mask) | row; } else { - size_t shift_left = start_col0 * 4; - size_t shift_right = (7 - start_col1) * 4; - u32 row_mask = 0xFFFFFFFF; + size_t shift_left = start_col * 4; + size_t shift_right = (7 - end_col) * 4; + u32 mask = 0xFFFFFFFF; u32 row = 0x11111111 * clr; - buf_top[0] = buf_top[0] & ~(row_mask << shift_left); - buf_top[0] |= row << shift_left; - buf_bot[0] = buf_bot[0] & ~(row_mask << shift_left); - buf_bot[0] |= row << shift_left; - dirty_tiles[tile_y0] |= 1 << tile_x0; - dirty_tiles[tile_y1] |= 1 << tile_x0; - for (size_t i = 1; i < dx; i++) { - buf_top[i * 8] = row; - buf_bot[i * 8] = row; - dirty_tiles[tile_y0] |= 1 << (tile_x0 + i); - dirty_tiles[tile_y1] |= 1 << (tile_x0 + i); - } - buf_top[dx * 8] = buf_top[dx * 8] & ~(row_mask >> shift_right); - buf_top[dx * 8] |= row >> shift_right; - buf_bot[dx * 8] = buf_bot[dx * 8] & ~(row_mask >> shift_right); - buf_bot[dx * 8] |= row >> shift_right; - dirty_tiles[tile_y0] |= 1 << (tile_x0 + dx); - dirty_tiles[tile_y1] |= 1 << (tile_x0 + dx); - } - u32 row_mask_left = 0xF << start_col0 * 4; - u32 row_mask_right = 0xF << start_col1 * 4; - u32 row_left = (0x11111111 * clr) & row_mask_left; - u32 row_right = (0x11111111 * clr) & row_mask_right; - if (dy < 1) { - for (size_t i = 1; i < y1 - y0; i++, buf_top++) { - buf_top[1] = buf_top[1] & ~row_mask_left; - buf_top[1] |= row_left; - buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; - buf_top[1 + 8 * dx] |= row_right; - } - } else { - for (size_t i = 1; i < (8 - start_row0); i++, buf_top++) { - buf_top[1] = buf_top[1] & ~row_mask_left; - buf_top[1] |= row_left; - buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; - buf_top[1 + 8 * dx] |= row_right; - } - buf_top += 8 * 31; - for (size_t j = 1; j < dy; j++) { - for (size_t i = 0; i < 8; i++, buf_top++) { - buf_top[1] = buf_top[1] & ~row_mask_left; - buf_top[1] |= row_left; - buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; - buf_top[1 + 8 * dx] |= row_right; - } - buf_top += 8 * 31; - dirty_tiles[tile_y0 + j] |= 1 << tile_x0; - dirty_tiles[tile_y0 + j] |= 1 << (tile_x0 + dx); - } - for (size_t i = 0; i < start_row1; i++, buf_top++) { - buf_top[1] = buf_top[1] & ~row_mask_left; - buf_top[1] |= row_left; - buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; - buf_top[1 + 8 * dx] |= row_right; + *dst = (*dst & ~(mask << shift_left)) | (row << shift_left); + dst += 8; + for (size_t i = 1; i < dtx; i++) { + *dst = row; + dst += 8; } + *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right); } } IWRAM_CODE void screen_fill(u32 *layer, u16 x0, u16 y0, u16 x1, u16 y1, u8 clr) { - BOUNDCHECK_SCREEN(x0, y0); - BOUNDCHECK_SCREEN(x1, y1); - - size_t dx = x1 - x0; - size_t dy = y1 - y0; - size_t n_rect = MIN(dx, dy); - n_rect = n_rect / 2 + 1; - for (size_t i = 0; i < n_rect; i++) { - ppu_rect(layer, x0 + i, y0 + i, x1 - i, y1 - i, clr); + MAYBE_SWAP(x0, x1); + MAYBE_SWAP(y0, y1); + + // Special condition. If the screen is to be completely filled, use the DMA + // instead. + if (x0 == 0 && x1 >= (SCREEN_WIDTH - 1) && y0 == 0 && y1 >= (SCREEN_HEIGHT - 1)) { + clear_screen(layer, clr); + return; + } + + // Drawline implementation. + for (size_t y = y0; y <= y1; y++) { + draw_hline(layer, x0, x1, y, clr); } + // TODO: dirty? } #if NEW_PPU == 0 @@ -616,15 +574,15 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; if (clr == 1) { - u64 mask = ~((u64)0xFFFFFFFF << shift_left); + u32 mask = 0xFFFFFFFF; if (!flip_y) { for(size_t v = 0; v < 8; v++, dst++) { if ((y + v) >= SCREEN_HEIGHT) break; u8 ch1 = sprite[v]; u8 ch2 = sprite[v | 8]; u32 color = lut[ch1] | (lut[ch2] << 1); - dst[0] = (dst[0] & (mask << shift_left)) | color; - dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); + dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); if ((start_row + v) == 7) dst += (32 - 1) * 8; } } else { @@ -633,8 +591,8 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { u8 ch1 = sprite[(7 - v)]; u8 ch2 = sprite[(7 - v) | 8]; u32 color = lut[ch1] | (lut[ch2] << 1); - dst[0] = (dst[0] & (mask << shift_left)) | color; - dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); + dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); if ((start_row + v) == 7) dst += (32 - 1) * 8; } } @@ -707,7 +665,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { color = (clr1 * col1mask) | (clr2 * col2mask) | (clr3 * col3mask); - dst[0] = (dst[0] & (mask << shift_left)) | color; + dst[0] = (dst[0] & (mask << shift_left)) | (color << shift_left); dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); if ((start_row + v) == 7) dst += (32 - 1) * 8; } @@ -727,7 +685,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { color = (clr1 * col1mask) | (clr2 * col2mask) | (clr3 * col3mask); - dst[0] = (dst[0] & (mask << shift_left)) | color; + dst[0] = (dst[0] & (mask << shift_left)) | (color << shift_left); dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); if ((start_row + v) == 7) dst += (32 - 1) * 8; } -- cgit v1.2.1