From d68d16405b013967e0faa12f9dfc608e3bb3d0d4 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Wed, 19 Apr 2023 17:58:04 +0200 Subject: Update uxn core and fix some new ppu bugs --- src/ppu.c | 162 +++++++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 134 insertions(+), 28 deletions(-) (limited to 'src/ppu.c') diff --git a/src/ppu.c b/src/ppu.c index c258fe2..afde963 100644 --- a/src/ppu.c +++ b/src/ppu.c @@ -247,6 +247,118 @@ ppu_pixel(u32 *layer, u16 x, u16 y, u8 clr) { dirty_tiles[tile_y] |= 1 << tile_x; } +IWRAM_CODE +void +ppu_rect(u32 *layer, size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { + BOUNDCHECK_SCREEN(x0, y0); + BOUNDCHECK_SCREEN(x1, y1); + + // Find row positions for the given x/y coordinates. + size_t tile_x0 = x0 / 8; + size_t tile_y0 = y0 / 8; + size_t tile_x1 = x1 / 8; + size_t tile_y1 = y1 / 8; + size_t start_col0 = x0 % 8; + size_t start_col1 = x1 % 8; + size_t start_row0 = y0 % 8; + size_t start_row1 = y1 % 8; + + // Get a pointer to the backbuffer and the tile row. + u32 *buf_top = &layer[start_row0 + (tile_x0 + tile_y0 * 32) * 8]; + u32 *buf_bot = &layer[start_row1 + (tile_x0 + tile_y1 * 32) * 8]; + + size_t dx = tile_x1 - tile_x0; + size_t dy = tile_y1 - tile_y0; + + // We can update two lines at a time, which is faster than calling draw_line + // four times. + if (dx < 1) { + u32 row_mask = 0xFFFFFFFF; + row_mask >>= (7 - start_col1 - dx) * 4; + row_mask &= 0xFFFFFFFF << start_col0 * 4; + u32 row = (0x11111111 * clr) & row_mask; + buf_top[0] = (buf_top[0] & ~row_mask) | row; + buf_bot[0] = (buf_bot[0] & ~row_mask) | row; + dirty_tiles[tile_y0] |= 1 << tile_x0; + dirty_tiles[tile_y1] |= 1 << tile_x0; + } else { + size_t shift_left = start_col0 * 4; + size_t shift_right = (7 - start_col1) * 4; + u32 row_mask = 0xFFFFFFFF; + u32 row = 0x11111111 * clr; + buf_top[0] = buf_top[0] & ~(row_mask << shift_left); + buf_top[0] |= row << shift_left; + buf_bot[0] = buf_bot[0] & ~(row_mask << shift_left); + buf_bot[0] |= row << shift_left; + dirty_tiles[tile_y0] |= 1 << tile_x0; + dirty_tiles[tile_y1] |= 1 << tile_x0; + for (size_t i = 1; i < dx; i++) { + buf_top[i * 8] = row; + buf_bot[i * 8] = row; + dirty_tiles[tile_y0] |= 1 << (tile_x0 + i); + dirty_tiles[tile_y1] |= 1 << (tile_x0 + i); + } + buf_top[dx * 8] = buf_top[dx * 8] & ~(row_mask >> shift_right); + buf_top[dx * 8] |= row >> shift_right; + buf_bot[dx * 8] = buf_bot[dx * 8] & ~(row_mask >> shift_right); + buf_bot[dx * 8] |= row >> shift_right; + dirty_tiles[tile_y0] |= 1 << (tile_x0 + dx); + dirty_tiles[tile_y1] |= 1 << (tile_x0 + dx); + } + u32 row_mask_left = 0xF << start_col0 * 4; + u32 row_mask_right = 0xF << start_col1 * 4; + u32 row_left = (0x11111111 * clr) & row_mask_left; + u32 row_right = (0x11111111 * clr) & row_mask_right; + if (dy < 1) { + for (size_t i = 1; i < y1 - y0; i++, buf_top++) { + buf_top[1] = buf_top[1] & ~row_mask_left; + buf_top[1] |= row_left; + buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; + buf_top[1 + 8 * dx] |= row_right; + } + } else { + for (size_t i = 1; i < (8 - start_row0); i++, buf_top++) { + buf_top[1] = buf_top[1] & ~row_mask_left; + buf_top[1] |= row_left; + buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; + buf_top[1 + 8 * dx] |= row_right; + } + buf_top += 8 * 31; + for (size_t j = 1; j < dy; j++) { + for (size_t i = 0; i < 8; i++, buf_top++) { + buf_top[1] = buf_top[1] & ~row_mask_left; + buf_top[1] |= row_left; + buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; + buf_top[1 + 8 * dx] |= row_right; + } + buf_top += 8 * 31; + dirty_tiles[tile_y0 + j] |= 1 << tile_x0; + dirty_tiles[tile_y0 + j] |= 1 << (tile_x0 + dx); + } + for (size_t i = 0; i < start_row1; i++, buf_top++) { + buf_top[1] = buf_top[1] & ~row_mask_left; + buf_top[1] |= row_left; + buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; + buf_top[1 + 8 * dx] |= row_right; + } + } +} + +IWRAM_CODE +void +screen_fill(u32 *layer, u16 x0, u16 y0, u16 x1, u16 y1, u8 clr) { + BOUNDCHECK_SCREEN(x0, y0); + BOUNDCHECK_SCREEN(x1, y1); + + size_t dx = x1 - x0; + size_t dy = y1 - y0; + size_t n_rect = MIN(dx, dy); + n_rect = n_rect / 2 + 1; + for (size_t i = 0; i < n_rect; i++) { + ppu_rect(layer, x0 + i, y0 + i, x1 - i, y1 - i, clr); + } +} + #if NEW_PPU == 0 IWRAM_CODE void @@ -315,34 +427,28 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; if (blending[4][clr]) { - u64 mask = ~((u64)0xFFFFFFFF); + u32 mask = 0xFFFFFFFF; if (!flip_y) { for(size_t v = 0; v < 8; v++, dst++) { if ((y + v) >= SCREEN_HEIGHT) break; u8 ch1 = sprite[v]; u32 color_1 = lut[ch1]; - u32 color_2 = (color_1 ^ 0xffffffff) & 0x11111111; + u32 color_2 = (color_1 ^ 0xFFFFFFFF) & 0x11111111; u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2)); - if (start_col == 0) { - dst[0] = (dst[0] & mask) | color; - } else { - dst[0] = (dst[0] & (mask << shift_left)) | color; - dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); - } + dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); + if ((start_row + v) == 7) dst += (32 - 1) * 8; } } else { for(size_t v = 0; v < 8; v++, dst++) { if ((y + v) >= SCREEN_HEIGHT) break; u8 ch1 = sprite[(7 - v)]; u32 color_1 = lut[ch1]; - u32 color_2 = (color_1 ^ 0xffffffff) & 0x11111111; + u32 color_2 = (color_1 ^ 0xFFFFFFFF) & 0x11111111; u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2)); - if (start_col == 0) { - dst[0] = (dst[0] & mask) | color; - } else { - dst[0] = (dst[0] & (mask << shift_left)) | color; - dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); - } + dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); + if ((start_row + v) == 7) dst += (32 - 1) * 8; } } } else { @@ -359,6 +465,7 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); } + if ((start_row + v) == 7) dst += (32 - 1) * 8; } } else { for(size_t v = 0; v < 8; v++, dst++) { @@ -373,6 +480,7 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); } + if ((start_row + v) == 7) dst += (32 - 1) * 8; } } } @@ -529,6 +637,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { dst[0] = (dst[0] & (mask << shift_left)) | color; dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); } + if ((start_row + v) == 7) dst += (32 - 1) * 8; } } else { for(size_t v = 0; v < 8; v++, dst++) { @@ -542,10 +651,11 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { dst[0] = (dst[0] & (mask << shift_left)) | color; dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); } + if ((start_row + v) == 7) dst += (32 - 1) * 8; } } } else if (blending[4][clr]) { - u64 mask = ~((u64)0xFFFFFFFF << shift_left); + u32 mask = 0xFFFFFFFF; u8 clr0 = blending[0][clr]; u8 clr1 = blending[1][clr]; u8 clr2 = blending[2][clr]; @@ -567,12 +677,9 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { (clr1 * col1mask) | (clr2 * col2mask) | (clr3 * col3mask); - if (start_col == 0) { - dst[0] = (dst[0] & mask) | color; - } else { - dst[0] = (dst[0] & (mask << shift_left)) | color; - dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); - } + dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); + if ((start_row + v) == 7) dst += (32 - 1) * 8; } } else { for(size_t v = 0; v < 8; v++, dst++) { @@ -591,12 +698,9 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { (clr1 * col1mask) | (clr2 * col2mask) | (clr3 * col3mask); - if (start_col == 0) { - dst[0] = (dst[0] & mask) | color; - } else { - dst[0] = (dst[0] & (mask << shift_left)) | color; - dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); - } + dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); + if ((start_row + v) == 7) dst += (32 - 1) * 8; } } } else { @@ -625,6 +729,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { dst[0] = (dst[0] & (mask << shift_left)) | color; dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); } + if ((start_row + v) == 7) dst += (32 - 1) * 8; } } else { for(size_t v = 0; v < 8; v++, dst++) { @@ -648,6 +753,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { dst[0] = (dst[0] & (mask << shift_left)) | color; dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); } + if ((start_row + v) == 7) dst += (32 - 1) * 8; } } } -- cgit v1.2.1