From e08a6dc4f278d2df2525cdf189c9447c372f1e98 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Wed, 19 Apr 2023 10:20:14 +0200 Subject: Minor cleanup and fix small color bug --- src/ppu.c | 254 ++++++++++++-------------------------------------------------- 1 file changed, 46 insertions(+), 208 deletions(-) diff --git a/src/ppu.c b/src/ppu.c index a841b97..bafa520 100644 --- a/src/ppu.c +++ b/src/ppu.c @@ -39,116 +39,6 @@ WITH REGARD TO THIS SOFTWARE. // Keyboard. #define SPRITE_START_IDX 640 -static u32 lut_2bpp[256] = { - 0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100, - 0x00000101, 0x00000110, 0x00000111, 0x00001000, 0x00001001, - 0x00001010, 0x00001011, 0x00001100, 0x00001101, 0x00001110, - 0x00001111, 0x00010000, 0x00010001, 0x00010010, 0x00010011, - 0x00010100, 0x00010101, 0x00010110, 0x00010111, 0x00011000, - 0x00011001, 0x00011010, 0x00011011, 0x00011100, 0x00011101, - 0x00011110, 0x00011111, 0x00100000, 0x00100001, 0x00100010, - 0x00100011, 0x00100100, 0x00100101, 0x00100110, 0x00100111, - 0x00101000, 0x00101001, 0x00101010, 0x00101011, 0x00101100, - 0x00101101, 0x00101110, 0x00101111, 0x00110000, 0x00110001, - 0x00110010, 0x00110011, 0x00110100, 0x00110101, 0x00110110, - 0x00110111, 0x00111000, 0x00111001, 0x00111010, 0x00111011, - 0x00111100, 0x00111101, 0x00111110, 0x00111111, 0x01000000, - 0x01000001, 0x01000010, 0x01000011, 0x01000100, 0x01000101, - 0x01000110, 0x01000111, 0x01001000, 0x01001001, 0x01001010, - 0x01001011, 0x01001100, 0x01001101, 0x01001110, 0x01001111, - 0x01010000, 0x01010001, 0x01010010, 0x01010011, 0x01010100, - 0x01010101, 0x01010110, 0x01010111, 0x01011000, 0x01011001, - 0x01011010, 0x01011011, 0x01011100, 0x01011101, 0x01011110, - 0x01011111, 0x01100000, 0x01100001, 0x01100010, 0x01100011, - 0x01100100, 0x01100101, 0x01100110, 0x01100111, 0x01101000, - 0x01101001, 0x01101010, 0x01101011, 0x01101100, 0x01101101, - 0x01101110, 0x01101111, 0x01110000, 0x01110001, 0x01110010, - 0x01110011, 0x01110100, 0x01110101, 0x01110110, 0x01110111, - 0x01111000, 0x01111001, 0x01111010, 0x01111011, 0x01111100, - 0x01111101, 0x01111110, 0x01111111, 0x10000000, 0x10000001, - 0x10000010, 0x10000011, 0x10000100, 0x10000101, 0x10000110, - 0x10000111, 0x10001000, 0x10001001, 0x10001010, 0x10001011, - 0x10001100, 0x10001101, 0x10001110, 0x10001111, 0x10010000, - 0x10010001, 0x10010010, 0x10010011, 0x10010100, 0x10010101, - 0x10010110, 0x10010111, 0x10011000, 0x10011001, 0x10011010, - 0x10011011, 0x10011100, 0x10011101, 0x10011110, 0x10011111, - 0x10100000, 0x10100001, 0x10100010, 0x10100011, 0x10100100, - 0x10100101, 0x10100110, 0x10100111, 0x10101000, 0x10101001, - 0x10101010, 0x10101011, 0x10101100, 0x10101101, 0x10101110, - 0x10101111, 0x10110000, 0x10110001, 0x10110010, 0x10110011, - 0x10110100, 0x10110101, 0x10110110, 0x10110111, 0x10111000, - 0x10111001, 0x10111010, 0x10111011, 0x10111100, 0x10111101, - 0x10111110, 0x10111111, 0x11000000, 0x11000001, 0x11000010, - 0x11000011, 0x11000100, 0x11000101, 0x11000110, 0x11000111, - 0x11001000, 0x11001001, 0x11001010, 0x11001011, 0x11001100, - 0x11001101, 0x11001110, 0x11001111, 0x11010000, 0x11010001, - 0x11010010, 0x11010011, 0x11010100, 0x11010101, 0x11010110, - 0x11010111, 0x11011000, 0x11011001, 0x11011010, 0x11011011, - 0x11011100, 0x11011101, 0x11011110, 0x11011111, 0x11100000, - 0x11100001, 0x11100010, 0x11100011, 0x11100100, 0x11100101, - 0x11100110, 0x11100111, 0x11101000, 0x11101001, 0x11101010, - 0x11101011, 0x11101100, 0x11101101, 0x11101110, 0x11101111, - 0x11110000, 0x11110001, 0x11110010, 0x11110011, 0x11110100, - 0x11110101, 0x11110110, 0x11110111, 0x11111000, 0x11111001, - 0x11111010, 0x11111011, 0x11111100, 0x11111101, 0x11111110, - 0x11111111 -}; - -static u32 lut2bpp_flipx[256] = { - 0x00000000, 0x10000000, 0x01000000, 0x11000000, 0x00100000, - 0x10100000, 0x01100000, 0x11100000, 0x00010000, 0x10010000, - 0x01010000, 0x11010000, 0x00110000, 0x10110000, 0x01110000, - 0x11110000, 0x00001000, 0x10001000, 0x01001000, 0x11001000, - 0x00101000, 0x10101000, 0x01101000, 0x11101000, 0x00011000, - 0x10011000, 0x01011000, 0x11011000, 0x00111000, 0x10111000, - 0x01111000, 0x11111000, 0x00000100, 0x10000100, 0x01000100, - 0x11000100, 0x00100100, 0x10100100, 0x01100100, 0x11100100, - 0x00010100, 0x10010100, 0x01010100, 0x11010100, 0x00110100, - 0x10110100, 0x01110100, 0x11110100, 0x00001100, 0x10001100, - 0x01001100, 0x11001100, 0x00101100, 0x10101100, 0x01101100, - 0x11101100, 0x00011100, 0x10011100, 0x01011100, 0x11011100, - 0x00111100, 0x10111100, 0x01111100, 0x11111100, 0x00000010, - 0x10000010, 0x01000010, 0x11000010, 0x00100010, 0x10100010, - 0x01100010, 0x11100010, 0x00010010, 0x10010010, 0x01010010, - 0x11010010, 0x00110010, 0x10110010, 0x01110010, 0x11110010, - 0x00001010, 0x10001010, 0x01001010, 0x11001010, 0x00101010, - 0x10101010, 0x01101010, 0x11101010, 0x00011010, 0x10011010, - 0x01011010, 0x11011010, 0x00111010, 0x10111010, 0x01111010, - 0x11111010, 0x00000110, 0x10000110, 0x01000110, 0x11000110, - 0x00100110, 0x10100110, 0x01100110, 0x11100110, 0x00010110, - 0x10010110, 0x01010110, 0x11010110, 0x00110110, 0x10110110, - 0x01110110, 0x11110110, 0x00001110, 0x10001110, 0x01001110, - 0x11001110, 0x00101110, 0x10101110, 0x01101110, 0x11101110, - 0x00011110, 0x10011110, 0x01011110, 0x11011110, 0x00111110, - 0x10111110, 0x01111110, 0x11111110, 0x00000001, 0x10000001, - 0x01000001, 0x11000001, 0x00100001, 0x10100001, 0x01100001, - 0x11100001, 0x00010001, 0x10010001, 0x01010001, 0x11010001, - 0x00110001, 0x10110001, 0x01110001, 0x11110001, 0x00001001, - 0x10001001, 0x01001001, 0x11001001, 0x00101001, 0x10101001, - 0x01101001, 0x11101001, 0x00011001, 0x10011001, 0x01011001, - 0x11011001, 0x00111001, 0x10111001, 0x01111001, 0x11111001, - 0x00000101, 0x10000101, 0x01000101, 0x11000101, 0x00100101, - 0x10100101, 0x01100101, 0x11100101, 0x00010101, 0x10010101, - 0x01010101, 0x11010101, 0x00110101, 0x10110101, 0x01110101, - 0x11110101, 0x00001101, 0x10001101, 0x01001101, 0x11001101, - 0x00101101, 0x10101101, 0x01101101, 0x11101101, 0x00011101, - 0x10011101, 0x01011101, 0x11011101, 0x00111101, 0x10111101, - 0x01111101, 0x11111101, 0x00000011, 0x10000011, 0x01000011, - 0x11000011, 0x00100011, 0x10100011, 0x01100011, 0x11100011, - 0x00010011, 0x10010011, 0x01010011, 0x11010011, 0x00110011, - 0x10110011, 0x01110011, 0x11110011, 0x00001011, 0x10001011, - 0x01001011, 0x11001011, 0x00101011, 0x10101011, 0x01101011, - 0x11101011, 0x00011011, 0x10011011, 0x01011011, 0x11011011, - 0x00111011, 0x10111011, 0x01111011, 0x11111011, 0x00000111, - 0x10000111, 0x01000111, 0x11000111, 0x00100111, 0x10100111, - 0x01100111, 0x11100111, 0x00010111, 0x10010111, 0x01010111, - 0x11010111, 0x00110111, 0x10110111, 0x01110111, 0x11110111, - 0x00001111, 0x10001111, 0x01001111, 0x11001111, 0x00101111, - 0x10101111, 0x01101111, 0x11101111, 0x00011111, 0x10011111, - 0x01011111, 0x11011111, 0x00111111, 0x10111111, 0x01111111, - 0x11111111 -}; - static u32 dec_byte_flip_x[256] = { 0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100, 0x00000101, 0x00000110, 0x00000111, 0x00001000, 0x00001001, @@ -304,60 +194,58 @@ ppu_pixel(u32 *layer, u16 x, u16 y, u8 color) { dirty_tiles[tile_y] |= 1 << tile_x; } +#if NEW_PPU == 0 IWRAM_CODE -static inline -u32 -decode_1bpp(u8 row, u8 flip_x) { - return flip_x ? dec_byte_flip_x[row] : dec_byte[row]; -} - -IWRAM_CODE -static inline void -draw_1bpp_row(u32 *layer, size_t x, size_t y, u8 sprite, u8 clr, u8 flip_x) { - BOUNDCHECK_SCREEN(x, y); - - size_t tile_x = x / 8; - size_t tile_y = y / 8; - size_t start_col = x % 8; - size_t start_row = y % 8; - size_t shift_left = start_col * 4; - size_t shift_right = (8 - start_col) * 4; +ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) { + u8 sprline; + u16 v; + u32 dirtyflag = (1 << (x >> 3)) | (1 << ((x + 7) >> 3)); - u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; - u32 color = decode_1bpp(sprite, flip_x); - u32 mask = ~color; - color *= clr; - if (start_col == 0) { - dst[0] = (dst[0] & ~mask) | color; - } else { - dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); - dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); - } + u32 layerpos = ((y & 7) + (((x >> 3) + (y >> 3) * 32) * 8)); + u32 *layerptr = &layer[layerpos]; + u32 shift = (x & 7) << 2; + u32 *lut_expand = flipx ? dec_byte_flip_x : dec_byte; - // TODO: different blend modes? -} + if (flipy) flipy = 7; -IWRAM_CODE -void -draw_icn(u32 * layer, size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { BOUNDCHECK_SCREEN(x, y); - if (!flip_y) { - for(size_t v = 0; v < 8; v++) { + + if (blending[4][color]) { + u64 mask = ~((u64)0xFFFFFFFF << shift); + + for (v = 0; v < 8; v++, layerptr++) { if ((y + v) >= SCREEN_HEIGHT) break; - u8 ch1 = sprite[v]; - draw_1bpp_row(layer, x, y + v, ch1, clr, flip_x); + + sprline = sprite[v ^ flipy]; + u64 data = (u64)(lut_expand[sprline] * (color & 3)) << shift; + data |= (u64)(lut_expand[sprline ^ 0xFF] * (color >> 2)) << shift; + + layerptr[0] = (layerptr[0] & mask) | data; + layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); + + if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; } } else { - for(size_t v = 0; v < 8; v++) { + for (v = 0; v < 8; v++, layerptr++) { if ((y + v) >= SCREEN_HEIGHT) break; - u8 ch1 = sprite[(7 - v)]; - draw_1bpp_row(layer, x, y + v, ch1, clr, flip_x); + + sprline = sprite[v ^ flipy]; + u64 mask = ~((u64)(lut_expand[sprline] * 0xF) << shift); + u64 data = (u64)(lut_expand[sprline] * (color & 3)) << shift; + + layerptr[0] = (layerptr[0] & mask) | data; + layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); + + if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; } } + + dirty_tiles[y >> 3] |= dirtyflag; + dirty_tiles[(y + 7) >> 3] |= dirtyflag; } -#if NEW_PPU == 1 +#else IWRAM_CODE UNROLL_LOOPS void @@ -371,13 +259,14 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { size_t shift_left = start_col * 4; size_t shift_right = (8 - start_col) * 4; u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; + u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; if (blending[4][clr]) { u64 mask = ~((u64)0xFFFFFFFF); if (!flip_y) { for(size_t v = 0; v < 8; v++, dst++) { if ((y + v) >= SCREEN_HEIGHT) break; u8 ch1 = sprite[v]; - u32 color_1 = decode_1bpp(ch1, flip_x); + u32 color_1 = lut[ch1]; u32 color_2 = (color_1 ^ 0xffffffff) & 0x11111111; u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2)); if (start_col == 0) { @@ -391,7 +280,7 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { for(size_t v = 0; v < 8; v++, dst++) { if ((y + v) >= SCREEN_HEIGHT) break; u8 ch1 = sprite[(7 - v)]; - u32 color_1 = decode_1bpp(ch1, flip_x); + u32 color_1 = lut[ch1]; u32 color_2 = (color_1 ^ 0xffffffff) & 0x11111111; u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2)); if (start_col == 0) { @@ -407,9 +296,9 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { for(size_t v = 0; v < 8; v++, dst++) { if ((y + v) >= SCREEN_HEIGHT) break; u8 ch1 = sprite[v]; - u32 color = decode_1bpp(ch1, flip_x); + u32 color= lut[ch1]; u32 mask = ~color; - color *= clr; + color *= clr & 3; if (start_col == 0) { dst[0] = (dst[0] & ~mask) | color; } else { @@ -421,9 +310,9 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { for(size_t v = 0; v < 8; v++, dst++) { if ((y + v) >= SCREEN_HEIGHT) break; u8 ch1 = sprite[(7 - v)]; - u32 color = decode_1bpp(ch1, flip_x); + u32 color= lut[ch1]; u32 mask = ~color; - color *= clr; + color *= clr & 3; if (start_col == 0) { dst[0] = (dst[0] & ~mask) | color; } else { @@ -437,57 +326,6 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { // dirty_tiles[y >> 3] |= dirtyflag; // dirty_tiles[(y + 7) >> 3] |= dirtyflag; } - -#else -IWRAM_CODE -void -ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) { - u8 sprline; - u16 v; - u32 dirtyflag = (1 << (x >> 3)) | (1 << ((x + 7) >> 3)); - - u32 layerpos = ((y & 7) + (((x >> 3) + (y >> 3) * 32) * 8)); - u32 *layerptr = &layer[layerpos]; - u32 shift = (x & 7) << 2; - u32 *lut_expand = flipx ? lut_2bpp : lut2bpp_flipx; - - if (flipy) flipy = 7; - - BOUNDCHECK_SCREEN(x, y); - - if (blending[4][color]) { - u64 mask = ~((u64)0xFFFFFFFF << shift); - - for (v = 0; v < 8; v++, layerptr++) { - if ((y + v) >= SCREEN_HEIGHT) break; - - sprline = sprite[v ^ flipy]; - u64 data = (u64)(lut_expand[sprline] * (color & 3)) << shift; - data |= (u64)(lut_expand[sprline ^ 0xFF] * (color >> 2)) << shift; - - layerptr[0] = (layerptr[0] & mask) | data; - layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); - - if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; - } - } else { - for (v = 0; v < 8; v++, layerptr++) { - if ((y + v) >= SCREEN_HEIGHT) break; - - sprline = sprite[v ^ flipy]; - u64 mask = ~((u64)(lut_expand[sprline] * 0xF) << shift); - u64 data = (u64)(lut_expand[sprline] * (color & 3)) << shift; - - layerptr[0] = (layerptr[0] & mask) | data; - layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); - - if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; - } - } - - dirty_tiles[y >> 3] |= dirtyflag; - dirty_tiles[(y + 7) >> 3] |= dirtyflag; -} #endif IWRAM_CODE @@ -564,7 +402,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, // if (x >= SCREEN_WIDTH || y >= SCREEN_HEIGHT) return; // if (color == 1) { - // u32 *lut_expand = flip_x ? lut_2bpp : lut2bpp_flipx; + // u32 *lut_expand = flip_x ? dec_byte_flip_x : dec_byte; // u64 mask = ~((u64)0xFFFFFFFF << shift); // for (v = 0; v < 8; v++, layerptr++) { -- cgit v1.2.1