From 5f47e14f6ab4e3b346de1d62c65452e674edbebe Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Wed, 19 Apr 2023 10:05:03 +0200 Subject: Add optimized 1bpp drawing function --- src/ppu.c | 546 +++++++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 431 insertions(+), 115 deletions(-) (limited to 'src/ppu.c') diff --git a/src/ppu.c b/src/ppu.c index 1a13ba3..a841b97 100644 --- a/src/ppu.c +++ b/src/ppu.c @@ -15,6 +15,8 @@ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE. */ +#define NEW_PPU 1 + #define FG_FRONT ((u32*)(MEM_VRAM)) #define BG_FRONT ((u32*)(MEM_VRAM + KB(20))) #define FG_BACK ((u32*)(MEM_VRAM + KB(44))) @@ -22,6 +24,18 @@ WITH REGARD TO THIS SOFTWARE. #define TILE_MAP ((u32*)(MEM_VRAM + KB(40))) #define FONT_DATA ((u32*)(MEM_VRAM + KB(84))) +#ifdef DISABLE_BOUNDCHECK_SCREEN +#define BOUNDCHECK_SCREEN(X,Y) +#else +#define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return; +#endif + +// Swap A and B values without a tmp variable. +#define SWAP(A, B) (((A) ^= (B)), ((B) ^= (A)), ((A) ^= (B))) + +// Swap A and B values to make sure A <= B. +#define MAYBE_SWAP(A,B) if ((A) > (B)) { SWAP(A,B); } + // Keyboard. #define SPRITE_START_IDX 640 @@ -135,6 +149,116 @@ static u32 lut2bpp_flipx[256] = { 0x11111111 }; +static u32 dec_byte_flip_x[256] = { + 0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100, + 0x00000101, 0x00000110, 0x00000111, 0x00001000, 0x00001001, + 0x00001010, 0x00001011, 0x00001100, 0x00001101, 0x00001110, + 0x00001111, 0x00010000, 0x00010001, 0x00010010, 0x00010011, + 0x00010100, 0x00010101, 0x00010110, 0x00010111, 0x00011000, + 0x00011001, 0x00011010, 0x00011011, 0x00011100, 0x00011101, + 0x00011110, 0x00011111, 0x00100000, 0x00100001, 0x00100010, + 0x00100011, 0x00100100, 0x00100101, 0x00100110, 0x00100111, + 0x00101000, 0x00101001, 0x00101010, 0x00101011, 0x00101100, + 0x00101101, 0x00101110, 0x00101111, 0x00110000, 0x00110001, + 0x00110010, 0x00110011, 0x00110100, 0x00110101, 0x00110110, + 0x00110111, 0x00111000, 0x00111001, 0x00111010, 0x00111011, + 0x00111100, 0x00111101, 0x00111110, 0x00111111, 0x01000000, + 0x01000001, 0x01000010, 0x01000011, 0x01000100, 0x01000101, + 0x01000110, 0x01000111, 0x01001000, 0x01001001, 0x01001010, + 0x01001011, 0x01001100, 0x01001101, 0x01001110, 0x01001111, + 0x01010000, 0x01010001, 0x01010010, 0x01010011, 0x01010100, + 0x01010101, 0x01010110, 0x01010111, 0x01011000, 0x01011001, + 0x01011010, 0x01011011, 0x01011100, 0x01011101, 0x01011110, + 0x01011111, 0x01100000, 0x01100001, 0x01100010, 0x01100011, + 0x01100100, 0x01100101, 0x01100110, 0x01100111, 0x01101000, + 0x01101001, 0x01101010, 0x01101011, 0x01101100, 0x01101101, + 0x01101110, 0x01101111, 0x01110000, 0x01110001, 0x01110010, + 0x01110011, 0x01110100, 0x01110101, 0x01110110, 0x01110111, + 0x01111000, 0x01111001, 0x01111010, 0x01111011, 0x01111100, + 0x01111101, 0x01111110, 0x01111111, 0x10000000, 0x10000001, + 0x10000010, 0x10000011, 0x10000100, 0x10000101, 0x10000110, + 0x10000111, 0x10001000, 0x10001001, 0x10001010, 0x10001011, + 0x10001100, 0x10001101, 0x10001110, 0x10001111, 0x10010000, + 0x10010001, 0x10010010, 0x10010011, 0x10010100, 0x10010101, + 0x10010110, 0x10010111, 0x10011000, 0x10011001, 0x10011010, + 0x10011011, 0x10011100, 0x10011101, 0x10011110, 0x10011111, + 0x10100000, 0x10100001, 0x10100010, 0x10100011, 0x10100100, + 0x10100101, 0x10100110, 0x10100111, 0x10101000, 0x10101001, + 0x10101010, 0x10101011, 0x10101100, 0x10101101, 0x10101110, + 0x10101111, 0x10110000, 0x10110001, 0x10110010, 0x10110011, + 0x10110100, 0x10110101, 0x10110110, 0x10110111, 0x10111000, + 0x10111001, 0x10111010, 0x10111011, 0x10111100, 0x10111101, + 0x10111110, 0x10111111, 0x11000000, 0x11000001, 0x11000010, + 0x11000011, 0x11000100, 0x11000101, 0x11000110, 0x11000111, + 0x11001000, 0x11001001, 0x11001010, 0x11001011, 0x11001100, + 0x11001101, 0x11001110, 0x11001111, 0x11010000, 0x11010001, + 0x11010010, 0x11010011, 0x11010100, 0x11010101, 0x11010110, + 0x11010111, 0x11011000, 0x11011001, 0x11011010, 0x11011011, + 0x11011100, 0x11011101, 0x11011110, 0x11011111, 0x11100000, + 0x11100001, 0x11100010, 0x11100011, 0x11100100, 0x11100101, + 0x11100110, 0x11100111, 0x11101000, 0x11101001, 0x11101010, + 0x11101011, 0x11101100, 0x11101101, 0x11101110, 0x11101111, + 0x11110000, 0x11110001, 0x11110010, 0x11110011, 0x11110100, + 0x11110101, 0x11110110, 0x11110111, 0x11111000, 0x11111001, + 0x11111010, 0x11111011, 0x11111100, 0x11111101, 0x11111110, + 0x11111111 +}; + +static u32 dec_byte[256] = { + 0x00000000, 0x10000000, 0x01000000, 0x11000000, 0x00100000, + 0x10100000, 0x01100000, 0x11100000, 0x00010000, 0x10010000, + 0x01010000, 0x11010000, 0x00110000, 0x10110000, 0x01110000, + 0x11110000, 0x00001000, 0x10001000, 0x01001000, 0x11001000, + 0x00101000, 0x10101000, 0x01101000, 0x11101000, 0x00011000, + 0x10011000, 0x01011000, 0x11011000, 0x00111000, 0x10111000, + 0x01111000, 0x11111000, 0x00000100, 0x10000100, 0x01000100, + 0x11000100, 0x00100100, 0x10100100, 0x01100100, 0x11100100, + 0x00010100, 0x10010100, 0x01010100, 0x11010100, 0x00110100, + 0x10110100, 0x01110100, 0x11110100, 0x00001100, 0x10001100, + 0x01001100, 0x11001100, 0x00101100, 0x10101100, 0x01101100, + 0x11101100, 0x00011100, 0x10011100, 0x01011100, 0x11011100, + 0x00111100, 0x10111100, 0x01111100, 0x11111100, 0x00000010, + 0x10000010, 0x01000010, 0x11000010, 0x00100010, 0x10100010, + 0x01100010, 0x11100010, 0x00010010, 0x10010010, 0x01010010, + 0x11010010, 0x00110010, 0x10110010, 0x01110010, 0x11110010, + 0x00001010, 0x10001010, 0x01001010, 0x11001010, 0x00101010, + 0x10101010, 0x01101010, 0x11101010, 0x00011010, 0x10011010, + 0x01011010, 0x11011010, 0x00111010, 0x10111010, 0x01111010, + 0x11111010, 0x00000110, 0x10000110, 0x01000110, 0x11000110, + 0x00100110, 0x10100110, 0x01100110, 0x11100110, 0x00010110, + 0x10010110, 0x01010110, 0x11010110, 0x00110110, 0x10110110, + 0x01110110, 0x11110110, 0x00001110, 0x10001110, 0x01001110, + 0x11001110, 0x00101110, 0x10101110, 0x01101110, 0x11101110, + 0x00011110, 0x10011110, 0x01011110, 0x11011110, 0x00111110, + 0x10111110, 0x01111110, 0x11111110, 0x00000001, 0x10000001, + 0x01000001, 0x11000001, 0x00100001, 0x10100001, 0x01100001, + 0x11100001, 0x00010001, 0x10010001, 0x01010001, 0x11010001, + 0x00110001, 0x10110001, 0x01110001, 0x11110001, 0x00001001, + 0x10001001, 0x01001001, 0x11001001, 0x00101001, 0x10101001, + 0x01101001, 0x11101001, 0x00011001, 0x10011001, 0x01011001, + 0x11011001, 0x00111001, 0x10111001, 0x01111001, 0x11111001, + 0x00000101, 0x10000101, 0x01000101, 0x11000101, 0x00100101, + 0x10100101, 0x01100101, 0x11100101, 0x00010101, 0x10010101, + 0x01010101, 0x11010101, 0x00110101, 0x10110101, 0x01110101, + 0x11110101, 0x00001101, 0x10001101, 0x01001101, 0x11001101, + 0x00101101, 0x10101101, 0x01101101, 0x11101101, 0x00011101, + 0x10011101, 0x01011101, 0x11011101, 0x00111101, 0x10111101, + 0x01111101, 0x11111101, 0x00000011, 0x10000011, 0x01000011, + 0x11000011, 0x00100011, 0x10100011, 0x01100011, 0x11100011, + 0x00010011, 0x10010011, 0x01010011, 0x11010011, 0x00110011, + 0x10110011, 0x01110011, 0x11110011, 0x00001011, 0x10001011, + 0x01001011, 0x11001011, 0x00101011, 0x10101011, 0x01101011, + 0x11101011, 0x00011011, 0x10011011, 0x01011011, 0x11011011, + 0x00111011, 0x10111011, 0x01111011, 0x11111011, 0x00000111, + 0x10000111, 0x01000111, 0x11000111, 0x00100111, 0x10100111, + 0x01100111, 0x11100111, 0x00010111, 0x10010111, 0x01010111, + 0x11010111, 0x00110111, 0x10110111, 0x01110111, 0x11110111, + 0x00001111, 0x10001111, 0x01001111, 0x11001111, 0x00101111, + 0x10101111, 0x01101111, 0x11101111, 0x00011111, 0x10011111, + 0x01011111, 0x11011111, 0x00111111, 0x10111111, 0x01111111, + 0x11111111 +}; + static u8 blending[5][16] = { {0, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 2, 3, 3, 3, 0}, {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}, @@ -180,6 +304,141 @@ ppu_pixel(u32 *layer, u16 x, u16 y, u8 color) { dirty_tiles[tile_y] |= 1 << tile_x; } +IWRAM_CODE +static inline +u32 +decode_1bpp(u8 row, u8 flip_x) { + return flip_x ? dec_byte_flip_x[row] : dec_byte[row]; +} + +IWRAM_CODE +static inline +void +draw_1bpp_row(u32 *layer, size_t x, size_t y, u8 sprite, u8 clr, u8 flip_x) { + BOUNDCHECK_SCREEN(x, y); + + size_t tile_x = x / 8; + size_t tile_y = y / 8; + size_t start_col = x % 8; + size_t start_row = y % 8; + size_t shift_left = start_col * 4; + size_t shift_right = (8 - start_col) * 4; + + u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; + u32 color = decode_1bpp(sprite, flip_x); + u32 mask = ~color; + color *= clr; + if (start_col == 0) { + dst[0] = (dst[0] & ~mask) | color; + } else { + dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); + } + + // TODO: different blend modes? +} + +IWRAM_CODE +void +draw_icn(u32 * layer, size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { + BOUNDCHECK_SCREEN(x, y); + if (!flip_y) { + for(size_t v = 0; v < 8; v++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[v]; + draw_1bpp_row(layer, x, y + v, ch1, clr, flip_x); + } + } else { + for(size_t v = 0; v < 8; v++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[(7 - v)]; + draw_1bpp_row(layer, x, y + v, ch1, clr, flip_x); + } + } +} + +#if NEW_PPU == 1 +IWRAM_CODE +UNROLL_LOOPS +void +ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { + BOUNDCHECK_SCREEN(x, y); + + size_t tile_x = x / 8; + size_t tile_y = y / 8; + size_t start_col = x % 8; + size_t start_row = y % 8; + size_t shift_left = start_col * 4; + size_t shift_right = (8 - start_col) * 4; + u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; + if (blending[4][clr]) { + u64 mask = ~((u64)0xFFFFFFFF); + if (!flip_y) { + for(size_t v = 0; v < 8; v++, dst++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[v]; + u32 color_1 = decode_1bpp(ch1, flip_x); + u32 color_2 = (color_1 ^ 0xffffffff) & 0x11111111; + u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2)); + if (start_col == 0) { + dst[0] = (dst[0] & mask) | color; + } else { + dst[0] = (dst[0] & (mask << shift_left)) | color; + dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); + } + } + } else { + for(size_t v = 0; v < 8; v++, dst++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[(7 - v)]; + u32 color_1 = decode_1bpp(ch1, flip_x); + u32 color_2 = (color_1 ^ 0xffffffff) & 0x11111111; + u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2)); + if (start_col == 0) { + dst[0] = (dst[0] & mask) | color; + } else { + dst[0] = (dst[0] & (mask << shift_left)) | color; + dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); + } + } + } + } else { + if (!flip_y) { + for(size_t v = 0; v < 8; v++, dst++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[v]; + u32 color = decode_1bpp(ch1, flip_x); + u32 mask = ~color; + color *= clr; + if (start_col == 0) { + dst[0] = (dst[0] & ~mask) | color; + } else { + dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); + } + } + } else { + for(size_t v = 0; v < 8; v++, dst++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[(7 - v)]; + u32 color = decode_1bpp(ch1, flip_x); + u32 mask = ~color; + color *= clr; + if (start_col == 0) { + dst[0] = (dst[0] & ~mask) | color; + } else { + dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); + } + } + } + } + + // dirty_tiles[y >> 3] |= dirtyflag; + // dirty_tiles[(y + 7) >> 3] |= dirtyflag; +} + +#else IWRAM_CODE void ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) { @@ -194,7 +453,7 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) { if (flipy) flipy = 7; - if (x >= SCREEN_WIDTH || y >= SCREEN_HEIGHT) return; + BOUNDCHECK_SCREEN(x, y); if (blending[4][color]) { u64 mask = ~((u64)0xFFFFFFFF << shift); @@ -229,126 +488,183 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) { dirty_tiles[y >> 3] |= dirtyflag; dirty_tiles[(y + 7) >> 3] |= dirtyflag; } +#endif IWRAM_CODE +static inline void -ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, - u8 flipx, u8 flipy) { - u8 sprline1, sprline2; - u8 xrightedge = x < ((32 - 1) * 8); - u16 v, h; - u32 dirtyflag = (1 << (x >> 3)) | (1 << ((x + 7) >> 3)); - - u32 layerpos = ((y & 7) + (((x >> 3) + (y >> 3) * 32) * 8)); - u32 *layerptr = &layer[layerpos]; - u32 shift = (x & 7) << 2; - - if (flipy) flipy = 7; - - if (x >= SCREEN_WIDTH || y >= SCREEN_HEIGHT) return; - - if (color == 1) { - u32 *lut_expand = flipx ? lut_2bpp : lut2bpp_flipx; - u64 mask = ~((u64)0xFFFFFFFF << shift); - - for (v = 0; v < 8; v++, layerptr++) { - if ((y + v) >= (24 * 8)) break; - - sprline1 = sprite[v ^ flipy]; - sprline2 = sprite[(v ^ flipy) | 8]; - - u32 data32 = (lut_expand[sprline1]) | (lut_expand[sprline2] << 1); - u64 data = ((u64) (data32 & 0x33333333)) << shift; - - layerptr[0] = (layerptr[0] & mask) | data; - if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); - - if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; - } - } else if (blending[4][color]) { - u64 mask = ~((u64)0xFFFFFFFF << shift); - - for (v = 0; v < 8; v++, layerptr++) { - if ((y + v) >= (24 * 8)) break; - - u8 ch1 = sprite[v ^ flipy]; - u8 ch2 = sprite[(v ^ flipy) | 8]; - u32 data32 = 0; - - if (!flipx) { - for (h = 0; h < 8; h++) { - data32 <<= 4; - - u8 ch = (ch1 & 1) | ((ch2 & 1) << 1); - data32 |= blending[ch][color]; - - ch1 >>= 1; ch2 >>= 1; - } - } else { - for (h = 0; h < 8; h++) { - data32 <<= 4; - - u8 ch = (ch1 >> 7) | ((ch2 >> 7) << 1); - data32 |= blending[ch][color]; - - ch1 <<= 1; ch2 <<= 1; - } - } - - u64 data = ((u64) (data32 & 0x33333333)) << shift; - - layerptr[0] = (layerptr[0] & mask) | data; - if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); +draw_2bpp_row(void *layer, size_t x, size_t y, u8 a, u8 b, u8 flip_x) { + // BOUNDCHECK_SCREEN(x, y); - if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; - } + size_t tile_x = x / 8; + size_t tile_y = y / 8; + size_t start_col = x % 8; + size_t start_row = y % 8; + size_t shift_left = start_col * 4; + size_t shift_right = (8 - start_col) * 4; + + u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; +// #if DEC_BIG_LUT + u32 *lut = dec_byte; + if (flip_x) { + lut = dec_byte_flip_x; + } + u32 clr_a = lut[a]; + u32 clr_b = lut[b]; +// #else +// u32 clr_a = decode_1bpp(a, flip_x); +// u32 clr_b = decode_1bpp(b, flip_x); +// #endif + u32 mask_a = (clr_a * 0xF); + u32 mask_b = (clr_b * 0xF); + u32 mask = (mask_a | mask_b); + u32 color = clr_a + (clr_b << 1); + if (start_col == 0) { + dst[0] = (dst[0] & ~mask) | color; } else { - for (v = 0; v < 8; v++, layerptr++) { - if ((y + v) >= (24 * 8)) break; - - u8 ch1 = sprite[v ^ flipy]; - u8 ch2 = sprite[(v ^ flipy) | 8]; - u32 data32 = 0; - u32 mask32 = 0; - - if (!flipx) { - for (h = 0; h < 8; h++) { - data32 <<= 4; mask32 <<= 4; - - if ((ch1 | ch2) & 1) { - u8 ch = (ch1 & 1) | ((ch2 & 1) << 1); - data32 |= blending[ch][color]; - mask32 |= 0xF; - } + dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); + } - ch1 >>= 1; ch2 >>= 1; - } - } else { - for (h = 0; h < 8; h++) { - data32 <<= 4; mask32 <<= 4; + // TODO: different blend modes? +} - if ((ch1 | ch2) & 128) { - u8 ch = (ch1 >> 7) | ((ch2 >> 7) << 1); - data32 |= blending[ch][color]; - mask32 |= 0xF; - } +IWRAM_CODE +void +ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, + u8 flip_x, u8 flip_y) { + // u32 *dst = &layer[0]; + // *dst = 0x111111111; + // if (!flip_y) { + // for(size_t v = 0; v < 8; v++) { + // // if ((y + v) >= SCREEN_HEIGHT) break; + // u8 ch1 = sprite[v + 0]; + // u8 ch2 = sprite[v + 8]; + // draw_2bpp_row(layer, x, y + v, ch1, ch2, flip_x); + // } + // } else { + // for(size_t v = 0; v < 8; v++) { + // // if ((y + v) >= SCREEN_HEIGHT) break; + // u8 ch1 = sprite[(7 - v) + 0]; + // u8 ch2 = sprite[(7 - v) + 8]; + // draw_2bpp_row(layer, x, y + v, ch1, ch2, flip_x); + // } + // } + // u8 sprline1, sprline2; + // u8 xrightedge = x < ((32 - 1) * 8); + // u16 v, h; + // u32 dirtyflag = (1 << (x >> 3)) | (1 << ((x + 7) >> 3)); + + // u32 layerpos = ((y & 7) + (((x >> 3) + (y >> 3) * 32) * 8)); + // u32 *layerptr = &layer[layerpos]; + // u32 shift = (x & 7) << 2; + + // if (flip_y) flip_y = 7; + + // if (x >= SCREEN_WIDTH || y >= SCREEN_HEIGHT) return; + + // if (color == 1) { + // u32 *lut_expand = flip_x ? lut_2bpp : lut2bpp_flipx; + // u64 mask = ~((u64)0xFFFFFFFF << shift); + + // for (v = 0; v < 8; v++, layerptr++) { + // if ((y + v) >= (24 * 8)) break; + + // sprline1 = sprite[v ^ flip_y]; + // sprline2 = sprite[(v ^ flip_y) | 8]; + + // u32 data32 = (lut_expand[sprline1]) | (lut_expand[sprline2] << 1); + // u64 data = ((u64) (data32 & 0x33333333)) << shift; + + // layerptr[0] = (layerptr[0] & mask) | data; + // if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); + + // if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; + // } + // } else if (blending[4][color]) { + // u64 mask = ~((u64)0xFFFFFFFF << shift); + + // for (v = 0; v < 8; v++, layerptr++) { + // if ((y + v) >= (24 * 8)) break; + + // u8 ch1 = sprite[v ^ flip_y]; + // u8 ch2 = sprite[(v ^ flip_y) | 8]; + // u32 data32 = 0; + + // if (!flip_x) { + // for (h = 0; h < 8; h++) { + // data32 <<= 4; + + // u8 ch = (ch1 & 1) | ((ch2 & 1) << 1); + // data32 |= blending[ch][color]; + + // ch1 >>= 1; ch2 >>= 1; + // } + // } else { + // for (h = 0; h < 8; h++) { + // data32 <<= 4; + + // u8 ch = (ch1 >> 7) | ((ch2 >> 7) << 1); + // data32 |= blending[ch][color]; + + // ch1 <<= 1; ch2 <<= 1; + // } + // } + + // u64 data = ((u64) (data32 & 0x33333333)) << shift; + + // layerptr[0] = (layerptr[0] & mask) | data; + // if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); + + // if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; + // } + // } else { + // for (v = 0; v < 8; v++, layerptr++) { + // if ((y + v) >= (24 * 8)) break; + + // u8 ch1 = sprite[v ^ flip_y]; + // u8 ch2 = sprite[(v ^ flip_y) | 8]; + // u32 data32 = 0; + // u32 mask32 = 0; + + // if (!flip_x) { + // for (h = 0; h < 8; h++) { + // data32 <<= 4; mask32 <<= 4; + + // if ((ch1 | ch2) & 1) { + // u8 ch = (ch1 & 1) | ((ch2 & 1) << 1); + // data32 |= blending[ch][color]; + // mask32 |= 0xF; + // } + + // ch1 >>= 1; ch2 >>= 1; + // } + // } else { + // for (h = 0; h < 8; h++) { + // data32 <<= 4; mask32 <<= 4; + + // if ((ch1 | ch2) & 128) { + // u8 ch = (ch1 >> 7) | ((ch2 >> 7) << 1); + // data32 |= blending[ch][color]; + // mask32 |= 0xF; + // } - ch1 <<= 1; ch2 <<= 1; - } - } + // ch1 <<= 1; ch2 <<= 1; + // } + // } - u64 data = ((u64) (data32 & 0x33333333)) << shift; - u64 mask = ~(((u64) (mask32 & 0x33333333)) << shift); + // u64 data = ((u64) (data32 & 0x33333333)) << shift; + // u64 mask = ~(((u64) (mask32 & 0x33333333)) << shift); - layerptr[0] = (layerptr[0] & mask) | data; - if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); + // layerptr[0] = (layerptr[0] & mask) | data; + // if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); - if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; - } - } + // if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; + // } + // } - dirty_tiles[y >> 3] |= dirtyflag; - dirty_tiles[(y + 7) >> 3] |= dirtyflag; + // dirty_tiles[y >> 3] |= dirtyflag; + // dirty_tiles[(y + 7) >> 3] |= dirtyflag; } IWRAM_CODE @@ -369,20 +685,20 @@ flipbuf(Ppu *p) { Tile *mem_fg = FG_FRONT; Tile *mem_bg = BG_FRONT; for (size_t j = 0; j < 20; ++j) { - if (dirty_tiles[j] == 0) { - continue; - } + // if (dirty_tiles[j] == 0) { + // continue; + // } size_t k = 1; for (size_t i = 0; i < 30; ++i, k <<= 1) { - if (dirty_tiles[j] & k) { + // if (dirty_tiles[j] & k) { Tile *tile_fg = p->fg; Tile *tile_bg = p->bg; mem_fg[i + j * 32] = tile_fg[i + j * 32]; mem_bg[i + j * 32] = tile_bg[i + j * 32]; - } + // } } - dirty_tiles[j] = 0; + // dirty_tiles[j] = 0; } } -- cgit v1.2.1