From eee968f893d7878e675a43a3f89a15d8ceaadc44 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Wed, 19 Apr 2023 13:30:23 +0200 Subject: Add blending table and start new implementation of chr drawing --- src/ppu.c | 426 +++++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 281 insertions(+), 145 deletions(-) (limited to 'src/ppu.c') diff --git a/src/ppu.c b/src/ppu.c index bafa520..de33145 100644 --- a/src/ppu.c +++ b/src/ppu.c @@ -15,7 +15,7 @@ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE. */ -#define NEW_PPU 1 +#define NEW_PPU 0 #define FG_FRONT ((u32*)(MEM_VRAM)) #define BG_FRONT ((u32*)(MEM_VRAM + KB(20))) @@ -149,12 +149,55 @@ static u32 dec_byte[256] = { 0x11111111 }; +// Blending table +// +// | BLEND BITS | COLOR +// CLR | 0 0 0 0 | 0 1 2 3 T +// ----+-------------+---------- +// 0x0 | 0 0 0 0 | 0 0 1 2 1 +// 0x1 | 0 0 0 1 | 0 1 2 3 1 +// 0x2 | 0 0 1 0 | 0 2 3 1 1 +// 0x3 | 0 0 1 1 | 0 3 1 2 1 +// 0x4 | 0 1 0 0 | 1 0 1 2 1 +// 0x5 | 0 1 0 1 | * 1 2 3 0 +// 0x6 | 0 1 1 0 | 1 2 3 1 1 +// 0x7 | 0 1 1 1 | 1 3 1 2 1 +// 0x8 | 1 0 0 0 | 2 0 1 2 1 +// 0x9 | 1 0 0 1 | 2 1 2 3 1 +// 0xA | 1 0 1 0 | * 2 3 1 0 +// 0xB | 1 0 1 1 | 2 3 1 2 1 +// 0xC | 1 1 0 0 | 3 0 1 2 1 +// 0xD | 1 1 0 1 | 3 1 2 3 1 +// 0xE | 1 1 1 0 | 3 2 3 1 1 +// 0xF | 1 1 1 1 | * 3 1 2 0 +// ----+-------------+---------- +// +// Colors 0x5, 0xA and 0xF have transparent background and must be dealt +// with separately, blending color 0 with existing data in that pixel. +// +// We need to do the following: +// +// 1. Extract the color row as u32 (4bpp). +// u32 color = lut[ch1] | (lut[ch2] << 1); // color == 0x00112233 +// 2. Split the row into each of its colors: +// u32 col3 = (color & 0x33333333); // 0x00000011 +// u32 col2 = (color & 0x22222222) & ~(col3 * 0xF); // 0x00001100 +// u32 col1 = (color & 0x11111111) & ~(col3 * 0xF); // 0x00110000 +// u32 col0 = color & ~((col3 | col2 | col1) * 0xF); // 0x11000000 +// 3. Multiply based on the table, for example for color 0x2: 0123 -> 0231 +// a *= 0 +// b *= 2 +// c *= 3 +// d *= 1 +// 4. Obtain final color by ORing the individual ones. +// color = a | b | c | d; +// static u8 blending[5][16] = { - {0, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 2, 3, 3, 3, 0}, - {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}, - {1, 2, 3, 1, 1, 2, 3, 1, 1, 2, 3, 1, 1, 2, 3, 1}, - {2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2}, - {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0} + {0, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 2, 3, 3, 3, 0}, // Color 0 map. + {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}, // Color 1 map. + {1, 2, 3, 1, 1, 2, 3, 1, 1, 2, 3, 1, 1, 2, 3, 1}, // Color 2 map. + {2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2}, // Color 3 map. + {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0}, // Transparency marker. }; static u32 dirty_tiles[21] = {0}; @@ -182,7 +225,7 @@ putcolors(u8 *addr) { IWRAM_CODE void -ppu_pixel(u32 *layer, u16 x, u16 y, u8 color) { +ppu_pixel(u32 *layer, u16 x, u16 y, u8 clr) { if (x > SCREEN_WIDTH || y > SCREEN_HEIGHT) return; size_t tile_x = x / 8; size_t tile_y = y / 8; @@ -190,14 +233,14 @@ ppu_pixel(u32 *layer, u16 x, u16 y, u8 color) { size_t start_row = y % 8; size_t pos = (start_row + ((tile_x + tile_y * 32) * 8)); size_t shift = start_col * 4; - layer[pos] = (layer[pos] & (~(0xF << shift))) | (color << shift); + layer[pos] = (layer[pos] & (~(0xF << shift))) | (clr << shift); dirty_tiles[tile_y] |= 1 << tile_x; } #if NEW_PPU == 0 IWRAM_CODE void -ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) { +ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { u8 sprline; u16 v; u32 dirtyflag = (1 << (x >> 3)) | (1 << ((x + 7) >> 3)); @@ -205,21 +248,21 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) { u32 layerpos = ((y & 7) + (((x >> 3) + (y >> 3) * 32) * 8)); u32 *layerptr = &layer[layerpos]; u32 shift = (x & 7) << 2; - u32 *lut_expand = flipx ? dec_byte_flip_x : dec_byte; + u32 *lut_expand = flip_x ? dec_byte_flip_x : dec_byte; - if (flipy) flipy = 7; + if (flip_y) flip_y = 7; BOUNDCHECK_SCREEN(x, y); - if (blending[4][color]) { + if (blending[4][clr]) { u64 mask = ~((u64)0xFFFFFFFF << shift); for (v = 0; v < 8; v++, layerptr++) { if ((y + v) >= SCREEN_HEIGHT) break; - sprline = sprite[v ^ flipy]; - u64 data = (u64)(lut_expand[sprline] * (color & 3)) << shift; - data |= (u64)(lut_expand[sprline ^ 0xFF] * (color >> 2)) << shift; + sprline = sprite[v ^ flip_y]; + u64 data = (u64)(lut_expand[sprline] * (clr & 3)) << shift; + data |= (u64)(lut_expand[sprline ^ 0xFF] * (clr >> 2)) << shift; layerptr[0] = (layerptr[0] & mask) | data; layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); @@ -230,9 +273,9 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) { for (v = 0; v < 8; v++, layerptr++) { if ((y + v) >= SCREEN_HEIGHT) break; - sprline = sprite[v ^ flipy]; + sprline = sprite[v ^ flip_y]; u64 mask = ~((u64)(lut_expand[sprline] * 0xF) << shift); - u64 data = (u64)(lut_expand[sprline] * (color & 3)) << shift; + u64 data = (u64)(lut_expand[sprline] * (clr & 3)) << shift; layerptr[0] = (layerptr[0] & mask) | data; layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); @@ -251,7 +294,6 @@ UNROLL_LOOPS void ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { BOUNDCHECK_SCREEN(x, y); - size_t tile_x = x / 8; size_t tile_y = y / 8; size_t start_col = x % 8; @@ -367,143 +409,237 @@ draw_2bpp_row(void *layer, size_t x, size_t y, u8 a, u8 b, u8 flip_x) { // TODO: different blend modes? } +#if NEW_PPU == 0 IWRAM_CODE void -ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, - u8 flip_x, u8 flip_y) { - // u32 *dst = &layer[0]; - // *dst = 0x111111111; - // if (!flip_y) { - // for(size_t v = 0; v < 8; v++) { - // // if ((y + v) >= SCREEN_HEIGHT) break; - // u8 ch1 = sprite[v + 0]; - // u8 ch2 = sprite[v + 8]; - // draw_2bpp_row(layer, x, y + v, ch1, ch2, flip_x); - // } - // } else { - // for(size_t v = 0; v < 8; v++) { - // // if ((y + v) >= SCREEN_HEIGHT) break; - // u8 ch1 = sprite[(7 - v) + 0]; - // u8 ch2 = sprite[(7 - v) + 8]; - // draw_2bpp_row(layer, x, y + v, ch1, ch2, flip_x); - // } - // } - // u8 sprline1, sprline2; - // u8 xrightedge = x < ((32 - 1) * 8); - // u16 v, h; - // u32 dirtyflag = (1 << (x >> 3)) | (1 << ((x + 7) >> 3)); - - // u32 layerpos = ((y & 7) + (((x >> 3) + (y >> 3) * 32) * 8)); - // u32 *layerptr = &layer[layerpos]; - // u32 shift = (x & 7) << 2; - - // if (flip_y) flip_y = 7; - - // if (x >= SCREEN_WIDTH || y >= SCREEN_HEIGHT) return; - - // if (color == 1) { - // u32 *lut_expand = flip_x ? dec_byte_flip_x : dec_byte; - // u64 mask = ~((u64)0xFFFFFFFF << shift); - - // for (v = 0; v < 8; v++, layerptr++) { - // if ((y + v) >= (24 * 8)) break; - - // sprline1 = sprite[v ^ flip_y]; - // sprline2 = sprite[(v ^ flip_y) | 8]; - - // u32 data32 = (lut_expand[sprline1]) | (lut_expand[sprline2] << 1); - // u64 data = ((u64) (data32 & 0x33333333)) << shift; - - // layerptr[0] = (layerptr[0] & mask) | data; - // if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); - - // if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; - // } - // } else if (blending[4][color]) { - // u64 mask = ~((u64)0xFFFFFFFF << shift); - - // for (v = 0; v < 8; v++, layerptr++) { - // if ((y + v) >= (24 * 8)) break; - - // u8 ch1 = sprite[v ^ flip_y]; - // u8 ch2 = sprite[(v ^ flip_y) | 8]; - // u32 data32 = 0; - - // if (!flip_x) { - // for (h = 0; h < 8; h++) { - // data32 <<= 4; - - // u8 ch = (ch1 & 1) | ((ch2 & 1) << 1); - // data32 |= blending[ch][color]; - - // ch1 >>= 1; ch2 >>= 1; - // } - // } else { - // for (h = 0; h < 8; h++) { - // data32 <<= 4; - - // u8 ch = (ch1 >> 7) | ((ch2 >> 7) << 1); - // data32 |= blending[ch][color]; - - // ch1 <<= 1; ch2 <<= 1; - // } - // } - - // u64 data = ((u64) (data32 & 0x33333333)) << shift; - - // layerptr[0] = (layerptr[0] & mask) | data; - // if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); - - // if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; - // } - // } else { - // for (v = 0; v < 8; v++, layerptr++) { - // if ((y + v) >= (24 * 8)) break; - - // u8 ch1 = sprite[v ^ flip_y]; - // u8 ch2 = sprite[(v ^ flip_y) | 8]; - // u32 data32 = 0; - // u32 mask32 = 0; - - // if (!flip_x) { - // for (h = 0; h < 8; h++) { - // data32 <<= 4; mask32 <<= 4; - - // if ((ch1 | ch2) & 1) { - // u8 ch = (ch1 & 1) | ((ch2 & 1) << 1); - // data32 |= blending[ch][color]; - // mask32 |= 0xF; - // } - - // ch1 >>= 1; ch2 >>= 1; - // } - // } else { - // for (h = 0; h < 8; h++) { - // data32 <<= 4; mask32 <<= 4; - - // if ((ch1 | ch2) & 128) { - // u8 ch = (ch1 >> 7) | ((ch2 >> 7) << 1); - // data32 |= blending[ch][color]; - // mask32 |= 0xF; - // } +ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flip_x, u8 flip_y) { + u8 sprline1, sprline2; + u8 xrightedge = x < ((32 - 1) * 8); + u16 v, h; + u32 dirtyflag = (1 << (x >> 3)) | (1 << ((x + 7) >> 3)); + + u32 layerpos = ((y & 7) + (((x >> 3) + (y >> 3) * 32) * 8)); + u32 *layerptr = &layer[layerpos]; + u32 shift = (x & 7) << 2; + + if (flip_y) flip_y = 7; + + BOUNDCHECK_SCREEN(x, y); + + if (color == 1) { + u32 *lut_expand = flip_x ? dec_byte_flip_x : dec_byte; + u64 mask = ~((u64)0xFFFFFFFF << shift); + + for (v = 0; v < 8; v++, layerptr++) { + if ((y + v) >= (24 * 8)) break; + + sprline1 = sprite[v ^ flip_y]; + sprline2 = sprite[(v ^ flip_y) | 8]; + u32 data32 = (lut_expand[sprline1]) | (lut_expand[sprline2] << 1); + u64 data = ((u64) (data32 & 0x33333333)) << shift; + + layerptr[0] = (layerptr[0] & mask) | data; + if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); + + if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; + } + } else if (blending[4][color]) { + u64 mask = ~((u64)0xFFFFFFFF << shift); + + for (v = 0; v < 8; v++, layerptr++) { + if ((y + v) >= (24 * 8)) break; + + u8 ch1 = sprite[v ^ flip_y]; + u8 ch2 = sprite[(v ^ flip_y) | 8]; + u32 data32 = 0; + + if (!flip_x) { + for (h = 0; h < 8; h++) { + data32 <<= 4; + + u8 ch = (ch1 & 1) | ((ch2 & 1) << 1); + data32 |= blending[ch][color]; + + ch1 >>= 1; ch2 >>= 1; + } + } else { + for (h = 0; h < 8; h++) { + data32 <<= 4; - // ch1 <<= 1; ch2 <<= 1; - // } - // } + u8 ch = (ch1 >> 7) | ((ch2 >> 7) << 1); + data32 |= blending[ch][color]; - // u64 data = ((u64) (data32 & 0x33333333)) << shift; - // u64 mask = ~(((u64) (mask32 & 0x33333333)) << shift); + ch1 <<= 1; ch2 <<= 1; + } + } + + u64 data = ((u64) (data32 & 0x33333333)) << shift; + + layerptr[0] = (layerptr[0] & mask) | data; + if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); + + if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; + } + } else { + for (v = 0; v < 8; v++, layerptr++) { + if ((y + v) >= (24 * 8)) break; - // layerptr[0] = (layerptr[0] & mask) | data; - // if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); + u8 ch1 = sprite[v ^ flip_y]; + u8 ch2 = sprite[(v ^ flip_y) | 8]; + u32 data32 = 0; + u32 mask32 = 0; - // if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; - // } - // } + if (!flip_x) { + for (h = 0; h < 8; h++) { + data32 <<= 4; mask32 <<= 4; + if ((ch1 | ch2) & 1) { + u8 ch = (ch1 & 1) | ((ch2 & 1) << 1); + data32 |= blending[ch][color]; + mask32 |= 0xF; + } + + ch1 >>= 1; ch2 >>= 1; + } + } else { + for (h = 0; h < 8; h++) { + data32 <<= 4; mask32 <<= 4; + + if ((ch1 | ch2) & 128) { + u8 ch = (ch1 >> 7) | ((ch2 >> 7) << 1); + data32 |= blending[ch][color]; + mask32 |= 0xF; + } + + ch1 <<= 1; ch2 <<= 1; + } + } + + u64 data = ((u64) (data32 & 0x33333333)) << shift; + u64 mask = ~(((u64) (mask32 & 0x33333333)) << shift); + + layerptr[0] = (layerptr[0] & mask) | data; + if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); + + if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; + } + } + + dirty_tiles[y >> 3] |= dirtyflag; + dirty_tiles[(y + 7) >> 3] |= dirtyflag; +} +#else +IWRAM_CODE +// UNROLL_LOOPS +void +ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { + BOUNDCHECK_SCREEN(x, y); + size_t tile_x = x / 8; + size_t tile_y = y / 8; + size_t start_col = x % 8; + size_t start_row = y % 8; + size_t shift_left = start_col * 4; + size_t shift_right = (8 - start_col) * 4; + u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; + u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; + if (clr == 1) { + // u64 mask = ~((u64)0xFFFFFFFF << shift_left); + // if (!flip_y) { + // for(size_t v = 0; v < 8; v++, dst++) { + // if ((y + v) >= SCREEN_HEIGHT) break; + // u8 ch1 = sprite[v]; + // u8 ch2 = sprite[v | 8]; + // u32 color = lut[ch1] | (lut[ch2] << 1); + // if (start_col == 0) { + // dst[0] = (dst[0] & mask) | color; + // } else { + // dst[0] = (dst[0] & (mask << shift_left)) | color; + // dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); + // } + // } + // } else { + // for(size_t v = 0; v < 8; v++, dst++) { + // if ((y + v) >= SCREEN_HEIGHT) break; + // u8 ch1 = sprite[(7 - v)]; + // u8 ch2 = sprite[(7 - v) | 8]; + // u32 color = lut[ch1] | (lut[ch2] << 1); + // if (start_col == 0) { + // dst[0] = (dst[0] & mask) | color; + // } else { + // dst[0] = (dst[0] & (mask << shift_left)) | color; + // dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); + // } + // } + // } + } else if (blending[4][clr]) { + // ICN + u64 mask = ~((u64)0xFFFFFFFF << shift_left); + // DEBUG: remove flip_y from sprite fetching + // if (!flip_y) { + if (flip_y) flip_y = 7; + for(size_t v = 0; v < 8; v++, dst++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[v ^ flip_y]; + u8 ch2 = sprite[(v ^ flip_y) | 8]; + u32 color = lut[ch1] | (lut[ch2] << 1); + if (start_col == 0) { + dst[0] = (dst[0] & mask) | color; + } else { + dst[0] = (dst[0] & (mask << shift_left)) | color; + dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); + } + } + // } else { + // for(size_t v = 0; v < 8; v++, dst++) { + // if ((y + v) >= SCREEN_HEIGHT) break; + // u8 ch1 = sprite[(7 - v)]; + // u32 color_1 = lut[ch1]; + // u32 color_2 = (color_1 ^ 0xffffffff) & 0x11111111; + // u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2)); + // if (start_col == 0) { + // dst[0] = (dst[0] & mask) | color; + // } else { + // dst[0] = (dst[0] & (mask << shift_left)) | color; + // dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); + // } + // } + // } + } else { + // ICN + // if (!flip_y) { + // for(size_t v = 0; v < 8; v++, dst++) { + // if ((y + v) >= SCREEN_HEIGHT) break; + // u8 ch1 = sprite[v]; + // u32 color= lut[ch1]; + // u32 mask = ~color; + // color *= clr & 3; + // if (start_col == 0) { + // dst[0] = (dst[0] & ~mask) | color; + // } else { + // dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + // dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); + // } + // } + // } else { + // for(size_t v = 0; v < 8; v++, dst++) { + // if ((y + v) >= SCREEN_HEIGHT) break; + // u8 ch1 = sprite[(7 - v)]; + // u32 color= lut[ch1]; + // u32 mask = ~color; + // color *= clr & 3; + // if (start_col == 0) { + // dst[0] = (dst[0] & ~mask) | color; + // } else { + // dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + // dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); + // } + // } + // } + } // dirty_tiles[y >> 3] |= dirtyflag; // dirty_tiles[(y + 7) >> 3] |= dirtyflag; } +#endif IWRAM_CODE void -- cgit v1.2.1