From 6d943ebca061683c076bc026f7e15a3a047b2027 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Wed, 19 Apr 2023 15:08:20 +0200 Subject: Add working implementation of ppu_2bpp --- src/ppu.c | 257 +++++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 155 insertions(+), 102 deletions(-) (limited to 'src/ppu.c') diff --git a/src/ppu.c b/src/ppu.c index de33145..96a3bc0 100644 --- a/src/ppu.c +++ b/src/ppu.c @@ -15,7 +15,7 @@ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE. */ -#define NEW_PPU 0 +#define NEW_PPU 1 #define FG_FRONT ((u32*)(MEM_VRAM)) #define BG_FRONT ((u32*)(MEM_VRAM + KB(20))) @@ -178,19 +178,29 @@ static u32 dec_byte[256] = { // We need to do the following: // // 1. Extract the color row as u32 (4bpp). -// u32 color = lut[ch1] | (lut[ch2] << 1); // color == 0x00112233 -// 2. Split the row into each of its colors: -// u32 col3 = (color & 0x33333333); // 0x00000011 -// u32 col2 = (color & 0x22222222) & ~(col3 * 0xF); // 0x00001100 -// u32 col1 = (color & 0x11111111) & ~(col3 * 0xF); // 0x00110000 -// u32 col0 = color & ~((col3 | col2 | col1) * 0xF); // 0x11000000 -// 3. Multiply based on the table, for example for color 0x2: 0123 -> 0231 -// a *= 0 -// b *= 2 -// c *= 3 -// d *= 1 -// 4. Obtain final color by ORing the individual ones. -// color = a | b | c | d; +// 2. Split the row into each of its colors. +// 3. Multiply based on the table, for example for color blend 2: 0123 -> 0231 +// 4. Obtain final color by ORing the colors from each channel. +// +// clr0 = blending[0][clr]; +// clr1 = blending[1][clr]; +// clr2 = blending[2][clr]; +// clr3 = blending[3][clr]; +// color = 0x00112233; 0b 0000 0000 0001 0001 0010 0010 0011 0011 0x00112233 +// col1mask = (color & 0x11111111); 0b 0000 0000 0001 0001 0000 0000 0001 0001 0x00110011 +// col2mask = (color & 0x22222222) >> 1; 0b 0000 0000 0000 0000 0001 0001 0001 0001 0x00001111 +// col3mask = (col1mask & col2mask) * 0xF; 0b 0000 0000 0000 0000 0000 0000 1111 1111 0x000000FF +// col1mask &= ~col3mask; 0b 0000 0000 0000 0000 0000 0000 0001 0001 0x00000011 +// col2mask &= ~col3mask; 0b 0000 0000 0000 0000 0001 0001 0000 0000 0x00001100 +// col3mask = (color & col3mask) & 0x11111111; 0b 0000 0000 0000 0000 0000 0000 0001 0001 0x00000011 +// col0mask = ~(col1mask | col2mask | col3mask) & 0x11111111; 0b 0001 0001 0000 0000 0000 0000 0000 0000 0x11000000 +// color = (clr0 * col0mask) | +// (clr1 * col1mask) | +// (clr2 * col2mask) | +// (clr3 * col3mask); +// +// Note that in case of transparent nodes col0mask can be used to mask off the +// bits we want to pull from the existing framebuffer. // static u8 blending[5][16] = { {0, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 2, 3, 3, 3, 0}, // Color 0 map. @@ -530,7 +540,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flip_x, u8 flip_y) { } #else IWRAM_CODE -// UNROLL_LOOPS +UNROLL_LOOPS void ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { BOUNDCHECK_SCREEN(x, y); @@ -543,98 +553,141 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; if (clr == 1) { - // u64 mask = ~((u64)0xFFFFFFFF << shift_left); - // if (!flip_y) { - // for(size_t v = 0; v < 8; v++, dst++) { - // if ((y + v) >= SCREEN_HEIGHT) break; - // u8 ch1 = sprite[v]; - // u8 ch2 = sprite[v | 8]; - // u32 color = lut[ch1] | (lut[ch2] << 1); - // if (start_col == 0) { - // dst[0] = (dst[0] & mask) | color; - // } else { - // dst[0] = (dst[0] & (mask << shift_left)) | color; - // dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); - // } - // } - // } else { - // for(size_t v = 0; v < 8; v++, dst++) { - // if ((y + v) >= SCREEN_HEIGHT) break; - // u8 ch1 = sprite[(7 - v)]; - // u8 ch2 = sprite[(7 - v) | 8]; - // u32 color = lut[ch1] | (lut[ch2] << 1); - // if (start_col == 0) { - // dst[0] = (dst[0] & mask) | color; - // } else { - // dst[0] = (dst[0] & (mask << shift_left)) | color; - // dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); - // } - // } - // } + u64 mask = ~((u64)0xFFFFFFFF << shift_left); + if (!flip_y) { + for(size_t v = 0; v < 8; v++, dst++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[v]; + u8 ch2 = sprite[v | 8]; + u32 color = lut[ch1] | (lut[ch2] << 1); + if (start_col == 0) { + dst[0] = (dst[0] & mask) | color; + } else { + dst[0] = (dst[0] & (mask << shift_left)) | color; + dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); + } + } + } else { + for(size_t v = 0; v < 8; v++, dst++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[(7 - v)]; + u8 ch2 = sprite[(7 - v) | 8]; + u32 color = lut[ch1] | (lut[ch2] << 1); + if (start_col == 0) { + dst[0] = (dst[0] & mask) | color; + } else { + dst[0] = (dst[0] & (mask << shift_left)) | color; + dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); + } + } + } } else if (blending[4][clr]) { - // ICN u64 mask = ~((u64)0xFFFFFFFF << shift_left); - // DEBUG: remove flip_y from sprite fetching - // if (!flip_y) { - if (flip_y) flip_y = 7; - for(size_t v = 0; v < 8; v++, dst++) { - if ((y + v) >= SCREEN_HEIGHT) break; - u8 ch1 = sprite[v ^ flip_y]; - u8 ch2 = sprite[(v ^ flip_y) | 8]; - u32 color = lut[ch1] | (lut[ch2] << 1); - if (start_col == 0) { - dst[0] = (dst[0] & mask) | color; - } else { - dst[0] = (dst[0] & (mask << shift_left)) | color; - dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); + u8 clr0 = blending[0][clr]; + u8 clr1 = blending[1][clr]; + u8 clr2 = blending[2][clr]; + u8 clr3 = blending[3][clr]; + if (!flip_y) { + for(size_t v = 0; v < 8; v++, dst++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[v]; + u8 ch2 = sprite[v | 8]; + u32 color = lut[ch1] | (lut[ch2] << 1); + u32 col1mask = (color & 0x11111111); + u32 col2mask = (color & 0x22222222) >> 1; + u32 col3mask = (col1mask & col2mask) * 0xF; + col1mask &= ~col3mask; + col2mask &= ~col3mask; + col3mask = (color & col3mask) & 0x11111111; + u32 col0mask = ~(col1mask | col2mask | col3mask) & 0x11111111; + color = (clr0 * col0mask) | + (clr1 * col1mask) | + (clr2 * col2mask) | + (clr3 * col3mask); + if (start_col == 0) { + dst[0] = (dst[0] & mask) | color; + } else { + dst[0] = (dst[0] & (mask << shift_left)) | color; + dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); + } + } + } else { + for(size_t v = 0; v < 8; v++, dst++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[(7 - v)]; + u8 ch2 = sprite[(7 - v) | 8]; + u32 color = lut[ch1] | (lut[ch2] << 1); + u32 col1mask = (color & 0x11111111); + u32 col2mask = (color & 0x22222222) >> 1; + u32 col3mask = (col1mask & col2mask) * 0xF; + col1mask &= ~col3mask; + col2mask &= ~col3mask; + col3mask = (color & col3mask) & 0x11111111; + u32 col0mask = ~(col1mask | col2mask | col3mask) & 0x11111111; + color = (clr0 * col0mask) | + (clr1 * col1mask) | + (clr2 * col2mask) | + (clr3 * col3mask); + if (start_col == 0) { + dst[0] = (dst[0] & mask) | color; + } else { + dst[0] = (dst[0] & (mask << shift_left)) | color; + dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); + } } } - // } else { - // for(size_t v = 0; v < 8; v++, dst++) { - // if ((y + v) >= SCREEN_HEIGHT) break; - // u8 ch1 = sprite[(7 - v)]; - // u32 color_1 = lut[ch1]; - // u32 color_2 = (color_1 ^ 0xffffffff) & 0x11111111; - // u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2)); - // if (start_col == 0) { - // dst[0] = (dst[0] & mask) | color; - // } else { - // dst[0] = (dst[0] & (mask << shift_left)) | color; - // dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); - // } - // } - // } } else { - // ICN - // if (!flip_y) { - // for(size_t v = 0; v < 8; v++, dst++) { - // if ((y + v) >= SCREEN_HEIGHT) break; - // u8 ch1 = sprite[v]; - // u32 color= lut[ch1]; - // u32 mask = ~color; - // color *= clr & 3; - // if (start_col == 0) { - // dst[0] = (dst[0] & ~mask) | color; - // } else { - // dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); - // dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); - // } - // } - // } else { - // for(size_t v = 0; v < 8; v++, dst++) { - // if ((y + v) >= SCREEN_HEIGHT) break; - // u8 ch1 = sprite[(7 - v)]; - // u32 color= lut[ch1]; - // u32 mask = ~color; - // color *= clr & 3; - // if (start_col == 0) { - // dst[0] = (dst[0] & ~mask) | color; - // } else { - // dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); - // dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); - // } - // } - // } + u8 clr0 = blending[0][clr]; + u8 clr1 = blending[1][clr]; + u8 clr2 = blending[2][clr]; + u8 clr3 = blending[3][clr]; + if (!flip_y) { + for(size_t v = 0; v < 8; v++, dst++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[v]; + u8 ch2 = sprite[v | 8]; + u32 color = lut[ch1] | (lut[ch2] << 1); + u32 col1mask = (color & 0x11111111); + u32 col2mask = (color & 0x22222222) >> 1; + u32 col3mask = (col1mask & col2mask) * 0xF; + col1mask &= ~col3mask; + col2mask &= ~col3mask; + col3mask = (color & col3mask) & 0x11111111; + u32 mask = ~(col1mask | col2mask | col3mask) & 0x11111111; + color = (clr1 * col1mask) | + (clr2 * col2mask) | + (clr3 * col3mask); + if (start_col == 0) { + dst[0] = (dst[0] & mask) | color; + } else { + dst[0] = (dst[0] & (mask << shift_left)) | color; + dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); + } + } + } else { + for(size_t v = 0; v < 8; v++, dst++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[(7 - v)]; + u8 ch2 = sprite[(7 - v) | 8]; + u32 color = lut[ch1] | (lut[ch2] << 1); + u32 col1mask = (color & 0x11111111); + u32 col2mask = (color & 0x22222222) >> 1; + u32 col3mask = (col1mask & col2mask) * 0xF; + col1mask &= ~col3mask; + col2mask &= ~col3mask; + col3mask = (color & col3mask) & 0x11111111; + u32 mask = ~(col1mask | col2mask | col3mask) & 0x11111111; + color = (clr1 * col1mask) | + (clr2 * col2mask) | + (clr3 * col3mask); + if (start_col == 0) { + dst[0] = (dst[0] & mask) | color; + } else { + dst[0] = (dst[0] & (mask << shift_left)) | color; + dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); + } + } + } } // dirty_tiles[y >> 3] |= dirtyflag; // dirty_tiles[(y + 7) >> 3] |= dirtyflag; -- cgit v1.2.1