aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2023-04-19 10:20:14 +0200
committerBad Diode <bd@badd10de.dev>2023-04-19 15:28:08 +0200
commite08a6dc4f278d2df2525cdf189c9447c372f1e98 (patch)
treea343866a3135bb793f5c1cccfa3fff19509ee8ae
parent5f47e14f6ab4e3b346de1d62c65452e674edbebe (diff)
downloaduxngba-e08a6dc4f278d2df2525cdf189c9447c372f1e98.tar.gz
uxngba-e08a6dc4f278d2df2525cdf189c9447c372f1e98.zip
Minor cleanup and fix small color bug
-rw-r--r--src/ppu.c254
1 files changed, 46 insertions, 208 deletions
diff --git a/src/ppu.c b/src/ppu.c
index a841b97..bafa520 100644
--- a/src/ppu.c
+++ b/src/ppu.c
@@ -39,116 +39,6 @@ WITH REGARD TO THIS SOFTWARE.
39// Keyboard. 39// Keyboard.
40#define SPRITE_START_IDX 640 40#define SPRITE_START_IDX 640
41 41
42static u32 lut_2bpp[256] = {
43 0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100,
44 0x00000101, 0x00000110, 0x00000111, 0x00001000, 0x00001001,
45 0x00001010, 0x00001011, 0x00001100, 0x00001101, 0x00001110,
46 0x00001111, 0x00010000, 0x00010001, 0x00010010, 0x00010011,
47 0x00010100, 0x00010101, 0x00010110, 0x00010111, 0x00011000,
48 0x00011001, 0x00011010, 0x00011011, 0x00011100, 0x00011101,
49 0x00011110, 0x00011111, 0x00100000, 0x00100001, 0x00100010,
50 0x00100011, 0x00100100, 0x00100101, 0x00100110, 0x00100111,
51 0x00101000, 0x00101001, 0x00101010, 0x00101011, 0x00101100,
52 0x00101101, 0x00101110, 0x00101111, 0x00110000, 0x00110001,
53 0x00110010, 0x00110011, 0x00110100, 0x00110101, 0x00110110,
54 0x00110111, 0x00111000, 0x00111001, 0x00111010, 0x00111011,
55 0x00111100, 0x00111101, 0x00111110, 0x00111111, 0x01000000,
56 0x01000001, 0x01000010, 0x01000011, 0x01000100, 0x01000101,
57 0x01000110, 0x01000111, 0x01001000, 0x01001001, 0x01001010,
58 0x01001011, 0x01001100, 0x01001101, 0x01001110, 0x01001111,
59 0x01010000, 0x01010001, 0x01010010, 0x01010011, 0x01010100,
60 0x01010101, 0x01010110, 0x01010111, 0x01011000, 0x01011001,
61 0x01011010, 0x01011011, 0x01011100, 0x01011101, 0x01011110,
62 0x01011111, 0x01100000, 0x01100001, 0x01100010, 0x01100011,
63 0x01100100, 0x01100101, 0x01100110, 0x01100111, 0x01101000,
64 0x01101001, 0x01101010, 0x01101011, 0x01101100, 0x01101101,
65 0x01101110, 0x01101111, 0x01110000, 0x01110001, 0x01110010,
66 0x01110011, 0x01110100, 0x01110101, 0x01110110, 0x01110111,
67 0x01111000, 0x01111001, 0x01111010, 0x01111011, 0x01111100,
68 0x01111101, 0x01111110, 0x01111111, 0x10000000, 0x10000001,
69 0x10000010, 0x10000011, 0x10000100, 0x10000101, 0x10000110,
70 0x10000111, 0x10001000, 0x10001001, 0x10001010, 0x10001011,
71 0x10001100, 0x10001101, 0x10001110, 0x10001111, 0x10010000,
72 0x10010001, 0x10010010, 0x10010011, 0x10010100, 0x10010101,
73 0x10010110, 0x10010111, 0x10011000, 0x10011001, 0x10011010,
74 0x10011011, 0x10011100, 0x10011101, 0x10011110, 0x10011111,
75 0x10100000, 0x10100001, 0x10100010, 0x10100011, 0x10100100,
76 0x10100101, 0x10100110, 0x10100111, 0x10101000, 0x10101001,
77 0x10101010, 0x10101011, 0x10101100, 0x10101101, 0x10101110,
78 0x10101111, 0x10110000, 0x10110001, 0x10110010, 0x10110011,
79 0x10110100, 0x10110101, 0x10110110, 0x10110111, 0x10111000,
80 0x10111001, 0x10111010, 0x10111011, 0x10111100, 0x10111101,
81 0x10111110, 0x10111111, 0x11000000, 0x11000001, 0x11000010,
82 0x11000011, 0x11000100, 0x11000101, 0x11000110, 0x11000111,
83 0x11001000, 0x11001001, 0x11001010, 0x11001011, 0x11001100,
84 0x11001101, 0x11001110, 0x11001111, 0x11010000, 0x11010001,
85 0x11010010, 0x11010011, 0x11010100, 0x11010101, 0x11010110,
86 0x11010111, 0x11011000, 0x11011001, 0x11011010, 0x11011011,
87 0x11011100, 0x11011101, 0x11011110, 0x11011111, 0x11100000,
88 0x11100001, 0x11100010, 0x11100011, 0x11100100, 0x11100101,
89 0x11100110, 0x11100111, 0x11101000, 0x11101001, 0x11101010,
90 0x11101011, 0x11101100, 0x11101101, 0x11101110, 0x11101111,
91 0x11110000, 0x11110001, 0x11110010, 0x11110011, 0x11110100,
92 0x11110101, 0x11110110, 0x11110111, 0x11111000, 0x11111001,
93 0x11111010, 0x11111011, 0x11111100, 0x11111101, 0x11111110,
94 0x11111111
95};
96
97static u32 lut2bpp_flipx[256] = {
98 0x00000000, 0x10000000, 0x01000000, 0x11000000, 0x00100000,
99 0x10100000, 0x01100000, 0x11100000, 0x00010000, 0x10010000,
100 0x01010000, 0x11010000, 0x00110000, 0x10110000, 0x01110000,
101 0x11110000, 0x00001000, 0x10001000, 0x01001000, 0x11001000,
102 0x00101000, 0x10101000, 0x01101000, 0x11101000, 0x00011000,
103 0x10011000, 0x01011000, 0x11011000, 0x00111000, 0x10111000,
104 0x01111000, 0x11111000, 0x00000100, 0x10000100, 0x01000100,
105 0x11000100, 0x00100100, 0x10100100, 0x01100100, 0x11100100,
106 0x00010100, 0x10010100, 0x01010100, 0x11010100, 0x00110100,
107 0x10110100, 0x01110100, 0x11110100, 0x00001100, 0x10001100,
108 0x01001100, 0x11001100, 0x00101100, 0x10101100, 0x01101100,
109 0x11101100, 0x00011100, 0x10011100, 0x01011100, 0x11011100,
110 0x00111100, 0x10111100, 0x01111100, 0x11111100, 0x00000010,
111 0x10000010, 0x01000010, 0x11000010, 0x00100010, 0x10100010,
112 0x01100010, 0x11100010, 0x00010010, 0x10010010, 0x01010010,
113 0x11010010, 0x00110010, 0x10110010, 0x01110010, 0x11110010,
114 0x00001010, 0x10001010, 0x01001010, 0x11001010, 0x00101010,
115 0x10101010, 0x01101010, 0x11101010, 0x00011010, 0x10011010,
116 0x01011010, 0x11011010, 0x00111010, 0x10111010, 0x01111010,
117 0x11111010, 0x00000110, 0x10000110, 0x01000110, 0x11000110,
118 0x00100110, 0x10100110, 0x01100110, 0x11100110, 0x00010110,
119 0x10010110, 0x01010110, 0x11010110, 0x00110110, 0x10110110,
120 0x01110110, 0x11110110, 0x00001110, 0x10001110, 0x01001110,
121 0x11001110, 0x00101110, 0x10101110, 0x01101110, 0x11101110,
122 0x00011110, 0x10011110, 0x01011110, 0x11011110, 0x00111110,
123 0x10111110, 0x01111110, 0x11111110, 0x00000001, 0x10000001,
124 0x01000001, 0x11000001, 0x00100001, 0x10100001, 0x01100001,
125 0x11100001, 0x00010001, 0x10010001, 0x01010001, 0x11010001,
126 0x00110001, 0x10110001, 0x01110001, 0x11110001, 0x00001001,
127 0x10001001, 0x01001001, 0x11001001, 0x00101001, 0x10101001,
128 0x01101001, 0x11101001, 0x00011001, 0x10011001, 0x01011001,
129 0x11011001, 0x00111001, 0x10111001, 0x01111001, 0x11111001,
130 0x00000101, 0x10000101, 0x01000101, 0x11000101, 0x00100101,
131 0x10100101, 0x01100101, 0x11100101, 0x00010101, 0x10010101,
132 0x01010101, 0x11010101, 0x00110101, 0x10110101, 0x01110101,
133 0x11110101, 0x00001101, 0x10001101, 0x01001101, 0x11001101,
134 0x00101101, 0x10101101, 0x01101101, 0x11101101, 0x00011101,
135 0x10011101, 0x01011101, 0x11011101, 0x00111101, 0x10111101,
136 0x01111101, 0x11111101, 0x00000011, 0x10000011, 0x01000011,
137 0x11000011, 0x00100011, 0x10100011, 0x01100011, 0x11100011,
138 0x00010011, 0x10010011, 0x01010011, 0x11010011, 0x00110011,
139 0x10110011, 0x01110011, 0x11110011, 0x00001011, 0x10001011,
140 0x01001011, 0x11001011, 0x00101011, 0x10101011, 0x01101011,
141 0x11101011, 0x00011011, 0x10011011, 0x01011011, 0x11011011,
142 0x00111011, 0x10111011, 0x01111011, 0x11111011, 0x00000111,
143 0x10000111, 0x01000111, 0x11000111, 0x00100111, 0x10100111,
144 0x01100111, 0x11100111, 0x00010111, 0x10010111, 0x01010111,
145 0x11010111, 0x00110111, 0x10110111, 0x01110111, 0x11110111,
146 0x00001111, 0x10001111, 0x01001111, 0x11001111, 0x00101111,
147 0x10101111, 0x01101111, 0x11101111, 0x00011111, 0x10011111,
148 0x01011111, 0x11011111, 0x00111111, 0x10111111, 0x01111111,
149 0x11111111
150};
151
152static u32 dec_byte_flip_x[256] = { 42static u32 dec_byte_flip_x[256] = {
153 0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100, 43 0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100,
154 0x00000101, 0x00000110, 0x00000111, 0x00001000, 0x00001001, 44 0x00000101, 0x00000110, 0x00000111, 0x00001000, 0x00001001,
@@ -304,60 +194,58 @@ ppu_pixel(u32 *layer, u16 x, u16 y, u8 color) {
304 dirty_tiles[tile_y] |= 1 << tile_x; 194 dirty_tiles[tile_y] |= 1 << tile_x;
305} 195}
306 196
197#if NEW_PPU == 0
307IWRAM_CODE 198IWRAM_CODE
308static inline
309u32
310decode_1bpp(u8 row, u8 flip_x) {
311 return flip_x ? dec_byte_flip_x[row] : dec_byte[row];
312}
313
314IWRAM_CODE
315static inline
316void 199void
317draw_1bpp_row(u32 *layer, size_t x, size_t y, u8 sprite, u8 clr, u8 flip_x) { 200ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) {
318 BOUNDCHECK_SCREEN(x, y); 201 u8 sprline;
319 202 u16 v;
320 size_t tile_x = x / 8; 203 u32 dirtyflag = (1 << (x >> 3)) | (1 << ((x + 7) >> 3));
321 size_t tile_y = y / 8;
322 size_t start_col = x % 8;
323 size_t start_row = y % 8;
324 size_t shift_left = start_col * 4;
325 size_t shift_right = (8 - start_col) * 4;
326 204
327 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; 205 u32 layerpos = ((y & 7) + (((x >> 3) + (y >> 3) * 32) * 8));
328 u32 color = decode_1bpp(sprite, flip_x); 206 u32 *layerptr = &layer[layerpos];
329 u32 mask = ~color; 207 u32 shift = (x & 7) << 2;
330 color *= clr; 208 u32 *lut_expand = flipx ? dec_byte_flip_x : dec_byte;
331 if (start_col == 0) {
332 dst[0] = (dst[0] & ~mask) | color;
333 } else {
334 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
335 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
336 }
337 209
338 // TODO: different blend modes? 210 if (flipy) flipy = 7;
339}
340 211
341IWRAM_CODE
342void
343draw_icn(u32 * layer, size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
344 BOUNDCHECK_SCREEN(x, y); 212 BOUNDCHECK_SCREEN(x, y);
345 if (!flip_y) { 213
346 for(size_t v = 0; v < 8; v++) { 214 if (blending[4][color]) {
215 u64 mask = ~((u64)0xFFFFFFFF << shift);
216
217 for (v = 0; v < 8; v++, layerptr++) {
347 if ((y + v) >= SCREEN_HEIGHT) break; 218 if ((y + v) >= SCREEN_HEIGHT) break;
348 u8 ch1 = sprite[v]; 219
349 draw_1bpp_row(layer, x, y + v, ch1, clr, flip_x); 220 sprline = sprite[v ^ flipy];
221 u64 data = (u64)(lut_expand[sprline] * (color & 3)) << shift;
222 data |= (u64)(lut_expand[sprline ^ 0xFF] * (color >> 2)) << shift;
223
224 layerptr[0] = (layerptr[0] & mask) | data;
225 layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32);
226
227 if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8;
350 } 228 }
351 } else { 229 } else {
352 for(size_t v = 0; v < 8; v++) { 230 for (v = 0; v < 8; v++, layerptr++) {
353 if ((y + v) >= SCREEN_HEIGHT) break; 231 if ((y + v) >= SCREEN_HEIGHT) break;
354 u8 ch1 = sprite[(7 - v)]; 232
355 draw_1bpp_row(layer, x, y + v, ch1, clr, flip_x); 233 sprline = sprite[v ^ flipy];
234 u64 mask = ~((u64)(lut_expand[sprline] * 0xF) << shift);
235 u64 data = (u64)(lut_expand[sprline] * (color & 3)) << shift;
236
237 layerptr[0] = (layerptr[0] & mask) | data;
238 layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32);
239
240 if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8;
356 } 241 }
357 } 242 }
243
244 dirty_tiles[y >> 3] |= dirtyflag;
245 dirty_tiles[(y + 7) >> 3] |= dirtyflag;
358} 246}
359 247
360#if NEW_PPU == 1 248#else
361IWRAM_CODE 249IWRAM_CODE
362UNROLL_LOOPS 250UNROLL_LOOPS
363void 251void
@@ -371,13 +259,14 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
371 size_t shift_left = start_col * 4; 259 size_t shift_left = start_col * 4;
372 size_t shift_right = (8 - start_col) * 4; 260 size_t shift_right = (8 - start_col) * 4;
373 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; 261 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8];
262 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte;
374 if (blending[4][clr]) { 263 if (blending[4][clr]) {
375 u64 mask = ~((u64)0xFFFFFFFF); 264 u64 mask = ~((u64)0xFFFFFFFF);
376 if (!flip_y) { 265 if (!flip_y) {
377 for(size_t v = 0; v < 8; v++, dst++) { 266 for(size_t v = 0; v < 8; v++, dst++) {
378 if ((y + v) >= SCREEN_HEIGHT) break; 267 if ((y + v) >= SCREEN_HEIGHT) break;
379 u8 ch1 = sprite[v]; 268 u8 ch1 = sprite[v];
380 u32 color_1 = decode_1bpp(ch1, flip_x); 269 u32 color_1 = lut[ch1];
381 u32 color_2 = (color_1 ^ 0xffffffff) & 0x11111111; 270 u32 color_2 = (color_1 ^ 0xffffffff) & 0x11111111;
382 u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2)); 271 u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2));
383 if (start_col == 0) { 272 if (start_col == 0) {
@@ -391,7 +280,7 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
391 for(size_t v = 0; v < 8; v++, dst++) { 280 for(size_t v = 0; v < 8; v++, dst++) {
392 if ((y + v) >= SCREEN_HEIGHT) break; 281 if ((y + v) >= SCREEN_HEIGHT) break;
393 u8 ch1 = sprite[(7 - v)]; 282 u8 ch1 = sprite[(7 - v)];
394 u32 color_1 = decode_1bpp(ch1, flip_x); 283 u32 color_1 = lut[ch1];
395 u32 color_2 = (color_1 ^ 0xffffffff) & 0x11111111; 284 u32 color_2 = (color_1 ^ 0xffffffff) & 0x11111111;
396 u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2)); 285 u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2));
397 if (start_col == 0) { 286 if (start_col == 0) {
@@ -407,9 +296,9 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
407 for(size_t v = 0; v < 8; v++, dst++) { 296 for(size_t v = 0; v < 8; v++, dst++) {
408 if ((y + v) >= SCREEN_HEIGHT) break; 297 if ((y + v) >= SCREEN_HEIGHT) break;
409 u8 ch1 = sprite[v]; 298 u8 ch1 = sprite[v];
410 u32 color = decode_1bpp(ch1, flip_x); 299 u32 color= lut[ch1];
411 u32 mask = ~color; 300 u32 mask = ~color;
412 color *= clr; 301 color *= clr & 3;
413 if (start_col == 0) { 302 if (start_col == 0) {
414 dst[0] = (dst[0] & ~mask) | color; 303 dst[0] = (dst[0] & ~mask) | color;
415 } else { 304 } else {
@@ -421,9 +310,9 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
421 for(size_t v = 0; v < 8; v++, dst++) { 310 for(size_t v = 0; v < 8; v++, dst++) {
422 if ((y + v) >= SCREEN_HEIGHT) break; 311 if ((y + v) >= SCREEN_HEIGHT) break;
423 u8 ch1 = sprite[(7 - v)]; 312 u8 ch1 = sprite[(7 - v)];
424 u32 color = decode_1bpp(ch1, flip_x); 313 u32 color= lut[ch1];
425 u32 mask = ~color; 314 u32 mask = ~color;
426 color *= clr; 315 color *= clr & 3;
427 if (start_col == 0) { 316 if (start_col == 0) {
428 dst[0] = (dst[0] & ~mask) | color; 317 dst[0] = (dst[0] & ~mask) | color;
429 } else { 318 } else {
@@ -437,57 +326,6 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
437 // dirty_tiles[y >> 3] |= dirtyflag; 326 // dirty_tiles[y >> 3] |= dirtyflag;
438 // dirty_tiles[(y + 7) >> 3] |= dirtyflag; 327 // dirty_tiles[(y + 7) >> 3] |= dirtyflag;
439} 328}
440
441#else
442IWRAM_CODE
443void
444ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) {
445 u8 sprline;
446 u16 v;
447 u32 dirtyflag = (1 << (x >> 3)) | (1 << ((x + 7) >> 3));
448
449 u32 layerpos = ((y & 7) + (((x >> 3) + (y >> 3) * 32) * 8));
450 u32 *layerptr = &layer[layerpos];
451 u32 shift = (x & 7) << 2;
452 u32 *lut_expand = flipx ? lut_2bpp : lut2bpp_flipx;
453
454 if (flipy) flipy = 7;
455
456 BOUNDCHECK_SCREEN(x, y);
457
458 if (blending[4][color]) {
459 u64 mask = ~((u64)0xFFFFFFFF << shift);
460
461 for (v = 0; v < 8; v++, layerptr++) {
462 if ((y + v) >= SCREEN_HEIGHT) break;
463
464 sprline = sprite[v ^ flipy];
465 u64 data = (u64)(lut_expand[sprline] * (color & 3)) << shift;
466 data |= (u64)(lut_expand[sprline ^ 0xFF] * (color >> 2)) << shift;
467
468 layerptr[0] = (layerptr[0] & mask) | data;
469 layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32);
470
471 if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8;
472 }
473 } else {
474 for (v = 0; v < 8; v++, layerptr++) {
475 if ((y + v) >= SCREEN_HEIGHT) break;
476
477 sprline = sprite[v ^ flipy];
478 u64 mask = ~((u64)(lut_expand[sprline] * 0xF) << shift);
479 u64 data = (u64)(lut_expand[sprline] * (color & 3)) << shift;
480
481 layerptr[0] = (layerptr[0] & mask) | data;
482 layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32);
483
484 if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8;
485 }
486 }
487
488 dirty_tiles[y >> 3] |= dirtyflag;
489 dirty_tiles[(y + 7) >> 3] |= dirtyflag;
490}
491#endif 329#endif
492 330
493IWRAM_CODE 331IWRAM_CODE
@@ -564,7 +402,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color,
564 // if (x >= SCREEN_WIDTH || y >= SCREEN_HEIGHT) return; 402 // if (x >= SCREEN_WIDTH || y >= SCREEN_HEIGHT) return;
565 403
566 // if (color == 1) { 404 // if (color == 1) {
567 // u32 *lut_expand = flip_x ? lut_2bpp : lut2bpp_flipx; 405 // u32 *lut_expand = flip_x ? dec_byte_flip_x : dec_byte;
568 // u64 mask = ~((u64)0xFFFFFFFF << shift); 406 // u64 mask = ~((u64)0xFFFFFFFF << shift);
569 407
570 // for (v = 0; v < 8; v++, layerptr++) { 408 // for (v = 0; v < 8; v++, layerptr++) {