diff options
Diffstat (limited to 'src/ppu.c')
-rw-r--r-- | src/ppu.c | 426 |
1 files changed, 281 insertions, 145 deletions
@@ -15,7 +15,7 @@ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |||
15 | WITH REGARD TO THIS SOFTWARE. | 15 | WITH REGARD TO THIS SOFTWARE. |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #define NEW_PPU 1 | 18 | #define NEW_PPU 0 |
19 | 19 | ||
20 | #define FG_FRONT ((u32*)(MEM_VRAM)) | 20 | #define FG_FRONT ((u32*)(MEM_VRAM)) |
21 | #define BG_FRONT ((u32*)(MEM_VRAM + KB(20))) | 21 | #define BG_FRONT ((u32*)(MEM_VRAM + KB(20))) |
@@ -149,12 +149,55 @@ static u32 dec_byte[256] = { | |||
149 | 0x11111111 | 149 | 0x11111111 |
150 | }; | 150 | }; |
151 | 151 | ||
152 | // Blending table | ||
153 | // | ||
154 | // | BLEND BITS | COLOR | ||
155 | // CLR | 0 0 0 0 | 0 1 2 3 T | ||
156 | // ----+-------------+---------- | ||
157 | // 0x0 | 0 0 0 0 | 0 0 1 2 1 | ||
158 | // 0x1 | 0 0 0 1 | 0 1 2 3 1 | ||
159 | // 0x2 | 0 0 1 0 | 0 2 3 1 1 | ||
160 | // 0x3 | 0 0 1 1 | 0 3 1 2 1 | ||
161 | // 0x4 | 0 1 0 0 | 1 0 1 2 1 | ||
162 | // 0x5 | 0 1 0 1 | * 1 2 3 0 | ||
163 | // 0x6 | 0 1 1 0 | 1 2 3 1 1 | ||
164 | // 0x7 | 0 1 1 1 | 1 3 1 2 1 | ||
165 | // 0x8 | 1 0 0 0 | 2 0 1 2 1 | ||
166 | // 0x9 | 1 0 0 1 | 2 1 2 3 1 | ||
167 | // 0xA | 1 0 1 0 | * 2 3 1 0 | ||
168 | // 0xB | 1 0 1 1 | 2 3 1 2 1 | ||
169 | // 0xC | 1 1 0 0 | 3 0 1 2 1 | ||
170 | // 0xD | 1 1 0 1 | 3 1 2 3 1 | ||
171 | // 0xE | 1 1 1 0 | 3 2 3 1 1 | ||
172 | // 0xF | 1 1 1 1 | * 3 1 2 0 | ||
173 | // ----+-------------+---------- | ||
174 | // | ||
175 | // Colors 0x5, 0xA and 0xF have transparent background and must be dealt | ||
176 | // with separately, blending color 0 with existing data in that pixel. | ||
177 | // | ||
178 | // We need to do the following: | ||
179 | // | ||
180 | // 1. Extract the color row as u32 (4bpp). | ||
181 | // u32 color = lut[ch1] | (lut[ch2] << 1); // color == 0x00112233 | ||
182 | // 2. Split the row into each of its colors: | ||
183 | // u32 col3 = (color & 0x33333333); // 0x00000011 | ||
184 | // u32 col2 = (color & 0x22222222) & ~(col3 * 0xF); // 0x00001100 | ||
185 | // u32 col1 = (color & 0x11111111) & ~(col3 * 0xF); // 0x00110000 | ||
186 | // u32 col0 = color & ~((col3 | col2 | col1) * 0xF); // 0x11000000 | ||
187 | // 3. Multiply based on the table, for example for color 0x2: 0123 -> 0231 | ||
188 | // a *= 0 | ||
189 | // b *= 2 | ||
190 | // c *= 3 | ||
191 | // d *= 1 | ||
192 | // 4. Obtain final color by ORing the individual ones. | ||
193 | // color = a | b | c | d; | ||
194 | // | ||
152 | static u8 blending[5][16] = { | 195 | static u8 blending[5][16] = { |
153 | {0, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 2, 3, 3, 3, 0}, | 196 | {0, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 2, 3, 3, 3, 0}, // Color 0 map. |
154 | {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}, | 197 | {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}, // Color 1 map. |
155 | {1, 2, 3, 1, 1, 2, 3, 1, 1, 2, 3, 1, 1, 2, 3, 1}, | 198 | {1, 2, 3, 1, 1, 2, 3, 1, 1, 2, 3, 1, 1, 2, 3, 1}, // Color 2 map. |
156 | {2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2}, | 199 | {2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2}, // Color 3 map. |
157 | {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0} | 200 | {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0}, // Transparency marker. |
158 | }; | 201 | }; |
159 | 202 | ||
160 | static u32 dirty_tiles[21] = {0}; | 203 | static u32 dirty_tiles[21] = {0}; |
@@ -182,7 +225,7 @@ putcolors(u8 *addr) { | |||
182 | 225 | ||
183 | IWRAM_CODE | 226 | IWRAM_CODE |
184 | void | 227 | void |
185 | ppu_pixel(u32 *layer, u16 x, u16 y, u8 color) { | 228 | ppu_pixel(u32 *layer, u16 x, u16 y, u8 clr) { |
186 | if (x > SCREEN_WIDTH || y > SCREEN_HEIGHT) return; | 229 | if (x > SCREEN_WIDTH || y > SCREEN_HEIGHT) return; |
187 | size_t tile_x = x / 8; | 230 | size_t tile_x = x / 8; |
188 | size_t tile_y = y / 8; | 231 | size_t tile_y = y / 8; |
@@ -190,14 +233,14 @@ ppu_pixel(u32 *layer, u16 x, u16 y, u8 color) { | |||
190 | size_t start_row = y % 8; | 233 | size_t start_row = y % 8; |
191 | size_t pos = (start_row + ((tile_x + tile_y * 32) * 8)); | 234 | size_t pos = (start_row + ((tile_x + tile_y * 32) * 8)); |
192 | size_t shift = start_col * 4; | 235 | size_t shift = start_col * 4; |
193 | layer[pos] = (layer[pos] & (~(0xF << shift))) | (color << shift); | 236 | layer[pos] = (layer[pos] & (~(0xF << shift))) | (clr << shift); |
194 | dirty_tiles[tile_y] |= 1 << tile_x; | 237 | dirty_tiles[tile_y] |= 1 << tile_x; |
195 | } | 238 | } |
196 | 239 | ||
197 | #if NEW_PPU == 0 | 240 | #if NEW_PPU == 0 |
198 | IWRAM_CODE | 241 | IWRAM_CODE |
199 | void | 242 | void |
200 | ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) { | 243 | ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { |
201 | u8 sprline; | 244 | u8 sprline; |
202 | u16 v; | 245 | u16 v; |
203 | u32 dirtyflag = (1 << (x >> 3)) | (1 << ((x + 7) >> 3)); | 246 | u32 dirtyflag = (1 << (x >> 3)) | (1 << ((x + 7) >> 3)); |
@@ -205,21 +248,21 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) { | |||
205 | u32 layerpos = ((y & 7) + (((x >> 3) + (y >> 3) * 32) * 8)); | 248 | u32 layerpos = ((y & 7) + (((x >> 3) + (y >> 3) * 32) * 8)); |
206 | u32 *layerptr = &layer[layerpos]; | 249 | u32 *layerptr = &layer[layerpos]; |
207 | u32 shift = (x & 7) << 2; | 250 | u32 shift = (x & 7) << 2; |
208 | u32 *lut_expand = flipx ? dec_byte_flip_x : dec_byte; | 251 | u32 *lut_expand = flip_x ? dec_byte_flip_x : dec_byte; |
209 | 252 | ||
210 | if (flipy) flipy = 7; | 253 | if (flip_y) flip_y = 7; |
211 | 254 | ||
212 | BOUNDCHECK_SCREEN(x, y); | 255 | BOUNDCHECK_SCREEN(x, y); |
213 | 256 | ||
214 | if (blending[4][color]) { | 257 | if (blending[4][clr]) { |
215 | u64 mask = ~((u64)0xFFFFFFFF << shift); | 258 | u64 mask = ~((u64)0xFFFFFFFF << shift); |
216 | 259 | ||
217 | for (v = 0; v < 8; v++, layerptr++) { | 260 | for (v = 0; v < 8; v++, layerptr++) { |
218 | if ((y + v) >= SCREEN_HEIGHT) break; | 261 | if ((y + v) >= SCREEN_HEIGHT) break; |
219 | 262 | ||
220 | sprline = sprite[v ^ flipy]; | 263 | sprline = sprite[v ^ flip_y]; |
221 | u64 data = (u64)(lut_expand[sprline] * (color & 3)) << shift; | 264 | u64 data = (u64)(lut_expand[sprline] * (clr & 3)) << shift; |
222 | data |= (u64)(lut_expand[sprline ^ 0xFF] * (color >> 2)) << shift; | 265 | data |= (u64)(lut_expand[sprline ^ 0xFF] * (clr >> 2)) << shift; |
223 | 266 | ||
224 | layerptr[0] = (layerptr[0] & mask) | data; | 267 | layerptr[0] = (layerptr[0] & mask) | data; |
225 | layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); | 268 | layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); |
@@ -230,9 +273,9 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) { | |||
230 | for (v = 0; v < 8; v++, layerptr++) { | 273 | for (v = 0; v < 8; v++, layerptr++) { |
231 | if ((y + v) >= SCREEN_HEIGHT) break; | 274 | if ((y + v) >= SCREEN_HEIGHT) break; |
232 | 275 | ||
233 | sprline = sprite[v ^ flipy]; | 276 | sprline = sprite[v ^ flip_y]; |
234 | u64 mask = ~((u64)(lut_expand[sprline] * 0xF) << shift); | 277 | u64 mask = ~((u64)(lut_expand[sprline] * 0xF) << shift); |
235 | u64 data = (u64)(lut_expand[sprline] * (color & 3)) << shift; | 278 | u64 data = (u64)(lut_expand[sprline] * (clr & 3)) << shift; |
236 | 279 | ||
237 | layerptr[0] = (layerptr[0] & mask) | data; | 280 | layerptr[0] = (layerptr[0] & mask) | data; |
238 | layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); | 281 | layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); |
@@ -251,7 +294,6 @@ UNROLL_LOOPS | |||
251 | void | 294 | void |
252 | ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | 295 | ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { |
253 | BOUNDCHECK_SCREEN(x, y); | 296 | BOUNDCHECK_SCREEN(x, y); |
254 | |||
255 | size_t tile_x = x / 8; | 297 | size_t tile_x = x / 8; |
256 | size_t tile_y = y / 8; | 298 | size_t tile_y = y / 8; |
257 | size_t start_col = x % 8; | 299 | size_t start_col = x % 8; |
@@ -367,143 +409,237 @@ draw_2bpp_row(void *layer, size_t x, size_t y, u8 a, u8 b, u8 flip_x) { | |||
367 | // TODO: different blend modes? | 409 | // TODO: different blend modes? |
368 | } | 410 | } |
369 | 411 | ||
412 | #if NEW_PPU == 0 | ||
370 | IWRAM_CODE | 413 | IWRAM_CODE |
371 | void | 414 | void |
372 | ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, | 415 | ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flip_x, u8 flip_y) { |
373 | u8 flip_x, u8 flip_y) { | 416 | u8 sprline1, sprline2; |
374 | // u32 *dst = &layer[0]; | 417 | u8 xrightedge = x < ((32 - 1) * 8); |
375 | // *dst = 0x111111111; | 418 | u16 v, h; |
376 | // if (!flip_y) { | 419 | u32 dirtyflag = (1 << (x >> 3)) | (1 << ((x + 7) >> 3)); |
377 | // for(size_t v = 0; v < 8; v++) { | 420 | |
378 | // // if ((y + v) >= SCREEN_HEIGHT) break; | 421 | u32 layerpos = ((y & 7) + (((x >> 3) + (y >> 3) * 32) * 8)); |
379 | // u8 ch1 = sprite[v + 0]; | 422 | u32 *layerptr = &layer[layerpos]; |
380 | // u8 ch2 = sprite[v + 8]; | 423 | u32 shift = (x & 7) << 2; |
381 | // draw_2bpp_row(layer, x, y + v, ch1, ch2, flip_x); | 424 | |
382 | // } | 425 | if (flip_y) flip_y = 7; |
383 | // } else { | 426 | |
384 | // for(size_t v = 0; v < 8; v++) { | 427 | BOUNDCHECK_SCREEN(x, y); |
385 | // // if ((y + v) >= SCREEN_HEIGHT) break; | 428 | |
386 | // u8 ch1 = sprite[(7 - v) + 0]; | 429 | if (color == 1) { |
387 | // u8 ch2 = sprite[(7 - v) + 8]; | 430 | u32 *lut_expand = flip_x ? dec_byte_flip_x : dec_byte; |
388 | // draw_2bpp_row(layer, x, y + v, ch1, ch2, flip_x); | 431 | u64 mask = ~((u64)0xFFFFFFFF << shift); |
389 | // } | 432 | |
390 | // } | 433 | for (v = 0; v < 8; v++, layerptr++) { |
391 | // u8 sprline1, sprline2; | 434 | if ((y + v) >= (24 * 8)) break; |
392 | // u8 xrightedge = x < ((32 - 1) * 8); | 435 | |
393 | // u16 v, h; | 436 | sprline1 = sprite[v ^ flip_y]; |
394 | // u32 dirtyflag = (1 << (x >> 3)) | (1 << ((x + 7) >> 3)); | 437 | sprline2 = sprite[(v ^ flip_y) | 8]; |
395 | 438 | u32 data32 = (lut_expand[sprline1]) | (lut_expand[sprline2] << 1); | |
396 | // u32 layerpos = ((y & 7) + (((x >> 3) + (y >> 3) * 32) * 8)); | 439 | u64 data = ((u64) (data32 & 0x33333333)) << shift; |
397 | // u32 *layerptr = &layer[layerpos]; | 440 | |
398 | // u32 shift = (x & 7) << 2; | 441 | layerptr[0] = (layerptr[0] & mask) | data; |
399 | 442 | if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); | |
400 | // if (flip_y) flip_y = 7; | 443 | |
401 | 444 | if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; | |
402 | // if (x >= SCREEN_WIDTH || y >= SCREEN_HEIGHT) return; | 445 | } |
403 | 446 | } else if (blending[4][color]) { | |
404 | // if (color == 1) { | 447 | u64 mask = ~((u64)0xFFFFFFFF << shift); |
405 | // u32 *lut_expand = flip_x ? dec_byte_flip_x : dec_byte; | 448 | |
406 | // u64 mask = ~((u64)0xFFFFFFFF << shift); | 449 | for (v = 0; v < 8; v++, layerptr++) { |
407 | 450 | if ((y + v) >= (24 * 8)) break; | |
408 | // for (v = 0; v < 8; v++, layerptr++) { | 451 | |
409 | // if ((y + v) >= (24 * 8)) break; | 452 | u8 ch1 = sprite[v ^ flip_y]; |
410 | 453 | u8 ch2 = sprite[(v ^ flip_y) | 8]; | |
411 | // sprline1 = sprite[v ^ flip_y]; | 454 | u32 data32 = 0; |
412 | // sprline2 = sprite[(v ^ flip_y) | 8]; | 455 | |
413 | 456 | if (!flip_x) { | |
414 | // u32 data32 = (lut_expand[sprline1]) | (lut_expand[sprline2] << 1); | 457 | for (h = 0; h < 8; h++) { |
415 | // u64 data = ((u64) (data32 & 0x33333333)) << shift; | 458 | data32 <<= 4; |
416 | 459 | ||
417 | // layerptr[0] = (layerptr[0] & mask) | data; | 460 | u8 ch = (ch1 & 1) | ((ch2 & 1) << 1); |
418 | // if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); | 461 | data32 |= blending[ch][color]; |
419 | 462 | ||
420 | // if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; | 463 | ch1 >>= 1; ch2 >>= 1; |
421 | // } | 464 | } |
422 | // } else if (blending[4][color]) { | 465 | } else { |
423 | // u64 mask = ~((u64)0xFFFFFFFF << shift); | 466 | for (h = 0; h < 8; h++) { |
424 | 467 | data32 <<= 4; | |
425 | // for (v = 0; v < 8; v++, layerptr++) { | ||
426 | // if ((y + v) >= (24 * 8)) break; | ||
427 | |||
428 | // u8 ch1 = sprite[v ^ flip_y]; | ||
429 | // u8 ch2 = sprite[(v ^ flip_y) | 8]; | ||
430 | // u32 data32 = 0; | ||
431 | |||
432 | // if (!flip_x) { | ||
433 | // for (h = 0; h < 8; h++) { | ||
434 | // data32 <<= 4; | ||
435 | |||
436 | // u8 ch = (ch1 & 1) | ((ch2 & 1) << 1); | ||
437 | // data32 |= blending[ch][color]; | ||
438 | |||
439 | // ch1 >>= 1; ch2 >>= 1; | ||
440 | // } | ||
441 | // } else { | ||
442 | // for (h = 0; h < 8; h++) { | ||
443 | // data32 <<= 4; | ||
444 | |||
445 | // u8 ch = (ch1 >> 7) | ((ch2 >> 7) << 1); | ||
446 | // data32 |= blending[ch][color]; | ||
447 | |||
448 | // ch1 <<= 1; ch2 <<= 1; | ||
449 | // } | ||
450 | // } | ||
451 | |||
452 | // u64 data = ((u64) (data32 & 0x33333333)) << shift; | ||
453 | |||
454 | // layerptr[0] = (layerptr[0] & mask) | data; | ||
455 | // if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); | ||
456 | |||
457 | // if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; | ||
458 | // } | ||
459 | // } else { | ||
460 | // for (v = 0; v < 8; v++, layerptr++) { | ||
461 | // if ((y + v) >= (24 * 8)) break; | ||
462 | |||
463 | // u8 ch1 = sprite[v ^ flip_y]; | ||
464 | // u8 ch2 = sprite[(v ^ flip_y) | 8]; | ||
465 | // u32 data32 = 0; | ||
466 | // u32 mask32 = 0; | ||
467 | |||
468 | // if (!flip_x) { | ||
469 | // for (h = 0; h < 8; h++) { | ||
470 | // data32 <<= 4; mask32 <<= 4; | ||
471 | |||
472 | // if ((ch1 | ch2) & 1) { | ||
473 | // u8 ch = (ch1 & 1) | ((ch2 & 1) << 1); | ||
474 | // data32 |= blending[ch][color]; | ||
475 | // mask32 |= 0xF; | ||
476 | // } | ||
477 | |||
478 | // ch1 >>= 1; ch2 >>= 1; | ||
479 | // } | ||
480 | // } else { | ||
481 | // for (h = 0; h < 8; h++) { | ||
482 | // data32 <<= 4; mask32 <<= 4; | ||
483 | |||
484 | // if ((ch1 | ch2) & 128) { | ||
485 | // u8 ch = (ch1 >> 7) | ((ch2 >> 7) << 1); | ||
486 | // data32 |= blending[ch][color]; | ||
487 | // mask32 |= 0xF; | ||
488 | // } | ||
489 | 468 | ||
490 | // ch1 <<= 1; ch2 <<= 1; | 469 | u8 ch = (ch1 >> 7) | ((ch2 >> 7) << 1); |
491 | // } | 470 | data32 |= blending[ch][color]; |
492 | // } | ||
493 | 471 | ||
494 | // u64 data = ((u64) (data32 & 0x33333333)) << shift; | 472 | ch1 <<= 1; ch2 <<= 1; |
495 | // u64 mask = ~(((u64) (mask32 & 0x33333333)) << shift); | 473 | } |
474 | } | ||
475 | |||
476 | u64 data = ((u64) (data32 & 0x33333333)) << shift; | ||
477 | |||
478 | layerptr[0] = (layerptr[0] & mask) | data; | ||
479 | if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); | ||
480 | |||
481 | if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; | ||
482 | } | ||
483 | } else { | ||
484 | for (v = 0; v < 8; v++, layerptr++) { | ||
485 | if ((y + v) >= (24 * 8)) break; | ||
496 | 486 | ||
497 | // layerptr[0] = (layerptr[0] & mask) | data; | 487 | u8 ch1 = sprite[v ^ flip_y]; |
498 | // if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); | 488 | u8 ch2 = sprite[(v ^ flip_y) | 8]; |
489 | u32 data32 = 0; | ||
490 | u32 mask32 = 0; | ||
499 | 491 | ||
500 | // if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; | 492 | if (!flip_x) { |
501 | // } | 493 | for (h = 0; h < 8; h++) { |
502 | // } | 494 | data32 <<= 4; mask32 <<= 4; |
503 | 495 | ||
496 | if ((ch1 | ch2) & 1) { | ||
497 | u8 ch = (ch1 & 1) | ((ch2 & 1) << 1); | ||
498 | data32 |= blending[ch][color]; | ||
499 | mask32 |= 0xF; | ||
500 | } | ||
501 | |||
502 | ch1 >>= 1; ch2 >>= 1; | ||
503 | } | ||
504 | } else { | ||
505 | for (h = 0; h < 8; h++) { | ||
506 | data32 <<= 4; mask32 <<= 4; | ||
507 | |||
508 | if ((ch1 | ch2) & 128) { | ||
509 | u8 ch = (ch1 >> 7) | ((ch2 >> 7) << 1); | ||
510 | data32 |= blending[ch][color]; | ||
511 | mask32 |= 0xF; | ||
512 | } | ||
513 | |||
514 | ch1 <<= 1; ch2 <<= 1; | ||
515 | } | ||
516 | } | ||
517 | |||
518 | u64 data = ((u64) (data32 & 0x33333333)) << shift; | ||
519 | u64 mask = ~(((u64) (mask32 & 0x33333333)) << shift); | ||
520 | |||
521 | layerptr[0] = (layerptr[0] & mask) | data; | ||
522 | if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); | ||
523 | |||
524 | if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; | ||
525 | } | ||
526 | } | ||
527 | |||
528 | dirty_tiles[y >> 3] |= dirtyflag; | ||
529 | dirty_tiles[(y + 7) >> 3] |= dirtyflag; | ||
530 | } | ||
531 | #else | ||
532 | IWRAM_CODE | ||
533 | // UNROLL_LOOPS | ||
534 | void | ||
535 | ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | ||
536 | BOUNDCHECK_SCREEN(x, y); | ||
537 | size_t tile_x = x / 8; | ||
538 | size_t tile_y = y / 8; | ||
539 | size_t start_col = x % 8; | ||
540 | size_t start_row = y % 8; | ||
541 | size_t shift_left = start_col * 4; | ||
542 | size_t shift_right = (8 - start_col) * 4; | ||
543 | u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; | ||
544 | u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; | ||
545 | if (clr == 1) { | ||
546 | // u64 mask = ~((u64)0xFFFFFFFF << shift_left); | ||
547 | // if (!flip_y) { | ||
548 | // for(size_t v = 0; v < 8; v++, dst++) { | ||
549 | // if ((y + v) >= SCREEN_HEIGHT) break; | ||
550 | // u8 ch1 = sprite[v]; | ||
551 | // u8 ch2 = sprite[v | 8]; | ||
552 | // u32 color = lut[ch1] | (lut[ch2] << 1); | ||
553 | // if (start_col == 0) { | ||
554 | // dst[0] = (dst[0] & mask) | color; | ||
555 | // } else { | ||
556 | // dst[0] = (dst[0] & (mask << shift_left)) | color; | ||
557 | // dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | ||
558 | // } | ||
559 | // } | ||
560 | // } else { | ||
561 | // for(size_t v = 0; v < 8; v++, dst++) { | ||
562 | // if ((y + v) >= SCREEN_HEIGHT) break; | ||
563 | // u8 ch1 = sprite[(7 - v)]; | ||
564 | // u8 ch2 = sprite[(7 - v) | 8]; | ||
565 | // u32 color = lut[ch1] | (lut[ch2] << 1); | ||
566 | // if (start_col == 0) { | ||
567 | // dst[0] = (dst[0] & mask) | color; | ||
568 | // } else { | ||
569 | // dst[0] = (dst[0] & (mask << shift_left)) | color; | ||
570 | // dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | ||
571 | // } | ||
572 | // } | ||
573 | // } | ||
574 | } else if (blending[4][clr]) { | ||
575 | // ICN | ||
576 | u64 mask = ~((u64)0xFFFFFFFF << shift_left); | ||
577 | // DEBUG: remove flip_y from sprite fetching | ||
578 | // if (!flip_y) { | ||
579 | if (flip_y) flip_y = 7; | ||
580 | for(size_t v = 0; v < 8; v++, dst++) { | ||
581 | if ((y + v) >= SCREEN_HEIGHT) break; | ||
582 | u8 ch1 = sprite[v ^ flip_y]; | ||
583 | u8 ch2 = sprite[(v ^ flip_y) | 8]; | ||
584 | u32 color = lut[ch1] | (lut[ch2] << 1); | ||
585 | if (start_col == 0) { | ||
586 | dst[0] = (dst[0] & mask) | color; | ||
587 | } else { | ||
588 | dst[0] = (dst[0] & (mask << shift_left)) | color; | ||
589 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | ||
590 | } | ||
591 | } | ||
592 | // } else { | ||
593 | // for(size_t v = 0; v < 8; v++, dst++) { | ||
594 | // if ((y + v) >= SCREEN_HEIGHT) break; | ||
595 | // u8 ch1 = sprite[(7 - v)]; | ||
596 | // u32 color_1 = lut[ch1]; | ||
597 | // u32 color_2 = (color_1 ^ 0xffffffff) & 0x11111111; | ||
598 | // u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2)); | ||
599 | // if (start_col == 0) { | ||
600 | // dst[0] = (dst[0] & mask) | color; | ||
601 | // } else { | ||
602 | // dst[0] = (dst[0] & (mask << shift_left)) | color; | ||
603 | // dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | ||
604 | // } | ||
605 | // } | ||
606 | // } | ||
607 | } else { | ||
608 | // ICN | ||
609 | // if (!flip_y) { | ||
610 | // for(size_t v = 0; v < 8; v++, dst++) { | ||
611 | // if ((y + v) >= SCREEN_HEIGHT) break; | ||
612 | // u8 ch1 = sprite[v]; | ||
613 | // u32 color= lut[ch1]; | ||
614 | // u32 mask = ~color; | ||
615 | // color *= clr & 3; | ||
616 | // if (start_col == 0) { | ||
617 | // dst[0] = (dst[0] & ~mask) | color; | ||
618 | // } else { | ||
619 | // dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | ||
620 | // dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); | ||
621 | // } | ||
622 | // } | ||
623 | // } else { | ||
624 | // for(size_t v = 0; v < 8; v++, dst++) { | ||
625 | // if ((y + v) >= SCREEN_HEIGHT) break; | ||
626 | // u8 ch1 = sprite[(7 - v)]; | ||
627 | // u32 color= lut[ch1]; | ||
628 | // u32 mask = ~color; | ||
629 | // color *= clr & 3; | ||
630 | // if (start_col == 0) { | ||
631 | // dst[0] = (dst[0] & ~mask) | color; | ||
632 | // } else { | ||
633 | // dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | ||
634 | // dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); | ||
635 | // } | ||
636 | // } | ||
637 | // } | ||
638 | } | ||
504 | // dirty_tiles[y >> 3] |= dirtyflag; | 639 | // dirty_tiles[y >> 3] |= dirtyflag; |
505 | // dirty_tiles[(y + 7) >> 3] |= dirtyflag; | 640 | // dirty_tiles[(y + 7) >> 3] |= dirtyflag; |
506 | } | 641 | } |
642 | #endif | ||
507 | 643 | ||
508 | IWRAM_CODE | 644 | IWRAM_CODE |
509 | void | 645 | void |