diff options
author | Bad Diode <bd@badd10de.dev> | 2023-04-19 17:58:04 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2023-04-19 17:58:04 +0200 |
commit | d68d16405b013967e0faa12f9dfc608e3bb3d0d4 (patch) | |
tree | f8436d01c6a9aa112cdb6ac6bd042fe2a7c0b046 /src/ppu.c | |
parent | 2e8ab813a9e4c2d908d5450e346a81d2a91a75e8 (diff) | |
download | uxngba-d68d16405b013967e0faa12f9dfc608e3bb3d0d4.tar.gz uxngba-d68d16405b013967e0faa12f9dfc608e3bb3d0d4.zip |
Update uxn core and fix some new ppu bugs
Diffstat (limited to 'src/ppu.c')
-rw-r--r-- | src/ppu.c | 162 |
1 files changed, 134 insertions, 28 deletions
@@ -247,6 +247,118 @@ ppu_pixel(u32 *layer, u16 x, u16 y, u8 clr) { | |||
247 | dirty_tiles[tile_y] |= 1 << tile_x; | 247 | dirty_tiles[tile_y] |= 1 << tile_x; |
248 | } | 248 | } |
249 | 249 | ||
250 | IWRAM_CODE | ||
251 | void | ||
252 | ppu_rect(u32 *layer, size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { | ||
253 | BOUNDCHECK_SCREEN(x0, y0); | ||
254 | BOUNDCHECK_SCREEN(x1, y1); | ||
255 | |||
256 | // Find row positions for the given x/y coordinates. | ||
257 | size_t tile_x0 = x0 / 8; | ||
258 | size_t tile_y0 = y0 / 8; | ||
259 | size_t tile_x1 = x1 / 8; | ||
260 | size_t tile_y1 = y1 / 8; | ||
261 | size_t start_col0 = x0 % 8; | ||
262 | size_t start_col1 = x1 % 8; | ||
263 | size_t start_row0 = y0 % 8; | ||
264 | size_t start_row1 = y1 % 8; | ||
265 | |||
266 | // Get a pointer to the backbuffer and the tile row. | ||
267 | u32 *buf_top = &layer[start_row0 + (tile_x0 + tile_y0 * 32) * 8]; | ||
268 | u32 *buf_bot = &layer[start_row1 + (tile_x0 + tile_y1 * 32) * 8]; | ||
269 | |||
270 | size_t dx = tile_x1 - tile_x0; | ||
271 | size_t dy = tile_y1 - tile_y0; | ||
272 | |||
273 | // We can update two lines at a time, which is faster than calling draw_line | ||
274 | // four times. | ||
275 | if (dx < 1) { | ||
276 | u32 row_mask = 0xFFFFFFFF; | ||
277 | row_mask >>= (7 - start_col1 - dx) * 4; | ||
278 | row_mask &= 0xFFFFFFFF << start_col0 * 4; | ||
279 | u32 row = (0x11111111 * clr) & row_mask; | ||
280 | buf_top[0] = (buf_top[0] & ~row_mask) | row; | ||
281 | buf_bot[0] = (buf_bot[0] & ~row_mask) | row; | ||
282 | dirty_tiles[tile_y0] |= 1 << tile_x0; | ||
283 | dirty_tiles[tile_y1] |= 1 << tile_x0; | ||
284 | } else { | ||
285 | size_t shift_left = start_col0 * 4; | ||
286 | size_t shift_right = (7 - start_col1) * 4; | ||
287 | u32 row_mask = 0xFFFFFFFF; | ||
288 | u32 row = 0x11111111 * clr; | ||
289 | buf_top[0] = buf_top[0] & ~(row_mask << shift_left); | ||
290 | buf_top[0] |= row << shift_left; | ||
291 | buf_bot[0] = buf_bot[0] & ~(row_mask << shift_left); | ||
292 | buf_bot[0] |= row << shift_left; | ||
293 | dirty_tiles[tile_y0] |= 1 << tile_x0; | ||
294 | dirty_tiles[tile_y1] |= 1 << tile_x0; | ||
295 | for (size_t i = 1; i < dx; i++) { | ||
296 | buf_top[i * 8] = row; | ||
297 | buf_bot[i * 8] = row; | ||
298 | dirty_tiles[tile_y0] |= 1 << (tile_x0 + i); | ||
299 | dirty_tiles[tile_y1] |= 1 << (tile_x0 + i); | ||
300 | } | ||
301 | buf_top[dx * 8] = buf_top[dx * 8] & ~(row_mask >> shift_right); | ||
302 | buf_top[dx * 8] |= row >> shift_right; | ||
303 | buf_bot[dx * 8] = buf_bot[dx * 8] & ~(row_mask >> shift_right); | ||
304 | buf_bot[dx * 8] |= row >> shift_right; | ||
305 | dirty_tiles[tile_y0] |= 1 << (tile_x0 + dx); | ||
306 | dirty_tiles[tile_y1] |= 1 << (tile_x0 + dx); | ||
307 | } | ||
308 | u32 row_mask_left = 0xF << start_col0 * 4; | ||
309 | u32 row_mask_right = 0xF << start_col1 * 4; | ||
310 | u32 row_left = (0x11111111 * clr) & row_mask_left; | ||
311 | u32 row_right = (0x11111111 * clr) & row_mask_right; | ||
312 | if (dy < 1) { | ||
313 | for (size_t i = 1; i < y1 - y0; i++, buf_top++) { | ||
314 | buf_top[1] = buf_top[1] & ~row_mask_left; | ||
315 | buf_top[1] |= row_left; | ||
316 | buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; | ||
317 | buf_top[1 + 8 * dx] |= row_right; | ||
318 | } | ||
319 | } else { | ||
320 | for (size_t i = 1; i < (8 - start_row0); i++, buf_top++) { | ||
321 | buf_top[1] = buf_top[1] & ~row_mask_left; | ||
322 | buf_top[1] |= row_left; | ||
323 | buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; | ||
324 | buf_top[1 + 8 * dx] |= row_right; | ||
325 | } | ||
326 | buf_top += 8 * 31; | ||
327 | for (size_t j = 1; j < dy; j++) { | ||
328 | for (size_t i = 0; i < 8; i++, buf_top++) { | ||
329 | buf_top[1] = buf_top[1] & ~row_mask_left; | ||
330 | buf_top[1] |= row_left; | ||
331 | buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; | ||
332 | buf_top[1 + 8 * dx] |= row_right; | ||
333 | } | ||
334 | buf_top += 8 * 31; | ||
335 | dirty_tiles[tile_y0 + j] |= 1 << tile_x0; | ||
336 | dirty_tiles[tile_y0 + j] |= 1 << (tile_x0 + dx); | ||
337 | } | ||
338 | for (size_t i = 0; i < start_row1; i++, buf_top++) { | ||
339 | buf_top[1] = buf_top[1] & ~row_mask_left; | ||
340 | buf_top[1] |= row_left; | ||
341 | buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; | ||
342 | buf_top[1 + 8 * dx] |= row_right; | ||
343 | } | ||
344 | } | ||
345 | } | ||
346 | |||
347 | IWRAM_CODE | ||
348 | void | ||
349 | screen_fill(u32 *layer, u16 x0, u16 y0, u16 x1, u16 y1, u8 clr) { | ||
350 | BOUNDCHECK_SCREEN(x0, y0); | ||
351 | BOUNDCHECK_SCREEN(x1, y1); | ||
352 | |||
353 | size_t dx = x1 - x0; | ||
354 | size_t dy = y1 - y0; | ||
355 | size_t n_rect = MIN(dx, dy); | ||
356 | n_rect = n_rect / 2 + 1; | ||
357 | for (size_t i = 0; i < n_rect; i++) { | ||
358 | ppu_rect(layer, x0 + i, y0 + i, x1 - i, y1 - i, clr); | ||
359 | } | ||
360 | } | ||
361 | |||
250 | #if NEW_PPU == 0 | 362 | #if NEW_PPU == 0 |
251 | IWRAM_CODE | 363 | IWRAM_CODE |
252 | void | 364 | void |
@@ -315,34 +427,28 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | |||
315 | u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; | 427 | u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; |
316 | u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; | 428 | u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; |
317 | if (blending[4][clr]) { | 429 | if (blending[4][clr]) { |
318 | u64 mask = ~((u64)0xFFFFFFFF); | 430 | u32 mask = 0xFFFFFFFF; |
319 | if (!flip_y) { | 431 | if (!flip_y) { |
320 | for(size_t v = 0; v < 8; v++, dst++) { | 432 | for(size_t v = 0; v < 8; v++, dst++) { |
321 | if ((y + v) >= SCREEN_HEIGHT) break; | 433 | if ((y + v) >= SCREEN_HEIGHT) break; |
322 | u8 ch1 = sprite[v]; | 434 | u8 ch1 = sprite[v]; |
323 | u32 color_1 = lut[ch1]; | 435 | u32 color_1 = lut[ch1]; |
324 | u32 color_2 = (color_1 ^ 0xffffffff) & 0x11111111; | 436 | u32 color_2 = (color_1 ^ 0xFFFFFFFF) & 0x11111111; |
325 | u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2)); | 437 | u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2)); |
326 | if (start_col == 0) { | 438 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); |
327 | dst[0] = (dst[0] & mask) | color; | 439 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); |
328 | } else { | 440 | if ((start_row + v) == 7) dst += (32 - 1) * 8; |
329 | dst[0] = (dst[0] & (mask << shift_left)) | color; | ||
330 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | ||
331 | } | ||
332 | } | 441 | } |
333 | } else { | 442 | } else { |
334 | for(size_t v = 0; v < 8; v++, dst++) { | 443 | for(size_t v = 0; v < 8; v++, dst++) { |
335 | if ((y + v) >= SCREEN_HEIGHT) break; | 444 | if ((y + v) >= SCREEN_HEIGHT) break; |
336 | u8 ch1 = sprite[(7 - v)]; | 445 | u8 ch1 = sprite[(7 - v)]; |
337 | u32 color_1 = lut[ch1]; | 446 | u32 color_1 = lut[ch1]; |
338 | u32 color_2 = (color_1 ^ 0xffffffff) & 0x11111111; | 447 | u32 color_2 = (color_1 ^ 0xFFFFFFFF) & 0x11111111; |
339 | u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2)); | 448 | u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2)); |
340 | if (start_col == 0) { | 449 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); |
341 | dst[0] = (dst[0] & mask) | color; | 450 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); |
342 | } else { | 451 | if ((start_row + v) == 7) dst += (32 - 1) * 8; |
343 | dst[0] = (dst[0] & (mask << shift_left)) | color; | ||
344 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | ||
345 | } | ||
346 | } | 452 | } |
347 | } | 453 | } |
348 | } else { | 454 | } else { |
@@ -359,6 +465,7 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | |||
359 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | 465 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); |
360 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); | 466 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); |
361 | } | 467 | } |
468 | if ((start_row + v) == 7) dst += (32 - 1) * 8; | ||
362 | } | 469 | } |
363 | } else { | 470 | } else { |
364 | for(size_t v = 0; v < 8; v++, dst++) { | 471 | for(size_t v = 0; v < 8; v++, dst++) { |
@@ -373,6 +480,7 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | |||
373 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | 480 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); |
374 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); | 481 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); |
375 | } | 482 | } |
483 | if ((start_row + v) == 7) dst += (32 - 1) * 8; | ||
376 | } | 484 | } |
377 | } | 485 | } |
378 | } | 486 | } |
@@ -529,6 +637,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | |||
529 | dst[0] = (dst[0] & (mask << shift_left)) | color; | 637 | dst[0] = (dst[0] & (mask << shift_left)) | color; |
530 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | 638 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); |
531 | } | 639 | } |
640 | if ((start_row + v) == 7) dst += (32 - 1) * 8; | ||
532 | } | 641 | } |
533 | } else { | 642 | } else { |
534 | for(size_t v = 0; v < 8; v++, dst++) { | 643 | for(size_t v = 0; v < 8; v++, dst++) { |
@@ -542,10 +651,11 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | |||
542 | dst[0] = (dst[0] & (mask << shift_left)) | color; | 651 | dst[0] = (dst[0] & (mask << shift_left)) | color; |
543 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | 652 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); |
544 | } | 653 | } |
654 | if ((start_row + v) == 7) dst += (32 - 1) * 8; | ||
545 | } | 655 | } |
546 | } | 656 | } |
547 | } else if (blending[4][clr]) { | 657 | } else if (blending[4][clr]) { |
548 | u64 mask = ~((u64)0xFFFFFFFF << shift_left); | 658 | u32 mask = 0xFFFFFFFF; |
549 | u8 clr0 = blending[0][clr]; | 659 | u8 clr0 = blending[0][clr]; |
550 | u8 clr1 = blending[1][clr]; | 660 | u8 clr1 = blending[1][clr]; |
551 | u8 clr2 = blending[2][clr]; | 661 | u8 clr2 = blending[2][clr]; |
@@ -567,12 +677,9 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | |||
567 | (clr1 * col1mask) | | 677 | (clr1 * col1mask) | |
568 | (clr2 * col2mask) | | 678 | (clr2 * col2mask) | |
569 | (clr3 * col3mask); | 679 | (clr3 * col3mask); |
570 | if (start_col == 0) { | 680 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); |
571 | dst[0] = (dst[0] & mask) | color; | 681 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); |
572 | } else { | 682 | if ((start_row + v) == 7) dst += (32 - 1) * 8; |
573 | dst[0] = (dst[0] & (mask << shift_left)) | color; | ||
574 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | ||
575 | } | ||
576 | } | 683 | } |
577 | } else { | 684 | } else { |
578 | for(size_t v = 0; v < 8; v++, dst++) { | 685 | for(size_t v = 0; v < 8; v++, dst++) { |
@@ -591,12 +698,9 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | |||
591 | (clr1 * col1mask) | | 698 | (clr1 * col1mask) | |
592 | (clr2 * col2mask) | | 699 | (clr2 * col2mask) | |
593 | (clr3 * col3mask); | 700 | (clr3 * col3mask); |
594 | if (start_col == 0) { | 701 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); |
595 | dst[0] = (dst[0] & mask) | color; | 702 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); |
596 | } else { | 703 | if ((start_row + v) == 7) dst += (32 - 1) * 8; |
597 | dst[0] = (dst[0] & (mask << shift_left)) | color; | ||
598 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | ||
599 | } | ||
600 | } | 704 | } |
601 | } | 705 | } |
602 | } else { | 706 | } else { |
@@ -625,6 +729,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | |||
625 | dst[0] = (dst[0] & (mask << shift_left)) | color; | 729 | dst[0] = (dst[0] & (mask << shift_left)) | color; |
626 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | 730 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); |
627 | } | 731 | } |
732 | if ((start_row + v) == 7) dst += (32 - 1) * 8; | ||
628 | } | 733 | } |
629 | } else { | 734 | } else { |
630 | for(size_t v = 0; v < 8; v++, dst++) { | 735 | for(size_t v = 0; v < 8; v++, dst++) { |
@@ -648,6 +753,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | |||
648 | dst[0] = (dst[0] & (mask << shift_left)) | color; | 753 | dst[0] = (dst[0] & (mask << shift_left)) | color; |
649 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | 754 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); |
650 | } | 755 | } |
756 | if ((start_row + v) == 7) dst += (32 - 1) * 8; | ||
651 | } | 757 | } |
652 | } | 758 | } |
653 | } | 759 | } |