diff options
author | Bad Diode <bd@badd10de.dev> | 2023-04-19 15:08:20 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2023-04-19 15:28:08 +0200 |
commit | 6d943ebca061683c076bc026f7e15a3a047b2027 (patch) | |
tree | 070f50bf5541be63054caac802ff4085b293ca12 | |
parent | eee968f893d7878e675a43a3f89a15d8ceaadc44 (diff) | |
download | uxngba-6d943ebca061683c076bc026f7e15a3a047b2027.tar.gz uxngba-6d943ebca061683c076bc026f7e15a3a047b2027.zip |
Add working implementation of ppu_2bpp
-rw-r--r-- | src/ppu.c | 257 |
1 files changed, 155 insertions, 102 deletions
@@ -15,7 +15,7 @@ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |||
15 | WITH REGARD TO THIS SOFTWARE. | 15 | WITH REGARD TO THIS SOFTWARE. |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #define NEW_PPU 0 | 18 | #define NEW_PPU 1 |
19 | 19 | ||
20 | #define FG_FRONT ((u32*)(MEM_VRAM)) | 20 | #define FG_FRONT ((u32*)(MEM_VRAM)) |
21 | #define BG_FRONT ((u32*)(MEM_VRAM + KB(20))) | 21 | #define BG_FRONT ((u32*)(MEM_VRAM + KB(20))) |
@@ -178,19 +178,29 @@ static u32 dec_byte[256] = { | |||
178 | // We need to do the following: | 178 | // We need to do the following: |
179 | // | 179 | // |
180 | // 1. Extract the color row as u32 (4bpp). | 180 | // 1. Extract the color row as u32 (4bpp). |
181 | // u32 color = lut[ch1] | (lut[ch2] << 1); // color == 0x00112233 | 181 | // 2. Split the row into each of its colors. |
182 | // 2. Split the row into each of its colors: | 182 | // 3. Multiply based on the table, for example for color blend 2: 0123 -> 0231 |
183 | // u32 col3 = (color & 0x33333333); // 0x00000011 | 183 | // 4. Obtain final color by ORing the colors from each channel. |
184 | // u32 col2 = (color & 0x22222222) & ~(col3 * 0xF); // 0x00001100 | 184 | // |
185 | // u32 col1 = (color & 0x11111111) & ~(col3 * 0xF); // 0x00110000 | 185 | // clr0 = blending[0][clr]; |
186 | // u32 col0 = color & ~((col3 | col2 | col1) * 0xF); // 0x11000000 | 186 | // clr1 = blending[1][clr]; |
187 | // 3. Multiply based on the table, for example for color 0x2: 0123 -> 0231 | 187 | // clr2 = blending[2][clr]; |
188 | // a *= 0 | 188 | // clr3 = blending[3][clr]; |
189 | // b *= 2 | 189 | // color = 0x00112233; 0b 0000 0000 0001 0001 0010 0010 0011 0011 0x00112233 |
190 | // c *= 3 | 190 | // col1mask = (color & 0x11111111); 0b 0000 0000 0001 0001 0000 0000 0001 0001 0x00110011 |
191 | // d *= 1 | 191 | // col2mask = (color & 0x22222222) >> 1; 0b 0000 0000 0000 0000 0001 0001 0001 0001 0x00001111 |
192 | // 4. Obtain final color by ORing the individual ones. | 192 | // col3mask = (col1mask & col2mask) * 0xF; 0b 0000 0000 0000 0000 0000 0000 1111 1111 0x000000FF |
193 | // color = a | b | c | d; | 193 | // col1mask &= ~col3mask; 0b 0000 0000 0000 0000 0000 0000 0001 0001 0x00000011 |
194 | // col2mask &= ~col3mask; 0b 0000 0000 0000 0000 0001 0001 0000 0000 0x00001100 | ||
195 | // col3mask = (color & col3mask) & 0x11111111; 0b 0000 0000 0000 0000 0000 0000 0001 0001 0x00000011 | ||
196 | // col0mask = ~(col1mask | col2mask | col3mask) & 0x11111111; 0b 0001 0001 0000 0000 0000 0000 0000 0000 0x11000000 | ||
197 | // color = (clr0 * col0mask) | | ||
198 | // (clr1 * col1mask) | | ||
199 | // (clr2 * col2mask) | | ||
200 | // (clr3 * col3mask); | ||
201 | // | ||
202 | // Note that in case of transparent nodes col0mask can be used to mask off the | ||
203 | // bits we want to pull from the existing framebuffer. | ||
194 | // | 204 | // |
195 | static u8 blending[5][16] = { | 205 | static u8 blending[5][16] = { |
196 | {0, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 2, 3, 3, 3, 0}, // Color 0 map. | 206 | {0, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 2, 3, 3, 3, 0}, // Color 0 map. |
@@ -530,7 +540,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flip_x, u8 flip_y) { | |||
530 | } | 540 | } |
531 | #else | 541 | #else |
532 | IWRAM_CODE | 542 | IWRAM_CODE |
533 | // UNROLL_LOOPS | 543 | UNROLL_LOOPS |
534 | void | 544 | void |
535 | ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | 545 | ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { |
536 | BOUNDCHECK_SCREEN(x, y); | 546 | BOUNDCHECK_SCREEN(x, y); |
@@ -543,98 +553,141 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | |||
543 | u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; | 553 | u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; |
544 | u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; | 554 | u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; |
545 | if (clr == 1) { | 555 | if (clr == 1) { |
546 | // u64 mask = ~((u64)0xFFFFFFFF << shift_left); | 556 | u64 mask = ~((u64)0xFFFFFFFF << shift_left); |
547 | // if (!flip_y) { | 557 | if (!flip_y) { |
548 | // for(size_t v = 0; v < 8; v++, dst++) { | 558 | for(size_t v = 0; v < 8; v++, dst++) { |
549 | // if ((y + v) >= SCREEN_HEIGHT) break; | 559 | if ((y + v) >= SCREEN_HEIGHT) break; |
550 | // u8 ch1 = sprite[v]; | 560 | u8 ch1 = sprite[v]; |
551 | // u8 ch2 = sprite[v | 8]; | 561 | u8 ch2 = sprite[v | 8]; |
552 | // u32 color = lut[ch1] | (lut[ch2] << 1); | 562 | u32 color = lut[ch1] | (lut[ch2] << 1); |
553 | // if (start_col == 0) { | 563 | if (start_col == 0) { |
554 | // dst[0] = (dst[0] & mask) | color; | 564 | dst[0] = (dst[0] & mask) | color; |
555 | // } else { | 565 | } else { |
556 | // dst[0] = (dst[0] & (mask << shift_left)) | color; | 566 | dst[0] = (dst[0] & (mask << shift_left)) | color; |
557 | // dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | 567 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); |
558 | // } | 568 | } |
559 | // } | 569 | } |
560 | // } else { | 570 | } else { |
561 | // for(size_t v = 0; v < 8; v++, dst++) { | 571 | for(size_t v = 0; v < 8; v++, dst++) { |
562 | // if ((y + v) >= SCREEN_HEIGHT) break; | 572 | if ((y + v) >= SCREEN_HEIGHT) break; |
563 | // u8 ch1 = sprite[(7 - v)]; | 573 | u8 ch1 = sprite[(7 - v)]; |
564 | // u8 ch2 = sprite[(7 - v) | 8]; | 574 | u8 ch2 = sprite[(7 - v) | 8]; |
565 | // u32 color = lut[ch1] | (lut[ch2] << 1); | 575 | u32 color = lut[ch1] | (lut[ch2] << 1); |
566 | // if (start_col == 0) { | 576 | if (start_col == 0) { |
567 | // dst[0] = (dst[0] & mask) | color; | 577 | dst[0] = (dst[0] & mask) | color; |
568 | // } else { | 578 | } else { |
569 | // dst[0] = (dst[0] & (mask << shift_left)) | color; | 579 | dst[0] = (dst[0] & (mask << shift_left)) | color; |
570 | // dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | 580 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); |
571 | // } | 581 | } |
572 | // } | 582 | } |
573 | // } | 583 | } |
574 | } else if (blending[4][clr]) { | 584 | } else if (blending[4][clr]) { |
575 | // ICN | ||
576 | u64 mask = ~((u64)0xFFFFFFFF << shift_left); | 585 | u64 mask = ~((u64)0xFFFFFFFF << shift_left); |
577 | // DEBUG: remove flip_y from sprite fetching | 586 | u8 clr0 = blending[0][clr]; |
578 | // if (!flip_y) { | 587 | u8 clr1 = blending[1][clr]; |
579 | if (flip_y) flip_y = 7; | 588 | u8 clr2 = blending[2][clr]; |
580 | for(size_t v = 0; v < 8; v++, dst++) { | 589 | u8 clr3 = blending[3][clr]; |
581 | if ((y + v) >= SCREEN_HEIGHT) break; | 590 | if (!flip_y) { |
582 | u8 ch1 = sprite[v ^ flip_y]; | 591 | for(size_t v = 0; v < 8; v++, dst++) { |
583 | u8 ch2 = sprite[(v ^ flip_y) | 8]; | 592 | if ((y + v) >= SCREEN_HEIGHT) break; |
584 | u32 color = lut[ch1] | (lut[ch2] << 1); | 593 | u8 ch1 = sprite[v]; |
585 | if (start_col == 0) { | 594 | u8 ch2 = sprite[v | 8]; |
586 | dst[0] = (dst[0] & mask) | color; | 595 | u32 color = lut[ch1] | (lut[ch2] << 1); |
587 | } else { | 596 | u32 col1mask = (color & 0x11111111); |
588 | dst[0] = (dst[0] & (mask << shift_left)) | color; | 597 | u32 col2mask = (color & 0x22222222) >> 1; |
589 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | 598 | u32 col3mask = (col1mask & col2mask) * 0xF; |
599 | col1mask &= ~col3mask; | ||
600 | col2mask &= ~col3mask; | ||
601 | col3mask = (color & col3mask) & 0x11111111; | ||
602 | u32 col0mask = ~(col1mask | col2mask | col3mask) & 0x11111111; | ||
603 | color = (clr0 * col0mask) | | ||
604 | (clr1 * col1mask) | | ||
605 | (clr2 * col2mask) | | ||
606 | (clr3 * col3mask); | ||
607 | if (start_col == 0) { | ||
608 | dst[0] = (dst[0] & mask) | color; | ||
609 | } else { | ||
610 | dst[0] = (dst[0] & (mask << shift_left)) | color; | ||
611 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | ||
612 | } | ||
613 | } | ||
614 | } else { | ||
615 | for(size_t v = 0; v < 8; v++, dst++) { | ||
616 | if ((y + v) >= SCREEN_HEIGHT) break; | ||
617 | u8 ch1 = sprite[(7 - v)]; | ||
618 | u8 ch2 = sprite[(7 - v) | 8]; | ||
619 | u32 color = lut[ch1] | (lut[ch2] << 1); | ||
620 | u32 col1mask = (color & 0x11111111); | ||
621 | u32 col2mask = (color & 0x22222222) >> 1; | ||
622 | u32 col3mask = (col1mask & col2mask) * 0xF; | ||
623 | col1mask &= ~col3mask; | ||
624 | col2mask &= ~col3mask; | ||
625 | col3mask = (color & col3mask) & 0x11111111; | ||
626 | u32 col0mask = ~(col1mask | col2mask | col3mask) & 0x11111111; | ||
627 | color = (clr0 * col0mask) | | ||
628 | (clr1 * col1mask) | | ||
629 | (clr2 * col2mask) | | ||
630 | (clr3 * col3mask); | ||
631 | if (start_col == 0) { | ||
632 | dst[0] = (dst[0] & mask) | color; | ||
633 | } else { | ||
634 | dst[0] = (dst[0] & (mask << shift_left)) | color; | ||
635 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | ||
636 | } | ||
590 | } | 637 | } |
591 | } | 638 | } |
592 | // } else { | ||
593 | // for(size_t v = 0; v < 8; v++, dst++) { | ||
594 | // if ((y + v) >= SCREEN_HEIGHT) break; | ||
595 | // u8 ch1 = sprite[(7 - v)]; | ||
596 | // u32 color_1 = lut[ch1]; | ||
597 | // u32 color_2 = (color_1 ^ 0xffffffff) & 0x11111111; | ||
598 | // u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2)); | ||
599 | // if (start_col == 0) { | ||
600 | // dst[0] = (dst[0] & mask) | color; | ||
601 | // } else { | ||
602 | // dst[0] = (dst[0] & (mask << shift_left)) | color; | ||
603 | // dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | ||
604 | // } | ||
605 | // } | ||
606 | // } | ||
607 | } else { | 639 | } else { |
608 | // ICN | 640 | u8 clr0 = blending[0][clr]; |
609 | // if (!flip_y) { | 641 | u8 clr1 = blending[1][clr]; |
610 | // for(size_t v = 0; v < 8; v++, dst++) { | 642 | u8 clr2 = blending[2][clr]; |
611 | // if ((y + v) >= SCREEN_HEIGHT) break; | 643 | u8 clr3 = blending[3][clr]; |
612 | // u8 ch1 = sprite[v]; | 644 | if (!flip_y) { |
613 | // u32 color= lut[ch1]; | 645 | for(size_t v = 0; v < 8; v++, dst++) { |
614 | // u32 mask = ~color; | 646 | if ((y + v) >= SCREEN_HEIGHT) break; |
615 | // color *= clr & 3; | 647 | u8 ch1 = sprite[v]; |
616 | // if (start_col == 0) { | 648 | u8 ch2 = sprite[v | 8]; |
617 | // dst[0] = (dst[0] & ~mask) | color; | 649 | u32 color = lut[ch1] | (lut[ch2] << 1); |
618 | // } else { | 650 | u32 col1mask = (color & 0x11111111); |
619 | // dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | 651 | u32 col2mask = (color & 0x22222222) >> 1; |
620 | // dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); | 652 | u32 col3mask = (col1mask & col2mask) * 0xF; |
621 | // } | 653 | col1mask &= ~col3mask; |
622 | // } | 654 | col2mask &= ~col3mask; |
623 | // } else { | 655 | col3mask = (color & col3mask) & 0x11111111; |
624 | // for(size_t v = 0; v < 8; v++, dst++) { | 656 | u32 mask = ~(col1mask | col2mask | col3mask) & 0x11111111; |
625 | // if ((y + v) >= SCREEN_HEIGHT) break; | 657 | color = (clr1 * col1mask) | |
626 | // u8 ch1 = sprite[(7 - v)]; | 658 | (clr2 * col2mask) | |
627 | // u32 color= lut[ch1]; | 659 | (clr3 * col3mask); |
628 | // u32 mask = ~color; | 660 | if (start_col == 0) { |
629 | // color *= clr & 3; | 661 | dst[0] = (dst[0] & mask) | color; |
630 | // if (start_col == 0) { | 662 | } else { |
631 | // dst[0] = (dst[0] & ~mask) | color; | 663 | dst[0] = (dst[0] & (mask << shift_left)) | color; |
632 | // } else { | 664 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); |
633 | // dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | 665 | } |
634 | // dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); | 666 | } |
635 | // } | 667 | } else { |
636 | // } | 668 | for(size_t v = 0; v < 8; v++, dst++) { |
637 | // } | 669 | if ((y + v) >= SCREEN_HEIGHT) break; |
670 | u8 ch1 = sprite[(7 - v)]; | ||
671 | u8 ch2 = sprite[(7 - v) | 8]; | ||
672 | u32 color = lut[ch1] | (lut[ch2] << 1); | ||
673 | u32 col1mask = (color & 0x11111111); | ||
674 | u32 col2mask = (color & 0x22222222) >> 1; | ||
675 | u32 col3mask = (col1mask & col2mask) * 0xF; | ||
676 | col1mask &= ~col3mask; | ||
677 | col2mask &= ~col3mask; | ||
678 | col3mask = (color & col3mask) & 0x11111111; | ||
679 | u32 mask = ~(col1mask | col2mask | col3mask) & 0x11111111; | ||
680 | color = (clr1 * col1mask) | | ||
681 | (clr2 * col2mask) | | ||
682 | (clr3 * col3mask); | ||
683 | if (start_col == 0) { | ||
684 | dst[0] = (dst[0] & mask) | color; | ||
685 | } else { | ||
686 | dst[0] = (dst[0] & (mask << shift_left)) | color; | ||
687 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | ||
688 | } | ||
689 | } | ||
690 | } | ||
638 | } | 691 | } |
639 | // dirty_tiles[y >> 3] |= dirtyflag; | 692 | // dirty_tiles[y >> 3] |= dirtyflag; |
640 | // dirty_tiles[(y + 7) >> 3] |= dirtyflag; | 693 | // dirty_tiles[(y + 7) >> 3] |= dirtyflag; |