aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2023-04-19 15:08:20 +0200
committerBad Diode <bd@badd10de.dev>2023-04-19 15:28:08 +0200
commit6d943ebca061683c076bc026f7e15a3a047b2027 (patch)
tree070f50bf5541be63054caac802ff4085b293ca12
parenteee968f893d7878e675a43a3f89a15d8ceaadc44 (diff)
downloaduxngba-6d943ebca061683c076bc026f7e15a3a047b2027.tar.gz
uxngba-6d943ebca061683c076bc026f7e15a3a047b2027.zip
Add working implementation of ppu_2bpp
-rw-r--r--src/ppu.c257
1 files changed, 155 insertions, 102 deletions
diff --git a/src/ppu.c b/src/ppu.c
index de33145..96a3bc0 100644
--- a/src/ppu.c
+++ b/src/ppu.c
@@ -15,7 +15,7 @@ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
15WITH REGARD TO THIS SOFTWARE. 15WITH REGARD TO THIS SOFTWARE.
16*/ 16*/
17 17
18#define NEW_PPU 0 18#define NEW_PPU 1
19 19
20#define FG_FRONT ((u32*)(MEM_VRAM)) 20#define FG_FRONT ((u32*)(MEM_VRAM))
21#define BG_FRONT ((u32*)(MEM_VRAM + KB(20))) 21#define BG_FRONT ((u32*)(MEM_VRAM + KB(20)))
@@ -178,19 +178,29 @@ static u32 dec_byte[256] = {
178// We need to do the following: 178// We need to do the following:
179// 179//
180// 1. Extract the color row as u32 (4bpp). 180// 1. Extract the color row as u32 (4bpp).
181// u32 color = lut[ch1] | (lut[ch2] << 1); // color == 0x00112233 181// 2. Split the row into each of its colors.
182// 2. Split the row into each of its colors: 182// 3. Multiply based on the table, for example for color blend 2: 0123 -> 0231
183// u32 col3 = (color & 0x33333333); // 0x00000011 183// 4. Obtain final color by ORing the colors from each channel.
184// u32 col2 = (color & 0x22222222) & ~(col3 * 0xF); // 0x00001100 184//
185// u32 col1 = (color & 0x11111111) & ~(col3 * 0xF); // 0x00110000 185// clr0 = blending[0][clr];
186// u32 col0 = color & ~((col3 | col2 | col1) * 0xF); // 0x11000000 186// clr1 = blending[1][clr];
187// 3. Multiply based on the table, for example for color 0x2: 0123 -> 0231 187// clr2 = blending[2][clr];
188// a *= 0 188// clr3 = blending[3][clr];
189// b *= 2 189// color = 0x00112233; 0b 0000 0000 0001 0001 0010 0010 0011 0011 0x00112233
190// c *= 3 190// col1mask = (color & 0x11111111); 0b 0000 0000 0001 0001 0000 0000 0001 0001 0x00110011
191// d *= 1 191// col2mask = (color & 0x22222222) >> 1; 0b 0000 0000 0000 0000 0001 0001 0001 0001 0x00001111
192// 4. Obtain final color by ORing the individual ones. 192// col3mask = (col1mask & col2mask) * 0xF; 0b 0000 0000 0000 0000 0000 0000 1111 1111 0x000000FF
193// color = a | b | c | d; 193// col1mask &= ~col3mask; 0b 0000 0000 0000 0000 0000 0000 0001 0001 0x00000011
194// col2mask &= ~col3mask; 0b 0000 0000 0000 0000 0001 0001 0000 0000 0x00001100
195// col3mask = (color & col3mask) & 0x11111111; 0b 0000 0000 0000 0000 0000 0000 0001 0001 0x00000011
196// col0mask = ~(col1mask | col2mask | col3mask) & 0x11111111; 0b 0001 0001 0000 0000 0000 0000 0000 0000 0x11000000
197// color = (clr0 * col0mask) |
198// (clr1 * col1mask) |
199// (clr2 * col2mask) |
200// (clr3 * col3mask);
201//
202// Note that in case of transparent nodes col0mask can be used to mask off the
203// bits we want to pull from the existing framebuffer.
194// 204//
195static u8 blending[5][16] = { 205static u8 blending[5][16] = {
196 {0, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 2, 3, 3, 3, 0}, // Color 0 map. 206 {0, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 2, 3, 3, 3, 0}, // Color 0 map.
@@ -530,7 +540,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flip_x, u8 flip_y) {
530} 540}
531#else 541#else
532IWRAM_CODE 542IWRAM_CODE
533// UNROLL_LOOPS 543UNROLL_LOOPS
534void 544void
535ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { 545ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
536 BOUNDCHECK_SCREEN(x, y); 546 BOUNDCHECK_SCREEN(x, y);
@@ -543,98 +553,141 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
543 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; 553 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8];
544 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; 554 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte;
545 if (clr == 1) { 555 if (clr == 1) {
546 // u64 mask = ~((u64)0xFFFFFFFF << shift_left); 556 u64 mask = ~((u64)0xFFFFFFFF << shift_left);
547 // if (!flip_y) { 557 if (!flip_y) {
548 // for(size_t v = 0; v < 8; v++, dst++) { 558 for(size_t v = 0; v < 8; v++, dst++) {
549 // if ((y + v) >= SCREEN_HEIGHT) break; 559 if ((y + v) >= SCREEN_HEIGHT) break;
550 // u8 ch1 = sprite[v]; 560 u8 ch1 = sprite[v];
551 // u8 ch2 = sprite[v | 8]; 561 u8 ch2 = sprite[v | 8];
552 // u32 color = lut[ch1] | (lut[ch2] << 1); 562 u32 color = lut[ch1] | (lut[ch2] << 1);
553 // if (start_col == 0) { 563 if (start_col == 0) {
554 // dst[0] = (dst[0] & mask) | color; 564 dst[0] = (dst[0] & mask) | color;
555 // } else { 565 } else {
556 // dst[0] = (dst[0] & (mask << shift_left)) | color; 566 dst[0] = (dst[0] & (mask << shift_left)) | color;
557 // dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); 567 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right);
558 // } 568 }
559 // } 569 }
560 // } else { 570 } else {
561 // for(size_t v = 0; v < 8; v++, dst++) { 571 for(size_t v = 0; v < 8; v++, dst++) {
562 // if ((y + v) >= SCREEN_HEIGHT) break; 572 if ((y + v) >= SCREEN_HEIGHT) break;
563 // u8 ch1 = sprite[(7 - v)]; 573 u8 ch1 = sprite[(7 - v)];
564 // u8 ch2 = sprite[(7 - v) | 8]; 574 u8 ch2 = sprite[(7 - v) | 8];
565 // u32 color = lut[ch1] | (lut[ch2] << 1); 575 u32 color = lut[ch1] | (lut[ch2] << 1);
566 // if (start_col == 0) { 576 if (start_col == 0) {
567 // dst[0] = (dst[0] & mask) | color; 577 dst[0] = (dst[0] & mask) | color;
568 // } else { 578 } else {
569 // dst[0] = (dst[0] & (mask << shift_left)) | color; 579 dst[0] = (dst[0] & (mask << shift_left)) | color;
570 // dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); 580 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right);
571 // } 581 }
572 // } 582 }
573 // } 583 }
574 } else if (blending[4][clr]) { 584 } else if (blending[4][clr]) {
575 // ICN
576 u64 mask = ~((u64)0xFFFFFFFF << shift_left); 585 u64 mask = ~((u64)0xFFFFFFFF << shift_left);
577 // DEBUG: remove flip_y from sprite fetching 586 u8 clr0 = blending[0][clr];
578 // if (!flip_y) { 587 u8 clr1 = blending[1][clr];
579 if (flip_y) flip_y = 7; 588 u8 clr2 = blending[2][clr];
580 for(size_t v = 0; v < 8; v++, dst++) { 589 u8 clr3 = blending[3][clr];
581 if ((y + v) >= SCREEN_HEIGHT) break; 590 if (!flip_y) {
582 u8 ch1 = sprite[v ^ flip_y]; 591 for(size_t v = 0; v < 8; v++, dst++) {
583 u8 ch2 = sprite[(v ^ flip_y) | 8]; 592 if ((y + v) >= SCREEN_HEIGHT) break;
584 u32 color = lut[ch1] | (lut[ch2] << 1); 593 u8 ch1 = sprite[v];
585 if (start_col == 0) { 594 u8 ch2 = sprite[v | 8];
586 dst[0] = (dst[0] & mask) | color; 595 u32 color = lut[ch1] | (lut[ch2] << 1);
587 } else { 596 u32 col1mask = (color & 0x11111111);
588 dst[0] = (dst[0] & (mask << shift_left)) | color; 597 u32 col2mask = (color & 0x22222222) >> 1;
589 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); 598 u32 col3mask = (col1mask & col2mask) * 0xF;
599 col1mask &= ~col3mask;
600 col2mask &= ~col3mask;
601 col3mask = (color & col3mask) & 0x11111111;
602 u32 col0mask = ~(col1mask | col2mask | col3mask) & 0x11111111;
603 color = (clr0 * col0mask) |
604 (clr1 * col1mask) |
605 (clr2 * col2mask) |
606 (clr3 * col3mask);
607 if (start_col == 0) {
608 dst[0] = (dst[0] & mask) | color;
609 } else {
610 dst[0] = (dst[0] & (mask << shift_left)) | color;
611 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right);
612 }
613 }
614 } else {
615 for(size_t v = 0; v < 8; v++, dst++) {
616 if ((y + v) >= SCREEN_HEIGHT) break;
617 u8 ch1 = sprite[(7 - v)];
618 u8 ch2 = sprite[(7 - v) | 8];
619 u32 color = lut[ch1] | (lut[ch2] << 1);
620 u32 col1mask = (color & 0x11111111);
621 u32 col2mask = (color & 0x22222222) >> 1;
622 u32 col3mask = (col1mask & col2mask) * 0xF;
623 col1mask &= ~col3mask;
624 col2mask &= ~col3mask;
625 col3mask = (color & col3mask) & 0x11111111;
626 u32 col0mask = ~(col1mask | col2mask | col3mask) & 0x11111111;
627 color = (clr0 * col0mask) |
628 (clr1 * col1mask) |
629 (clr2 * col2mask) |
630 (clr3 * col3mask);
631 if (start_col == 0) {
632 dst[0] = (dst[0] & mask) | color;
633 } else {
634 dst[0] = (dst[0] & (mask << shift_left)) | color;
635 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right);
636 }
590 } 637 }
591 } 638 }
592 // } else {
593 // for(size_t v = 0; v < 8; v++, dst++) {
594 // if ((y + v) >= SCREEN_HEIGHT) break;
595 // u8 ch1 = sprite[(7 - v)];
596 // u32 color_1 = lut[ch1];
597 // u32 color_2 = (color_1 ^ 0xffffffff) & 0x11111111;
598 // u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2));
599 // if (start_col == 0) {
600 // dst[0] = (dst[0] & mask) | color;
601 // } else {
602 // dst[0] = (dst[0] & (mask << shift_left)) | color;
603 // dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right);
604 // }
605 // }
606 // }
607 } else { 639 } else {
608 // ICN 640 u8 clr0 = blending[0][clr];
609 // if (!flip_y) { 641 u8 clr1 = blending[1][clr];
610 // for(size_t v = 0; v < 8; v++, dst++) { 642 u8 clr2 = blending[2][clr];
611 // if ((y + v) >= SCREEN_HEIGHT) break; 643 u8 clr3 = blending[3][clr];
612 // u8 ch1 = sprite[v]; 644 if (!flip_y) {
613 // u32 color= lut[ch1]; 645 for(size_t v = 0; v < 8; v++, dst++) {
614 // u32 mask = ~color; 646 if ((y + v) >= SCREEN_HEIGHT) break;
615 // color *= clr & 3; 647 u8 ch1 = sprite[v];
616 // if (start_col == 0) { 648 u8 ch2 = sprite[v | 8];
617 // dst[0] = (dst[0] & ~mask) | color; 649 u32 color = lut[ch1] | (lut[ch2] << 1);
618 // } else { 650 u32 col1mask = (color & 0x11111111);
619 // dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); 651 u32 col2mask = (color & 0x22222222) >> 1;
620 // dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); 652 u32 col3mask = (col1mask & col2mask) * 0xF;
621 // } 653 col1mask &= ~col3mask;
622 // } 654 col2mask &= ~col3mask;
623 // } else { 655 col3mask = (color & col3mask) & 0x11111111;
624 // for(size_t v = 0; v < 8; v++, dst++) { 656 u32 mask = ~(col1mask | col2mask | col3mask) & 0x11111111;
625 // if ((y + v) >= SCREEN_HEIGHT) break; 657 color = (clr1 * col1mask) |
626 // u8 ch1 = sprite[(7 - v)]; 658 (clr2 * col2mask) |
627 // u32 color= lut[ch1]; 659 (clr3 * col3mask);
628 // u32 mask = ~color; 660 if (start_col == 0) {
629 // color *= clr & 3; 661 dst[0] = (dst[0] & mask) | color;
630 // if (start_col == 0) { 662 } else {
631 // dst[0] = (dst[0] & ~mask) | color; 663 dst[0] = (dst[0] & (mask << shift_left)) | color;
632 // } else { 664 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right);
633 // dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); 665 }
634 // dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); 666 }
635 // } 667 } else {
636 // } 668 for(size_t v = 0; v < 8; v++, dst++) {
637 // } 669 if ((y + v) >= SCREEN_HEIGHT) break;
670 u8 ch1 = sprite[(7 - v)];
671 u8 ch2 = sprite[(7 - v) | 8];
672 u32 color = lut[ch1] | (lut[ch2] << 1);
673 u32 col1mask = (color & 0x11111111);
674 u32 col2mask = (color & 0x22222222) >> 1;
675 u32 col3mask = (col1mask & col2mask) * 0xF;
676 col1mask &= ~col3mask;
677 col2mask &= ~col3mask;
678 col3mask = (color & col3mask) & 0x11111111;
679 u32 mask = ~(col1mask | col2mask | col3mask) & 0x11111111;
680 color = (clr1 * col1mask) |
681 (clr2 * col2mask) |
682 (clr3 * col3mask);
683 if (start_col == 0) {
684 dst[0] = (dst[0] & mask) | color;
685 } else {
686 dst[0] = (dst[0] & (mask << shift_left)) | color;
687 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right);
688 }
689 }
690 }
638 } 691 }
639 // dirty_tiles[y >> 3] |= dirtyflag; 692 // dirty_tiles[y >> 3] |= dirtyflag;
640 // dirty_tiles[(y + 7) >> 3] |= dirtyflag; 693 // dirty_tiles[(y + 7) >> 3] |= dirtyflag;