aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2023-04-19 10:05:03 +0200
committerBad Diode <bd@badd10de.dev>2023-04-19 15:28:08 +0200
commit5f47e14f6ab4e3b346de1d62c65452e674edbebe (patch)
tree7b4ff41a4667c61b3a2a40c764aaf222f267b909
parent1b1765d3a27b97dc282b6295aa206d92c32f33d9 (diff)
downloaduxngba-5f47e14f6ab4e3b346de1d62c65452e674edbebe.tar.gz
uxngba-5f47e14f6ab4e3b346de1d62c65452e674edbebe.zip
Add optimized 1bpp drawing function
-rw-r--r--src/common.h6
-rw-r--r--src/main.c41
-rw-r--r--src/ppu.c546
3 files changed, 466 insertions, 127 deletions
diff --git a/src/common.h b/src/common.h
index 0d6c89f..e62c78c 100644
--- a/src/common.h
+++ b/src/common.h
@@ -745,4 +745,10 @@ wait_vsync(void) {
745#define EWRAM_CODE __attribute__((section(".ewram"), long_call)) 745#define EWRAM_CODE __attribute__((section(".ewram"), long_call))
746#define EWRAM_BSS __attribute__((section(".sbss"))) 746#define EWRAM_BSS __attribute__((section(".sbss")))
747 747
748//
749// Compiler hints.
750//
751
752#define UNROLL_LOOPS __attribute__((optimize("unroll-loops")))
753
748#endif // GBAEXP_COMMON_H 754#endif // GBAEXP_COMMON_H
diff --git a/src/main.c b/src/main.c
index 8d0eba8..ecfa57b 100644
--- a/src/main.c
+++ b/src/main.c
@@ -37,6 +37,8 @@
37#define CONTROL_METHODS CONTROL_CONTROLLER,CONTROL_MOUSE,CONTROL_KEYBOARD 37#define CONTROL_METHODS CONTROL_CONTROLLER,CONTROL_MOUSE,CONTROL_KEYBOARD
38#endif 38#endif
39 39
40#define PROF_ENABLE 1
41
40#ifdef PROF_ENABLE 42#ifdef PROF_ENABLE
41#if PROF_ENABLE == 0 43#if PROF_ENABLE == 0
42#define TEXT_ENABLE 1 44#define TEXT_ENABLE 1
@@ -51,22 +53,38 @@
51#ifndef PROF_SHOW_Y 53#ifndef PROF_SHOW_Y
52#define PROF_SHOW_Y 0 54#define PROF_SHOW_Y 0
53#endif 55#endif
56// #define PROF_SHOW() \
57// do { \
58// txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\
59// txt_printf("INPUT: %lu ", input_cycles);\
60// txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+1);\
61// txt_printf("EVAL: %lu ", eval_cycles);\
62// txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+2);\
63// txt_printf("FLIP: %lu ", flip_cycles);\
64// txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+3);\
65// txt_printf("MIX: %lu ", mix_cycles);\
66// } while (0)
54#define PROF_SHOW() \ 67#define PROF_SHOW() \
55 do { \ 68 do { \
56 txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ 69 txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\
57 txt_printf("INPUT: %lu ", input_cycles);\ 70 txt_printf("PIX: %lu ", ppu_pixel_cycles);\
58 txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+1);\ 71 txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+1);\
59 txt_printf("EVAL: %lu ", eval_cycles);\ 72 txt_printf("2BPP: %lu ", ppu_chr_cycles);\
60 txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+2);\ 73 txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+2);\
61 txt_printf("FLIP: %lu ", flip_cycles);\ 74 txt_printf("1BPP: %lu ", ppu_icn_cycles);\
62 txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+3);\ 75 txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+3);\
63 txt_printf("MIX: %lu ", mix_cycles);\ 76 txt_printf("FLIP: %lu ", flip_cycles);\
64 } while (0) 77 } while (0)
65#define PROF_INIT() \ 78#define PROF_INIT() \
66 u32 flip_cycles = 0;\ 79 static u32 ppu_pixel_cycles = 0;\
67 u32 eval_cycles = 0;\ 80 static u32 ppu_chr_cycles = 0;\
68 u32 input_cycles = 0;\ 81 static u32 ppu_icn_cycles = 0;\
69 u32 mix_cycles = 0; 82 static u32 flip_cycles = 0;\
83 static u32 eval_cycles = 0;\
84 static u32 input_cycles = 0;\
85 static u32 mix_cycles = 0;
86
87 PROF_INIT();
70#else 88#else
71#define PROF(F,VAR) (F) 89#define PROF(F,VAR) (F)
72#define PROF_SHOW() 90#define PROF_SHOW()
@@ -131,7 +149,7 @@ screen_deo(u8 *ram, u8 *d, u8 port) {
131 u8 layer = d[0xe] & 0x40; 149 u8 layer = d[0xe] & 0x40;
132 PEKDEV(x, 0x8); 150 PEKDEV(x, 0x8);
133 PEKDEV(y, 0xa); 151 PEKDEV(y, 0xa);
134 ppu_pixel(layer ? ppu.fg : ppu.bg, x, y, d[0xe] & 0x3); 152 PROF(ppu_pixel(layer ? ppu.fg : ppu.bg, x, y, d[0xe] & 0x3), ppu_pixel_cycles);
135 if(d[0x6] & 0x01) POKDEV(0x8, x + 1); /* auto x+1 */ 153 if(d[0x6] & 0x01) POKDEV(0x8, x + 1); /* auto x+1 */
136 if(d[0x6] & 0x02) POKDEV(0xa, y + 1); /* auto y+1 */ 154 if(d[0x6] & 0x02) POKDEV(0xa, y + 1); /* auto y+1 */
137 break; 155 break;
@@ -155,9 +173,9 @@ screen_deo(u8 *ram, u8 *d, u8 port) {
155 for(size_t i = 0; i <= n; i++) { 173 for(size_t i = 0; i <= n; i++) {
156 u8 *sprite = &ram[addr]; 174 u8 *sprite = &ram[addr];
157 if (twobpp) { 175 if (twobpp) {
158 ppu_2bpp(layer, x + dy * i, y + dx * i, sprite, color, flipx, flipy); 176 PROF(ppu_2bpp(layer, x + dy * i, y + dx * i, sprite, color, flipx, flipy), ppu_chr_cycles);
159 } else { 177 } else {
160 ppu_1bpp(layer, x + dy * i, y + dx * i, sprite, color, flipx, flipy); 178 PROF(ppu_1bpp(layer, x + dy * i, y + dx * i, sprite, color, flipx, flipy), ppu_icn_cycles);
161 } 179 }
162 addr += (d[0x6] & 0x04) << (1 + twobpp); 180 addr += (d[0x6] & 0x04) << (1 + twobpp);
163 } 181 }
@@ -564,7 +582,6 @@ main(void) {
564 582
565 // Main loop. 583 // Main loop.
566 uxn_eval(&u, PAGE_PROGRAM); 584 uxn_eval(&u, PAGE_PROGRAM);
567 PROF_INIT();
568 u8 frame_counter = 0; 585 u8 frame_counter = 0;
569 while(true) { 586 while(true) {
570 bios_vblank_wait(); 587 bios_vblank_wait();
diff --git a/src/ppu.c b/src/ppu.c
index 1a13ba3..a841b97 100644
--- a/src/ppu.c
+++ b/src/ppu.c
@@ -15,6 +15,8 @@ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
15WITH REGARD TO THIS SOFTWARE. 15WITH REGARD TO THIS SOFTWARE.
16*/ 16*/
17 17
18#define NEW_PPU 1
19
18#define FG_FRONT ((u32*)(MEM_VRAM)) 20#define FG_FRONT ((u32*)(MEM_VRAM))
19#define BG_FRONT ((u32*)(MEM_VRAM + KB(20))) 21#define BG_FRONT ((u32*)(MEM_VRAM + KB(20)))
20#define FG_BACK ((u32*)(MEM_VRAM + KB(44))) 22#define FG_BACK ((u32*)(MEM_VRAM + KB(44)))
@@ -22,6 +24,18 @@ WITH REGARD TO THIS SOFTWARE.
22#define TILE_MAP ((u32*)(MEM_VRAM + KB(40))) 24#define TILE_MAP ((u32*)(MEM_VRAM + KB(40)))
23#define FONT_DATA ((u32*)(MEM_VRAM + KB(84))) 25#define FONT_DATA ((u32*)(MEM_VRAM + KB(84)))
24 26
27#ifdef DISABLE_BOUNDCHECK_SCREEN
28#define BOUNDCHECK_SCREEN(X,Y)
29#else
30#define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return;
31#endif
32
33// Swap A and B values without a tmp variable.
34#define SWAP(A, B) (((A) ^= (B)), ((B) ^= (A)), ((A) ^= (B)))
35
36// Swap A and B values to make sure A <= B.
37#define MAYBE_SWAP(A,B) if ((A) > (B)) { SWAP(A,B); }
38
25// Keyboard. 39// Keyboard.
26#define SPRITE_START_IDX 640 40#define SPRITE_START_IDX 640
27 41
@@ -135,6 +149,116 @@ static u32 lut2bpp_flipx[256] = {
135 0x11111111 149 0x11111111
136}; 150};
137 151
152static u32 dec_byte_flip_x[256] = {
153 0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100,
154 0x00000101, 0x00000110, 0x00000111, 0x00001000, 0x00001001,
155 0x00001010, 0x00001011, 0x00001100, 0x00001101, 0x00001110,
156 0x00001111, 0x00010000, 0x00010001, 0x00010010, 0x00010011,
157 0x00010100, 0x00010101, 0x00010110, 0x00010111, 0x00011000,
158 0x00011001, 0x00011010, 0x00011011, 0x00011100, 0x00011101,
159 0x00011110, 0x00011111, 0x00100000, 0x00100001, 0x00100010,
160 0x00100011, 0x00100100, 0x00100101, 0x00100110, 0x00100111,
161 0x00101000, 0x00101001, 0x00101010, 0x00101011, 0x00101100,
162 0x00101101, 0x00101110, 0x00101111, 0x00110000, 0x00110001,
163 0x00110010, 0x00110011, 0x00110100, 0x00110101, 0x00110110,
164 0x00110111, 0x00111000, 0x00111001, 0x00111010, 0x00111011,
165 0x00111100, 0x00111101, 0x00111110, 0x00111111, 0x01000000,
166 0x01000001, 0x01000010, 0x01000011, 0x01000100, 0x01000101,
167 0x01000110, 0x01000111, 0x01001000, 0x01001001, 0x01001010,
168 0x01001011, 0x01001100, 0x01001101, 0x01001110, 0x01001111,
169 0x01010000, 0x01010001, 0x01010010, 0x01010011, 0x01010100,
170 0x01010101, 0x01010110, 0x01010111, 0x01011000, 0x01011001,
171 0x01011010, 0x01011011, 0x01011100, 0x01011101, 0x01011110,
172 0x01011111, 0x01100000, 0x01100001, 0x01100010, 0x01100011,
173 0x01100100, 0x01100101, 0x01100110, 0x01100111, 0x01101000,
174 0x01101001, 0x01101010, 0x01101011, 0x01101100, 0x01101101,
175 0x01101110, 0x01101111, 0x01110000, 0x01110001, 0x01110010,
176 0x01110011, 0x01110100, 0x01110101, 0x01110110, 0x01110111,
177 0x01111000, 0x01111001, 0x01111010, 0x01111011, 0x01111100,
178 0x01111101, 0x01111110, 0x01111111, 0x10000000, 0x10000001,
179 0x10000010, 0x10000011, 0x10000100, 0x10000101, 0x10000110,
180 0x10000111, 0x10001000, 0x10001001, 0x10001010, 0x10001011,
181 0x10001100, 0x10001101, 0x10001110, 0x10001111, 0x10010000,
182 0x10010001, 0x10010010, 0x10010011, 0x10010100, 0x10010101,
183 0x10010110, 0x10010111, 0x10011000, 0x10011001, 0x10011010,
184 0x10011011, 0x10011100, 0x10011101, 0x10011110, 0x10011111,
185 0x10100000, 0x10100001, 0x10100010, 0x10100011, 0x10100100,
186 0x10100101, 0x10100110, 0x10100111, 0x10101000, 0x10101001,
187 0x10101010, 0x10101011, 0x10101100, 0x10101101, 0x10101110,
188 0x10101111, 0x10110000, 0x10110001, 0x10110010, 0x10110011,
189 0x10110100, 0x10110101, 0x10110110, 0x10110111, 0x10111000,
190 0x10111001, 0x10111010, 0x10111011, 0x10111100, 0x10111101,
191 0x10111110, 0x10111111, 0x11000000, 0x11000001, 0x11000010,
192 0x11000011, 0x11000100, 0x11000101, 0x11000110, 0x11000111,
193 0x11001000, 0x11001001, 0x11001010, 0x11001011, 0x11001100,
194 0x11001101, 0x11001110, 0x11001111, 0x11010000, 0x11010001,
195 0x11010010, 0x11010011, 0x11010100, 0x11010101, 0x11010110,
196 0x11010111, 0x11011000, 0x11011001, 0x11011010, 0x11011011,
197 0x11011100, 0x11011101, 0x11011110, 0x11011111, 0x11100000,
198 0x11100001, 0x11100010, 0x11100011, 0x11100100, 0x11100101,
199 0x11100110, 0x11100111, 0x11101000, 0x11101001, 0x11101010,
200 0x11101011, 0x11101100, 0x11101101, 0x11101110, 0x11101111,
201 0x11110000, 0x11110001, 0x11110010, 0x11110011, 0x11110100,
202 0x11110101, 0x11110110, 0x11110111, 0x11111000, 0x11111001,
203 0x11111010, 0x11111011, 0x11111100, 0x11111101, 0x11111110,
204 0x11111111
205};
206
207static u32 dec_byte[256] = {
208 0x00000000, 0x10000000, 0x01000000, 0x11000000, 0x00100000,
209 0x10100000, 0x01100000, 0x11100000, 0x00010000, 0x10010000,
210 0x01010000, 0x11010000, 0x00110000, 0x10110000, 0x01110000,
211 0x11110000, 0x00001000, 0x10001000, 0x01001000, 0x11001000,
212 0x00101000, 0x10101000, 0x01101000, 0x11101000, 0x00011000,
213 0x10011000, 0x01011000, 0x11011000, 0x00111000, 0x10111000,
214 0x01111000, 0x11111000, 0x00000100, 0x10000100, 0x01000100,
215 0x11000100, 0x00100100, 0x10100100, 0x01100100, 0x11100100,
216 0x00010100, 0x10010100, 0x01010100, 0x11010100, 0x00110100,
217 0x10110100, 0x01110100, 0x11110100, 0x00001100, 0x10001100,
218 0x01001100, 0x11001100, 0x00101100, 0x10101100, 0x01101100,
219 0x11101100, 0x00011100, 0x10011100, 0x01011100, 0x11011100,
220 0x00111100, 0x10111100, 0x01111100, 0x11111100, 0x00000010,
221 0x10000010, 0x01000010, 0x11000010, 0x00100010, 0x10100010,
222 0x01100010, 0x11100010, 0x00010010, 0x10010010, 0x01010010,
223 0x11010010, 0x00110010, 0x10110010, 0x01110010, 0x11110010,
224 0x00001010, 0x10001010, 0x01001010, 0x11001010, 0x00101010,
225 0x10101010, 0x01101010, 0x11101010, 0x00011010, 0x10011010,
226 0x01011010, 0x11011010, 0x00111010, 0x10111010, 0x01111010,
227 0x11111010, 0x00000110, 0x10000110, 0x01000110, 0x11000110,
228 0x00100110, 0x10100110, 0x01100110, 0x11100110, 0x00010110,
229 0x10010110, 0x01010110, 0x11010110, 0x00110110, 0x10110110,
230 0x01110110, 0x11110110, 0x00001110, 0x10001110, 0x01001110,
231 0x11001110, 0x00101110, 0x10101110, 0x01101110, 0x11101110,
232 0x00011110, 0x10011110, 0x01011110, 0x11011110, 0x00111110,
233 0x10111110, 0x01111110, 0x11111110, 0x00000001, 0x10000001,
234 0x01000001, 0x11000001, 0x00100001, 0x10100001, 0x01100001,
235 0x11100001, 0x00010001, 0x10010001, 0x01010001, 0x11010001,
236 0x00110001, 0x10110001, 0x01110001, 0x11110001, 0x00001001,
237 0x10001001, 0x01001001, 0x11001001, 0x00101001, 0x10101001,
238 0x01101001, 0x11101001, 0x00011001, 0x10011001, 0x01011001,
239 0x11011001, 0x00111001, 0x10111001, 0x01111001, 0x11111001,
240 0x00000101, 0x10000101, 0x01000101, 0x11000101, 0x00100101,
241 0x10100101, 0x01100101, 0x11100101, 0x00010101, 0x10010101,
242 0x01010101, 0x11010101, 0x00110101, 0x10110101, 0x01110101,
243 0x11110101, 0x00001101, 0x10001101, 0x01001101, 0x11001101,
244 0x00101101, 0x10101101, 0x01101101, 0x11101101, 0x00011101,
245 0x10011101, 0x01011101, 0x11011101, 0x00111101, 0x10111101,
246 0x01111101, 0x11111101, 0x00000011, 0x10000011, 0x01000011,
247 0x11000011, 0x00100011, 0x10100011, 0x01100011, 0x11100011,
248 0x00010011, 0x10010011, 0x01010011, 0x11010011, 0x00110011,
249 0x10110011, 0x01110011, 0x11110011, 0x00001011, 0x10001011,
250 0x01001011, 0x11001011, 0x00101011, 0x10101011, 0x01101011,
251 0x11101011, 0x00011011, 0x10011011, 0x01011011, 0x11011011,
252 0x00111011, 0x10111011, 0x01111011, 0x11111011, 0x00000111,
253 0x10000111, 0x01000111, 0x11000111, 0x00100111, 0x10100111,
254 0x01100111, 0x11100111, 0x00010111, 0x10010111, 0x01010111,
255 0x11010111, 0x00110111, 0x10110111, 0x01110111, 0x11110111,
256 0x00001111, 0x10001111, 0x01001111, 0x11001111, 0x00101111,
257 0x10101111, 0x01101111, 0x11101111, 0x00011111, 0x10011111,
258 0x01011111, 0x11011111, 0x00111111, 0x10111111, 0x01111111,
259 0x11111111
260};
261
138static u8 blending[5][16] = { 262static u8 blending[5][16] = {
139 {0, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 2, 3, 3, 3, 0}, 263 {0, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 2, 3, 3, 3, 0},
140 {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}, 264 {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3},
@@ -181,6 +305,141 @@ ppu_pixel(u32 *layer, u16 x, u16 y, u8 color) {
181} 305}
182 306
183IWRAM_CODE 307IWRAM_CODE
308static inline
309u32
310decode_1bpp(u8 row, u8 flip_x) {
311 return flip_x ? dec_byte_flip_x[row] : dec_byte[row];
312}
313
314IWRAM_CODE
315static inline
316void
317draw_1bpp_row(u32 *layer, size_t x, size_t y, u8 sprite, u8 clr, u8 flip_x) {
318 BOUNDCHECK_SCREEN(x, y);
319
320 size_t tile_x = x / 8;
321 size_t tile_y = y / 8;
322 size_t start_col = x % 8;
323 size_t start_row = y % 8;
324 size_t shift_left = start_col * 4;
325 size_t shift_right = (8 - start_col) * 4;
326
327 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8];
328 u32 color = decode_1bpp(sprite, flip_x);
329 u32 mask = ~color;
330 color *= clr;
331 if (start_col == 0) {
332 dst[0] = (dst[0] & ~mask) | color;
333 } else {
334 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
335 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
336 }
337
338 // TODO: different blend modes?
339}
340
341IWRAM_CODE
342void
343draw_icn(u32 * layer, size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
344 BOUNDCHECK_SCREEN(x, y);
345 if (!flip_y) {
346 for(size_t v = 0; v < 8; v++) {
347 if ((y + v) >= SCREEN_HEIGHT) break;
348 u8 ch1 = sprite[v];
349 draw_1bpp_row(layer, x, y + v, ch1, clr, flip_x);
350 }
351 } else {
352 for(size_t v = 0; v < 8; v++) {
353 if ((y + v) >= SCREEN_HEIGHT) break;
354 u8 ch1 = sprite[(7 - v)];
355 draw_1bpp_row(layer, x, y + v, ch1, clr, flip_x);
356 }
357 }
358}
359
360#if NEW_PPU == 1
361IWRAM_CODE
362UNROLL_LOOPS
363void
364ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
365 BOUNDCHECK_SCREEN(x, y);
366
367 size_t tile_x = x / 8;
368 size_t tile_y = y / 8;
369 size_t start_col = x % 8;
370 size_t start_row = y % 8;
371 size_t shift_left = start_col * 4;
372 size_t shift_right = (8 - start_col) * 4;
373 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8];
374 if (blending[4][clr]) {
375 u64 mask = ~((u64)0xFFFFFFFF);
376 if (!flip_y) {
377 for(size_t v = 0; v < 8; v++, dst++) {
378 if ((y + v) >= SCREEN_HEIGHT) break;
379 u8 ch1 = sprite[v];
380 u32 color_1 = decode_1bpp(ch1, flip_x);
381 u32 color_2 = (color_1 ^ 0xffffffff) & 0x11111111;
382 u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2));
383 if (start_col == 0) {
384 dst[0] = (dst[0] & mask) | color;
385 } else {
386 dst[0] = (dst[0] & (mask << shift_left)) | color;
387 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right);
388 }
389 }
390 } else {
391 for(size_t v = 0; v < 8; v++, dst++) {
392 if ((y + v) >= SCREEN_HEIGHT) break;
393 u8 ch1 = sprite[(7 - v)];
394 u32 color_1 = decode_1bpp(ch1, flip_x);
395 u32 color_2 = (color_1 ^ 0xffffffff) & 0x11111111;
396 u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2));
397 if (start_col == 0) {
398 dst[0] = (dst[0] & mask) | color;
399 } else {
400 dst[0] = (dst[0] & (mask << shift_left)) | color;
401 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right);
402 }
403 }
404 }
405 } else {
406 if (!flip_y) {
407 for(size_t v = 0; v < 8; v++, dst++) {
408 if ((y + v) >= SCREEN_HEIGHT) break;
409 u8 ch1 = sprite[v];
410 u32 color = decode_1bpp(ch1, flip_x);
411 u32 mask = ~color;
412 color *= clr;
413 if (start_col == 0) {
414 dst[0] = (dst[0] & ~mask) | color;
415 } else {
416 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
417 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
418 }
419 }
420 } else {
421 for(size_t v = 0; v < 8; v++, dst++) {
422 if ((y + v) >= SCREEN_HEIGHT) break;
423 u8 ch1 = sprite[(7 - v)];
424 u32 color = decode_1bpp(ch1, flip_x);
425 u32 mask = ~color;
426 color *= clr;
427 if (start_col == 0) {
428 dst[0] = (dst[0] & ~mask) | color;
429 } else {
430 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
431 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
432 }
433 }
434 }
435 }
436
437 // dirty_tiles[y >> 3] |= dirtyflag;
438 // dirty_tiles[(y + 7) >> 3] |= dirtyflag;
439}
440
441#else
442IWRAM_CODE
184void 443void
185ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) { 444ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) {
186 u8 sprline; 445 u8 sprline;
@@ -194,7 +453,7 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) {
194 453
195 if (flipy) flipy = 7; 454 if (flipy) flipy = 7;
196 455
197 if (x >= SCREEN_WIDTH || y >= SCREEN_HEIGHT) return; 456 BOUNDCHECK_SCREEN(x, y);
198 457
199 if (blending[4][color]) { 458 if (blending[4][color]) {
200 u64 mask = ~((u64)0xFFFFFFFF << shift); 459 u64 mask = ~((u64)0xFFFFFFFF << shift);
@@ -229,126 +488,183 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) {
229 dirty_tiles[y >> 3] |= dirtyflag; 488 dirty_tiles[y >> 3] |= dirtyflag;
230 dirty_tiles[(y + 7) >> 3] |= dirtyflag; 489 dirty_tiles[(y + 7) >> 3] |= dirtyflag;
231} 490}
491#endif
232 492
233IWRAM_CODE 493IWRAM_CODE
494static inline
234void 495void
235ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, 496draw_2bpp_row(void *layer, size_t x, size_t y, u8 a, u8 b, u8 flip_x) {
236 u8 flipx, u8 flipy) { 497 // BOUNDCHECK_SCREEN(x, y);
237 u8 sprline1, sprline2;
238 u8 xrightedge = x < ((32 - 1) * 8);
239 u16 v, h;
240 u32 dirtyflag = (1 << (x >> 3)) | (1 << ((x + 7) >> 3));
241
242 u32 layerpos = ((y & 7) + (((x >> 3) + (y >> 3) * 32) * 8));
243 u32 *layerptr = &layer[layerpos];
244 u32 shift = (x & 7) << 2;
245
246 if (flipy) flipy = 7;
247
248 if (x >= SCREEN_WIDTH || y >= SCREEN_HEIGHT) return;
249
250 if (color == 1) {
251 u32 *lut_expand = flipx ? lut_2bpp : lut2bpp_flipx;
252 u64 mask = ~((u64)0xFFFFFFFF << shift);
253
254 for (v = 0; v < 8; v++, layerptr++) {
255 if ((y + v) >= (24 * 8)) break;
256
257 sprline1 = sprite[v ^ flipy];
258 sprline2 = sprite[(v ^ flipy) | 8];
259
260 u32 data32 = (lut_expand[sprline1]) | (lut_expand[sprline2] << 1);
261 u64 data = ((u64) (data32 & 0x33333333)) << shift;
262
263 layerptr[0] = (layerptr[0] & mask) | data;
264 if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32);
265
266 if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8;
267 }
268 } else if (blending[4][color]) {
269 u64 mask = ~((u64)0xFFFFFFFF << shift);
270
271 for (v = 0; v < 8; v++, layerptr++) {
272 if ((y + v) >= (24 * 8)) break;
273
274 u8 ch1 = sprite[v ^ flipy];
275 u8 ch2 = sprite[(v ^ flipy) | 8];
276 u32 data32 = 0;
277
278 if (!flipx) {
279 for (h = 0; h < 8; h++) {
280 data32 <<= 4;
281
282 u8 ch = (ch1 & 1) | ((ch2 & 1) << 1);
283 data32 |= blending[ch][color];
284
285 ch1 >>= 1; ch2 >>= 1;
286 }
287 } else {
288 for (h = 0; h < 8; h++) {
289 data32 <<= 4;
290
291 u8 ch = (ch1 >> 7) | ((ch2 >> 7) << 1);
292 data32 |= blending[ch][color];
293
294 ch1 <<= 1; ch2 <<= 1;
295 }
296 }
297
298 u64 data = ((u64) (data32 & 0x33333333)) << shift;
299
300 layerptr[0] = (layerptr[0] & mask) | data;
301 if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32);
302 498
303 if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; 499 size_t tile_x = x / 8;
304 } 500 size_t tile_y = y / 8;
501 size_t start_col = x % 8;
502 size_t start_row = y % 8;
503 size_t shift_left = start_col * 4;
504 size_t shift_right = (8 - start_col) * 4;
505
506 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8];
507// #if DEC_BIG_LUT
508 u32 *lut = dec_byte;
509 if (flip_x) {
510 lut = dec_byte_flip_x;
511 }
512 u32 clr_a = lut[a];
513 u32 clr_b = lut[b];
514// #else
515// u32 clr_a = decode_1bpp(a, flip_x);
516// u32 clr_b = decode_1bpp(b, flip_x);
517// #endif
518 u32 mask_a = (clr_a * 0xF);
519 u32 mask_b = (clr_b * 0xF);
520 u32 mask = (mask_a | mask_b);
521 u32 color = clr_a + (clr_b << 1);
522 if (start_col == 0) {
523 dst[0] = (dst[0] & ~mask) | color;
305 } else { 524 } else {
306 for (v = 0; v < 8; v++, layerptr++) { 525 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
307 if ((y + v) >= (24 * 8)) break; 526 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
308 527 }
309 u8 ch1 = sprite[v ^ flipy];
310 u8 ch2 = sprite[(v ^ flipy) | 8];
311 u32 data32 = 0;
312 u32 mask32 = 0;
313
314 if (!flipx) {
315 for (h = 0; h < 8; h++) {
316 data32 <<= 4; mask32 <<= 4;
317
318 if ((ch1 | ch2) & 1) {
319 u8 ch = (ch1 & 1) | ((ch2 & 1) << 1);
320 data32 |= blending[ch][color];
321 mask32 |= 0xF;
322 }
323 528
324 ch1 >>= 1; ch2 >>= 1; 529 // TODO: different blend modes?
325 } 530}
326 } else {
327 for (h = 0; h < 8; h++) {
328 data32 <<= 4; mask32 <<= 4;
329 531
330 if ((ch1 | ch2) & 128) { 532IWRAM_CODE
331 u8 ch = (ch1 >> 7) | ((ch2 >> 7) << 1); 533void
332 data32 |= blending[ch][color]; 534ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color,
333 mask32 |= 0xF; 535 u8 flip_x, u8 flip_y) {
334 } 536 // u32 *dst = &layer[0];
537 // *dst = 0x111111111;
538 // if (!flip_y) {
539 // for(size_t v = 0; v < 8; v++) {
540 // // if ((y + v) >= SCREEN_HEIGHT) break;
541 // u8 ch1 = sprite[v + 0];
542 // u8 ch2 = sprite[v + 8];
543 // draw_2bpp_row(layer, x, y + v, ch1, ch2, flip_x);
544 // }
545 // } else {
546 // for(size_t v = 0; v < 8; v++) {
547 // // if ((y + v) >= SCREEN_HEIGHT) break;
548 // u8 ch1 = sprite[(7 - v) + 0];
549 // u8 ch2 = sprite[(7 - v) + 8];
550 // draw_2bpp_row(layer, x, y + v, ch1, ch2, flip_x);
551 // }
552 // }
553 // u8 sprline1, sprline2;
554 // u8 xrightedge = x < ((32 - 1) * 8);
555 // u16 v, h;
556 // u32 dirtyflag = (1 << (x >> 3)) | (1 << ((x + 7) >> 3));
557
558 // u32 layerpos = ((y & 7) + (((x >> 3) + (y >> 3) * 32) * 8));
559 // u32 *layerptr = &layer[layerpos];
560 // u32 shift = (x & 7) << 2;
561
562 // if (flip_y) flip_y = 7;
563
564 // if (x >= SCREEN_WIDTH || y >= SCREEN_HEIGHT) return;
565
566 // if (color == 1) {
567 // u32 *lut_expand = flip_x ? lut_2bpp : lut2bpp_flipx;
568 // u64 mask = ~((u64)0xFFFFFFFF << shift);
569
570 // for (v = 0; v < 8; v++, layerptr++) {
571 // if ((y + v) >= (24 * 8)) break;
572
573 // sprline1 = sprite[v ^ flip_y];
574 // sprline2 = sprite[(v ^ flip_y) | 8];
575
576 // u32 data32 = (lut_expand[sprline1]) | (lut_expand[sprline2] << 1);
577 // u64 data = ((u64) (data32 & 0x33333333)) << shift;
578
579 // layerptr[0] = (layerptr[0] & mask) | data;
580 // if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32);
581
582 // if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8;
583 // }
584 // } else if (blending[4][color]) {
585 // u64 mask = ~((u64)0xFFFFFFFF << shift);
586
587 // for (v = 0; v < 8; v++, layerptr++) {
588 // if ((y + v) >= (24 * 8)) break;
589
590 // u8 ch1 = sprite[v ^ flip_y];
591 // u8 ch2 = sprite[(v ^ flip_y) | 8];
592 // u32 data32 = 0;
593
594 // if (!flip_x) {
595 // for (h = 0; h < 8; h++) {
596 // data32 <<= 4;
597
598 // u8 ch = (ch1 & 1) | ((ch2 & 1) << 1);
599 // data32 |= blending[ch][color];
600
601 // ch1 >>= 1; ch2 >>= 1;
602 // }
603 // } else {
604 // for (h = 0; h < 8; h++) {
605 // data32 <<= 4;
606
607 // u8 ch = (ch1 >> 7) | ((ch2 >> 7) << 1);
608 // data32 |= blending[ch][color];
609
610 // ch1 <<= 1; ch2 <<= 1;
611 // }
612 // }
613
614 // u64 data = ((u64) (data32 & 0x33333333)) << shift;
615
616 // layerptr[0] = (layerptr[0] & mask) | data;
617 // if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32);
618
619 // if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8;
620 // }
621 // } else {
622 // for (v = 0; v < 8; v++, layerptr++) {
623 // if ((y + v) >= (24 * 8)) break;
624
625 // u8 ch1 = sprite[v ^ flip_y];
626 // u8 ch2 = sprite[(v ^ flip_y) | 8];
627 // u32 data32 = 0;
628 // u32 mask32 = 0;
629
630 // if (!flip_x) {
631 // for (h = 0; h < 8; h++) {
632 // data32 <<= 4; mask32 <<= 4;
633
634 // if ((ch1 | ch2) & 1) {
635 // u8 ch = (ch1 & 1) | ((ch2 & 1) << 1);
636 // data32 |= blending[ch][color];
637 // mask32 |= 0xF;
638 // }
639
640 // ch1 >>= 1; ch2 >>= 1;
641 // }
642 // } else {
643 // for (h = 0; h < 8; h++) {
644 // data32 <<= 4; mask32 <<= 4;
645
646 // if ((ch1 | ch2) & 128) {
647 // u8 ch = (ch1 >> 7) | ((ch2 >> 7) << 1);
648 // data32 |= blending[ch][color];
649 // mask32 |= 0xF;
650 // }
335 651
336 ch1 <<= 1; ch2 <<= 1; 652 // ch1 <<= 1; ch2 <<= 1;
337 } 653 // }
338 } 654 // }
339 655
340 u64 data = ((u64) (data32 & 0x33333333)) << shift; 656 // u64 data = ((u64) (data32 & 0x33333333)) << shift;
341 u64 mask = ~(((u64) (mask32 & 0x33333333)) << shift); 657 // u64 mask = ~(((u64) (mask32 & 0x33333333)) << shift);
342 658
343 layerptr[0] = (layerptr[0] & mask) | data; 659 // layerptr[0] = (layerptr[0] & mask) | data;
344 if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); 660 // if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32);
345 661
346 if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; 662 // if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8;
347 } 663 // }
348 } 664 // }
349 665
350 dirty_tiles[y >> 3] |= dirtyflag; 666 // dirty_tiles[y >> 3] |= dirtyflag;
351 dirty_tiles[(y + 7) >> 3] |= dirtyflag; 667 // dirty_tiles[(y + 7) >> 3] |= dirtyflag;
352} 668}
353 669
354IWRAM_CODE 670IWRAM_CODE
@@ -369,20 +685,20 @@ flipbuf(Ppu *p) {
369 Tile *mem_fg = FG_FRONT; 685 Tile *mem_fg = FG_FRONT;
370 Tile *mem_bg = BG_FRONT; 686 Tile *mem_bg = BG_FRONT;
371 for (size_t j = 0; j < 20; ++j) { 687 for (size_t j = 0; j < 20; ++j) {
372 if (dirty_tiles[j] == 0) { 688 // if (dirty_tiles[j] == 0) {
373 continue; 689 // continue;
374 } 690 // }
375 691
376 size_t k = 1; 692 size_t k = 1;
377 for (size_t i = 0; i < 30; ++i, k <<= 1) { 693 for (size_t i = 0; i < 30; ++i, k <<= 1) {
378 if (dirty_tiles[j] & k) { 694 // if (dirty_tiles[j] & k) {
379 Tile *tile_fg = p->fg; 695 Tile *tile_fg = p->fg;
380 Tile *tile_bg = p->bg; 696 Tile *tile_bg = p->bg;
381 mem_fg[i + j * 32] = tile_fg[i + j * 32]; 697 mem_fg[i + j * 32] = tile_fg[i + j * 32];
382 mem_bg[i + j * 32] = tile_bg[i + j * 32]; 698 mem_bg[i + j * 32] = tile_bg[i + j * 32];
383 } 699 // }
384 } 700 }
385 dirty_tiles[j] = 0; 701 // dirty_tiles[j] = 0;
386 } 702 }
387} 703}
388 704