aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2023-04-20 09:10:50 +0200
committerBad Diode <bd@badd10de.dev>2023-04-20 09:10:50 +0200
commit402a74bf60e6e00e625364628e2d1ffe28d225ca (patch)
treef4a5ead120da09a25c83d17fed2a7e04514e4338
parentda30ead571b5243b777244b6407fe911fab8359f (diff)
downloaduxngba-402a74bf60e6e00e625364628e2d1ffe28d225ca.tar.gz
uxngba-402a74bf60e6e00e625364628e2d1ffe28d225ca.zip
Add initial screen fill implementation
Still need to add dirty tiles to it but thus far should be fine.
-rw-r--r--src/main.c104
-rw-r--r--src/ppu.c160
2 files changed, 116 insertions, 148 deletions
diff --git a/src/main.c b/src/main.c
index ba835ff..2f9d023 100644
--- a/src/main.c
+++ b/src/main.c
@@ -80,19 +80,21 @@
80#define PROF_SHOW() \ 80#define PROF_SHOW() \
81 do { \ 81 do { \
82 txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ 82 txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\
83 txt_printf("1BPP: %lu 2BPP: %lu", ppu_icn_cycles, ppu_chr_cycles);\ 83 txt_printf("1BPP: %.8lu\n2BPP: %.8lu\nFILL: %.8lu", ppu_icn_cycles, ppu_chr_cycles, ppu_fill_cycles);\
84 } while (0) 84 } while (0)
85
85static u32 ppu_pixel_cycles = 0; 86static u32 ppu_pixel_cycles = 0;
87static u32 ppu_fill_cycles = 0;
86static u32 ppu_chr_cycles = 0; 88static u32 ppu_chr_cycles = 0;
87static u32 ppu_icn_cycles = 0; 89static u32 ppu_icn_cycles = 0;
88static u32 flip_cycles = 0; 90static u32 flip_cycles = 0;
89static u32 eval_cycles = 0; 91static u32 eval_cycles = 0;
90static u32 input_cycles = 0; 92static u32 input_cycles = 0;
91static u32 mix_cycles = 0; 93static u32 mix_cycles = 0;
94
92#else 95#else
93#define PROF(F,VAR) (F) 96#define PROF(F,VAR) (F)
94#define PROF_SHOW() 97#define PROF_SHOW()
95#define PROF_INIT()
96#endif 98#endif
97 99
98static time_t seconds = 0; 100static time_t seconds = 0;
@@ -149,21 +151,23 @@ void
149screen_deo(u8 *ram, u8 *d, u8 port) { 151screen_deo(u8 *ram, u8 *d, u8 port) {
150 switch(port) { 152 switch(port) {
151 case 0xe: { 153 case 0xe: {
152 u16 x, y; 154 u8 ctrl = d[0xe];
153 u8 *layer = (d[0xe] & 0x40) ? ppu.fg : ppu.bg; 155 u8 color = ctrl & 0x3;
154 x = PEEK2(d + 0x8); 156 u16 x0 = PEEK2(d + 0x8);
155 y = PEEK2(d + 0xa); 157 u16 y0 = PEEK2(d + 0xa);
156 if(d[0xe] & 0x80) { 158 u8 *layer = (ctrl & 0x40) ? ppu.fg : ppu.bg;
157 screen_fill(layer, 159 if(ctrl & 0x80) {
158 (d[0xe] & 0x10) ? 0 : x, 160 u16 x1 = SCREEN_WIDTH - 1;
159 (d[0xe] & 0x20) ? 0 : y, 161 u16 y1 = SCREEN_HEIGHT - 1;
160 (d[0xe] & 0x10) ? x : SCREEN_WIDTH, 162 if(ctrl & 0x10) x1 = x0, x0 = 0;
161 (d[0xe] & 0x20) ? y : SCREEN_HEIGHT, 163 if(ctrl & 0x20) y1 = y0, y0 = 0;
162 (d[0xe] & 0x03)); 164 PROF(screen_fill(layer, x0, y0, x1, y1, color), ppu_fill_cycles);
163 } else { 165 } else {
164 PROF(ppu_pixel(layer, x, y, d[0xe] & 0x3), ppu_pixel_cycles); 166 u16 width = SCREEN_WIDTH;
165 if(d[0x6] & 0x01) POKE2(d + 0x8, x + 1); /* auto x+1 */ 167 u16 height = SCREEN_HEIGHT;
166 if(d[0x6] & 0x02) POKE2(d + 0xa, y + 1); /* auto y+1 */ 168 PROF(ppu_pixel(layer, x0, y0, color), ppu_pixel_cycles);
169 if(d[0x6] & 0x1) POKE2(d + 0x8, x0 + 1); /* auto x+1 */
170 if(d[0x6] & 0x2) POKE2(d + 0xa, y0 + 1); /* auto y+1 */
167 } 171 }
168 break; 172 break;
169 } 173 }
@@ -328,25 +332,30 @@ console_deo(u8 *d, u8 port) {
328 332
329static void 333static void
330system_cmd(u8 *ram, u16 addr) { 334system_cmd(u8 *ram, u16 addr) {
331 // if(ram[addr] == 0x01) { 335 if(ram[addr] == 0x01) {
332 // u16 i, length = PEEK2(ram + addr + 1); 336 // NOTE: Handle rom paging on a case by case basis if a rom has to be
333 // u16 a_page = PEEK2(ram + addr + 1 + 2); 337 // split in multiple chunks. The GBA compiler doesn't like allocating
334 // u16 a_addr = PEEK2(ram + addr + 1 + 4); 338 // big arrays, but it's fine if we split it into chunks of 64KB, for
335 // u16 b_addr = PEEK2(ram + addr + 1 + 8); 339 // example.
336 // u8 *rom = uxn_rom; 340 //
337 // for(i = 0; i < length; i++) { 341 // u16 i, length = PEEK2(ram + addr + 1);
338 // switch (a_page % RAM_PAGES) { 342 // u16 a_page = PEEK2(ram + addr + 1 + 2);
339 // case 0: { rom = uxn_rom; } break; 343 // u16 a_addr = PEEK2(ram + addr + 1 + 4);
340 // case 1: { rom = uxn_rom_2; } break; 344 // u16 b_addr = PEEK2(ram + addr + 1 + 8);
341 // case 2: { rom = uxn_rom_3; } break; 345 // u8 *rom = uxn_rom;
342 // case 3: { rom = uxn_rom_4; } break; 346 // for(i = 0; i < length; i++) {
343 // case 4: { rom = uxn_rom_5; } break; 347 // switch (a_page % RAM_PAGES) {
344 // case 5: { rom = uxn_rom_6; } break; 348 // case 0: { rom = uxn_rom; } break;
345 // case 6: { rom = uxn_rom_7; } break; 349 // case 1: { rom = uxn_rom_2; } break;
346 // } 350 // case 2: { rom = uxn_rom_3; } break;
347 // ram[(u16)(b_addr + i)] = rom[(u16)(a_addr + i)]; 351 // case 3: { rom = uxn_rom_4; } break;
352 // case 4: { rom = uxn_rom_5; } break;
353 // case 5: { rom = uxn_rom_6; } break;
354 // case 6: { rom = uxn_rom_7; } break;
355 // }
356 // ram[(u16)(b_addr + i)] = rom[(u16)(a_addr + i)];
348 // } 357 // }
349 // } 358 }
350} 359}
351 360
352void 361void
@@ -378,19 +387,20 @@ void
378uxn_deo(Uxn *u, u8 addr) { 387uxn_deo(Uxn *u, u8 addr) {
379 u8 p = addr & 0x0f, d = addr & 0xf0; 388 u8 p = addr & 0x0f, d = addr & 0xf0;
380 switch(d) { 389 switch(d) {
381 case 0x00: 390 case 0x00:
382 system_deo(u, &u->dev[d], p); 391 system_deo(u, &u->dev[d], p);
383 if(p > 0x7 && p < 0xe) 392 if(p > 0x7 && p < 0xe) {
384 putcolors(&u->dev[0x8]); 393 putcolors(&u->dev[0x8]);
385 break; 394 }
386 case 0x10: console_deo(&u->dev[d], p); break; 395 break;
387 case 0x20: screen_deo(u->ram, &u->dev[d], p); break; 396 case 0x10: console_deo(&u->dev[d], p); break;
388 case 0x30: audio_deo(0, &u->dev[d], p, u); break; 397 case 0x20: screen_deo(u->ram, &u->dev[d], p); break;
389 case 0x40: audio_deo(1, &u->dev[d], p, u); break; 398 case 0x30: audio_deo(0, &u->dev[d], p, u); break;
390 case 0x50: audio_deo(2, &u->dev[d], p, u); break; 399 case 0x40: audio_deo(1, &u->dev[d], p, u); break;
391 case 0x60: audio_deo(3, &u->dev[d], p, u); break; 400 case 0x50: audio_deo(2, &u->dev[d], p, u); break;
392 case 0xa0: file_deo(0, u->ram, &u->dev[d], p); break; 401 case 0x60: audio_deo(3, &u->dev[d], p, u); break;
393 case 0xb0: file_deo(1, u->ram, &u->dev[d], p); break; 402 case 0xa0: file_deo(0, u->ram, &u->dev[d], p); break;
403 case 0xb0: file_deo(1, u->ram, &u->dev[d], p); break;
394 } 404 }
395} 405}
396 406
diff --git a/src/ppu.c b/src/ppu.c
index 3b159af..8e1710c 100644
--- a/src/ppu.c
+++ b/src/ppu.c
@@ -248,115 +248,73 @@ ppu_pixel(u32 *layer, u16 x, u16 y, u8 clr) {
248} 248}
249 249
250IWRAM_CODE 250IWRAM_CODE
251void clear_screen(u32 *layer, u8 clr) {
252 // We have to make sure we leave the last tile blank to use as alpha channel
253 // when moving the BG during double buffering in case we are using that.
254 dma_fill(layer, 0x11111111 * clr, KB(20) - 32, 3);
255}
256
257IWRAM_CODE
258static inline
251void 259void
252ppu_rect(u32 *layer, size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { 260draw_hline(u32 *layer, size_t x0, size_t x1, size_t y0, u8 clr) {
253 BOUNDCHECK_SCREEN(x0, y0); 261 BOUNDCHECK_SCREEN(x0, y0);
254 BOUNDCHECK_SCREEN(x1, y1); 262 BOUNDCHECK_SCREEN(x1, y0);
255
256 // Find row positions for the given x/y coordinates. 263 // Find row positions for the given x/y coordinates.
257 size_t tile_x0 = x0 / 8; 264 size_t tile_x0 = x0 / 8;
258 size_t tile_y0 = y0 / 8;
259 size_t tile_x1 = x1 / 8; 265 size_t tile_x1 = x1 / 8;
260 size_t tile_y1 = y1 / 8; 266 size_t tile_y = y0 / 8;
261 size_t start_col0 = x0 % 8; 267 size_t start_col = x0 % 8;
262 size_t start_col1 = x1 % 8; 268 size_t end_col = x1 % 8;
263 size_t start_row0 = y0 % 8; 269 size_t start_row = y0 % 8;
264 size_t start_row1 = y1 % 8; 270
265 271 // Horizontal line. There are 3 cases:
266 // Get a pointer to the backbuffer and the tile row. 272 // 1. Lines fit on a single tile.
267 u32 *buf_top = &layer[start_row0 + (tile_x0 + tile_y0 * 32) * 8]; 273 // 2. Lines go through 2 tiles, both require partial row updates.
268 u32 *buf_bot = &layer[start_row1 + (tile_x0 + tile_y1 * 32) * 8]; 274 // 3. Lines go through 3 or more tiles, first and last tiles use
269 275 // partial row updates, rows in the middle can write the entire
270 size_t dx = tile_x1 - tile_x0; 276 // row.
271 size_t dy = tile_y1 - tile_y0; 277 size_t dtx = tile_x1 - tile_x0;
272 278 u32 *dst = &layer[start_row + (tile_x0 + tile_y * 32) * 8];
273 // We can update two lines at a time, which is faster than calling draw_line 279 if (dtx < 1) {
274 // four times. 280 size_t shift_left = start_col * 4;
275 if (dx < 1) { 281 size_t shift_right = (7 - end_col) * 4;
276 u32 row_mask = 0xFFFFFFFF; 282 u32 mask = (0xFFFFFFFF >> shift_right) & (0xFFFFFFFF << shift_left);
277 row_mask >>= (7 - start_col1 - dx) * 4; 283 u32 row = (0x11111111 * clr) & mask;
278 row_mask &= 0xFFFFFFFF << start_col0 * 4; 284 *dst = (*dst & ~mask) | row;
279 u32 row = (0x11111111 * clr) & row_mask;
280 buf_top[0] = (buf_top[0] & ~row_mask) | row;
281 buf_bot[0] = (buf_bot[0] & ~row_mask) | row;
282 dirty_tiles[tile_y0] |= 1 << tile_x0;
283 dirty_tiles[tile_y1] |= 1 << tile_x0;
284 } else { 285 } else {
285 size_t shift_left = start_col0 * 4; 286 size_t shift_left = start_col * 4;
286 size_t shift_right = (7 - start_col1) * 4; 287 size_t shift_right = (7 - end_col) * 4;
287 u32 row_mask = 0xFFFFFFFF; 288 u32 mask = 0xFFFFFFFF;
288 u32 row = 0x11111111 * clr; 289 u32 row = 0x11111111 * clr;
289 buf_top[0] = buf_top[0] & ~(row_mask << shift_left); 290 *dst = (*dst & ~(mask << shift_left)) | (row << shift_left);
290 buf_top[0] |= row << shift_left; 291 dst += 8;
291 buf_bot[0] = buf_bot[0] & ~(row_mask << shift_left); 292 for (size_t i = 1; i < dtx; i++) {
292 buf_bot[0] |= row << shift_left; 293 *dst = row;
293 dirty_tiles[tile_y0] |= 1 << tile_x0; 294 dst += 8;
294 dirty_tiles[tile_y1] |= 1 << tile_x0;
295 for (size_t i = 1; i < dx; i++) {
296 buf_top[i * 8] = row;
297 buf_bot[i * 8] = row;
298 dirty_tiles[tile_y0] |= 1 << (tile_x0 + i);
299 dirty_tiles[tile_y1] |= 1 << (tile_x0 + i);
300 }
301 buf_top[dx * 8] = buf_top[dx * 8] & ~(row_mask >> shift_right);
302 buf_top[dx * 8] |= row >> shift_right;
303 buf_bot[dx * 8] = buf_bot[dx * 8] & ~(row_mask >> shift_right);
304 buf_bot[dx * 8] |= row >> shift_right;
305 dirty_tiles[tile_y0] |= 1 << (tile_x0 + dx);
306 dirty_tiles[tile_y1] |= 1 << (tile_x0 + dx);
307 }
308 u32 row_mask_left = 0xF << start_col0 * 4;
309 u32 row_mask_right = 0xF << start_col1 * 4;
310 u32 row_left = (0x11111111 * clr) & row_mask_left;
311 u32 row_right = (0x11111111 * clr) & row_mask_right;
312 if (dy < 1) {
313 for (size_t i = 1; i < y1 - y0; i++, buf_top++) {
314 buf_top[1] = buf_top[1] & ~row_mask_left;
315 buf_top[1] |= row_left;
316 buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right;
317 buf_top[1 + 8 * dx] |= row_right;
318 }
319 } else {
320 for (size_t i = 1; i < (8 - start_row0); i++, buf_top++) {
321 buf_top[1] = buf_top[1] & ~row_mask_left;
322 buf_top[1] |= row_left;
323 buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right;
324 buf_top[1 + 8 * dx] |= row_right;
325 }
326 buf_top += 8 * 31;
327 for (size_t j = 1; j < dy; j++) {
328 for (size_t i = 0; i < 8; i++, buf_top++) {
329 buf_top[1] = buf_top[1] & ~row_mask_left;
330 buf_top[1] |= row_left;
331 buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right;
332 buf_top[1 + 8 * dx] |= row_right;
333 }
334 buf_top += 8 * 31;
335 dirty_tiles[tile_y0 + j] |= 1 << tile_x0;
336 dirty_tiles[tile_y0 + j] |= 1 << (tile_x0 + dx);
337 }
338 for (size_t i = 0; i < start_row1; i++, buf_top++) {
339 buf_top[1] = buf_top[1] & ~row_mask_left;
340 buf_top[1] |= row_left;
341 buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right;
342 buf_top[1 + 8 * dx] |= row_right;
343 } 295 }
296 *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right);
344 } 297 }
345} 298}
346 299
347IWRAM_CODE 300IWRAM_CODE
348void 301void
349screen_fill(u32 *layer, u16 x0, u16 y0, u16 x1, u16 y1, u8 clr) { 302screen_fill(u32 *layer, u16 x0, u16 y0, u16 x1, u16 y1, u8 clr) {
350 BOUNDCHECK_SCREEN(x0, y0); 303 MAYBE_SWAP(x0, x1);
351 BOUNDCHECK_SCREEN(x1, y1); 304 MAYBE_SWAP(y0, y1);
352 305
353 size_t dx = x1 - x0; 306 // Special condition. If the screen is to be completely filled, use the DMA
354 size_t dy = y1 - y0; 307 // instead.
355 size_t n_rect = MIN(dx, dy); 308 if (x0 == 0 && x1 >= (SCREEN_WIDTH - 1) && y0 == 0 && y1 >= (SCREEN_HEIGHT - 1)) {
356 n_rect = n_rect / 2 + 1; 309 clear_screen(layer, clr);
357 for (size_t i = 0; i < n_rect; i++) { 310 return;
358 ppu_rect(layer, x0 + i, y0 + i, x1 - i, y1 - i, clr); 311 }
312
313 // Drawline implementation.
314 for (size_t y = y0; y <= y1; y++) {
315 draw_hline(layer, x0, x1, y, clr);
359 } 316 }
317 // TODO: dirty?
360} 318}
361 319
362#if NEW_PPU == 0 320#if NEW_PPU == 0
@@ -616,15 +574,15 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
616 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; 574 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8];
617 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; 575 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte;
618 if (clr == 1) { 576 if (clr == 1) {
619 u64 mask = ~((u64)0xFFFFFFFF << shift_left); 577 u32 mask = 0xFFFFFFFF;
620 if (!flip_y) { 578 if (!flip_y) {
621 for(size_t v = 0; v < 8; v++, dst++) { 579 for(size_t v = 0; v < 8; v++, dst++) {
622 if ((y + v) >= SCREEN_HEIGHT) break; 580 if ((y + v) >= SCREEN_HEIGHT) break;
623 u8 ch1 = sprite[v]; 581 u8 ch1 = sprite[v];
624 u8 ch2 = sprite[v | 8]; 582 u8 ch2 = sprite[v | 8];
625 u32 color = lut[ch1] | (lut[ch2] << 1); 583 u32 color = lut[ch1] | (lut[ch2] << 1);
626 dst[0] = (dst[0] & (mask << shift_left)) | color; 584 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
627 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); 585 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
628 if ((start_row + v) == 7) dst += (32 - 1) * 8; 586 if ((start_row + v) == 7) dst += (32 - 1) * 8;
629 } 587 }
630 } else { 588 } else {
@@ -633,8 +591,8 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
633 u8 ch1 = sprite[(7 - v)]; 591 u8 ch1 = sprite[(7 - v)];
634 u8 ch2 = sprite[(7 - v) | 8]; 592 u8 ch2 = sprite[(7 - v) | 8];
635 u32 color = lut[ch1] | (lut[ch2] << 1); 593 u32 color = lut[ch1] | (lut[ch2] << 1);
636 dst[0] = (dst[0] & (mask << shift_left)) | color; 594 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
637 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); 595 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
638 if ((start_row + v) == 7) dst += (32 - 1) * 8; 596 if ((start_row + v) == 7) dst += (32 - 1) * 8;
639 } 597 }
640 } 598 }
@@ -707,7 +665,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
707 color = (clr1 * col1mask) | 665 color = (clr1 * col1mask) |
708 (clr2 * col2mask) | 666 (clr2 * col2mask) |
709 (clr3 * col3mask); 667 (clr3 * col3mask);
710 dst[0] = (dst[0] & (mask << shift_left)) | color; 668 dst[0] = (dst[0] & (mask << shift_left)) | (color << shift_left);
711 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); 669 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right);
712 if ((start_row + v) == 7) dst += (32 - 1) * 8; 670 if ((start_row + v) == 7) dst += (32 - 1) * 8;
713 } 671 }
@@ -727,7 +685,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
727 color = (clr1 * col1mask) | 685 color = (clr1 * col1mask) |
728 (clr2 * col2mask) | 686 (clr2 * col2mask) |
729 (clr3 * col3mask); 687 (clr3 * col3mask);
730 dst[0] = (dst[0] & (mask << shift_left)) | color; 688 dst[0] = (dst[0] & (mask << shift_left)) | (color << shift_left);
731 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); 689 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right);
732 if ((start_row + v) == 7) dst += (32 - 1) * 8; 690 if ((start_row + v) == 7) dst += (32 - 1) * 8;
733 } 691 }