aboutsummaryrefslogtreecommitdiffstats
path: root/src/ppu.c
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2023-04-20 09:10:50 +0200
committerBad Diode <bd@badd10de.dev>2023-04-20 09:10:50 +0200
commit402a74bf60e6e00e625364628e2d1ffe28d225ca (patch)
treef4a5ead120da09a25c83d17fed2a7e04514e4338 /src/ppu.c
parentda30ead571b5243b777244b6407fe911fab8359f (diff)
downloaduxngba-402a74bf60e6e00e625364628e2d1ffe28d225ca.tar.gz
uxngba-402a74bf60e6e00e625364628e2d1ffe28d225ca.zip
Add initial screen fill implementation
Still need to add dirty tiles to it but thus far should be fine.
Diffstat (limited to 'src/ppu.c')
-rw-r--r--src/ppu.c160
1 files changed, 59 insertions, 101 deletions
diff --git a/src/ppu.c b/src/ppu.c
index 3b159af..8e1710c 100644
--- a/src/ppu.c
+++ b/src/ppu.c
@@ -248,115 +248,73 @@ ppu_pixel(u32 *layer, u16 x, u16 y, u8 clr) {
248} 248}
249 249
250IWRAM_CODE 250IWRAM_CODE
251void clear_screen(u32 *layer, u8 clr) {
252 // We have to make sure we leave the last tile blank to use as alpha channel
253 // when moving the BG during double buffering in case we are using that.
254 dma_fill(layer, 0x11111111 * clr, KB(20) - 32, 3);
255}
256
257IWRAM_CODE
258static inline
251void 259void
252ppu_rect(u32 *layer, size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { 260draw_hline(u32 *layer, size_t x0, size_t x1, size_t y0, u8 clr) {
253 BOUNDCHECK_SCREEN(x0, y0); 261 BOUNDCHECK_SCREEN(x0, y0);
254 BOUNDCHECK_SCREEN(x1, y1); 262 BOUNDCHECK_SCREEN(x1, y0);
255
256 // Find row positions for the given x/y coordinates. 263 // Find row positions for the given x/y coordinates.
257 size_t tile_x0 = x0 / 8; 264 size_t tile_x0 = x0 / 8;
258 size_t tile_y0 = y0 / 8;
259 size_t tile_x1 = x1 / 8; 265 size_t tile_x1 = x1 / 8;
260 size_t tile_y1 = y1 / 8; 266 size_t tile_y = y0 / 8;
261 size_t start_col0 = x0 % 8; 267 size_t start_col = x0 % 8;
262 size_t start_col1 = x1 % 8; 268 size_t end_col = x1 % 8;
263 size_t start_row0 = y0 % 8; 269 size_t start_row = y0 % 8;
264 size_t start_row1 = y1 % 8; 270
265 271 // Horizontal line. There are 3 cases:
266 // Get a pointer to the backbuffer and the tile row. 272 // 1. Lines fit on a single tile.
267 u32 *buf_top = &layer[start_row0 + (tile_x0 + tile_y0 * 32) * 8]; 273 // 2. Lines go through 2 tiles, both require partial row updates.
268 u32 *buf_bot = &layer[start_row1 + (tile_x0 + tile_y1 * 32) * 8]; 274 // 3. Lines go through 3 or more tiles, first and last tiles use
269 275 // partial row updates, rows in the middle can write the entire
270 size_t dx = tile_x1 - tile_x0; 276 // row.
271 size_t dy = tile_y1 - tile_y0; 277 size_t dtx = tile_x1 - tile_x0;
272 278 u32 *dst = &layer[start_row + (tile_x0 + tile_y * 32) * 8];
273 // We can update two lines at a time, which is faster than calling draw_line 279 if (dtx < 1) {
274 // four times. 280 size_t shift_left = start_col * 4;
275 if (dx < 1) { 281 size_t shift_right = (7 - end_col) * 4;
276 u32 row_mask = 0xFFFFFFFF; 282 u32 mask = (0xFFFFFFFF >> shift_right) & (0xFFFFFFFF << shift_left);
277 row_mask >>= (7 - start_col1 - dx) * 4; 283 u32 row = (0x11111111 * clr) & mask;
278 row_mask &= 0xFFFFFFFF << start_col0 * 4; 284 *dst = (*dst & ~mask) | row;
279 u32 row = (0x11111111 * clr) & row_mask;
280 buf_top[0] = (buf_top[0] & ~row_mask) | row;
281 buf_bot[0] = (buf_bot[0] & ~row_mask) | row;
282 dirty_tiles[tile_y0] |= 1 << tile_x0;
283 dirty_tiles[tile_y1] |= 1 << tile_x0;
284 } else { 285 } else {
285 size_t shift_left = start_col0 * 4; 286 size_t shift_left = start_col * 4;
286 size_t shift_right = (7 - start_col1) * 4; 287 size_t shift_right = (7 - end_col) * 4;
287 u32 row_mask = 0xFFFFFFFF; 288 u32 mask = 0xFFFFFFFF;
288 u32 row = 0x11111111 * clr; 289 u32 row = 0x11111111 * clr;
289 buf_top[0] = buf_top[0] & ~(row_mask << shift_left); 290 *dst = (*dst & ~(mask << shift_left)) | (row << shift_left);
290 buf_top[0] |= row << shift_left; 291 dst += 8;
291 buf_bot[0] = buf_bot[0] & ~(row_mask << shift_left); 292 for (size_t i = 1; i < dtx; i++) {
292 buf_bot[0] |= row << shift_left; 293 *dst = row;
293 dirty_tiles[tile_y0] |= 1 << tile_x0; 294 dst += 8;
294 dirty_tiles[tile_y1] |= 1 << tile_x0;
295 for (size_t i = 1; i < dx; i++) {
296 buf_top[i * 8] = row;
297 buf_bot[i * 8] = row;
298 dirty_tiles[tile_y0] |= 1 << (tile_x0 + i);
299 dirty_tiles[tile_y1] |= 1 << (tile_x0 + i);
300 }
301 buf_top[dx * 8] = buf_top[dx * 8] & ~(row_mask >> shift_right);
302 buf_top[dx * 8] |= row >> shift_right;
303 buf_bot[dx * 8] = buf_bot[dx * 8] & ~(row_mask >> shift_right);
304 buf_bot[dx * 8] |= row >> shift_right;
305 dirty_tiles[tile_y0] |= 1 << (tile_x0 + dx);
306 dirty_tiles[tile_y1] |= 1 << (tile_x0 + dx);
307 }
308 u32 row_mask_left = 0xF << start_col0 * 4;
309 u32 row_mask_right = 0xF << start_col1 * 4;
310 u32 row_left = (0x11111111 * clr) & row_mask_left;
311 u32 row_right = (0x11111111 * clr) & row_mask_right;
312 if (dy < 1) {
313 for (size_t i = 1; i < y1 - y0; i++, buf_top++) {
314 buf_top[1] = buf_top[1] & ~row_mask_left;
315 buf_top[1] |= row_left;
316 buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right;
317 buf_top[1 + 8 * dx] |= row_right;
318 }
319 } else {
320 for (size_t i = 1; i < (8 - start_row0); i++, buf_top++) {
321 buf_top[1] = buf_top[1] & ~row_mask_left;
322 buf_top[1] |= row_left;
323 buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right;
324 buf_top[1 + 8 * dx] |= row_right;
325 }
326 buf_top += 8 * 31;
327 for (size_t j = 1; j < dy; j++) {
328 for (size_t i = 0; i < 8; i++, buf_top++) {
329 buf_top[1] = buf_top[1] & ~row_mask_left;
330 buf_top[1] |= row_left;
331 buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right;
332 buf_top[1 + 8 * dx] |= row_right;
333 }
334 buf_top += 8 * 31;
335 dirty_tiles[tile_y0 + j] |= 1 << tile_x0;
336 dirty_tiles[tile_y0 + j] |= 1 << (tile_x0 + dx);
337 }
338 for (size_t i = 0; i < start_row1; i++, buf_top++) {
339 buf_top[1] = buf_top[1] & ~row_mask_left;
340 buf_top[1] |= row_left;
341 buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right;
342 buf_top[1 + 8 * dx] |= row_right;
343 } 295 }
296 *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right);
344 } 297 }
345} 298}
346 299
347IWRAM_CODE 300IWRAM_CODE
348void 301void
349screen_fill(u32 *layer, u16 x0, u16 y0, u16 x1, u16 y1, u8 clr) { 302screen_fill(u32 *layer, u16 x0, u16 y0, u16 x1, u16 y1, u8 clr) {
350 BOUNDCHECK_SCREEN(x0, y0); 303 MAYBE_SWAP(x0, x1);
351 BOUNDCHECK_SCREEN(x1, y1); 304 MAYBE_SWAP(y0, y1);
352 305
353 size_t dx = x1 - x0; 306 // Special condition. If the screen is to be completely filled, use the DMA
354 size_t dy = y1 - y0; 307 // instead.
355 size_t n_rect = MIN(dx, dy); 308 if (x0 == 0 && x1 >= (SCREEN_WIDTH - 1) && y0 == 0 && y1 >= (SCREEN_HEIGHT - 1)) {
356 n_rect = n_rect / 2 + 1; 309 clear_screen(layer, clr);
357 for (size_t i = 0; i < n_rect; i++) { 310 return;
358 ppu_rect(layer, x0 + i, y0 + i, x1 - i, y1 - i, clr); 311 }
312
313 // Drawline implementation.
314 for (size_t y = y0; y <= y1; y++) {
315 draw_hline(layer, x0, x1, y, clr);
359 } 316 }
317 // TODO: dirty?
360} 318}
361 319
362#if NEW_PPU == 0 320#if NEW_PPU == 0
@@ -616,15 +574,15 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
616 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; 574 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8];
617 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; 575 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte;
618 if (clr == 1) { 576 if (clr == 1) {
619 u64 mask = ~((u64)0xFFFFFFFF << shift_left); 577 u32 mask = 0xFFFFFFFF;
620 if (!flip_y) { 578 if (!flip_y) {
621 for(size_t v = 0; v < 8; v++, dst++) { 579 for(size_t v = 0; v < 8; v++, dst++) {
622 if ((y + v) >= SCREEN_HEIGHT) break; 580 if ((y + v) >= SCREEN_HEIGHT) break;
623 u8 ch1 = sprite[v]; 581 u8 ch1 = sprite[v];
624 u8 ch2 = sprite[v | 8]; 582 u8 ch2 = sprite[v | 8];
625 u32 color = lut[ch1] | (lut[ch2] << 1); 583 u32 color = lut[ch1] | (lut[ch2] << 1);
626 dst[0] = (dst[0] & (mask << shift_left)) | color; 584 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
627 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); 585 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
628 if ((start_row + v) == 7) dst += (32 - 1) * 8; 586 if ((start_row + v) == 7) dst += (32 - 1) * 8;
629 } 587 }
630 } else { 588 } else {
@@ -633,8 +591,8 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
633 u8 ch1 = sprite[(7 - v)]; 591 u8 ch1 = sprite[(7 - v)];
634 u8 ch2 = sprite[(7 - v) | 8]; 592 u8 ch2 = sprite[(7 - v) | 8];
635 u32 color = lut[ch1] | (lut[ch2] << 1); 593 u32 color = lut[ch1] | (lut[ch2] << 1);
636 dst[0] = (dst[0] & (mask << shift_left)) | color; 594 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
637 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); 595 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
638 if ((start_row + v) == 7) dst += (32 - 1) * 8; 596 if ((start_row + v) == 7) dst += (32 - 1) * 8;
639 } 597 }
640 } 598 }
@@ -707,7 +665,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
707 color = (clr1 * col1mask) | 665 color = (clr1 * col1mask) |
708 (clr2 * col2mask) | 666 (clr2 * col2mask) |
709 (clr3 * col3mask); 667 (clr3 * col3mask);
710 dst[0] = (dst[0] & (mask << shift_left)) | color; 668 dst[0] = (dst[0] & (mask << shift_left)) | (color << shift_left);
711 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); 669 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right);
712 if ((start_row + v) == 7) dst += (32 - 1) * 8; 670 if ((start_row + v) == 7) dst += (32 - 1) * 8;
713 } 671 }
@@ -727,7 +685,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
727 color = (clr1 * col1mask) | 685 color = (clr1 * col1mask) |
728 (clr2 * col2mask) | 686 (clr2 * col2mask) |
729 (clr3 * col3mask); 687 (clr3 * col3mask);
730 dst[0] = (dst[0] & (mask << shift_left)) | color; 688 dst[0] = (dst[0] & (mask << shift_left)) | (color << shift_left);
731 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); 689 dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right);
732 if ((start_row + v) == 7) dst += (32 - 1) * 8; 690 if ((start_row + v) == 7) dst += (32 - 1) * 8;
733 } 691 }