diff options
Diffstat (limited to 'src/ppu.c')
-rw-r--r-- | src/ppu.c | 160 |
1 files changed, 59 insertions, 101 deletions
@@ -248,115 +248,73 @@ ppu_pixel(u32 *layer, u16 x, u16 y, u8 clr) { | |||
248 | } | 248 | } |
249 | 249 | ||
250 | IWRAM_CODE | 250 | IWRAM_CODE |
251 | void clear_screen(u32 *layer, u8 clr) { | ||
252 | // We have to make sure we leave the last tile blank to use as alpha channel | ||
253 | // when moving the BG during double buffering in case we are using that. | ||
254 | dma_fill(layer, 0x11111111 * clr, KB(20) - 32, 3); | ||
255 | } | ||
256 | |||
257 | IWRAM_CODE | ||
258 | static inline | ||
251 | void | 259 | void |
252 | ppu_rect(u32 *layer, size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { | 260 | draw_hline(u32 *layer, size_t x0, size_t x1, size_t y0, u8 clr) { |
253 | BOUNDCHECK_SCREEN(x0, y0); | 261 | BOUNDCHECK_SCREEN(x0, y0); |
254 | BOUNDCHECK_SCREEN(x1, y1); | 262 | BOUNDCHECK_SCREEN(x1, y0); |
255 | |||
256 | // Find row positions for the given x/y coordinates. | 263 | // Find row positions for the given x/y coordinates. |
257 | size_t tile_x0 = x0 / 8; | 264 | size_t tile_x0 = x0 / 8; |
258 | size_t tile_y0 = y0 / 8; | ||
259 | size_t tile_x1 = x1 / 8; | 265 | size_t tile_x1 = x1 / 8; |
260 | size_t tile_y1 = y1 / 8; | 266 | size_t tile_y = y0 / 8; |
261 | size_t start_col0 = x0 % 8; | 267 | size_t start_col = x0 % 8; |
262 | size_t start_col1 = x1 % 8; | 268 | size_t end_col = x1 % 8; |
263 | size_t start_row0 = y0 % 8; | 269 | size_t start_row = y0 % 8; |
264 | size_t start_row1 = y1 % 8; | 270 | |
265 | 271 | // Horizontal line. There are 3 cases: | |
266 | // Get a pointer to the backbuffer and the tile row. | 272 | // 1. Lines fit on a single tile. |
267 | u32 *buf_top = &layer[start_row0 + (tile_x0 + tile_y0 * 32) * 8]; | 273 | // 2. Lines go through 2 tiles, both require partial row updates. |
268 | u32 *buf_bot = &layer[start_row1 + (tile_x0 + tile_y1 * 32) * 8]; | 274 | // 3. Lines go through 3 or more tiles, first and last tiles use |
269 | 275 | // partial row updates, rows in the middle can write the entire | |
270 | size_t dx = tile_x1 - tile_x0; | 276 | // row. |
271 | size_t dy = tile_y1 - tile_y0; | 277 | size_t dtx = tile_x1 - tile_x0; |
272 | 278 | u32 *dst = &layer[start_row + (tile_x0 + tile_y * 32) * 8]; | |
273 | // We can update two lines at a time, which is faster than calling draw_line | 279 | if (dtx < 1) { |
274 | // four times. | 280 | size_t shift_left = start_col * 4; |
275 | if (dx < 1) { | 281 | size_t shift_right = (7 - end_col) * 4; |
276 | u32 row_mask = 0xFFFFFFFF; | 282 | u32 mask = (0xFFFFFFFF >> shift_right) & (0xFFFFFFFF << shift_left); |
277 | row_mask >>= (7 - start_col1 - dx) * 4; | 283 | u32 row = (0x11111111 * clr) & mask; |
278 | row_mask &= 0xFFFFFFFF << start_col0 * 4; | 284 | *dst = (*dst & ~mask) | row; |
279 | u32 row = (0x11111111 * clr) & row_mask; | ||
280 | buf_top[0] = (buf_top[0] & ~row_mask) | row; | ||
281 | buf_bot[0] = (buf_bot[0] & ~row_mask) | row; | ||
282 | dirty_tiles[tile_y0] |= 1 << tile_x0; | ||
283 | dirty_tiles[tile_y1] |= 1 << tile_x0; | ||
284 | } else { | 285 | } else { |
285 | size_t shift_left = start_col0 * 4; | 286 | size_t shift_left = start_col * 4; |
286 | size_t shift_right = (7 - start_col1) * 4; | 287 | size_t shift_right = (7 - end_col) * 4; |
287 | u32 row_mask = 0xFFFFFFFF; | 288 | u32 mask = 0xFFFFFFFF; |
288 | u32 row = 0x11111111 * clr; | 289 | u32 row = 0x11111111 * clr; |
289 | buf_top[0] = buf_top[0] & ~(row_mask << shift_left); | 290 | *dst = (*dst & ~(mask << shift_left)) | (row << shift_left); |
290 | buf_top[0] |= row << shift_left; | 291 | dst += 8; |
291 | buf_bot[0] = buf_bot[0] & ~(row_mask << shift_left); | 292 | for (size_t i = 1; i < dtx; i++) { |
292 | buf_bot[0] |= row << shift_left; | 293 | *dst = row; |
293 | dirty_tiles[tile_y0] |= 1 << tile_x0; | 294 | dst += 8; |
294 | dirty_tiles[tile_y1] |= 1 << tile_x0; | ||
295 | for (size_t i = 1; i < dx; i++) { | ||
296 | buf_top[i * 8] = row; | ||
297 | buf_bot[i * 8] = row; | ||
298 | dirty_tiles[tile_y0] |= 1 << (tile_x0 + i); | ||
299 | dirty_tiles[tile_y1] |= 1 << (tile_x0 + i); | ||
300 | } | ||
301 | buf_top[dx * 8] = buf_top[dx * 8] & ~(row_mask >> shift_right); | ||
302 | buf_top[dx * 8] |= row >> shift_right; | ||
303 | buf_bot[dx * 8] = buf_bot[dx * 8] & ~(row_mask >> shift_right); | ||
304 | buf_bot[dx * 8] |= row >> shift_right; | ||
305 | dirty_tiles[tile_y0] |= 1 << (tile_x0 + dx); | ||
306 | dirty_tiles[tile_y1] |= 1 << (tile_x0 + dx); | ||
307 | } | ||
308 | u32 row_mask_left = 0xF << start_col0 * 4; | ||
309 | u32 row_mask_right = 0xF << start_col1 * 4; | ||
310 | u32 row_left = (0x11111111 * clr) & row_mask_left; | ||
311 | u32 row_right = (0x11111111 * clr) & row_mask_right; | ||
312 | if (dy < 1) { | ||
313 | for (size_t i = 1; i < y1 - y0; i++, buf_top++) { | ||
314 | buf_top[1] = buf_top[1] & ~row_mask_left; | ||
315 | buf_top[1] |= row_left; | ||
316 | buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; | ||
317 | buf_top[1 + 8 * dx] |= row_right; | ||
318 | } | ||
319 | } else { | ||
320 | for (size_t i = 1; i < (8 - start_row0); i++, buf_top++) { | ||
321 | buf_top[1] = buf_top[1] & ~row_mask_left; | ||
322 | buf_top[1] |= row_left; | ||
323 | buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; | ||
324 | buf_top[1 + 8 * dx] |= row_right; | ||
325 | } | ||
326 | buf_top += 8 * 31; | ||
327 | for (size_t j = 1; j < dy; j++) { | ||
328 | for (size_t i = 0; i < 8; i++, buf_top++) { | ||
329 | buf_top[1] = buf_top[1] & ~row_mask_left; | ||
330 | buf_top[1] |= row_left; | ||
331 | buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; | ||
332 | buf_top[1 + 8 * dx] |= row_right; | ||
333 | } | ||
334 | buf_top += 8 * 31; | ||
335 | dirty_tiles[tile_y0 + j] |= 1 << tile_x0; | ||
336 | dirty_tiles[tile_y0 + j] |= 1 << (tile_x0 + dx); | ||
337 | } | ||
338 | for (size_t i = 0; i < start_row1; i++, buf_top++) { | ||
339 | buf_top[1] = buf_top[1] & ~row_mask_left; | ||
340 | buf_top[1] |= row_left; | ||
341 | buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; | ||
342 | buf_top[1 + 8 * dx] |= row_right; | ||
343 | } | 295 | } |
296 | *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right); | ||
344 | } | 297 | } |
345 | } | 298 | } |
346 | 299 | ||
347 | IWRAM_CODE | 300 | IWRAM_CODE |
348 | void | 301 | void |
349 | screen_fill(u32 *layer, u16 x0, u16 y0, u16 x1, u16 y1, u8 clr) { | 302 | screen_fill(u32 *layer, u16 x0, u16 y0, u16 x1, u16 y1, u8 clr) { |
350 | BOUNDCHECK_SCREEN(x0, y0); | 303 | MAYBE_SWAP(x0, x1); |
351 | BOUNDCHECK_SCREEN(x1, y1); | 304 | MAYBE_SWAP(y0, y1); |
352 | 305 | ||
353 | size_t dx = x1 - x0; | 306 | // Special condition. If the screen is to be completely filled, use the DMA |
354 | size_t dy = y1 - y0; | 307 | // instead. |
355 | size_t n_rect = MIN(dx, dy); | 308 | if (x0 == 0 && x1 >= (SCREEN_WIDTH - 1) && y0 == 0 && y1 >= (SCREEN_HEIGHT - 1)) { |
356 | n_rect = n_rect / 2 + 1; | 309 | clear_screen(layer, clr); |
357 | for (size_t i = 0; i < n_rect; i++) { | 310 | return; |
358 | ppu_rect(layer, x0 + i, y0 + i, x1 - i, y1 - i, clr); | 311 | } |
312 | |||
313 | // Drawline implementation. | ||
314 | for (size_t y = y0; y <= y1; y++) { | ||
315 | draw_hline(layer, x0, x1, y, clr); | ||
359 | } | 316 | } |
317 | // TODO: dirty? | ||
360 | } | 318 | } |
361 | 319 | ||
362 | #if NEW_PPU == 0 | 320 | #if NEW_PPU == 0 |
@@ -616,15 +574,15 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | |||
616 | u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; | 574 | u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; |
617 | u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; | 575 | u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; |
618 | if (clr == 1) { | 576 | if (clr == 1) { |
619 | u64 mask = ~((u64)0xFFFFFFFF << shift_left); | 577 | u32 mask = 0xFFFFFFFF; |
620 | if (!flip_y) { | 578 | if (!flip_y) { |
621 | for(size_t v = 0; v < 8; v++, dst++) { | 579 | for(size_t v = 0; v < 8; v++, dst++) { |
622 | if ((y + v) >= SCREEN_HEIGHT) break; | 580 | if ((y + v) >= SCREEN_HEIGHT) break; |
623 | u8 ch1 = sprite[v]; | 581 | u8 ch1 = sprite[v]; |
624 | u8 ch2 = sprite[v | 8]; | 582 | u8 ch2 = sprite[v | 8]; |
625 | u32 color = lut[ch1] | (lut[ch2] << 1); | 583 | u32 color = lut[ch1] | (lut[ch2] << 1); |
626 | dst[0] = (dst[0] & (mask << shift_left)) | color; | 584 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); |
627 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | 585 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); |
628 | if ((start_row + v) == 7) dst += (32 - 1) * 8; | 586 | if ((start_row + v) == 7) dst += (32 - 1) * 8; |
629 | } | 587 | } |
630 | } else { | 588 | } else { |
@@ -633,8 +591,8 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | |||
633 | u8 ch1 = sprite[(7 - v)]; | 591 | u8 ch1 = sprite[(7 - v)]; |
634 | u8 ch2 = sprite[(7 - v) | 8]; | 592 | u8 ch2 = sprite[(7 - v) | 8]; |
635 | u32 color = lut[ch1] | (lut[ch2] << 1); | 593 | u32 color = lut[ch1] | (lut[ch2] << 1); |
636 | dst[0] = (dst[0] & (mask << shift_left)) | color; | 594 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); |
637 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | 595 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); |
638 | if ((start_row + v) == 7) dst += (32 - 1) * 8; | 596 | if ((start_row + v) == 7) dst += (32 - 1) * 8; |
639 | } | 597 | } |
640 | } | 598 | } |
@@ -707,7 +665,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | |||
707 | color = (clr1 * col1mask) | | 665 | color = (clr1 * col1mask) | |
708 | (clr2 * col2mask) | | 666 | (clr2 * col2mask) | |
709 | (clr3 * col3mask); | 667 | (clr3 * col3mask); |
710 | dst[0] = (dst[0] & (mask << shift_left)) | color; | 668 | dst[0] = (dst[0] & (mask << shift_left)) | (color << shift_left); |
711 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | 669 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); |
712 | if ((start_row + v) == 7) dst += (32 - 1) * 8; | 670 | if ((start_row + v) == 7) dst += (32 - 1) * 8; |
713 | } | 671 | } |
@@ -727,7 +685,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | |||
727 | color = (clr1 * col1mask) | | 685 | color = (clr1 * col1mask) | |
728 | (clr2 * col2mask) | | 686 | (clr2 * col2mask) | |
729 | (clr3 * col3mask); | 687 | (clr3 * col3mask); |
730 | dst[0] = (dst[0] & (mask << shift_left)) | color; | 688 | dst[0] = (dst[0] & (mask << shift_left)) | (color << shift_left); |
731 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); | 689 | dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); |
732 | if ((start_row + v) == 7) dst += (32 - 1) * 8; | 690 | if ((start_row + v) == 7) dst += (32 - 1) * 8; |
733 | } | 691 | } |