From 402a74bf60e6e00e625364628e2d1ffe28d225ca Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Thu, 20 Apr 2023 09:10:50 +0200 Subject: Add initial screen fill implementation Still need to add dirty tiles to it but thus far should be fine. --- src/main.c | 104 ++++++++++++++++++++++------------------ src/ppu.c | 160 +++++++++++++++++++++++-------------------------------------- 2 files changed, 116 insertions(+), 148 deletions(-) diff --git a/src/main.c b/src/main.c index ba835ff..2f9d023 100644 --- a/src/main.c +++ b/src/main.c @@ -80,19 +80,21 @@ #define PROF_SHOW() \ do { \ txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ - txt_printf("1BPP: %lu 2BPP: %lu", ppu_icn_cycles, ppu_chr_cycles);\ + txt_printf("1BPP: %.8lu\n2BPP: %.8lu\nFILL: %.8lu", ppu_icn_cycles, ppu_chr_cycles, ppu_fill_cycles);\ } while (0) + static u32 ppu_pixel_cycles = 0; +static u32 ppu_fill_cycles = 0; static u32 ppu_chr_cycles = 0; static u32 ppu_icn_cycles = 0; static u32 flip_cycles = 0; static u32 eval_cycles = 0; static u32 input_cycles = 0; static u32 mix_cycles = 0; + #else #define PROF(F,VAR) (F) #define PROF_SHOW() -#define PROF_INIT() #endif static time_t seconds = 0; @@ -149,21 +151,23 @@ void screen_deo(u8 *ram, u8 *d, u8 port) { switch(port) { case 0xe: { - u16 x, y; - u8 *layer = (d[0xe] & 0x40) ? ppu.fg : ppu.bg; - x = PEEK2(d + 0x8); - y = PEEK2(d + 0xa); - if(d[0xe] & 0x80) { - screen_fill(layer, - (d[0xe] & 0x10) ? 0 : x, - (d[0xe] & 0x20) ? 0 : y, - (d[0xe] & 0x10) ? x : SCREEN_WIDTH, - (d[0xe] & 0x20) ? y : SCREEN_HEIGHT, - (d[0xe] & 0x03)); + u8 ctrl = d[0xe]; + u8 color = ctrl & 0x3; + u16 x0 = PEEK2(d + 0x8); + u16 y0 = PEEK2(d + 0xa); + u8 *layer = (ctrl & 0x40) ? ppu.fg : ppu.bg; + if(ctrl & 0x80) { + u16 x1 = SCREEN_WIDTH - 1; + u16 y1 = SCREEN_HEIGHT - 1; + if(ctrl & 0x10) x1 = x0, x0 = 0; + if(ctrl & 0x20) y1 = y0, y0 = 0; + PROF(screen_fill(layer, x0, y0, x1, y1, color), ppu_fill_cycles); } else { - PROF(ppu_pixel(layer, x, y, d[0xe] & 0x3), ppu_pixel_cycles); - if(d[0x6] & 0x01) POKE2(d + 0x8, x + 1); /* auto x+1 */ - if(d[0x6] & 0x02) POKE2(d + 0xa, y + 1); /* auto y+1 */ + u16 width = SCREEN_WIDTH; + u16 height = SCREEN_HEIGHT; + PROF(ppu_pixel(layer, x0, y0, color), ppu_pixel_cycles); + if(d[0x6] & 0x1) POKE2(d + 0x8, x0 + 1); /* auto x+1 */ + if(d[0x6] & 0x2) POKE2(d + 0xa, y0 + 1); /* auto y+1 */ } break; } @@ -328,25 +332,30 @@ console_deo(u8 *d, u8 port) { static void system_cmd(u8 *ram, u16 addr) { - // if(ram[addr] == 0x01) { - // u16 i, length = PEEK2(ram + addr + 1); - // u16 a_page = PEEK2(ram + addr + 1 + 2); - // u16 a_addr = PEEK2(ram + addr + 1 + 4); - // u16 b_addr = PEEK2(ram + addr + 1 + 8); - // u8 *rom = uxn_rom; - // for(i = 0; i < length; i++) { - // switch (a_page % RAM_PAGES) { - // case 0: { rom = uxn_rom; } break; - // case 1: { rom = uxn_rom_2; } break; - // case 2: { rom = uxn_rom_3; } break; - // case 3: { rom = uxn_rom_4; } break; - // case 4: { rom = uxn_rom_5; } break; - // case 5: { rom = uxn_rom_6; } break; - // case 6: { rom = uxn_rom_7; } break; - // } - // ram[(u16)(b_addr + i)] = rom[(u16)(a_addr + i)]; + if(ram[addr] == 0x01) { + // NOTE: Handle rom paging on a case by case basis if a rom has to be + // split in multiple chunks. The GBA compiler doesn't like allocating + // big arrays, but it's fine if we split it into chunks of 64KB, for + // example. + // + // u16 i, length = PEEK2(ram + addr + 1); + // u16 a_page = PEEK2(ram + addr + 1 + 2); + // u16 a_addr = PEEK2(ram + addr + 1 + 4); + // u16 b_addr = PEEK2(ram + addr + 1 + 8); + // u8 *rom = uxn_rom; + // for(i = 0; i < length; i++) { + // switch (a_page % RAM_PAGES) { + // case 0: { rom = uxn_rom; } break; + // case 1: { rom = uxn_rom_2; } break; + // case 2: { rom = uxn_rom_3; } break; + // case 3: { rom = uxn_rom_4; } break; + // case 4: { rom = uxn_rom_5; } break; + // case 5: { rom = uxn_rom_6; } break; + // case 6: { rom = uxn_rom_7; } break; + // } + // ram[(u16)(b_addr + i)] = rom[(u16)(a_addr + i)]; // } - // } + } } void @@ -378,19 +387,20 @@ void uxn_deo(Uxn *u, u8 addr) { u8 p = addr & 0x0f, d = addr & 0xf0; switch(d) { - case 0x00: - system_deo(u, &u->dev[d], p); - if(p > 0x7 && p < 0xe) - putcolors(&u->dev[0x8]); - break; - case 0x10: console_deo(&u->dev[d], p); break; - case 0x20: screen_deo(u->ram, &u->dev[d], p); break; - case 0x30: audio_deo(0, &u->dev[d], p, u); break; - case 0x40: audio_deo(1, &u->dev[d], p, u); break; - case 0x50: audio_deo(2, &u->dev[d], p, u); break; - case 0x60: audio_deo(3, &u->dev[d], p, u); break; - case 0xa0: file_deo(0, u->ram, &u->dev[d], p); break; - case 0xb0: file_deo(1, u->ram, &u->dev[d], p); break; + case 0x00: + system_deo(u, &u->dev[d], p); + if(p > 0x7 && p < 0xe) { + putcolors(&u->dev[0x8]); + } + break; + case 0x10: console_deo(&u->dev[d], p); break; + case 0x20: screen_deo(u->ram, &u->dev[d], p); break; + case 0x30: audio_deo(0, &u->dev[d], p, u); break; + case 0x40: audio_deo(1, &u->dev[d], p, u); break; + case 0x50: audio_deo(2, &u->dev[d], p, u); break; + case 0x60: audio_deo(3, &u->dev[d], p, u); break; + case 0xa0: file_deo(0, u->ram, &u->dev[d], p); break; + case 0xb0: file_deo(1, u->ram, &u->dev[d], p); break; } } diff --git a/src/ppu.c b/src/ppu.c index 3b159af..8e1710c 100644 --- a/src/ppu.c +++ b/src/ppu.c @@ -248,115 +248,73 @@ ppu_pixel(u32 *layer, u16 x, u16 y, u8 clr) { } IWRAM_CODE +void clear_screen(u32 *layer, u8 clr) { + // We have to make sure we leave the last tile blank to use as alpha channel + // when moving the BG during double buffering in case we are using that. + dma_fill(layer, 0x11111111 * clr, KB(20) - 32, 3); +} + +IWRAM_CODE +static inline void -ppu_rect(u32 *layer, size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { +draw_hline(u32 *layer, size_t x0, size_t x1, size_t y0, u8 clr) { BOUNDCHECK_SCREEN(x0, y0); - BOUNDCHECK_SCREEN(x1, y1); - + BOUNDCHECK_SCREEN(x1, y0); // Find row positions for the given x/y coordinates. size_t tile_x0 = x0 / 8; - size_t tile_y0 = y0 / 8; size_t tile_x1 = x1 / 8; - size_t tile_y1 = y1 / 8; - size_t start_col0 = x0 % 8; - size_t start_col1 = x1 % 8; - size_t start_row0 = y0 % 8; - size_t start_row1 = y1 % 8; - - // Get a pointer to the backbuffer and the tile row. - u32 *buf_top = &layer[start_row0 + (tile_x0 + tile_y0 * 32) * 8]; - u32 *buf_bot = &layer[start_row1 + (tile_x0 + tile_y1 * 32) * 8]; - - size_t dx = tile_x1 - tile_x0; - size_t dy = tile_y1 - tile_y0; - - // We can update two lines at a time, which is faster than calling draw_line - // four times. - if (dx < 1) { - u32 row_mask = 0xFFFFFFFF; - row_mask >>= (7 - start_col1 - dx) * 4; - row_mask &= 0xFFFFFFFF << start_col0 * 4; - u32 row = (0x11111111 * clr) & row_mask; - buf_top[0] = (buf_top[0] & ~row_mask) | row; - buf_bot[0] = (buf_bot[0] & ~row_mask) | row; - dirty_tiles[tile_y0] |= 1 << tile_x0; - dirty_tiles[tile_y1] |= 1 << tile_x0; + size_t tile_y = y0 / 8; + size_t start_col = x0 % 8; + size_t end_col = x1 % 8; + size_t start_row = y0 % 8; + + // Horizontal line. There are 3 cases: + // 1. Lines fit on a single tile. + // 2. Lines go through 2 tiles, both require partial row updates. + // 3. Lines go through 3 or more tiles, first and last tiles use + // partial row updates, rows in the middle can write the entire + // row. + size_t dtx = tile_x1 - tile_x0; + u32 *dst = &layer[start_row + (tile_x0 + tile_y * 32) * 8]; + if (dtx < 1) { + size_t shift_left = start_col * 4; + size_t shift_right = (7 - end_col) * 4; + u32 mask = (0xFFFFFFFF >> shift_right) & (0xFFFFFFFF << shift_left); + u32 row = (0x11111111 * clr) & mask; + *dst = (*dst & ~mask) | row; } else { - size_t shift_left = start_col0 * 4; - size_t shift_right = (7 - start_col1) * 4; - u32 row_mask = 0xFFFFFFFF; + size_t shift_left = start_col * 4; + size_t shift_right = (7 - end_col) * 4; + u32 mask = 0xFFFFFFFF; u32 row = 0x11111111 * clr; - buf_top[0] = buf_top[0] & ~(row_mask << shift_left); - buf_top[0] |= row << shift_left; - buf_bot[0] = buf_bot[0] & ~(row_mask << shift_left); - buf_bot[0] |= row << shift_left; - dirty_tiles[tile_y0] |= 1 << tile_x0; - dirty_tiles[tile_y1] |= 1 << tile_x0; - for (size_t i = 1; i < dx; i++) { - buf_top[i * 8] = row; - buf_bot[i * 8] = row; - dirty_tiles[tile_y0] |= 1 << (tile_x0 + i); - dirty_tiles[tile_y1] |= 1 << (tile_x0 + i); - } - buf_top[dx * 8] = buf_top[dx * 8] & ~(row_mask >> shift_right); - buf_top[dx * 8] |= row >> shift_right; - buf_bot[dx * 8] = buf_bot[dx * 8] & ~(row_mask >> shift_right); - buf_bot[dx * 8] |= row >> shift_right; - dirty_tiles[tile_y0] |= 1 << (tile_x0 + dx); - dirty_tiles[tile_y1] |= 1 << (tile_x0 + dx); - } - u32 row_mask_left = 0xF << start_col0 * 4; - u32 row_mask_right = 0xF << start_col1 * 4; - u32 row_left = (0x11111111 * clr) & row_mask_left; - u32 row_right = (0x11111111 * clr) & row_mask_right; - if (dy < 1) { - for (size_t i = 1; i < y1 - y0; i++, buf_top++) { - buf_top[1] = buf_top[1] & ~row_mask_left; - buf_top[1] |= row_left; - buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; - buf_top[1 + 8 * dx] |= row_right; - } - } else { - for (size_t i = 1; i < (8 - start_row0); i++, buf_top++) { - buf_top[1] = buf_top[1] & ~row_mask_left; - buf_top[1] |= row_left; - buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; - buf_top[1 + 8 * dx] |= row_right; - } - buf_top += 8 * 31; - for (size_t j = 1; j < dy; j++) { - for (size_t i = 0; i < 8; i++, buf_top++) { - buf_top[1] = buf_top[1] & ~row_mask_left; - buf_top[1] |= row_left; - buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; - buf_top[1 + 8 * dx] |= row_right; - } - buf_top += 8 * 31; - dirty_tiles[tile_y0 + j] |= 1 << tile_x0; - dirty_tiles[tile_y0 + j] |= 1 << (tile_x0 + dx); - } - for (size_t i = 0; i < start_row1; i++, buf_top++) { - buf_top[1] = buf_top[1] & ~row_mask_left; - buf_top[1] |= row_left; - buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; - buf_top[1 + 8 * dx] |= row_right; + *dst = (*dst & ~(mask << shift_left)) | (row << shift_left); + dst += 8; + for (size_t i = 1; i < dtx; i++) { + *dst = row; + dst += 8; } + *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right); } } IWRAM_CODE void screen_fill(u32 *layer, u16 x0, u16 y0, u16 x1, u16 y1, u8 clr) { - BOUNDCHECK_SCREEN(x0, y0); - BOUNDCHECK_SCREEN(x1, y1); - - size_t dx = x1 - x0; - size_t dy = y1 - y0; - size_t n_rect = MIN(dx, dy); - n_rect = n_rect / 2 + 1; - for (size_t i = 0; i < n_rect; i++) { - ppu_rect(layer, x0 + i, y0 + i, x1 - i, y1 - i, clr); + MAYBE_SWAP(x0, x1); + MAYBE_SWAP(y0, y1); + + // Special condition. If the screen is to be completely filled, use the DMA + // instead. + if (x0 == 0 && x1 >= (SCREEN_WIDTH - 1) && y0 == 0 && y1 >= (SCREEN_HEIGHT - 1)) { + clear_screen(layer, clr); + return; + } + + // Drawline implementation. + for (size_t y = y0; y <= y1; y++) { + draw_hline(layer, x0, x1, y, clr); } + // TODO: dirty? } #if NEW_PPU == 0 @@ -616,15 +574,15 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; if (clr == 1) { - u64 mask = ~((u64)0xFFFFFFFF << shift_left); + u32 mask = 0xFFFFFFFF; if (!flip_y) { for(size_t v = 0; v < 8; v++, dst++) { if ((y + v) >= SCREEN_HEIGHT) break; u8 ch1 = sprite[v]; u8 ch2 = sprite[v | 8]; u32 color = lut[ch1] | (lut[ch2] << 1); - dst[0] = (dst[0] & (mask << shift_left)) | color; - dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); + dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); if ((start_row + v) == 7) dst += (32 - 1) * 8; } } else { @@ -633,8 +591,8 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { u8 ch1 = sprite[(7 - v)]; u8 ch2 = sprite[(7 - v) | 8]; u32 color = lut[ch1] | (lut[ch2] << 1); - dst[0] = (dst[0] & (mask << shift_left)) | color; - dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); + dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); if ((start_row + v) == 7) dst += (32 - 1) * 8; } } @@ -707,7 +665,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { color = (clr1 * col1mask) | (clr2 * col2mask) | (clr3 * col3mask); - dst[0] = (dst[0] & (mask << shift_left)) | color; + dst[0] = (dst[0] & (mask << shift_left)) | (color << shift_left); dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); if ((start_row + v) == 7) dst += (32 - 1) * 8; } @@ -727,7 +685,7 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { color = (clr1 * col1mask) | (clr2 * col2mask) | (clr3 * col3mask); - dst[0] = (dst[0] & (mask << shift_left)) | color; + dst[0] = (dst[0] & (mask << shift_left)) | (color << shift_left); dst[8] = (dst[8] & (mask >> shift_right)) | (color >> shift_right); if ((start_row + v) == 7) dst += (32 - 1) * 8; } -- cgit v1.2.1