// // This Mode 0 renderer provides a way of drawing directly to a framebuffer // (similar to Mode 3 and 4) while retaining the flexibility of using other // backgrounds if needed. It also performs double buffering to avoid tearing // artifacts and tries to only draw tiles that changed on each frame. // // In addition to the frontbuffer (displayed on background 0), a tiled text // layer is displayed on background 1, which can be used for application // development or for debug information. // // These two layers occupy the first and second background charblocks, leaving // the remaining two available for other background layers. There are 14KB of // sprite memory available, since the backbuffer is located at the end of the // VRAM, but if more space is needed it can be moved to the end of the BG // charblocks instead. // #include "renderer.h" #include "text.h" // Keep track of which tiles need to be copied to the frontbuffer. static u32 dirty_tiles[21] = {0}; // Boundchecks can be disable at compile time but this will not always improve // the performance and can in fact make it worse. It is possible that this is // due to some aliasing optimiztions but not sure at this moment. #ifdef DISABLE_BOUNDCHECK_SCREEN #define BOUNDCHECK_SCREEN(X,Y) #else #define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return; #endif IWRAM_CODE void draw_pixel(size_t x, size_t y, u8 clr) { BOUNDCHECK_SCREEN(x, y); // Find row position for the given x/y coordinates. size_t tile_x = x / 8; size_t tile_y = y / 8; size_t start_col = x % 8; size_t start_row = y % 8; size_t pos = start_row + (tile_x + tile_y * 32) * 8; // Update backbuffer. size_t shift = start_col * sizeof(u32); BACKBUF[pos] = (BACKBUF[pos] & ~(0xF << shift)) | clr << shift; // Mark tile as dirty. dirty_tiles[tile_y] |= 1 << tile_x; } IWRAM_CODE void draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { BOUNDCHECK_SCREEN(x0, y0); BOUNDCHECK_SCREEN(x1, y1); // Find row positions for the given x/y coordinates. size_t tile_x0 = x0 / 8; size_t tile_y0 = y0 / 8; size_t tile_x1 = x1 / 8; size_t tile_y1 = y1 / 8; size_t start_col0 = x0 % 8; size_t start_col1 = x1 % 8; size_t start_row0 = y0 % 8; size_t start_row1 = y1 % 8; // Get a pointer to the backbuffer and the tile row. u32 *backbuffer = &BACKBUF[start_row0 + (tile_x0 + tile_y0 * 32) * 8]; if (y0 == y1) { // Horizontal line. There are 3 cases: // 1. Lines fit on a single tile. // 2. Lines go through 2 tiles, both require partial row updates. // 3. Lines go through 3 or more tiles, first and last tiles use // partial row updates, rows in the middle can write the. size_t dx = tile_x1 - tile_x0; if (dx < 1) { u32 row_mask = 0xFFFFFFFF; row_mask >>= (7 - start_col1 - dx) * 4; row_mask &= 0xFFFFFFFF << start_col0 * 4; u32 row = (0x11111111 * clr) & row_mask; backbuffer[0] = (backbuffer[0] & ~row_mask) | row; dirty_tiles[tile_y0] |= 1 << tile_x0; } else { size_t shift_left = start_col0 * 4; size_t shift_right = (7 - start_col1) * 4; u32 row_mask = 0xFFFFFFFF; u32 row = 0x11111111 * clr; backbuffer[0] = backbuffer[0] & ~(row_mask << shift_left); backbuffer[0] |= row << shift_left; dirty_tiles[tile_y0] |= 1 << tile_x0; for (size_t i = 1; i < dx; i++) { backbuffer[i * 8] = row; dirty_tiles[tile_y0] |= 1 << (tile_x0 + i); } backbuffer[dx * 8] = backbuffer[dx * 8] & ~(row_mask >> shift_right); backbuffer[dx * 8] |= row >> shift_right; dirty_tiles[tile_y0] |= 1 << (tile_x0 + dx); } } else if (x0 == x1) { // Vertical line. The cases are analogous to the horizontal ones. size_t dy = tile_y1 - tile_y0; u32 row_mask = 0xF << start_col0 * 4; u32 row_left = (0x11111111 * clr) & row_mask; if (dy < 1) { for (size_t i = 0; i <= y1 - y0; i++, backbuffer++) { backbuffer[0] = (backbuffer[0] & ~row_mask) | row_left; } } else { for (size_t i = 0; i < (8 - start_row0); i++, backbuffer++) { backbuffer[0] = (backbuffer[0] & ~row_mask) | row_left; } dirty_tiles[tile_y0] |= 1 << tile_x0; backbuffer += 8 * 31; for (size_t j = 1; j < dy; j++) { for (size_t i = 0; i < 8; i++, backbuffer++) { backbuffer[0] = (backbuffer[0] & ~row_mask) | row_left; } backbuffer += 8 * 31; dirty_tiles[tile_y0 + j] |= 1 << tile_x0; } for (size_t i = 0; i <= start_row1; i++, backbuffer++) { backbuffer[0] = (backbuffer[0] & ~row_mask) | row_left; } dirty_tiles[tile_y1] |= 1 << tile_x0; } } else { // Diagonal line. int dx = x0 > x1 ? x0 - x1 : x1 - x0; int dy = y0 > y1 ? y1 - y0 : y0 - y1; int x_step = x0 < x1 ? 1 : -1; int y_step = y0 < y1 ? 1 : -1; int err = dx + dy; while (!(x0 == x1 && y0 == y1)) { draw_pixel(x0, y0, clr); int diff = 2 * err; if (diff >= dy) { err += dy; x0 += x_step; } if (diff <= dx) { err += dx; y0 += y_step; } } } } IWRAM_CODE void draw_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { BOUNDCHECK_SCREEN(x0, y0); BOUNDCHECK_SCREEN(x1, y1); // Find row positions for the given x/y coordinates. size_t tile_x0 = x0 / 8; size_t tile_y0 = y0 / 8; size_t tile_x1 = x1 / 8; size_t tile_y1 = y1 / 8; size_t start_col0 = x0 % 8; size_t start_col1 = x1 % 8; size_t start_row0 = y0 % 8; size_t start_row1 = y1 % 8; // Get a pointer to the backbuffer and the tile row. u32 *buf_top = &BACKBUF[start_row0 + (tile_x0 + tile_y0 * 32) * 8]; u32 *buf_bot = &BACKBUF[start_row1 + (tile_x0 + tile_y1 * 32) * 8]; size_t dx = tile_x1 - tile_x0; size_t dy = tile_y1 - tile_y0; // We can update two lines at a time, which is faster than calling draw_line // four times. if (dx < 1) { u32 row_mask = 0xFFFFFFFF; row_mask >>= (7 - start_col1 - dx) * 4; row_mask &= 0xFFFFFFFF << start_col0 * 4; u32 row = (0x11111111 * clr) & row_mask; buf_top[0] = (buf_top[0] & ~row_mask) | row; buf_bot[0] = (buf_bot[0] & ~row_mask) | row; dirty_tiles[tile_y0] |= 1 << tile_x0; dirty_tiles[tile_y1] |= 1 << tile_x0; } else { size_t shift_left = start_col0 * 4; size_t shift_right = (7 - start_col1) * 4; u32 row_mask = 0xFFFFFFFF; u32 row = 0x11111111 * clr; buf_top[0] = buf_top[0] & ~(row_mask << shift_left); buf_top[0] |= row << shift_left; buf_bot[0] = buf_bot[0] & ~(row_mask << shift_left); buf_bot[0] |= row << shift_left; dirty_tiles[tile_y0] |= 1 << tile_x0; dirty_tiles[tile_y1] |= 1 << tile_x0; for (size_t i = 1; i < dx; i++) { buf_top[i * 8] = row; buf_bot[i * 8] = row; dirty_tiles[tile_y0] |= 1 << (tile_x0 + i); dirty_tiles[tile_y1] |= 1 << (tile_x0 + i); } buf_top[dx * 8] = buf_top[dx * 8] & ~(row_mask >> shift_right); buf_top[dx * 8] |= row >> shift_right; buf_bot[dx * 8] = buf_bot[dx * 8] & ~(row_mask >> shift_right); buf_bot[dx * 8] |= row >> shift_right; dirty_tiles[tile_y0] |= 1 << (tile_x0 + dx); dirty_tiles[tile_y1] |= 1 << (tile_x0 + dx); } u32 row_mask_left = 0xF << start_col0 * 4; u32 row_mask_right = 0xF << start_col1 * 4; u32 row_left = (0x11111111 * clr) & row_mask_left; u32 row_right = (0x11111111 * clr) & row_mask_right; if (dy < 1) { for (size_t i = 1; i < y1 - y0; i++, buf_top++) { buf_top[1] = buf_top[1] & ~row_mask_left; buf_top[1] |= row_left; buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; buf_top[1 + 8 * dx] |= row_right; } } else { for (size_t i = 1; i < (8 - start_row0); i++, buf_top++) { buf_top[1] = buf_top[1] & ~row_mask_left; buf_top[1] |= row_left; buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; buf_top[1 + 8 * dx] |= row_right; } buf_top += 8 * 31; for (size_t j = 1; j < dy; j++) { for (size_t i = 0; i < 8; i++, buf_top++) { buf_top[1] = buf_top[1] & ~row_mask_left; buf_top[1] |= row_left; buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; buf_top[1 + 8 * dx] |= row_right; } buf_top += 8 * 31; dirty_tiles[tile_y0 + j] |= 1 << tile_x0; dirty_tiles[tile_y0 + j] |= 1 << (tile_x0 + dx); } for (size_t i = 0; i < start_row1; i++, buf_top++) { buf_top[1] = buf_top[1] & ~row_mask_left; buf_top[1] |= row_left; buf_top[1 + 8 * dx] = buf_top[1 + 8 * dx] & ~row_mask_right; buf_top[1 + 8 * dx] |= row_right; } } } IWRAM_CODE void draw_filled_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { BOUNDCHECK_SCREEN(x0, y0); BOUNDCHECK_SCREEN(x1, y1); size_t dx = x1 - x0; size_t dy = y1 - y0; size_t n_rect = MIN(dx, dy); n_rect = n_rect / 2 + 1; for (size_t i = 0; i < n_rect; i++) { draw_rect(x0 + i, y0 + i, x1 - i, y1 - i, clr); } } void clear_screen(void) { dma_fill(FRONTBUF, 0, KB(20), 3); } IWRAM_CODE void flip_buffer(void) { // Copy dirty tiles from the backbuffer to the frontbuffer. Tile *dst = FRONTBUF; Tile *src = BACKBUF; for (size_t j = 0; j < 20; ++j) { if (dirty_tiles[j] == 0) { continue; } for (size_t i = 0, k = 1; i < 30; ++i, k <<= 1) { if (dirty_tiles[j] & k) { dst[i + j * 32] = src[i + j * 32]; } } dirty_tiles[j] = 0; } } void renderer_init(void) { // Initialize display mode and bg palette. DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1 | DISP_OBJ; // Initialize backgrounds. BG_CTRL(0) = BG_CHARBLOCK(0) | BG_SCREENBLOCK(FRONTBUF_SB) | BG_PRIORITY(1); BG_CTRL(1) = BG_CHARBLOCK(1) | BG_SCREENBLOCK(FONT_SB) | BG_PRIORITY(0); // Use DMA to clear front and back buffers as well as the font memory map. dma_fill(FRONTBUF, 0, KB(20), 3); dma_fill(FRONTBUF_TILEMAP, 0, KB(2), 3); dma_fill(BACKBUF, 0, KB(20), 3); dma_fill(FONT_DATA, 0, KB(8), 3); dma_fill(FONT_TILEMAP, (FONT_OFFSET << 16) | FONT_OFFSET, KB(2), 3); // Initialize default palette. PAL_BUFFER_BG[0] = COLOR_BLACK; PAL_BUFFER_BG[1] = COLOR_WHITE; PAL_BUFFER_BG[2] = COLOR_RED; PAL_BUFFER_BG[3] = COLOR_BLUE; PAL_BUFFER_BG[4] = COLOR_CYAN; PAL_BUFFER_BG[5] = COLOR_GREY; // Initialize background memory map for frontbuffer and font backgorund. for (size_t i = 0; i < 32 * 20; ++i) { FRONTBUF_TILEMAP[i] = i; } // Initialize text engine. txt_init(FONT_DATA, FONT_TILEMAP, FONT_OFFSET); }