// // This Mode 0 renderer provides a way of drawing directly to a framebuffer // (similar to Mode 3 and 4) while retaining the flexibility of using other // backgrounds if needed. It also performs double buffering to avoid tearing // artifacts and tries to only draw tiles that changed on each frame. // #include "renderer.h" #include "text.h" // // Parameters. // #define SUBPIXEL_LINES 1 #define FLIP_TYPE 3 #define DISABLE_BOUNDCHECK_SCREEN 0 #define NO_DMA 0 // Front/back buffers for double buffering. #define BUF_0 ((u32*)(MEM_VRAM)) #define BUF_1 ((u32*)(MEM_VRAM + KB(20))) // Pointer to the backbuffer. static u32 *backbuf = BUF_1; // Tracking which tiles are "dirty" and need refreshing. static u32 dirty_tiles[21] = {0}; // Position of the tilemap. #define TILE_MAP ((u32*)(MEM_VRAM + KB(40))) // Charblock and screenblock for both render buffers. #define CB_0 0 #define CB_1 1 #define SB_0 20 #define SB_1 22 // Boundchecks can be disable at compile time but this will not always improve // the performance and can in fact make it worse. It is possible that this is // due to some aliasing optimizations but not sure at this moment. #if DISABLE_BOUNDCHECK_SCREEN > 0 #define BOUNDCHECK_SCREEN(X,Y) #else #define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return; #endif // Swap A and B values without a tmp variable. #define SWAP(A, B) (((A) ^= (B)), ((B) ^= (A)), ((A) ^= (B))) // Swap A and B values to make sure A <= B. #define MAYBE_SWAP(A,B) if ((A) > (B)) { SWAP(A,B); } // // Basic primitives. // static inline void redraw(void) { for (size_t i = 0; i < 21; i++) { dirty_tiles[i] = 0xFFFFFFFF; } } IWRAM_CODE void screen_fill(u8 clr) { // We have to make sure we leave the last tile blank to use as alpha channel // when moving the BG during double buffering. #if NO_DMA == 0 dma_fill(backbuf, 0x11111111 * clr, KB(20) - 32, 3); #else set32(backbuf, 0x11111111 * clr, (KB(20) / 4) - 8); #endif redraw(); } IWRAM_CODE void draw_pixel(size_t x, size_t y, u8 clr) { BOUNDCHECK_SCREEN(x, y); // Find row position for the given x/y coordinates. size_t tile_x = x / 8; size_t tile_y = y / 8; size_t start_col = x % 8; size_t start_row = y % 8; u32 *dst = &backbuf[start_row + (tile_x + tile_y * 32) * 8]; // Update backbuffer. size_t shift = start_col * sizeof(u32); u32 mask = 0xF << shift; u32 color = clr << shift; *dst = (*dst & ~mask) | color; dirty_tiles[tile_y] |= 1 << tile_x; } IWRAM_CODE static inline void draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) { BOUNDCHECK_SCREEN(x0, y0); BOUNDCHECK_SCREEN(x1, y0); // Find row positions for the given x/y coordinates. size_t tile_x0 = x0 / 8; size_t tile_x1 = x1 / 8; size_t tile_y = y0 / 8; size_t start_col = x0 % 8; size_t end_col = x1 % 8; size_t start_row = y0 % 8; u32 dirty = (1 << tile_x0) | (1 << tile_x1); // Horizontal line. There are 3 cases: // 1. Lines fit on a single tile. // 2. Lines go through 2 tiles, both require partial row updates. // 3. Lines go through 3 or more tiles, first and last tiles use // partial row updates, rows in the middle can write the entire // row. size_t dtx = tile_x1 - tile_x0; u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8]; if (dtx < 1) { size_t shift_left = start_col * 4; size_t shift_right = (7 - end_col) * 4; u32 mask = (0xFFFFFFFF >> shift_right) & (0xFFFFFFFF << shift_left); u32 color = (0x11111111 * clr) & mask; *dst = (*dst & ~mask) | color; } else { size_t shift_left = start_col * 4; size_t shift_right = (7 - end_col) * 4; u32 mask = 0xFFFFFFFF; u32 color = 0x11111111 * clr; *dst = (*dst & ~(mask << shift_left)) | (color << shift_left); dst += 8; for (size_t i = 1; i < dtx; i++) { dirty |= (1 << (tile_x0 + i)); *dst = color; dst += 8; } *dst = (*dst & ~(mask >> shift_right)) | (color >> shift_right); } dirty_tiles[tile_y] |= dirty; } IWRAM_CODE UNROLL_LOOPS static inline void draw_vline(size_t x0, size_t y0, size_t y1, u8 clr) { BOUNDCHECK_SCREEN(x0, y0); BOUNDCHECK_SCREEN(x0, y1); size_t tile_x = x0 / 8; size_t tile_y = y0 / 8; size_t tile_y0 = y0 / 8; size_t tile_y1 = y1 / 8; size_t start_col = x0 % 8; size_t start_row0 = y0 % 8; size_t start_row1 = y1 % 8; size_t shift_left = start_col * 4; u32 dirty = (1 << tile_x); u32 *dst = &backbuf[start_row0 + (tile_x + tile_y * 32) * 8]; u32 mask = 0x0000000F << shift_left; u32 color = clr << shift_left; u32 dty = tile_y1 - tile_y0; if (dty < 1) { for (size_t i = 0; i <= (y1 - y0); i++, dst++) { dst[0] = (dst[0] & ~mask) | color; } } else { for (size_t i = 0; i < (8 - start_row0); i++, dst++) { dst[0] = (dst[0] & ~mask) | color; } dst += 8 * 31; for (size_t j = 1; j < dty; j++) { dirty_tiles[tile_y0 + j] |= dirty; for (size_t i = 0; i < 8; i++, dst++) { dst[0] = (dst[0] & ~mask) | color; } dst += 8 * 31; } for (size_t i = 0; i <= start_row1; i++, dst++) { dst[0] = (dst[0] & ~mask) | color; } } dirty_tiles[tile_y0] |= dirty; dirty_tiles[tile_y1] |= dirty; } IWRAM_CODE void draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { BOUNDCHECK_SCREEN(x0, y0); BOUNDCHECK_SCREEN(x1, y1); if (y0 == y1) { MAYBE_SWAP(x0, x1); draw_hline(x0, x1, y0, clr); } else if (x0 == x1) { MAYBE_SWAP(y0, y1); draw_vline(x0, y0, y1, clr); } else { // Fixed Precision constants. const int fp_bit = 6; const int fp_one = FP_NUM(1, fp_bit); const int fp_half = fp_one >> 1; int dx = x0 > x1 ? x0 - x1 : x1 - x0; int dy = y0 > y1 ? y0 - y1 : y1 - y0; if ((dx >= dy && x0 > x1) || (dx < dy && y0 > y1)) { SWAP(x0, x1); SWAP(y0, y1); } #if SUBPIXEL_LINES == 1 int dxf = (dx << fp_bit); int dyf = (dy << fp_bit); int frac_x = x0 > x1 ? FP_NUM(x0 - x1, fp_bit) : FP_NUM(x1 - x0, fp_bit); int frac_y = y0 > y1 ? FP_NUM(y0 - y1, fp_bit) : FP_NUM(y1 - y0, fp_bit); int x_step = x0 > x1 ? -1 : 1; int y_step = y0 > y1 ? -1 : 1; int distance = (frac_y - fp_one) * dx - (frac_x - fp_half) * dy; if (dx >= dy) { int step = dxf / dyf; int remaining = dx; while (remaining > (step - 1)) { distance += step * 2 * dyf; if (distance >= 0) { draw_hline(x0, x0 + step - 1, y0, clr); x0 += x_step * step; remaining -= step; } else { if (remaining < step) { break; } draw_hline(x0, x0 + step, y0, clr); distance += 2 * dyf; x0 += x_step * (step + 1); remaining -= step + 1; } distance -= 2 * dxf; y0 += y_step; } if (remaining >= 0) { draw_hline(x0, x0 + remaining, y0, clr); } } else { int step = dyf / dxf; int remaining = dy; while (remaining > (step - 1)) { distance += step * 2 * dxf; if (distance >= 0) { draw_vline(x0, y0, y0 + step - 1, clr); y0 += y_step * step; remaining -= step; } else { draw_vline(x0, y0, y0 + step, clr); distance += 2 * dxf; y0 += y_step * (step + 1); remaining -= step + 1; } distance -= 2 * dyf; x0 += x_step; } if (remaining >= 0) { draw_vline(x0, y0, y0 + remaining, clr); } } #else int x_step = x0 > x1 ? -1 : 1; int y_step = y0 > y1 ? -1 : 1; if (dx >= dy) { int diff = 2 * dy - dx; for (int i = 0; i < dx + 1; i++) { draw_pixel(x0, y0, clr); if (diff >= 0) { diff -= 2 * dx; y0 += y_step; } diff += 2 * dy; x0 += x_step; } } else { int diff = 2 * dx - dy; for (int i = 0; i < dy + 1; i++) { draw_pixel(x0, y0, clr); if (diff >= 0) { diff -= 2 * dy; x0 += x_step; } diff += 2 * dx; y0 += y_step; } } #endif } } IWRAM_CODE void draw_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { BOUNDCHECK_SCREEN(x0, y0); BOUNDCHECK_SCREEN(x1, y1); MAYBE_SWAP(x0, x1); MAYBE_SWAP(y0, y1); draw_hline(x0, x1, y0, clr); draw_hline(x0, x1, y1, clr); draw_vline(x0, y0, y1, clr); draw_vline(x1, y0, y1, clr); } IWRAM_CODE void draw_filled_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { BOUNDCHECK_SCREEN(x0, y0); BOUNDCHECK_SCREEN(x1, y1); MAYBE_SWAP(x0, x1); MAYBE_SWAP(y0, y1); // Special condition. If the screen is to be completely filled, use the // full clearing functions instead. if (x0 == 0 && x1 >= (SCREEN_WIDTH - 1) && y0 == 0 && y1 >= (SCREEN_HEIGHT - 1)) { screen_fill(clr); return; } for (size_t y = y0; y <= y1; y++) { draw_hline(x0, x1, y, clr); } } // // Sprites (1bpp). // static u32 lut_1bpp_mask[256] = { 0x00000000, 0xf0000000, 0x0f000000, 0xff000000, 0x00f00000, 0xf0f00000, 0x0ff00000, 0xfff00000, 0x000f0000, 0xf00f0000, 0x0f0f0000, 0xff0f0000, 0x00ff0000, 0xf0ff0000, 0x0fff0000, 0xffff0000, 0x0000f000, 0xf000f000, 0x0f00f000, 0xff00f000, 0x00f0f000, 0xf0f0f000, 0x0ff0f000, 0xfff0f000, 0x000ff000, 0xf00ff000, 0x0f0ff000, 0xff0ff000, 0x00fff000, 0xf0fff000, 0x0ffff000, 0xfffff000, 0x00000f00, 0xf0000f00, 0x0f000f00, 0xff000f00, 0x00f00f00, 0xf0f00f00, 0x0ff00f00, 0xfff00f00, 0x000f0f00, 0xf00f0f00, 0x0f0f0f00, 0xff0f0f00, 0x00ff0f00, 0xf0ff0f00, 0x0fff0f00, 0xffff0f00, 0x0000ff00, 0xf000ff00, 0x0f00ff00, 0xff00ff00, 0x00f0ff00, 0xf0f0ff00, 0x0ff0ff00, 0xfff0ff00, 0x000fff00, 0xf00fff00, 0x0f0fff00, 0xff0fff00, 0x00ffff00, 0xf0ffff00, 0x0fffff00, 0xffffff00, 0x000000f0, 0xf00000f0, 0x0f0000f0, 0xff0000f0, 0x00f000f0, 0xf0f000f0, 0x0ff000f0, 0xfff000f0, 0x000f00f0, 0xf00f00f0, 0x0f0f00f0, 0xff0f00f0, 0x00ff00f0, 0xf0ff00f0, 0x0fff00f0, 0xffff00f0, 0x0000f0f0, 0xf000f0f0, 0x0f00f0f0, 0xff00f0f0, 0x00f0f0f0, 0xf0f0f0f0, 0x0ff0f0f0, 0xfff0f0f0, 0x000ff0f0, 0xf00ff0f0, 0x0f0ff0f0, 0xff0ff0f0, 0x00fff0f0, 0xf0fff0f0, 0x0ffff0f0, 0xfffff0f0, 0x00000ff0, 0xf0000ff0, 0x0f000ff0, 0xff000ff0, 0x00f00ff0, 0xf0f00ff0, 0x0ff00ff0, 0xfff00ff0, 0x000f0ff0, 0xf00f0ff0, 0x0f0f0ff0, 0xff0f0ff0, 0x00ff0ff0, 0xf0ff0ff0, 0x0fff0ff0, 0xffff0ff0, 0x0000fff0, 0xf000fff0, 0x0f00fff0, 0xff00fff0, 0x00f0fff0, 0xf0f0fff0, 0x0ff0fff0, 0xfff0fff0, 0x000ffff0, 0xf00ffff0, 0x0f0ffff0, 0xff0ffff0, 0x00fffff0, 0xf0fffff0, 0x0ffffff0, 0xfffffff0, 0x0000000f, 0xf000000f, 0x0f00000f, 0xff00000f, 0x00f0000f, 0xf0f0000f, 0x0ff0000f, 0xfff0000f, 0x000f000f, 0xf00f000f, 0x0f0f000f, 0xff0f000f, 0x00ff000f, 0xf0ff000f, 0x0fff000f, 0xffff000f, 0x0000f00f, 0xf000f00f, 0x0f00f00f, 0xff00f00f, 0x00f0f00f, 0xf0f0f00f, 0x0ff0f00f, 0xfff0f00f, 0x000ff00f, 0xf00ff00f, 0x0f0ff00f, 0xff0ff00f, 0x00fff00f, 0xf0fff00f, 0x0ffff00f, 0xfffff00f, 0x00000f0f, 0xf0000f0f, 0x0f000f0f, 0xff000f0f, 0x00f00f0f, 0xf0f00f0f, 0x0ff00f0f, 0xfff00f0f, 0x000f0f0f, 0xf00f0f0f, 0x0f0f0f0f, 0xff0f0f0f, 0x00ff0f0f, 0xf0ff0f0f, 0x0fff0f0f, 0xffff0f0f, 0x0000ff0f, 0xf000ff0f, 0x0f00ff0f, 0xff00ff0f, 0x00f0ff0f, 0xf0f0ff0f, 0x0ff0ff0f, 0xfff0ff0f, 0x000fff0f, 0xf00fff0f, 0x0f0fff0f, 0xff0fff0f, 0x00ffff0f, 0xf0ffff0f, 0x0fffff0f, 0xffffff0f, 0x000000ff, 0xf00000ff, 0x0f0000ff, 0xff0000ff, 0x00f000ff, 0xf0f000ff, 0x0ff000ff, 0xfff000ff, 0x000f00ff, 0xf00f00ff, 0x0f0f00ff, 0xff0f00ff, 0x00ff00ff, 0xf0ff00ff, 0x0fff00ff, 0xffff00ff, 0x0000f0ff, 0xf000f0ff, 0x0f00f0ff, 0xff00f0ff, 0x00f0f0ff, 0xf0f0f0ff, 0x0ff0f0ff, 0xfff0f0ff, 0x000ff0ff, 0xf00ff0ff, 0x0f0ff0ff, 0xff0ff0ff, 0x00fff0ff, 0xf0fff0ff, 0x0ffff0ff, 0xfffff0ff, 0x00000fff, 0xf0000fff, 0x0f000fff, 0xff000fff, 0x00f00fff, 0xf0f00fff, 0x0ff00fff, 0xfff00fff, 0x000f0fff, 0xf00f0fff, 0x0f0f0fff, 0xff0f0fff, 0x00ff0fff, 0xf0ff0fff, 0x0fff0fff, 0xffff0fff, 0x0000ffff, 0xf000ffff, 0x0f00ffff, 0xff00ffff, 0x00f0ffff, 0xf0f0ffff, 0x0ff0ffff, 0xfff0ffff, 0x000fffff, 0xf00fffff, 0x0f0fffff, 0xff0fffff, 0x00ffffff, 0xf0ffffff, 0x0fffffff, 0xffffffff }; static u32 lut_1bpp_mask_flip_x[256] = { 0x00000000, 0x0000000f, 0x000000f0, 0x000000ff, 0x00000f00, 0x00000f0f, 0x00000ff0, 0x00000fff, 0x0000f000, 0x0000f00f, 0x0000f0f0, 0x0000f0ff, 0x0000ff00, 0x0000ff0f, 0x0000fff0, 0x0000ffff, 0x000f0000, 0x000f000f, 0x000f00f0, 0x000f00ff, 0x000f0f00, 0x000f0f0f, 0x000f0ff0, 0x000f0fff, 0x000ff000, 0x000ff00f, 0x000ff0f0, 0x000ff0ff, 0x000fff00, 0x000fff0f, 0x000ffff0, 0x000fffff, 0x00f00000, 0x00f0000f, 0x00f000f0, 0x00f000ff, 0x00f00f00, 0x00f00f0f, 0x00f00ff0, 0x00f00fff, 0x00f0f000, 0x00f0f00f, 0x00f0f0f0, 0x00f0f0ff, 0x00f0ff00, 0x00f0ff0f, 0x00f0fff0, 0x00f0ffff, 0x00ff0000, 0x00ff000f, 0x00ff00f0, 0x00ff00ff, 0x00ff0f00, 0x00ff0f0f, 0x00ff0ff0, 0x00ff0fff, 0x00fff000, 0x00fff00f, 0x00fff0f0, 0x00fff0ff, 0x00ffff00, 0x00ffff0f, 0x00fffff0, 0x00ffffff, 0x0f000000, 0x0f00000f, 0x0f0000f0, 0x0f0000ff, 0x0f000f00, 0x0f000f0f, 0x0f000ff0, 0x0f000fff, 0x0f00f000, 0x0f00f00f, 0x0f00f0f0, 0x0f00f0ff, 0x0f00ff00, 0x0f00ff0f, 0x0f00fff0, 0x0f00ffff, 0x0f0f0000, 0x0f0f000f, 0x0f0f00f0, 0x0f0f00ff, 0x0f0f0f00, 0x0f0f0f0f, 0x0f0f0ff0, 0x0f0f0fff, 0x0f0ff000, 0x0f0ff00f, 0x0f0ff0f0, 0x0f0ff0ff, 0x0f0fff00, 0x0f0fff0f, 0x0f0ffff0, 0x0f0fffff, 0x0ff00000, 0x0ff0000f, 0x0ff000f0, 0x0ff000ff, 0x0ff00f00, 0x0ff00f0f, 0x0ff00ff0, 0x0ff00fff, 0x0ff0f000, 0x0ff0f00f, 0x0ff0f0f0, 0x0ff0f0ff, 0x0ff0ff00, 0x0ff0ff0f, 0x0ff0fff0, 0x0ff0ffff, 0x0fff0000, 0x0fff000f, 0x0fff00f0, 0x0fff00ff, 0x0fff0f00, 0x0fff0f0f, 0x0fff0ff0, 0x0fff0fff, 0x0ffff000, 0x0ffff00f, 0x0ffff0f0, 0x0ffff0ff, 0x0fffff00, 0x0fffff0f, 0x0ffffff0, 0x0fffffff, 0xf0000000, 0xf000000f, 0xf00000f0, 0xf00000ff, 0xf0000f00, 0xf0000f0f, 0xf0000ff0, 0xf0000fff, 0xf000f000, 0xf000f00f, 0xf000f0f0, 0xf000f0ff, 0xf000ff00, 0xf000ff0f, 0xf000fff0, 0xf000ffff, 0xf00f0000, 0xf00f000f, 0xf00f00f0, 0xf00f00ff, 0xf00f0f00, 0xf00f0f0f, 0xf00f0ff0, 0xf00f0fff, 0xf00ff000, 0xf00ff00f, 0xf00ff0f0, 0xf00ff0ff, 0xf00fff00, 0xf00fff0f, 0xf00ffff0, 0xf00fffff, 0xf0f00000, 0xf0f0000f, 0xf0f000f0, 0xf0f000ff, 0xf0f00f00, 0xf0f00f0f, 0xf0f00ff0, 0xf0f00fff, 0xf0f0f000, 0xf0f0f00f, 0xf0f0f0f0, 0xf0f0f0ff, 0xf0f0ff00, 0xf0f0ff0f, 0xf0f0fff0, 0xf0f0ffff, 0xf0ff0000, 0xf0ff000f, 0xf0ff00f0, 0xf0ff00ff, 0xf0ff0f00, 0xf0ff0f0f, 0xf0ff0ff0, 0xf0ff0fff, 0xf0fff000, 0xf0fff00f, 0xf0fff0f0, 0xf0fff0ff, 0xf0ffff00, 0xf0ffff0f, 0xf0fffff0, 0xf0ffffff, 0xff000000, 0xff00000f, 0xff0000f0, 0xff0000ff, 0xff000f00, 0xff000f0f, 0xff000ff0, 0xff000fff, 0xff00f000, 0xff00f00f, 0xff00f0f0, 0xff00f0ff, 0xff00ff00, 0xff00ff0f, 0xff00fff0, 0xff00ffff, 0xff0f0000, 0xff0f000f, 0xff0f00f0, 0xff0f00ff, 0xff0f0f00, 0xff0f0f0f, 0xff0f0ff0, 0xff0f0fff, 0xff0ff000, 0xff0ff00f, 0xff0ff0f0, 0xff0ff0ff, 0xff0fff00, 0xff0fff0f, 0xff0ffff0, 0xff0fffff, 0xfff00000, 0xfff0000f, 0xfff000f0, 0xfff000ff, 0xfff00f00, 0xfff00f0f, 0xfff00ff0, 0xfff00fff, 0xfff0f000, 0xfff0f00f, 0xfff0f0f0, 0xfff0f0ff, 0xfff0ff00, 0xfff0ff0f, 0xfff0fff0, 0xfff0ffff, 0xffff0000, 0xffff000f, 0xffff00f0, 0xffff00ff, 0xffff0f00, 0xffff0f0f, 0xffff0ff0, 0xffff0fff, 0xfffff000, 0xfffff00f, 0xfffff0f0, 0xfffff0ff, 0xffffff00, 0xffffff0f, 0xfffffff0, 0xffffffff }; // Create a mask for zero sprite values in each nibble. // For example: 0x12305008 -> 0xFFF0F00F INLINE u32 create_zero_mask(u32 x) { x |= x >> 2; x |= x >> 1; x &= 0x11111111; return x * 0xf; } IWRAM_CODE UNROLL_LOOPS void draw_sprite(size_t x, size_t y, u32 *sprite, u8 clear) { // Copy a 4bpp sprite into memory. Color 0 is the transparency color. BOUNDCHECK_SCREEN(x, y); size_t tile_x0 = x / 8; size_t tile_x1 = (x + 7) / 8; size_t tile_y = y / 8; size_t start_col = x % 8; size_t start_row = y % 8; size_t shift_left = start_col * 4; size_t shift_right = (8 - start_col) * 4; u32 dirty = (1 << tile_x0) | (1 << tile_x1); u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8]; size_t n_rows = 8; if (y + 8 > SCREEN_HEIGHT) { n_rows = 8 - ((y + 8) - SCREEN_HEIGHT); } size_t n0 = MIN(8 - start_row, n_rows); if (clear) { for(size_t v = 0; v < n0; v++, dst++) { u32 row = sprite[v]; u32 mask = create_zero_mask(row); u32 msk0 = mask << shift_left; u32 msk1 = mask >> shift_right; dst[0] = (dst[0] & ~msk0); dst[8] = (dst[8] & ~msk1); } dst += (32 - 1) * 8; for(size_t v = n0; v < n_rows; v++, dst++) { u32 row = sprite[v]; u32 mask = create_zero_mask(row); u32 msk0 = mask << shift_left; u32 msk1 = mask >> shift_right; dst[0] = (dst[0] & ~msk0); dst[8] = (dst[8] & ~msk1); } } else { for(size_t v = 0; v < n0; v++, dst++) { u32 row = sprite[v]; u32 mask = create_zero_mask(row); u32 msk0 = mask << shift_left; u32 msk1 = mask >> shift_right; u32 clr0 = row << shift_left; u32 clr1 = row >> shift_right; dst[0] = (dst[0] & ~msk0) | clr0; dst[8] = (dst[8] & ~msk1) | clr1; } dst += (32 - 1) * 8; for(size_t v = n0; v < n_rows; v++, dst++) { u32 row = sprite[v]; u32 mask = create_zero_mask(row); u32 msk0 = mask << shift_left; u32 msk1 = mask >> shift_right; u32 clr0 = row << shift_left; u32 clr1 = row >> shift_right; dst[0] = (dst[0] & ~msk0) | clr0; dst[8] = (dst[8] & ~msk1) | clr1; } } dirty_tiles[tile_y] |= dirty; if (start_row != 0) { dirty_tiles[tile_y + 1] |= dirty; } } IWRAM_CODE UNROLL_LOOPS void draw_icn(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { BOUNDCHECK_SCREEN(x, y); size_t tile_x0 = x / 8; size_t tile_x1 = (x + 7) / 8; size_t tile_y = y / 8; size_t start_col = x % 8; size_t start_row = y % 8; size_t shift_left = start_col * 4; size_t shift_right = (8 - start_col) * 4; u32 dirty = (1 << tile_x0) | (1 << tile_x1); u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8]; u32 color = clr * 0x11111111; u32 *lut = flip_x ? lut_1bpp_mask_flip_x : lut_1bpp_mask; size_t n_rows = 8; if (y + 8 > SCREEN_HEIGHT) { n_rows = 8 - ((y + 8) - SCREEN_HEIGHT); } size_t n0 = MIN(8 - start_row, n_rows); if (!flip_y) { for(size_t v = 0; v < n0; v++, dst++) { u32 mask = lut[*sprite]; u32 msk0 = mask << shift_left; u32 msk1 = mask >> shift_right; u32 clr0 = msk0 & color; u32 clr1 = msk1 & color; dst[0] = (dst[0] & ~msk0) | clr0; dst[8] = (dst[8] & ~msk1) | clr1; sprite++; } dst += (32 - 1) * 8; for(size_t v = n0; v < n_rows; v++, dst++) { u32 mask = lut[*sprite]; u32 msk0 = mask << shift_left; u32 msk1 = mask >> shift_right; u32 clr0 = msk0 & color; u32 clr1 = msk1 & color; dst[0] = (dst[0] & ~msk0) | clr0; dst[8] = (dst[8] & ~msk1) | clr1; sprite++; } } else { sprite += 7; for(size_t v = 0; v < n0; v++, dst++) { u32 mask = lut[*sprite]; u32 msk0 = mask << shift_left; u32 msk1 = mask >> shift_right; u32 clr0 = msk0 & color; u32 clr1 = msk1 & color; dst[0] = (dst[0] & ~msk0) | clr0; dst[8] = (dst[8] & ~msk1) | clr1; sprite--; } dst += (32 - 1) * 8; for(size_t v = n0; v < n_rows; v++, dst++) { u32 mask = lut[*sprite]; u32 msk0 = mask << shift_left; u32 msk1 = mask >> shift_right; u32 clr0 = msk0 & color; u32 clr1 = msk1 & color; dst[0] = (dst[0] & ~msk0) | clr0; dst[8] = (dst[8] & ~msk1) | clr1; sprite--; } } dirty_tiles[tile_y] |= dirty; if (start_row != 0) { dirty_tiles[tile_y + 1] |= dirty; } } // // Flipping buffers/copying memory. // IWRAM_CODE UNROLL_LOOPS void flip_buffer(void) { // Mode 0: double buffering without dirty tiles. Use this when we are clearing // the screen every single frame. #if FLIP_TYPE == 0 if (backbuf == BUF_0) { backbuf = BUF_1; BG_H_SCROLL_0 = 0; BG_H_SCROLL_1 = -240; } else { backbuf = BUF_0; BG_H_SCROLL_0 = -240; BG_H_SCROLL_1 = 0; } // Mode 1: single buffer, copy the dirty lines from backbuffer (BUF_1) to // frontbuffer (BUF_0). #elif FLIP_TYPE == 1 u32 *front = BUF_0; u32 *back = BUF_1; BG_H_SCROLL_0 = 0; BG_H_SCROLL_1 = -240; for (size_t j = 0; j < 20; ++j) { if (dirty_tiles[j] == 0) { continue; } u32 offset = j * 32 * 8; #if NO_DMA == 0 dma_copy(front + offset, back + offset, (30 * 8 * 4), 3); #else copy32(front + offset, back + offset, (30 * 8)); #endif dirty_tiles[j] = 0; } // Mode 2: single buffer, copy the dirty tiles from backbuffer (BUF_1) to // frontbuffer (BUF_0). #elif FLIP_TYPE == 2 u32 *front = BUF_0; u32 *back = BUF_1; BG_H_SCROLL_0 = 0; BG_H_SCROLL_1 = -240; for (size_t j = 0; j < 20; ++j) { if (dirty_tiles[j] == 0) { continue; } size_t k = 1; for (size_t i = 0; i < 30; ++i, k <<= 1) { if (dirty_tiles[j] & k) { Tile *mem_front = front; Tile *mem_back = back; mem_front[i + j * 32] = mem_back[i + j * 32]; } } dirty_tiles[j] = 0; } // Mode 3: Double buffering with dirty line, copying the dirty lines if needed // after flipping buffers. #elif FLIP_TYPE == 3 bool should_flip = false; for (size_t j = 0; j < 20; ++j) { if (dirty_tiles[j] == 0) { continue; } should_flip = true; break; } if (!should_flip) { return; } u32 *frontbuf = backbuf; if (backbuf == BUF_0) { backbuf = BUF_1; BG_H_SCROLL_0 = 0; BG_H_SCROLL_1 = -240; } else { backbuf = BUF_0; BG_H_SCROLL_0 = -240; BG_H_SCROLL_1 = 0; } for (size_t j = 0; j < 20; ++j) { if (dirty_tiles[j] == 0) { continue; } u32 offset = j * 32 * 8; #if NO_DMA == 0 dma_copy(backbuf + offset, frontbuf + offset, (30 * 8 * 4), 3); #else copy32(backbuf + offset, frontbuf + offset, (30 * 8)); #endif dirty_tiles[j] = 0; } // Mode 4: Double buffering with dirty tiles, copying the dirty tiles if needed // after flipping buffers. #elif FLIP_TYPE == 4 bool should_flip = false; for (size_t j = 0; j < 20; ++j) { if (dirty_tiles[j] == 0) { continue; } should_flip = true; break; } if (!should_flip) { return; } u32 *frontbuf = backbuf; if (backbuf == BUF_0) { backbuf = BUF_1; BG_H_SCROLL_0 = 0; BG_H_SCROLL_1 = -240; } else { backbuf = BUF_0; BG_H_SCROLL_0 = -240; BG_H_SCROLL_1 = 0; } for (size_t j = 0; j < 20; ++j) { if (dirty_tiles[j] == 0) { continue; } size_t k = 1; for (size_t i = 0; i < 30; ++i, k <<= 1) { if (dirty_tiles[j] & k) { Tile *mem_front = frontbuf; Tile *mem_back = backbuf; mem_back[i + j * 32] = mem_front[i + j * 32]; } } dirty_tiles[j] = 0; } #endif } IWRAM_CODE UNROLL_LOOPS void decode_1bpp(u32 *dst, u8 *src, u8 clr, u8 flip_x, u32 n_tiles) { u32 color = 0x11111111 * clr; if (!flip_x) { for (size_t i = 0; i < n_tiles * 8; i++) { *dst++ = lut_1bpp_mask[*src++] & color; } } else { for (size_t i = 0; i < n_tiles * 8; i++) { *dst++ = lut_1bpp_mask_flip_x[*src++] & color; } } } // // Text rendering. // #include "font.h" // Font rendering function for the text engine. void txt_drawc(char c, size_t x, size_t y, u8 clr) { u8 *tile = font_icn; draw_icn(x, y, tile + 8 * c, clr, 1, 0); } void txt_drawc_small(char c, size_t x, size_t y, u8 clr) { u8 *tile = font_icn; c = c < 'a' ? c + 16 * 6 : c + 16 * 4; draw_icn(x, y, tile + 8 * c, clr, 1, 0); } #define txt_draws_small(msg, x, y, clr, ...) \ { \ u8 tmp = text_engine.spacing;\ txt_spacing(4);\ text_engine.drawc = txt_drawc_small;\ txt_draws(msg, x, y, clr); \ txt_spacing(tmp);\ text_engine.drawc = txt_drawc;\ } #define txt_drawf_small(msg, x, y, clr, ...) \ { \ char buf[256] = {0}; \ posprintf(buf, msg, ##__VA_ARGS__); \ u8 tmp = text_engine.spacing;\ txt_spacing(4);\ text_engine.drawc = txt_drawc_small;\ txt_draws(buf, x, y, clr); \ txt_spacing(tmp);\ text_engine.drawc = txt_drawc;\ } // // Initialization. // Palette themes[] = { { COLOR_BLACK, // BG COLOR_WHITE, // FG COLOR_BLUE, // ACC_0 COLOR_RED, // ACC_1 COLOR_CYAN, // ACC_2 COLOR_GREY, // COL_OFF }, { RGB15(31, 31, 31), // BG RGB15( 8, 8, 8), // FG RGB15( 2, 17, 31), // ACC_0 RGB15(31, 0, 10), // ACC_1 RGB15( 0, 27, 30), // ACC_2 RGB15(16, 17, 19), // COL_OFF }, { RGB15( 0, 0, 31), // BG RGB15(31, 31, 31), // FG RGB15( 0, 31, 14), // ACC_0 RGB15(24, 7, 19), // ACC_1 RGB15(31, 17, 27), // ACC_2 RGB15(22, 22, 31), // COL_OFF }, { RGB15(11, 24, 31), // BG RGB15(31, 31, 31), // FG RGB15(8, 17, 22), // COL_OFF RGB15(29, 17, 22), // ACC_1 RGB15(29, 17, 22), // ACC_1 RGB15(25, 27, 29), // COL_OFF }, { RGB15( 0, 0, 0), // BG RGB15(28, 17, 0), // FG RGB15(31, 28, 22), // ACC_0 RGB15(31, 0, 0), // ACC_2 RGB15( 0, 16, 0), // ACC_1 RGB15(14, 10, 9), // COL_OFF }, { RGB15( 2, 2, 2), // BG RGB15(28, 28, 28), // FG RGB15(14, 28, 24), // ACC_0 RGB15(14, 28, 24), // ACC_1 RGB15(14, 28, 24), // ACC_2 RGB15(12, 12, 12), // COL_OFF }, }; void swap_palette(int idx) { for (size_t i = 0; i < 16; i++) { PAL_BUFFER_BG[i] = themes[idx][i]; } } void renderer_init(void) { // Initialize display mode and bg palette. DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1; // Clear VRAM. #if NO_DMA == 0 dma_fill((u32*)MEM_VRAM, 0, KB(96), 3); #else set32((u32*)MEM_VRAM, 0, KB(96)/4); #endif // Initialize backgrounds. BG_CTRL(0) = BG_CHARBLOCK(CB_0) | BG_SCREENBLOCK(SB_0) | BG_PRIORITY(0) | BG_SIZE(1); BG_CTRL(1) = BG_CHARBLOCK(CB_1) | BG_SCREENBLOCK(SB_1) | BG_PRIORITY(1) | BG_SIZE(1); // Initialize background memory map for the render buffers. The backgrounds // are 64x32 each, with the second screenblock pointing to a zeroed tile. // This makes it so while scrolling the backgrounds to the second screen we // effectively disabling them. Thanks to this we can perform double // buffering with mode 0 rendering. u16 *mem_map_fg = SCREENBLOCK_MEM[SB_0]; u16 *mem_map_fg_blank = SCREENBLOCK_MEM[SB_0 + 1]; u16 *mem_map_bg = SCREENBLOCK_MEM[SB_1]; u16 *mem_map_bg_blank = SCREENBLOCK_MEM[SB_1 + 1]; for (size_t i = 0; i < 32 * 20; ++i) { mem_map_fg[i] = i; mem_map_fg_blank[i] = 32 * 20 - 1; mem_map_bg[i] = i + 32 * 4; mem_map_bg_blank[i] = (32 * 20 - 1) + 32 * 4; } // Setup initial background state. BG_H_SCROLL_0 = -240; BG_H_SCROLL_1 = -240; // Initialize default palette. swap_palette(0); // Initialize text engine. txt_init(txt_drawc); }