From deb9c48fbd3dc5854de4ae3a04dc999029c10ae0 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Sat, 22 Apr 2023 21:12:14 +0200 Subject: Add new renderer and prepare for render overhaul --- src/renderer_m0.c | 811 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 811 insertions(+) create mode 100644 src/renderer_m0.c (limited to 'src/renderer_m0.c') diff --git a/src/renderer_m0.c b/src/renderer_m0.c new file mode 100644 index 0000000..8bd4263 --- /dev/null +++ b/src/renderer_m0.c @@ -0,0 +1,811 @@ +// +// This Mode 0 renderer provides a way of drawing directly to a framebuffer +// (similar to Mode 3 and 4) while retaining the flexibility of using other +// backgrounds if needed. It also performs double buffering to avoid tearing +// artifacts and tries to only draw tiles that changed on each frame. +// + +#include "renderer.h" +#include "text.h" + +// +// Parameters. +// + +#define SUBPIXEL_LINES 1 +#define DEC_BIG_LUT 1 +#define FLIP_TYPE 3 + +// Front/back buffers for double buffering. +#define BUF_0 ((u32*)(MEM_VRAM)) +#define BUF_1 ((u32*)(MEM_VRAM + KB(20))) + +// Pointer to the backbuffer. +static u32 *backbuf = BUF_1; + +// Tracking which tiles are "dirty" and need refreshing. +static u32 dirty_tiles[21] = {0}; + +// Position of the tilemap. +#define TILE_MAP ((u32*)(MEM_VRAM + KB(40))) + +// Charblock and screenblock for both render buffers. +#define CB_0 0 +#define CB_1 1 +#define SB_0 20 +#define SB_1 22 + +// Boundchecks can be disable at compile time but this will not always improve +// the performance and can in fact make it worse. It is possible that this is +// due to some aliasing optimizations but not sure at this moment. +#ifdef DISABLE_BOUNDCHECK_SCREEN +#define BOUNDCHECK_SCREEN(X,Y) +#else +#define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return; +#endif + +// Swap A and B values without a tmp variable. +#define SWAP(A, B) (((A) ^= (B)), ((B) ^= (A)), ((A) ^= (B))) + +// Swap A and B values to make sure A <= B. +#define MAYBE_SWAP(A,B) if ((A) > (B)) { SWAP(A,B); } + +// +// Basic primitives. +// + +static inline +void +redraw(void) { + for (size_t i = 0; i < 21; i++) { + dirty_tiles[i] = 0xFFFFFFFF; + } +} + +IWRAM_CODE +void screen_fill(u8 clr) { + // We have to make sure we leave the last tile blank to use as alpha channel + // when moving the BG during double buffering. + dma_fill(backbuf, 0x11111111 * clr, KB(20) - 32, 3); + redraw(); +} + +IWRAM_CODE +void +draw_pixel(size_t x, size_t y, u8 clr) { + BOUNDCHECK_SCREEN(x, y); + + // Find row position for the given x/y coordinates. + size_t tile_x = x / 8; + size_t tile_y = y / 8; + size_t start_col = x % 8; + size_t start_row = y % 8; + u32 *dst = &backbuf[start_row + (tile_x + tile_y * 32) * 8]; + + // Update backbuffer. + size_t shift = start_col * sizeof(u32); + u32 mask = 0xF << shift; + u32 row = clr << shift; + *dst = (*dst & ~mask) | row; + dirty_tiles[tile_y] |= 1 << tile_x; +} + +IWRAM_CODE +static inline +void +draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) { + BOUNDCHECK_SCREEN(x0, y0); + BOUNDCHECK_SCREEN(x1, y0); + // Find row positions for the given x/y coordinates. + size_t tile_x0 = x0 / 8; + size_t tile_x1 = x1 / 8; + size_t tile_y = y0 / 8; + size_t start_col = x0 % 8; + size_t end_col = x1 % 8; + size_t start_row = y0 % 8; + u32 dirty = (1 << tile_x0) | (1 << tile_x1); + + // Horizontal line. There are 3 cases: + // 1. Lines fit on a single tile. + // 2. Lines go through 2 tiles, both require partial row updates. + // 3. Lines go through 3 or more tiles, first and last tiles use + // partial row updates, rows in the middle can write the entire + // row. + size_t dtx = tile_x1 - tile_x0; + u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8]; + if (dtx < 1) { + size_t shift_left = start_col * 4; + size_t shift_right = (7 - end_col) * 4; + u32 mask = (0xFFFFFFFF >> shift_right) & (0xFFFFFFFF << shift_left); + u32 row = (0x11111111 * clr) & mask; + *dst = (*dst & ~mask) | row; + } else { + size_t shift_left = start_col * 4; + size_t shift_right = (7 - end_col) * 4; + u32 mask = 0xFFFFFFFF; + u32 row = 0x11111111 * clr; + *dst = (*dst & ~(mask << shift_left)) | (row << shift_left); + dst += 8; + for (size_t i = 1; i < dtx; i++) { + dirty |= (1 << (tile_x0 + i)); + *dst = row; + dst += 8; + } + *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right); + } + dirty_tiles[tile_y] |= dirty; +} + +IWRAM_CODE +UNROLL_LOOPS +static inline +void +draw_vline(size_t x0, size_t y0, size_t y1, u8 clr) { + BOUNDCHECK_SCREEN(x0, y0); + BOUNDCHECK_SCREEN(x0, y1); + size_t tile_x = x0 / 8; + size_t tile_y = y0 / 8; + size_t tile_y0 = y0 / 8; + size_t tile_y1 = y1 / 8; + size_t start_col = x0 % 8; + size_t start_row0 = y0 % 8; + size_t start_row1 = y1 % 8; + + size_t shift_left = start_col * 4; + u32 dirty = (1 << tile_x); + + u32 *dst = &backbuf[start_row0 + (tile_x + tile_y * 32) * 8]; + u32 mask = 0x0000000F << shift_left; + u32 row = (0x11111111 * clr) & mask; + u32 dty = tile_y1 - tile_y0; + if (dty < 1) { + for (size_t i = 0; i <= (y1 - y0); i++, dst++) { + dst[0] = (dst[0] & ~mask) | row; + } + } else { + for (size_t i = 0; i < (8 - start_row0); i++, dst++) { + dst[0] = (dst[0] & ~mask) | row; + } + dst += 8 * 31; + for (size_t j = 1; j < dty; j++) { + dirty_tiles[tile_y0 + j] |= dirty; + for (size_t i = 0; i < 8; i++, dst++) { + dst[0] = (dst[0] & ~mask) | row; + } + dst += 8 * 31; + } + for (size_t i = 0; i <= start_row1; i++, dst++) { + dst[0] = (dst[0] & ~mask) | row; + } + } + dirty_tiles[tile_y0] |= dirty; + dirty_tiles[tile_y1] |= dirty; +} + +IWRAM_CODE +void +draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { + BOUNDCHECK_SCREEN(x0, y0); + BOUNDCHECK_SCREEN(x1, y1); + if (y0 == y1) { + MAYBE_SWAP(x0, x1); + draw_hline(x0, x1, y0, clr); + } else if (x0 == x1) { + MAYBE_SWAP(y0, y1); + draw_vline(x0, y0, y1, clr); + } else { + // Fixed Precision constants. + const int fp_bit = 6; + const int fp_one = FP_NUM(1, fp_bit); + const int fp_half = fp_one >> 1; + + int dx = x0 > x1 ? x0 - x1 : x1 - x0; + int dy = y0 > y1 ? y0 - y1 : y1 - y0; + + if ((dx >= dy && x0 > x1) || (dx < dy && y0 > y1)) { + SWAP(x0, x1); + SWAP(y0, y1); + } + +#if SUBPIXEL_LINES == 1 + int dxf = (dx << fp_bit); + int dyf = (dy << fp_bit); + int frac_x = x0 > x1 ? FP_NUM(x0 - x1, fp_bit) : FP_NUM(x1 - x0, fp_bit); + int frac_y = y0 > y1 ? FP_NUM(y0 - y1, fp_bit) : FP_NUM(y1 - y0, fp_bit); + int x_step = x0 > x1 ? -1 : 1; + int y_step = y0 > y1 ? -1 : 1; + int distance = (frac_y - fp_one) * dx - (frac_x - fp_half) * dy; + if (dx >= dy) { + int step = dxf / dyf; + int remaining = dx; + while (remaining > (step - 1)) { + distance += step * 2 * dyf; + if (distance >= 0) { + draw_hline(x0, x0 + step - 1, y0, clr); + x0 += x_step * step; + remaining -= step; + } else { + if (remaining < step) { + break; + } + draw_hline(x0, x0 + step, y0, clr); + distance += 2 * dyf; + x0 += x_step * (step + 1); + remaining -= step + 1; + } + distance -= 2 * dxf; + y0 += y_step; + } + if (remaining >= 0) { + draw_hline(x0, x0 + remaining, y0, clr); + } + } else { + int step = dyf / dxf; + int remaining = dy; + while (remaining > (step - 1)) { + distance += step * 2 * dxf; + if (distance >= 0) { + draw_vline(x0, y0, y0 + step - 1, clr); + y0 += y_step * step; + remaining -= step; + } else { + draw_vline(x0, y0, y0 + step, clr); + distance += 2 * dxf; + y0 += y_step * (step + 1); + remaining -= step + 1; + } + distance -= 2 * dyf; + x0 += x_step; + } + if (remaining >= 0) { + draw_vline(x0, y0, y0 + remaining, clr); + } + } +#else + int x_step = x0 > x1 ? -1 : 1; + int y_step = y0 > y1 ? -1 : 1; + if (dx >= dy) { + int diff = 2 * dy - dx; + for (int i = 0; i < dx + 1; i++) { + draw_pixel(x0, y0, clr); + if (diff >= 0) { + diff -= 2 * dx; + y0 += y_step; + } + diff += 2 * dy; + x0 += x_step; + } + } else { + int diff = 2 * dx - dy; + for (int i = 0; i < dy + 1; i++) { + draw_pixel(x0, y0, clr); + if (diff >= 0) { + diff -= 2 * dy; + x0 += x_step; + } + diff += 2 * dx; + y0 += y_step; + } + } +#endif + } +} + +IWRAM_CODE +void +draw_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { + BOUNDCHECK_SCREEN(x0, y0); + BOUNDCHECK_SCREEN(x1, y1); + MAYBE_SWAP(x0, x1); + MAYBE_SWAP(y0, y1); + + draw_hline(x0, x1, y0, clr); + draw_hline(x0, x1, y1, clr); + draw_vline(x0, y0, y1, clr); + draw_vline(x1, y0, y1, clr); +} + +IWRAM_CODE +void +draw_filled_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { + BOUNDCHECK_SCREEN(x0, y0); + BOUNDCHECK_SCREEN(x1, y1); + MAYBE_SWAP(x0, x1); + MAYBE_SWAP(y0, y1); + + // Special condition. If the screen is to be completely filled, use the DMA + // instead. + if (x0 == 0 && x1 >= (SCREEN_WIDTH - 1) && y0 == 0 && y1 >= (SCREEN_HEIGHT - 1)) { + screen_fill(clr); + return; + } + + for (size_t y = y0; y <= y1; y++) { + draw_hline(x0, x1, y, clr); + } +} + +// +// Sprites (chr/icn). +// + +#if DEC_BIG_LUT == 1 +static u32 dec_byte_flip_x[256] = { + 0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100, + 0x00000101, 0x00000110, 0x00000111, 0x00001000, 0x00001001, + 0x00001010, 0x00001011, 0x00001100, 0x00001101, 0x00001110, + 0x00001111, 0x00010000, 0x00010001, 0x00010010, 0x00010011, + 0x00010100, 0x00010101, 0x00010110, 0x00010111, 0x00011000, + 0x00011001, 0x00011010, 0x00011011, 0x00011100, 0x00011101, + 0x00011110, 0x00011111, 0x00100000, 0x00100001, 0x00100010, + 0x00100011, 0x00100100, 0x00100101, 0x00100110, 0x00100111, + 0x00101000, 0x00101001, 0x00101010, 0x00101011, 0x00101100, + 0x00101101, 0x00101110, 0x00101111, 0x00110000, 0x00110001, + 0x00110010, 0x00110011, 0x00110100, 0x00110101, 0x00110110, + 0x00110111, 0x00111000, 0x00111001, 0x00111010, 0x00111011, + 0x00111100, 0x00111101, 0x00111110, 0x00111111, 0x01000000, + 0x01000001, 0x01000010, 0x01000011, 0x01000100, 0x01000101, + 0x01000110, 0x01000111, 0x01001000, 0x01001001, 0x01001010, + 0x01001011, 0x01001100, 0x01001101, 0x01001110, 0x01001111, + 0x01010000, 0x01010001, 0x01010010, 0x01010011, 0x01010100, + 0x01010101, 0x01010110, 0x01010111, 0x01011000, 0x01011001, + 0x01011010, 0x01011011, 0x01011100, 0x01011101, 0x01011110, + 0x01011111, 0x01100000, 0x01100001, 0x01100010, 0x01100011, + 0x01100100, 0x01100101, 0x01100110, 0x01100111, 0x01101000, + 0x01101001, 0x01101010, 0x01101011, 0x01101100, 0x01101101, + 0x01101110, 0x01101111, 0x01110000, 0x01110001, 0x01110010, + 0x01110011, 0x01110100, 0x01110101, 0x01110110, 0x01110111, + 0x01111000, 0x01111001, 0x01111010, 0x01111011, 0x01111100, + 0x01111101, 0x01111110, 0x01111111, 0x10000000, 0x10000001, + 0x10000010, 0x10000011, 0x10000100, 0x10000101, 0x10000110, + 0x10000111, 0x10001000, 0x10001001, 0x10001010, 0x10001011, + 0x10001100, 0x10001101, 0x10001110, 0x10001111, 0x10010000, + 0x10010001, 0x10010010, 0x10010011, 0x10010100, 0x10010101, + 0x10010110, 0x10010111, 0x10011000, 0x10011001, 0x10011010, + 0x10011011, 0x10011100, 0x10011101, 0x10011110, 0x10011111, + 0x10100000, 0x10100001, 0x10100010, 0x10100011, 0x10100100, + 0x10100101, 0x10100110, 0x10100111, 0x10101000, 0x10101001, + 0x10101010, 0x10101011, 0x10101100, 0x10101101, 0x10101110, + 0x10101111, 0x10110000, 0x10110001, 0x10110010, 0x10110011, + 0x10110100, 0x10110101, 0x10110110, 0x10110111, 0x10111000, + 0x10111001, 0x10111010, 0x10111011, 0x10111100, 0x10111101, + 0x10111110, 0x10111111, 0x11000000, 0x11000001, 0x11000010, + 0x11000011, 0x11000100, 0x11000101, 0x11000110, 0x11000111, + 0x11001000, 0x11001001, 0x11001010, 0x11001011, 0x11001100, + 0x11001101, 0x11001110, 0x11001111, 0x11010000, 0x11010001, + 0x11010010, 0x11010011, 0x11010100, 0x11010101, 0x11010110, + 0x11010111, 0x11011000, 0x11011001, 0x11011010, 0x11011011, + 0x11011100, 0x11011101, 0x11011110, 0x11011111, 0x11100000, + 0x11100001, 0x11100010, 0x11100011, 0x11100100, 0x11100101, + 0x11100110, 0x11100111, 0x11101000, 0x11101001, 0x11101010, + 0x11101011, 0x11101100, 0x11101101, 0x11101110, 0x11101111, + 0x11110000, 0x11110001, 0x11110010, 0x11110011, 0x11110100, + 0x11110101, 0x11110110, 0x11110111, 0x11111000, 0x11111001, + 0x11111010, 0x11111011, 0x11111100, 0x11111101, 0x11111110, + 0x11111111 +}; + +static u32 dec_byte[256] = { + 0x00000000, 0x10000000, 0x01000000, 0x11000000, 0x00100000, + 0x10100000, 0x01100000, 0x11100000, 0x00010000, 0x10010000, + 0x01010000, 0x11010000, 0x00110000, 0x10110000, 0x01110000, + 0x11110000, 0x00001000, 0x10001000, 0x01001000, 0x11001000, + 0x00101000, 0x10101000, 0x01101000, 0x11101000, 0x00011000, + 0x10011000, 0x01011000, 0x11011000, 0x00111000, 0x10111000, + 0x01111000, 0x11111000, 0x00000100, 0x10000100, 0x01000100, + 0x11000100, 0x00100100, 0x10100100, 0x01100100, 0x11100100, + 0x00010100, 0x10010100, 0x01010100, 0x11010100, 0x00110100, + 0x10110100, 0x01110100, 0x11110100, 0x00001100, 0x10001100, + 0x01001100, 0x11001100, 0x00101100, 0x10101100, 0x01101100, + 0x11101100, 0x00011100, 0x10011100, 0x01011100, 0x11011100, + 0x00111100, 0x10111100, 0x01111100, 0x11111100, 0x00000010, + 0x10000010, 0x01000010, 0x11000010, 0x00100010, 0x10100010, + 0x01100010, 0x11100010, 0x00010010, 0x10010010, 0x01010010, + 0x11010010, 0x00110010, 0x10110010, 0x01110010, 0x11110010, + 0x00001010, 0x10001010, 0x01001010, 0x11001010, 0x00101010, + 0x10101010, 0x01101010, 0x11101010, 0x00011010, 0x10011010, + 0x01011010, 0x11011010, 0x00111010, 0x10111010, 0x01111010, + 0x11111010, 0x00000110, 0x10000110, 0x01000110, 0x11000110, + 0x00100110, 0x10100110, 0x01100110, 0x11100110, 0x00010110, + 0x10010110, 0x01010110, 0x11010110, 0x00110110, 0x10110110, + 0x01110110, 0x11110110, 0x00001110, 0x10001110, 0x01001110, + 0x11001110, 0x00101110, 0x10101110, 0x01101110, 0x11101110, + 0x00011110, 0x10011110, 0x01011110, 0x11011110, 0x00111110, + 0x10111110, 0x01111110, 0x11111110, 0x00000001, 0x10000001, + 0x01000001, 0x11000001, 0x00100001, 0x10100001, 0x01100001, + 0x11100001, 0x00010001, 0x10010001, 0x01010001, 0x11010001, + 0x00110001, 0x10110001, 0x01110001, 0x11110001, 0x00001001, + 0x10001001, 0x01001001, 0x11001001, 0x00101001, 0x10101001, + 0x01101001, 0x11101001, 0x00011001, 0x10011001, 0x01011001, + 0x11011001, 0x00111001, 0x10111001, 0x01111001, 0x11111001, + 0x00000101, 0x10000101, 0x01000101, 0x11000101, 0x00100101, + 0x10100101, 0x01100101, 0x11100101, 0x00010101, 0x10010101, + 0x01010101, 0x11010101, 0x00110101, 0x10110101, 0x01110101, + 0x11110101, 0x00001101, 0x10001101, 0x01001101, 0x11001101, + 0x00101101, 0x10101101, 0x01101101, 0x11101101, 0x00011101, + 0x10011101, 0x01011101, 0x11011101, 0x00111101, 0x10111101, + 0x01111101, 0x11111101, 0x00000011, 0x10000011, 0x01000011, + 0x11000011, 0x00100011, 0x10100011, 0x01100011, 0x11100011, + 0x00010011, 0x10010011, 0x01010011, 0x11010011, 0x00110011, + 0x10110011, 0x01110011, 0x11110011, 0x00001011, 0x10001011, + 0x01001011, 0x11001011, 0x00101011, 0x10101011, 0x01101011, + 0x11101011, 0x00011011, 0x10011011, 0x01011011, 0x11011011, + 0x00111011, 0x10111011, 0x01111011, 0x11111011, 0x00000111, + 0x10000111, 0x01000111, 0x11000111, 0x00100111, 0x10100111, + 0x01100111, 0x11100111, 0x00010111, 0x10010111, 0x01010111, + 0x11010111, 0x00110111, 0x10110111, 0x01110111, 0x11110111, + 0x00001111, 0x10001111, 0x01001111, 0x11001111, 0x00101111, + 0x10101111, 0x01101111, 0x11101111, 0x00011111, 0x10011111, + 0x01011111, 0x11011111, 0x00111111, 0x10111111, 0x01111111, + 0x11111111 +}; + +IWRAM_CODE +static inline +u32 +decode_1bpp(u8 row, u8 flip_x) { + if (flip_x) { + return dec_byte_flip_x[row]; + } + return dec_byte[row]; +} +#else +static u16 dec_nibble[] = { + 0x0000, 0x1000, 0x0100, 0x1100, + 0x0010, 0x1010, 0x0110, 0x1110, + 0x0001, 0x1001, 0x0101, 0x1101, + 0x0011, 0x1011, 0x0111, 0x1111, +}; + +static u16 dec_nibble_flip_x[] = { + 0x0000, 0x0001, 0x0010, 0x0011, + 0x0100, 0x0101, 0x0110, 0x0111, + 0x1000, 0x1001, 0x1010, 0x1011, + 0x1100, 0x1101, 0x1110, 0x1111, +}; + +IWRAM_CODE +static inline +u32 +decode_1bpp(u8 row, u8 flip_x) { + if (flip_x) { + u16 *lut = dec_nibble_flip_x; + return (u32)lut[(row >> 4) & 0xF] << 16 | (u32)lut[(row >> 0) & 0xF]; + } + u16 *lut = dec_nibble; + return (u32)lut[(row >> 0) & 0xF] << 16 | (u32)lut[(row >> 4) & 0xF]; +} +#endif + +IWRAM_CODE +UNROLL_LOOPS +void +draw_chr(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { + BOUNDCHECK_SCREEN(x, y); + size_t tile_x0 = x / 8; + size_t tile_x1 = (x + 7) / 8; + size_t tile_y = y / 8; + size_t start_col = x % 8; + size_t start_row = y % 8; + size_t shift_left = start_col * 4; + size_t shift_right = (8 - start_col) * 4; + u32 dirty = (1 << tile_x0) | (1 << tile_x1); + u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8]; +#if DEC_BIG_LUT + u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; +#endif + if (!flip_y) { + for(size_t v = 0; v < 8; v++, dst++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[v + 0]; + u8 ch2 = sprite[v + 8]; +#if DEC_BIG_LUT + u32 clr_a = lut[ch1]; + u32 clr_b = lut[ch2]; +#else + u32 clr_a = decode_1bpp(ch1, flip_x); + u32 clr_b = decode_1bpp(ch2, flip_x); +#endif + u32 mask_a = (clr_a * 0xF); + u32 mask_b = (clr_b * 0xF); + u32 mask = (mask_a | mask_b); + u32 color; + if (clr == 0) { + color = clr_a + (clr_b << 1); + } else if (clr == 15) { + color = 0; + } else { + color = (clr_a | clr_b) * clr; + } + dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); + if ((start_row + v) == 7) { + dirty_tiles[tile_y + 1] |= dirty; + dst += (32 - 1) * 8; + } + } + } else { + for(size_t v = 0; v < 8; v++, dst++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[(7 - v) + 0]; + u8 ch2 = sprite[(7 - v) + 8]; +#if DEC_BIG_LUT + u32 clr_a = lut[ch1]; + u32 clr_b = lut[ch2]; +#else + u32 clr_a = decode_1bpp(ch1, flip_x); + u32 clr_b = decode_1bpp(ch2, flip_x); +#endif + u32 mask_a = (clr_a * 0xF); + u32 mask_b = (clr_b * 0xF); + u32 mask = (mask_a | mask_b); + u32 color; + if (clr == 0) { + color = clr_a + (clr_b << 1); + } else if (clr == 15) { + color = 0; + } else { + color = (clr_a | clr_b) * clr; + } + dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); + if ((start_row + v) == 7) { + dirty_tiles[tile_y + 1] |= dirty; + dst += (32 - 1) * 8; + } + } + } + dirty_tiles[tile_y] |= dirty; +} + +IWRAM_CODE +UNROLL_LOOPS +void +draw_icn(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { + BOUNDCHECK_SCREEN(x, y); + size_t tile_x0 = x / 8; + size_t tile_x1 = (x + 7) / 8; + size_t tile_y = y / 8; + size_t start_col = x % 8; + size_t start_row = y % 8; + size_t shift_left = start_col * 4; + size_t shift_right = (8 - start_col) * 4; + u32 dirty = (1 << tile_x0) | (1 << tile_x1); + u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8]; +#if DEC_BIG_LUT + u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; +#endif + if (!flip_y) { + for(size_t v = 0; v < 8; v++, dst++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[v + 0]; +#if DEC_BIG_LUT + u32 color = lut[ch1]; +#else + u32 color = decode_1bpp(ch1, flip_x); +#endif + u32 mask = (color * 0xF); + color *= clr; + dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); + if ((start_row + v) == 7) { + dirty_tiles[tile_y + 1] |= dirty; + dst += (32 - 1) * 8; + } + } + } else { + for(size_t v = 0; v < 8; v++, dst++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[(7 - v) + 0]; +#if DEC_BIG_LUT + u32 color = lut[ch1]; +#else + u32 color = decode_1bpp(ch1, flip_x); +#endif + u32 mask = (color * 0xF); + color *= clr; + dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); + if ((start_row + v) == 7) { + dirty_tiles[tile_y + 1] |= dirty; + dst += (32 - 1) * 8; + } + } + } + dirty_tiles[tile_y] |= dirty; +} + +// +// Flipping buffers/copying memory. +// + +IWRAM_CODE +void +flip_buffer(void) { +// Mode 0: double buffering without dirty tiles. +#if FLIP_TYPE == 0 + if (backbuf == BUF_0) { + backbuf = BUF_1; + BG_H_SCROLL_0 = 0; + BG_H_SCROLL_1 = -240; + } else { + backbuf = BUF_0; + BG_H_SCROLL_0 = -240; + BG_H_SCROLL_1 = 0; + } + +// Mode 1: single buffer, copy the dirty lines from backbuffer (BUF_1) to +// frontbuffer (BUF_0) using the DMA. +#elif FLIP_TYPE == 1 + u32 *front = BUF_0; + u32 *back = BUF_1; + BG_H_SCROLL_0 = 0; + BG_H_SCROLL_1 = -240; + for (size_t j = 0; j < 20; ++j) { + if (dirty_tiles[j] == 0) { + continue; + } + u32 offset = j * 32 * 8; + dma_copy(front + offset, back + offset, (30 * 8 * 4), 3); + dirty_tiles[j] = 0; + } + +// Mode 2: single buffer, copy the dirty tiles from backbuffer (BUF_1) to +// frontbuffer (BUF_0). +#elif FLIP_TYPE == 2 + u32 *front = BUF_0; + u32 *back = BUF_1; + BG_H_SCROLL_0 = 0; + BG_H_SCROLL_1 = -240; + for (size_t j = 0; j < 20; ++j) { + if (dirty_tiles[j] == 0) { + continue; + } + size_t k = 1; + for (size_t i = 0; i < 30; ++i, k <<= 1) { + if (dirty_tiles[j] & k) { + Tile *mem_front = front; + Tile *mem_back = back; + mem_front[i + j * 32] = mem_back[i + j * 32]; + } + } + dirty_tiles[j] = 0; + } + +// Mode 3: Double buffering with dirty line, copying the dirty lines if needed +// after flipping buffers with the DMA. +#elif FLIP_TYPE == 3 + bool should_flip = false; + for (size_t j = 0; j < 20; ++j) { + if (dirty_tiles[j] == 0) { + continue; + } + should_flip = true; + break; + } + if (!should_flip) { + return; + } + u32 *frontbuf = backbuf; + if (backbuf == BUF_0) { + backbuf = BUF_1; + BG_H_SCROLL_0 = 0; + BG_H_SCROLL_1 = -240; + } else { + backbuf = BUF_0; + BG_H_SCROLL_0 = -240; + BG_H_SCROLL_1 = 0; + } + for (size_t j = 0; j < 20; ++j) { + if (dirty_tiles[j] == 0) { + continue; + } + u32 offset = j * 32 * 8; + dma_copy(backbuf + offset, frontbuf + offset, (30 * 8 * 4), 3); + dirty_tiles[j] = 0; + } + +// Mode 4: Double buffering with dirty tiles, copying the dirty tiles if needed +// after flipping buffers. +#elif FLIP_TYPE == 4 + bool should_flip = false; + for (size_t j = 0; j < 20; ++j) { + if (dirty_tiles[j] == 0) { + continue; + } + should_flip = true; + break; + } + if (!should_flip) { + return; + } + u32 *frontbuf = backbuf; + if (backbuf == BUF_0) { + backbuf = BUF_1; + BG_H_SCROLL_0 = 0; + BG_H_SCROLL_1 = -240; + } else { + backbuf = BUF_0; + BG_H_SCROLL_0 = -240; + BG_H_SCROLL_1 = 0; + } + for (size_t j = 0; j < 20; ++j) { + if (dirty_tiles[j] == 0) { + continue; + } + size_t k = 1; + for (size_t i = 0; i < 30; ++i, k <<= 1) { + if (dirty_tiles[j] & k) { + Tile *mem_front = frontbuf; + Tile *mem_back = backbuf; + mem_back[i + j * 32] = mem_front[i + j * 32]; + } + } + dirty_tiles[j] = 0; + } +#endif +} + +// +// Text rendering. +// + +#include "font.h" + +// Font rendering function for the text engine. +void +txt_drawc(char c, size_t x, size_t y, u8 clr) { + u8 *tile = font_icn; + draw_icn(x, y, tile + 8 * c, clr, 1, 0); +} + +// +// Initialization. +// + +void +renderer_init(void) { + // Initialize display mode and bg palette. + DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1; + + // Clear VRAM. + dma_fill((u32*)MEM_VRAM, 0, KB(96), 3); + + // Initialize backgrounds. + BG_CTRL(0) = BG_CHARBLOCK(CB_0) | BG_SCREENBLOCK(SB_0) | BG_PRIORITY(0) | BG_SIZE(1); + BG_CTRL(1) = BG_CHARBLOCK(CB_1) | BG_SCREENBLOCK(SB_1) | BG_PRIORITY(1) | BG_SIZE(1); + + // Initialize background memory map for the render buffers. The backgrounds + // are 64x32 each, with the second screenblock pointing to a zeroed tile. + // This makes it so while scrolling the backgrounds to the second screen we + // effectively disabling them. Thanks to this we can perform double + // buffering with mode 0 rendering. + u16 *mem_map_fg = SCREENBLOCK_MEM[SB_0]; + u16 *mem_map_fg_blank = SCREENBLOCK_MEM[SB_0 + 1]; + u16 *mem_map_bg = SCREENBLOCK_MEM[SB_1]; + u16 *mem_map_bg_blank = SCREENBLOCK_MEM[SB_1 + 1]; + for (size_t i = 0; i < 32 * 20; ++i) { + mem_map_fg[i] = i; + mem_map_fg_blank[i] = 32 * 20 - 1; + mem_map_bg[i] = i + 32 * 4; + mem_map_bg_blank[i] = (32 * 20 - 1) + 32 * 4; + } + + // Setup initial background state. + BG_H_SCROLL_0 = -240; + BG_H_SCROLL_1 = -240; + + // Initialize default palette. + PAL_BUFFER_BG[0] = COLOR_BLACK; + PAL_BUFFER_BG[1] = COLOR_WHITE; + PAL_BUFFER_BG[2] = COLOR_RED; + PAL_BUFFER_BG[3] = COLOR_BLUE; + PAL_BUFFER_BG[4] = COLOR_CYAN; + PAL_BUFFER_BG[5] = COLOR_GREY; + PAL_BUFFER_BG[6] = COLOR_WHITE; + PAL_BUFFER_BG[7] = COLOR_GREEN; + PAL_BUFFER_BG[8] = COLOR_PURPLE; + + // Initialize text engine. + txt_init(txt_drawc); +} -- cgit v1.2.1