From 8aa5155ecfbaaa2e67f82561352a5857bc86d39e Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Sun, 6 Aug 2023 11:13:22 +0200 Subject: Add missing .c files and wishlist TODOs --- src/renderer_m4.c | 601 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 601 insertions(+) create mode 100644 src/renderer_m4.c (limited to 'src/renderer_m4.c') diff --git a/src/renderer_m4.c b/src/renderer_m4.c new file mode 100644 index 0000000..d50bab3 --- /dev/null +++ b/src/renderer_m4.c @@ -0,0 +1,601 @@ +#include "renderer.h" +#include "text.h" + +// +// Parameters. +// + +#define SUBPIXEL_LINES 0 +#define DEC_BIG_LUT 1 + +// Front/back buffers for double buffering. +#define BUF_0 ((u32*)(MEM_VRAM)) +#define BUF_1 ((u32*)(MEM_VRAM ^ 0x0A000)) + +// Pointer to the backbuffer. +static u16 *backbuf = BUF_1; + +// Boundchecks can be disable at compile time but this will not always improve +// the performance and can in fact make it worse. It is possible that this is +// due to some aliasing optimiztions but not sure at this moment. +#ifdef DISABLE_BOUNDCHECK_SCREEN +#define BOUNDCHECK_SCREEN(X,Y) +#else +#define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return; +#endif + +// Swap A and B values without a tmp variable. +#define SWAP(A, B) (((A) ^= (B)), ((B) ^= (A)), ((A) ^= (B))) + +// Swap A and B values to make sure A <= B. +#define MAYBE_SWAP(A,B) if ((A) > (B)) { SWAP(A,B); } + +// +// Basic primitives. +// + +IWRAM_CODE +void screen_fill(u8 clr) { + dma_fill(backbuf, 0x01010101 * clr, KB(75) / 2, 3); +} + +IWRAM_CODE +void +draw_pixel(size_t x, size_t y, u8 clr) { + BOUNDCHECK_SCREEN(x, y); + u16 *dst = &backbuf[(x + y * SCREEN_WIDTH) / 2]; + if(x & 1) { + *dst = (*dst & 0xF) | (clr << 8); + } else { + *dst = (*dst & ~0xF) | clr; + } +} + +IWRAM_CODE +static inline +void +draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) { + BOUNDCHECK_SCREEN(x0, y0); + BOUNDCHECK_SCREEN(x1, y0); + // Find row positions for the given x/y coordinates. + size_t tile_x0 = x0 / 8; + size_t tile_x1 = x1 / 8; + size_t start_col = x0 % 8; + size_t end_col = x1 % 8; + // Horizontal line. There are 3 cases: + // 1. Lines fit on a single tile. + // 2. Lines go through 2 tiles, both require partial row updates. + // 3. Lines go through 3 or more tiles, first and last tiles use + // partial row updates, rows in the middle can write the entire + // row. + size_t dx = tile_x1 - tile_x0; + u64 *dst = &backbuf[(tile_x0 * 8 + y0 * SCREEN_WIDTH) / 2]; + if (dx < 1) { + u64 row_mask = 0xFFFFFFFFFFFFFFFF; + row_mask >>= (7 - end_col - dx) * 8; + row_mask &= 0xFFFFFFFFFFFFFFFF << start_col * 8; + u64 row = (0x0101010101010101 * clr) & row_mask; + *dst = (*dst & ~row_mask) | row; + } else { + size_t shift_left = start_col * 8; + size_t shift_right = (7 - end_col) * 8; + u64 row_mask = 0xFFFFFFFFFFFFFFFF; + u64 row = 0x0101010101010101 * clr; + dst[0] = (dst[0] & ~(row_mask << shift_left)) | row << shift_left; + if (dx != 1) { + dma_fill(&dst[1], 0x01010101 * clr, (dx - 1) * 8, 3); + } + dst[dx] = dst[dx] & ~(row_mask >> shift_right); + dst[dx] |= row >> shift_right; + } +} + +IWRAM_CODE +UNROLL_LOOPS +static inline +void +draw_vline(size_t x0, size_t y0, size_t y1, u8 clr) { + BOUNDCHECK_SCREEN(x0, y0); + BOUNDCHECK_SCREEN(x0, y1); + size_t tile_x0 = x0 / 8; + size_t start_col = x0 % 8; + u16 *dst = &backbuf[(start_col + tile_x0 * 8 + y0 * SCREEN_WIDTH) / 2]; + if(x0 & 1) { + for (size_t i = 0; i <= y1 - y0; i++, dst += SCREEN_WIDTH / 2) { + *dst = (*dst & 0xF) | (clr << 8); + } + } else { + for (size_t i = 0; i <= y1 - y0; i++, dst += SCREEN_WIDTH / 2) { + *dst = (*dst & ~0xF) | clr; + } + } +} + +IWRAM_CODE +UNROLL_LOOPS +void +draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { + BOUNDCHECK_SCREEN(x0, y0); + BOUNDCHECK_SCREEN(x1, y1); + + if (y0 == y1) { + MAYBE_SWAP(x0, x1); + draw_hline(x0, x1, y0, clr); + } else if (x0 == x1) { + MAYBE_SWAP(y0, y1); + draw_vline(x0, y0, y1, clr); + } else { +#if SUBPIXEL_LINES == 1 + // Fixed Precision constants. + const int fp_bit = 6; + const int fp_one = FP_NUM(1, fp_bit); + const int fp_half = fp_one >> 1; + + int dx = x0 > x1 ? x0 - x1 : x1 - x0; + int dy = y0 > y1 ? y0 - y1 : y1 - y0; + int dxf = (dx << fp_bit); + int dyf = (dy << fp_bit); + + if ((dx >= dy && x0 > x1) || (dx < dy && y0 > y1)) { + SWAP(x0, x1); + SWAP(y0, y1); + } + + int frac_x = x0 > x1 ? FP_NUM(x0 - x1, fp_bit) : FP_NUM(x1 - x0, fp_bit); + int frac_y = y0 > y1 ? FP_NUM(y0 - y1, fp_bit) : FP_NUM(y1 - y0, fp_bit); + int x_step = x0 > x1 ? -1 : 1; + int y_step = y0 > y1 ? -SCREEN_WIDTH : SCREEN_WIDTH; + + u16 *dst = NULL; + uintptr_t addr = ((uintptr_t)backbuf + y0 * SCREEN_WIDTH + x0); + u32 mask = x0 & 1 ? ~0xFF : 0xFF; + u32 color = (clr & 0xFF) | ((clr & 0xFF) << 8); + if (dx >= dy) { + int distance = (frac_y - fp_one) * dx - (frac_x - fp_half) * dy; + int remaining = dx; + while (distance <= 0 && remaining > 0) { + dst = (u16*)(addr - (mask >> 31)); + *dst = (*dst & ~mask) | (color & mask); + distance += 2 * dyf; + addr += x_step; + remaining--; + mask = ~mask; + } + distance -= 2 * dxf; + addr += y_step; + + while (remaining >= 0) { + dst = (u16*)(addr - (mask >> 31)); + *dst = (*dst & ~mask) | (color & mask); + if (distance >= 0) { + distance -= 2 * dxf; + addr += y_step; + } + distance += 2 * dyf; + addr += x_step; + mask = ~mask; + remaining--; + } + } else { + int distance = (frac_x - fp_one) * dy - (frac_y - fp_half) * dx; + int remaining = dy; + while (distance <= 0 && remaining > 0) { + dst = (u16*)(addr - (mask >> 31)); + *dst = (*dst & ~mask) | (color & mask); + distance += 2 * dxf; + addr += y_step; + remaining--; + } + distance -= 2 * dyf; + addr += x_step; + mask = ~mask; + + while (remaining >= 0) { + dst = (u16*)(addr - (mask >> 31)); + *dst = (*dst & ~mask) | (color & mask); + if (distance >= 0) { + distance -= 2 * dyf; + addr += x_step; + mask = ~mask; + } + distance += 2 * dxf; + addr += y_step; + remaining--; + } + } +#else + // Diagonal line. + int dx = x0 > x1 ? x0 - x1 : x1 - x0; + int dy = y0 > y1 ? y0 - y1 : y1 - y0; + int x_step = x0 > x1 ? -1 : 1; + int y_step = y0 > y1 ? -SCREEN_WIDTH : SCREEN_WIDTH; + + u16 *dst = NULL; + uintptr_t addr = ((uintptr_t)backbuf + y0 * SCREEN_WIDTH + x0); + u32 mask = x0 & 1 ? ~0xFF : 0xFF; + u32 color = (clr & 0xFF) | ((clr & 0xFF) << 8); + if (dx >= dy) { + int diff = 2 * dy - dx; + for (int i = 0; i < dx + 1; i++) { + dst = (u16*)(addr - (mask >> 31)); + *dst = (*dst & ~mask) | (color & mask); + if (diff >= 0) { + diff -= 2 * dx; + addr += y_step; + } + diff += 2 * dy; + addr += x_step; + mask = ~mask; + } + } else { + int diff = 2 * dx - dy; + for (int i = 0; i < dy + 1; i++) { + dst = (u16*)(addr - (mask >> 31)); + *dst = (*dst & ~mask) | (color & mask); + if (diff >= 0) { + diff -= 2 * dy; + addr += x_step; + mask = ~mask; + } + diff += 2 * dx; + addr += y_step; + } + } +#endif + } +} + +IWRAM_CODE +void +draw_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { + BOUNDCHECK_SCREEN(x0, y0); + BOUNDCHECK_SCREEN(x1, y1); + MAYBE_SWAP(x0, x1); + MAYBE_SWAP(y0, y1); + + draw_hline(x0, x1, y0, clr); + draw_hline(x0, x1, y1, clr); + draw_vline(x0, y0, y1, clr); + draw_vline(x1, y0, y1, clr); +} + +IWRAM_CODE +void +draw_filled_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { + BOUNDCHECK_SCREEN(x0, y0); + BOUNDCHECK_SCREEN(x1, y1); + MAYBE_SWAP(x0, x1); + MAYBE_SWAP(y0, y1); + + // Special condition. If the screen is to be completely filled, use the DMA + // instead. + if (x0 == 0 && x1 >= (SCREEN_WIDTH - 1) && y0 == 0 && y1 >= (SCREEN_HEIGHT - 1)) { + screen_fill(clr); + return; + } + + // Drawline implementation. + for (size_t y = y0; y <= y1; y++) { + draw_hline(x0, x1, y, clr); + } +} + +// +// Sprites (chr/icn). +// + +#if DEC_BIG_LUT == 1 + +static u64 dec_byte_flip_x[256] = { + 0x0000000000000000, 0x0000000000000001, 0x0000000000000100, 0x0000000000000101, 0x0000000000010000, + 0x0000000000010001, 0x0000000000010100, 0x0000000000010101, 0x0000000001000000, 0x0000000001000001, + 0x0000000001000100, 0x0000000001000101, 0x0000000001010000, 0x0000000001010001, 0x0000000001010100, + 0x0000000001010101, 0x0000000100000000, 0x0000000100000001, 0x0000000100000100, 0x0000000100000101, + 0x0000000100010000, 0x0000000100010001, 0x0000000100010100, 0x0000000100010101, 0x0000000101000000, + 0x0000000101000001, 0x0000000101000100, 0x0000000101000101, 0x0000000101010000, 0x0000000101010001, + 0x0000000101010100, 0x0000000101010101, 0x0000010000000000, 0x0000010000000001, 0x0000010000000100, + 0x0000010000000101, 0x0000010000010000, 0x0000010000010001, 0x0000010000010100, 0x0000010000010101, + 0x0000010001000000, 0x0000010001000001, 0x0000010001000100, 0x0000010001000101, 0x0000010001010000, + 0x0000010001010001, 0x0000010001010100, 0x0000010001010101, 0x0000010100000000, 0x0000010100000001, + 0x0000010100000100, 0x0000010100000101, 0x0000010100010000, 0x0000010100010001, 0x0000010100010100, + 0x0000010100010101, 0x0000010101000000, 0x0000010101000001, 0x0000010101000100, 0x0000010101000101, + 0x0000010101010000, 0x0000010101010001, 0x0000010101010100, 0x0000010101010101, 0x0001000000000000, + 0x0001000000000001, 0x0001000000000100, 0x0001000000000101, 0x0001000000010000, 0x0001000000010001, + 0x0001000000010100, 0x0001000000010101, 0x0001000001000000, 0x0001000001000001, 0x0001000001000100, + 0x0001000001000101, 0x0001000001010000, 0x0001000001010001, 0x0001000001010100, 0x0001000001010101, + 0x0001000100000000, 0x0001000100000001, 0x0001000100000100, 0x0001000100000101, 0x0001000100010000, + 0x0001000100010001, 0x0001000100010100, 0x0001000100010101, 0x0001000101000000, 0x0001000101000001, + 0x0001000101000100, 0x0001000101000101, 0x0001000101010000, 0x0001000101010001, 0x0001000101010100, + 0x0001000101010101, 0x0001010000000000, 0x0001010000000001, 0x0001010000000100, 0x0001010000000101, + 0x0001010000010000, 0x0001010000010001, 0x0001010000010100, 0x0001010000010101, 0x0001010001000000, + 0x0001010001000001, 0x0001010001000100, 0x0001010001000101, 0x0001010001010000, 0x0001010001010001, + 0x0001010001010100, 0x0001010001010101, 0x0001010100000000, 0x0001010100000001, 0x0001010100000100, + 0x0001010100000101, 0x0001010100010000, 0x0001010100010001, 0x0001010100010100, 0x0001010100010101, + 0x0001010101000000, 0x0001010101000001, 0x0001010101000100, 0x0001010101000101, 0x0001010101010000, + 0x0001010101010001, 0x0001010101010100, 0x0001010101010101, 0x0100000000000000, 0x0100000000000001, + 0x0100000000000100, 0x0100000000000101, 0x0100000000010000, 0x0100000000010001, 0x0100000000010100, + 0x0100000000010101, 0x0100000001000000, 0x0100000001000001, 0x0100000001000100, 0x0100000001000101, + 0x0100000001010000, 0x0100000001010001, 0x0100000001010100, 0x0100000001010101, 0x0100000100000000, + 0x0100000100000001, 0x0100000100000100, 0x0100000100000101, 0x0100000100010000, 0x0100000100010001, + 0x0100000100010100, 0x0100000100010101, 0x0100000101000000, 0x0100000101000001, 0x0100000101000100, + 0x0100000101000101, 0x0100000101010000, 0x0100000101010001, 0x0100000101010100, 0x0100000101010101, + 0x0100010000000000, 0x0100010000000001, 0x0100010000000100, 0x0100010000000101, 0x0100010000010000, + 0x0100010000010001, 0x0100010000010100, 0x0100010000010101, 0x0100010001000000, 0x0100010001000001, + 0x0100010001000100, 0x0100010001000101, 0x0100010001010000, 0x0100010001010001, 0x0100010001010100, + 0x0100010001010101, 0x0100010100000000, 0x0100010100000001, 0x0100010100000100, 0x0100010100000101, + 0x0100010100010000, 0x0100010100010001, 0x0100010100010100, 0x0100010100010101, 0x0100010101000000, + 0x0100010101000001, 0x0100010101000100, 0x0100010101000101, 0x0100010101010000, 0x0100010101010001, + 0x0100010101010100, 0x0100010101010101, 0x0101000000000000, 0x0101000000000001, 0x0101000000000100, + 0x0101000000000101, 0x0101000000010000, 0x0101000000010001, 0x0101000000010100, 0x0101000000010101, + 0x0101000001000000, 0x0101000001000001, 0x0101000001000100, 0x0101000001000101, 0x0101000001010000, + 0x0101000001010001, 0x0101000001010100, 0x0101000001010101, 0x0101000100000000, 0x0101000100000001, + 0x0101000100000100, 0x0101000100000101, 0x0101000100010000, 0x0101000100010001, 0x0101000100010100, + 0x0101000100010101, 0x0101000101000000, 0x0101000101000001, 0x0101000101000100, 0x0101000101000101, + 0x0101000101010000, 0x0101000101010001, 0x0101000101010100, 0x0101000101010101, 0x0101010000000000, + 0x0101010000000001, 0x0101010000000100, 0x0101010000000101, 0x0101010000010000, 0x0101010000010001, + 0x0101010000010100, 0x0101010000010101, 0x0101010001000000, 0x0101010001000001, 0x0101010001000100, + 0x0101010001000101, 0x0101010001010000, 0x0101010001010001, 0x0101010001010100, 0x0101010001010101, + 0x0101010100000000, 0x0101010100000001, 0x0101010100000100, 0x0101010100000101, 0x0101010100010000, + 0x0101010100010001, 0x0101010100010100, 0x0101010100010101, 0x0101010101000000, 0x0101010101000001, + 0x0101010101000100, 0x0101010101000101, 0x0101010101010000, 0x0101010101010001, 0x0101010101010100, + 0x0101010101010101 +}; + +static u64 dec_byte[256] = { + 0x0000000000000000, 0x0100000000000000, 0x0001000000000000, 0x0101000000000000, 0x0000010000000000, + 0x0100010000000000, 0x0001010000000000, 0x0101010000000000, 0x0000000100000000, 0x0100000100000000, + 0x0001000100000000, 0x0101000100000000, 0x0000010100000000, 0x0100010100000000, 0x0001010100000000, + 0x0101010100000000, 0x0000000001000000, 0x0100000001000000, 0x0001000001000000, 0x0101000001000000, + 0x0000010001000000, 0x0100010001000000, 0x0001010001000000, 0x0101010001000000, 0x0000000101000000, + 0x0100000101000000, 0x0001000101000000, 0x0101000101000000, 0x0000010101000000, 0x0100010101000000, + 0x0001010101000000, 0x0101010101000000, 0x0000000000010000, 0x0100000000010000, 0x0001000000010000, + 0x0101000000010000, 0x0000010000010000, 0x0100010000010000, 0x0001010000010000, 0x0101010000010000, + 0x0000000100010000, 0x0100000100010000, 0x0001000100010000, 0x0101000100010000, 0x0000010100010000, + 0x0100010100010000, 0x0001010100010000, 0x0101010100010000, 0x0000000001010000, 0x0100000001010000, + 0x0001000001010000, 0x0101000001010000, 0x0000010001010000, 0x0100010001010000, 0x0001010001010000, + 0x0101010001010000, 0x0000000101010000, 0x0100000101010000, 0x0001000101010000, 0x0101000101010000, + 0x0000010101010000, 0x0100010101010000, 0x0001010101010000, 0x0101010101010000, 0x0000000000000100, + 0x0100000000000100, 0x0001000000000100, 0x0101000000000100, 0x0000010000000100, 0x0100010000000100, + 0x0001010000000100, 0x0101010000000100, 0x0000000100000100, 0x0100000100000100, 0x0001000100000100, + 0x0101000100000100, 0x0000010100000100, 0x0100010100000100, 0x0001010100000100, 0x0101010100000100, + 0x0000000001000100, 0x0100000001000100, 0x0001000001000100, 0x0101000001000100, 0x0000010001000100, + 0x0100010001000100, 0x0001010001000100, 0x0101010001000100, 0x0000000101000100, 0x0100000101000100, + 0x0001000101000100, 0x0101000101000100, 0x0000010101000100, 0x0100010101000100, 0x0001010101000100, + 0x0101010101000100, 0x0000000000010100, 0x0100000000010100, 0x0001000000010100, 0x0101000000010100, + 0x0000010000010100, 0x0100010000010100, 0x0001010000010100, 0x0101010000010100, 0x0000000100010100, + 0x0100000100010100, 0x0001000100010100, 0x0101000100010100, 0x0000010100010100, 0x0100010100010100, + 0x0001010100010100, 0x0101010100010100, 0x0000000001010100, 0x0100000001010100, 0x0001000001010100, + 0x0101000001010100, 0x0000010001010100, 0x0100010001010100, 0x0001010001010100, 0x0101010001010100, + 0x0000000101010100, 0x0100000101010100, 0x0001000101010100, 0x0101000101010100, 0x0000010101010100, + 0x0100010101010100, 0x0001010101010100, 0x0101010101010100, 0x0000000000000001, 0x0100000000000001, + 0x0001000000000001, 0x0101000000000001, 0x0000010000000001, 0x0100010000000001, 0x0001010000000001, + 0x0101010000000001, 0x0000000100000001, 0x0100000100000001, 0x0001000100000001, 0x0101000100000001, + 0x0000010100000001, 0x0100010100000001, 0x0001010100000001, 0x0101010100000001, 0x0000000001000001, + 0x0100000001000001, 0x0001000001000001, 0x0101000001000001, 0x0000010001000001, 0x0100010001000001, + 0x0001010001000001, 0x0101010001000001, 0x0000000101000001, 0x0100000101000001, 0x0001000101000001, + 0x0101000101000001, 0x0000010101000001, 0x0100010101000001, 0x0001010101000001, 0x0101010101000001, + 0x0000000000010001, 0x0100000000010001, 0x0001000000010001, 0x0101000000010001, 0x0000010000010001, + 0x0100010000010001, 0x0001010000010001, 0x0101010000010001, 0x0000000100010001, 0x0100000100010001, + 0x0001000100010001, 0x0101000100010001, 0x0000010100010001, 0x0100010100010001, 0x0001010100010001, + 0x0101010100010001, 0x0000000001010001, 0x0100000001010001, 0x0001000001010001, 0x0101000001010001, + 0x0000010001010001, 0x0100010001010001, 0x0001010001010001, 0x0101010001010001, 0x0000000101010001, + 0x0100000101010001, 0x0001000101010001, 0x0101000101010001, 0x0000010101010001, 0x0100010101010001, + 0x0001010101010001, 0x0101010101010001, 0x0000000000000101, 0x0100000000000101, 0x0001000000000101, + 0x0101000000000101, 0x0000010000000101, 0x0100010000000101, 0x0001010000000101, 0x0101010000000101, + 0x0000000100000101, 0x0100000100000101, 0x0001000100000101, 0x0101000100000101, 0x0000010100000101, + 0x0100010100000101, 0x0001010100000101, 0x0101010100000101, 0x0000000001000101, 0x0100000001000101, + 0x0001000001000101, 0x0101000001000101, 0x0000010001000101, 0x0100010001000101, 0x0001010001000101, + 0x0101010001000101, 0x0000000101000101, 0x0100000101000101, 0x0001000101000101, 0x0101000101000101, + 0x0000010101000101, 0x0100010101000101, 0x0001010101000101, 0x0101010101000101, 0x0000000000010101, + 0x0100000000010101, 0x0001000000010101, 0x0101000000010101, 0x0000010000010101, 0x0100010000010101, + 0x0001010000010101, 0x0101010000010101, 0x0000000100010101, 0x0100000100010101, 0x0001000100010101, + 0x0101000100010101, 0x0000010100010101, 0x0100010100010101, 0x0001010100010101, 0x0101010100010101, + 0x0000000001010101, 0x0100000001010101, 0x0001000001010101, 0x0101000001010101, 0x0000010001010101, + 0x0100010001010101, 0x0001010001010101, 0x0101010001010101, 0x0000000101010101, 0x0100000101010101, + 0x0001000101010101, 0x0101000101010101, 0x0000010101010101, 0x0100010101010101, 0x0001010101010101, + 0x0101010101010101 +}; + +IWRAM_CODE +static inline +u64 +decode_1bpp(u8 row, u8 flip_x) { + if (flip_x) { + return dec_byte_flip_x[row]; + } + return dec_byte[row]; +} + +#else + +static u32 dec_nibble[] = { + 0x00000000, 0x01000000, 0x00010000, 0x01010000, + 0x00000100, 0x01000100, 0x00010100, 0x01010100, + 0x00000001, 0x01000001, 0x00010001, 0x01010001, + 0x00000101, 0x01000101, 0x00010101, 0x01010101, +}; + +static u32 dec_nibble_flip_x[] = { + 0x00000000, 0x00000001, 0x00000100, 0x00000101, + 0x00010000, 0x00010001, 0x00010100, 0x00010101, + 0x01000000, 0x01000001, 0x01000100, 0x01000101, + 0x01010000, 0x01010001, 0x01010100, 0x01010101, +}; + +IWRAM_CODE +static inline +u64 +decode_1bpp(u8 row, u8 flip_x) { + if (flip_x) { + u32 *lut = dec_nibble_flip_x; + return (u64)lut[(row >> 4) & 0xF] << 32 | (u64)lut[(row >> 0) & 0xF]; + } + u32 *lut = dec_nibble; + return (u64)lut[(row >> 0) & 0xF] << 32 | (u64)lut[(row >> 4) & 0xF]; +} + +#endif + +IWRAM_CODE +static inline +void +draw_2bpp_row(size_t x, size_t y, u8 a, u8 b, u8 clr, u8 flip_x) { + BOUNDCHECK_SCREEN(x, y); + + size_t tile_x = x / 8; + size_t start_col = x % 8; + size_t shift_left = start_col * 8; + size_t shift_right = (8 - start_col) * 8; + + u64 *dst = &backbuf[(y * 30 + tile_x) * 8 / 2]; +#if DEC_BIG_LUT + u64 *lut = flip_x ? dec_byte_flip_x : dec_byte; + u64 clr_a = lut[a]; + u64 clr_b = lut[b]; +#else + u64 clr_a = decode_1bpp(a, flip_x); + u64 clr_b = decode_1bpp(b, flip_x); +#endif + u64 mask_a = (clr_a * 0xF); + u64 mask_b = (clr_b * 0xF); + u64 mask = (mask_a | mask_b); + u64 color; + if (clr == 0) { + color = clr_a + (clr_b << 1); + } else if (clr == 15) { + color = 0; + } else { + color = (clr_a | clr_b) * clr; + } + dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + dst[1] = (dst[1] & ~(mask >> shift_right)) | (color >> shift_right); +} + +IWRAM_CODE +static inline +void +draw_1bpp_row(size_t x, size_t y, u8 a, u8 clr, u8 flip_x) { + BOUNDCHECK_SCREEN(x, y); + + size_t tile_x = x / 8; + size_t start_col = x % 8; + size_t shift_left = start_col * 8; + size_t shift_right = (8 - start_col) * 8; + + u64 *dst = &backbuf[(y * 30 + tile_x) * 8 / 2]; + u64 color = decode_1bpp(a, flip_x); + u64 mask = (color * 0xF); + color *= clr; + dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); + dst[1] = (dst[1] & ~(mask >> shift_right)) | (color >> shift_right); +} + +IWRAM_CODE +void +draw_chr(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { + BOUNDCHECK_SCREEN(x, y); + if (!flip_y) { + for(size_t v = 0; v < 8; v++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[v + 0]; + u8 ch2 = sprite[v + 8]; + draw_2bpp_row(x, y + v, ch1, ch2, clr, flip_x); + } + } else { + for(size_t v = 0; v < 8; v++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[(7 - v) + 0]; + u8 ch2 = sprite[(7 - v) + 8]; + draw_2bpp_row(x, y + v, ch1, ch2, clr, flip_x); + } + } +} + +IWRAM_CODE +void +draw_icn(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { + BOUNDCHECK_SCREEN(x, y); + if (!flip_y) { + for(size_t v = 0; v < 8; v++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[v]; + draw_1bpp_row(x, y + v, ch1, clr, flip_x); + } + } else { + for(size_t v = 0; v < 8; v++) { + if ((y + v) >= SCREEN_HEIGHT) break; + u8 ch1 = sprite[(7 - v)]; + draw_1bpp_row(x, y + v, ch1, clr, flip_x); + } + } +} + +// +// Flipping buffers/copying memory. +// + +IWRAM_CODE +void +flip_buffer(void) { + backbuf = (u16*)((u32)backbuf ^ 0x0A000); + DISP_CTRL ^= DISP_PAGE; +} + +// +// Text rendering. +// + +#include "font.h" + +// Font rendering function for the text engine. +void +txt_drawc(char c, size_t x, size_t y, u8 clr) { + u8 *tile = font_icn; + draw_icn(x, y, tile + 8 * c, clr, 1, 0); +} + +void +txt_drawc_small(char c, size_t x, size_t y, u8 clr) { + u8 *tile = font_icn; + c = c < 'a' ? c + 16 * 6 : c + 16 * 4; + draw_icn(x, y, tile + 8 * c, clr, 1, 0); +} + +#define txt_drawf_small(msg, x, y, clr, ...) \ + { \ + char buf[256] = {0}; \ + posprintf(buf, msg, ##__VA_ARGS__); \ + u8 tmp = text_engine.spacing;\ + txt_spacing(4);\ + text_engine.drawc = txt_drawc_small;\ + txt_draws(buf, x, y, clr); \ + txt_spacing(tmp);\ + text_engine.drawc = txt_drawc;\ + } + +// +// Initialization. +// + +void +renderer_init(void) { + // Initialize display mode and bg palette. + DISP_CTRL = DISP_MODE_4 | DISP_BG_2; + + // Clear VRAM. + dma_fill((u16*)MEM_VRAM, 0, KB(96), 3); + + // Initialize default palette. + PAL_BUFFER_BG[0] = COLOR_BLACK; + PAL_BUFFER_BG[1] = COLOR_WHITE; + PAL_BUFFER_BG[2] = COLOR_RED; + PAL_BUFFER_BG[3] = COLOR_BLUE; + PAL_BUFFER_BG[4] = COLOR_CYAN; + PAL_BUFFER_BG[5] = COLOR_GREY; + PAL_BUFFER_BG[6] = COLOR_WHITE; + PAL_BUFFER_BG[7] = COLOR_GREEN; + PAL_BUFFER_BG[8] = COLOR_PURPLE; + + // Initialize text engine. + txt_init(txt_drawc); +} -- cgit v1.2.1