#include "ppu.h" #include "bd-font.c" /* Copyright (c) 2021 Devine Lu Linvega Copyright (c) 2021 Andrew Alderwick Copyright (c) 2021 Adrian "asie" Siekierka Copyright (c) 2021 Bad Diode Permission to use, copy, modify, and distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE. */ #define NEW_PPU 1 #define FLIPBUF_DMA 1 #define FG_FRONT ((u32*)(MEM_VRAM)) #define BG_FRONT ((u32*)(MEM_VRAM + KB(20))) #define FG_BACK ((u32*)(MEM_VRAM + KB(44))) #define BG_BACK ((u32*)(MEM_VRAM + KB(64))) #define TILE_MAP ((u32*)(MEM_VRAM + KB(40))) #define FONT_DATA ((u32*)(MEM_VRAM + KB(84))) #ifdef DISABLE_BOUNDCHECK_SCREEN #define BOUNDCHECK_SCREEN(X,Y) #else #define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return; #endif // Swap A and B values without a tmp variable. #define SWAP(A, B) (((A) ^= (B)), ((B) ^= (A)), ((A) ^= (B))) // Swap A and B values to make sure A <= B. #define MAYBE_SWAP(A,B) if ((A) > (B)) { SWAP(A,B); } // Keyboard. #define SPRITE_START_IDX 640 // TODO: Can we put these tables on the VRAM for extra speed? static u32 dec_byte_flip_x[256] = { 0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100, 0x00000101, 0x00000110, 0x00000111, 0x00001000, 0x00001001, 0x00001010, 0x00001011, 0x00001100, 0x00001101, 0x00001110, 0x00001111, 0x00010000, 0x00010001, 0x00010010, 0x00010011, 0x00010100, 0x00010101, 0x00010110, 0x00010111, 0x00011000, 0x00011001, 0x00011010, 0x00011011, 0x00011100, 0x00011101, 0x00011110, 0x00011111, 0x00100000, 0x00100001, 0x00100010, 0x00100011, 0x00100100, 0x00100101, 0x00100110, 0x00100111, 0x00101000, 0x00101001, 0x00101010, 0x00101011, 0x00101100, 0x00101101, 0x00101110, 0x00101111, 0x00110000, 0x00110001, 0x00110010, 0x00110011, 0x00110100, 0x00110101, 0x00110110, 0x00110111, 0x00111000, 0x00111001, 0x00111010, 0x00111011, 0x00111100, 0x00111101, 0x00111110, 0x00111111, 0x01000000, 0x01000001, 0x01000010, 0x01000011, 0x01000100, 0x01000101, 0x01000110, 0x01000111, 0x01001000, 0x01001001, 0x01001010, 0x01001011, 0x01001100, 0x01001101, 0x01001110, 0x01001111, 0x01010000, 0x01010001, 0x01010010, 0x01010011, 0x01010100, 0x01010101, 0x01010110, 0x01010111, 0x01011000, 0x01011001, 0x01011010, 0x01011011, 0x01011100, 0x01011101, 0x01011110, 0x01011111, 0x01100000, 0x01100001, 0x01100010, 0x01100011, 0x01100100, 0x01100101, 0x01100110, 0x01100111, 0x01101000, 0x01101001, 0x01101010, 0x01101011, 0x01101100, 0x01101101, 0x01101110, 0x01101111, 0x01110000, 0x01110001, 0x01110010, 0x01110011, 0x01110100, 0x01110101, 0x01110110, 0x01110111, 0x01111000, 0x01111001, 0x01111010, 0x01111011, 0x01111100, 0x01111101, 0x01111110, 0x01111111, 0x10000000, 0x10000001, 0x10000010, 0x10000011, 0x10000100, 0x10000101, 0x10000110, 0x10000111, 0x10001000, 0x10001001, 0x10001010, 0x10001011, 0x10001100, 0x10001101, 0x10001110, 0x10001111, 0x10010000, 0x10010001, 0x10010010, 0x10010011, 0x10010100, 0x10010101, 0x10010110, 0x10010111, 0x10011000, 0x10011001, 0x10011010, 0x10011011, 0x10011100, 0x10011101, 0x10011110, 0x10011111, 0x10100000, 0x10100001, 0x10100010, 0x10100011, 0x10100100, 0x10100101, 0x10100110, 0x10100111, 0x10101000, 0x10101001, 0x10101010, 0x10101011, 0x10101100, 0x10101101, 0x10101110, 0x10101111, 0x10110000, 0x10110001, 0x10110010, 0x10110011, 0x10110100, 0x10110101, 0x10110110, 0x10110111, 0x10111000, 0x10111001, 0x10111010, 0x10111011, 0x10111100, 0x10111101, 0x10111110, 0x10111111, 0x11000000, 0x11000001, 0x11000010, 0x11000011, 0x11000100, 0x11000101, 0x11000110, 0x11000111, 0x11001000, 0x11001001, 0x11001010, 0x11001011, 0x11001100, 0x11001101, 0x11001110, 0x11001111, 0x11010000, 0x11010001, 0x11010010, 0x11010011, 0x11010100, 0x11010101, 0x11010110, 0x11010111, 0x11011000, 0x11011001, 0x11011010, 0x11011011, 0x11011100, 0x11011101, 0x11011110, 0x11011111, 0x11100000, 0x11100001, 0x11100010, 0x11100011, 0x11100100, 0x11100101, 0x11100110, 0x11100111, 0x11101000, 0x11101001, 0x11101010, 0x11101011, 0x11101100, 0x11101101, 0x11101110, 0x11101111, 0x11110000, 0x11110001, 0x11110010, 0x11110011, 0x11110100, 0x11110101, 0x11110110, 0x11110111, 0x11111000, 0x11111001, 0x11111010, 0x11111011, 0x11111100, 0x11111101, 0x11111110, 0x11111111 }; static u32 dec_byte[256] = { 0x00000000, 0x10000000, 0x01000000, 0x11000000, 0x00100000, 0x10100000, 0x01100000, 0x11100000, 0x00010000, 0x10010000, 0x01010000, 0x11010000, 0x00110000, 0x10110000, 0x01110000, 0x11110000, 0x00001000, 0x10001000, 0x01001000, 0x11001000, 0x00101000, 0x10101000, 0x01101000, 0x11101000, 0x00011000, 0x10011000, 0x01011000, 0x11011000, 0x00111000, 0x10111000, 0x01111000, 0x11111000, 0x00000100, 0x10000100, 0x01000100, 0x11000100, 0x00100100, 0x10100100, 0x01100100, 0x11100100, 0x00010100, 0x10010100, 0x01010100, 0x11010100, 0x00110100, 0x10110100, 0x01110100, 0x11110100, 0x00001100, 0x10001100, 0x01001100, 0x11001100, 0x00101100, 0x10101100, 0x01101100, 0x11101100, 0x00011100, 0x10011100, 0x01011100, 0x11011100, 0x00111100, 0x10111100, 0x01111100, 0x11111100, 0x00000010, 0x10000010, 0x01000010, 0x11000010, 0x00100010, 0x10100010, 0x01100010, 0x11100010, 0x00010010, 0x10010010, 0x01010010, 0x11010010, 0x00110010, 0x10110010, 0x01110010, 0x11110010, 0x00001010, 0x10001010, 0x01001010, 0x11001010, 0x00101010, 0x10101010, 0x01101010, 0x11101010, 0x00011010, 0x10011010, 0x01011010, 0x11011010, 0x00111010, 0x10111010, 0x01111010, 0x11111010, 0x00000110, 0x10000110, 0x01000110, 0x11000110, 0x00100110, 0x10100110, 0x01100110, 0x11100110, 0x00010110, 0x10010110, 0x01010110, 0x11010110, 0x00110110, 0x10110110, 0x01110110, 0x11110110, 0x00001110, 0x10001110, 0x01001110, 0x11001110, 0x00101110, 0x10101110, 0x01101110, 0x11101110, 0x00011110, 0x10011110, 0x01011110, 0x11011110, 0x00111110, 0x10111110, 0x01111110, 0x11111110, 0x00000001, 0x10000001, 0x01000001, 0x11000001, 0x00100001, 0x10100001, 0x01100001, 0x11100001, 0x00010001, 0x10010001, 0x01010001, 0x11010001, 0x00110001, 0x10110001, 0x01110001, 0x11110001, 0x00001001, 0x10001001, 0x01001001, 0x11001001, 0x00101001, 0x10101001, 0x01101001, 0x11101001, 0x00011001, 0x10011001, 0x01011001, 0x11011001, 0x00111001, 0x10111001, 0x01111001, 0x11111001, 0x00000101, 0x10000101, 0x01000101, 0x11000101, 0x00100101, 0x10100101, 0x01100101, 0x11100101, 0x00010101, 0x10010101, 0x01010101, 0x11010101, 0x00110101, 0x10110101, 0x01110101, 0x11110101, 0x00001101, 0x10001101, 0x01001101, 0x11001101, 0x00101101, 0x10101101, 0x01101101, 0x11101101, 0x00011101, 0x10011101, 0x01011101, 0x11011101, 0x00111101, 0x10111101, 0x01111101, 0x11111101, 0x00000011, 0x10000011, 0x01000011, 0x11000011, 0x00100011, 0x10100011, 0x01100011, 0x11100011, 0x00010011, 0x10010011, 0x01010011, 0x11010011, 0x00110011, 0x10110011, 0x01110011, 0x11110011, 0x00001011, 0x10001011, 0x01001011, 0x11001011, 0x00101011, 0x10101011, 0x01101011, 0x11101011, 0x00011011, 0x10011011, 0x01011011, 0x11011011, 0x00111011, 0x10111011, 0x01111011, 0x11111011, 0x00000111, 0x10000111, 0x01000111, 0x11000111, 0x00100111, 0x10100111, 0x01100111, 0x11100111, 0x00010111, 0x10010111, 0x01010111, 0x11010111, 0x00110111, 0x10110111, 0x01110111, 0x11110111, 0x00001111, 0x10001111, 0x01001111, 0x11001111, 0x00101111, 0x10101111, 0x01101111, 0x11101111, 0x00011111, 0x10011111, 0x01011111, 0x11011111, 0x00111111, 0x10111111, 0x01111111, 0x11111111 }; // Blending table // // | BLEND BITS | COLOR // CLR | 0 0 0 0 | 0 1 2 3 T // ----+-------------+---------- // 0x0 | 0 0 0 0 | 0 0 1 2 1 // 0x1 | 0 0 0 1 | 0 1 2 3 1 // 0x2 | 0 0 1 0 | 0 2 3 1 1 // 0x3 | 0 0 1 1 | 0 3 1 2 1 // 0x4 | 0 1 0 0 | 1 0 1 2 1 // 0x5 | 0 1 0 1 | * 1 2 3 0 // 0x6 | 0 1 1 0 | 1 2 3 1 1 // 0x7 | 0 1 1 1 | 1 3 1 2 1 // 0x8 | 1 0 0 0 | 2 0 1 2 1 // 0x9 | 1 0 0 1 | 2 1 2 3 1 // 0xA | 1 0 1 0 | * 2 3 1 0 // 0xB | 1 0 1 1 | 2 3 1 2 1 // 0xC | 1 1 0 0 | 3 0 1 2 1 // 0xD | 1 1 0 1 | 3 1 2 3 1 // 0xE | 1 1 1 0 | 3 2 3 1 1 // 0xF | 1 1 1 1 | * 3 1 2 0 // ----+-------------+---------- // // Colors 0x5, 0xA and 0xF have transparent background and must be dealt // with separately, blending color 0 with existing data in that pixel. // // We need to do the following: // // 1. Extract the color row as u32 (4bpp). // 2. Split the row into each of its colors. // 3. Multiply based on the table, for example for color blend 2: 0123 -> 0231 // 4. Obtain final color by ORing the colors from each channel. // // clr0 = blending[0][clr]; // clr1 = blending[1][clr]; // clr2 = blending[2][clr]; // clr3 = blending[3][clr]; // color = 0x00112233; 0b 0000 0000 0001 0001 0010 0010 0011 0011 0x00112233 // col1mask = (color & 0x11111111); 0b 0000 0000 0001 0001 0000 0000 0001 0001 0x00110011 // col2mask = (color & 0x22222222) >> 1; 0b 0000 0000 0000 0000 0001 0001 0001 0001 0x00001111 // col3mask = (col1mask & col2mask) * 0xF; 0b 0000 0000 0000 0000 0000 0000 1111 1111 0x000000FF // col1mask &= ~col3mask; 0b 0000 0000 0000 0000 0000 0000 0001 0001 0x00000011 // col2mask &= ~col3mask; 0b 0000 0000 0000 0000 0001 0001 0000 0000 0x00001100 // col3mask = (color & col3mask) & 0x11111111; 0b 0000 0000 0000 0000 0000 0000 0001 0001 0x00000011 // col0mask = ~(col1mask | col2mask | col3mask) & 0x11111111; 0b 0001 0001 0000 0000 0000 0000 0000 0000 0x11000000 // color = (clr0 * col0mask) | // (clr1 * col1mask) | // (clr2 * col2mask) | // (clr3 * col3mask); // // Note that in case of transparent nodes col0mask can be used to mask off the // bits we want to pull from the existing framebuffer. // static u8 blending[5][16] = { {0, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 2, 3, 3, 3, 0}, // Color 0 map. {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}, // Color 1 map. {1, 2, 3, 1, 1, 2, 3, 1, 1, 2, 3, 1, 1, 2, 3, 1}, // Color 2 map. {2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2}, // Color 3 map. {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0}, // Transparency marker. }; static u32 dirty_tiles[21] = {0}; void putcolors(u8 *addr) { for(size_t i = 0; i < 4; ++i) { u8 r = (*(addr + 0 + i / 2) >> (!(i % 2) << 2)) & 0x0f; u8 g = (*(addr + 2 + i / 2) >> (!(i % 2) << 2)) & 0x0f; u8 b = (*(addr + 4 + i / 2) >> (!(i % 2) << 2)) & 0x0f; Color color = rgb15( (r << 1) | (r >> 3), (g << 1) | (g >> 3), (b << 1) | (b >> 3)); PAL_BUFFER_BG[i] = color; } } IWRAM_CODE void ppu_pixel(u32 *layer, u16 x, u16 y, u8 clr) { if (x > SCREEN_WIDTH || y > SCREEN_HEIGHT) return; size_t tile_x = x / 8; size_t tile_y = y / 8; size_t start_col = x % 8; size_t start_row = y % 8; size_t shift_left = start_col * 4; u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; u32 mask = 0xF << shift_left; *dst = (*dst & ~mask) | (clr << shift_left); dirty_tiles[tile_y] |= 1 << tile_x; } static inline void redraw(void) { for (size_t i = 0; i < 21; i++) { dirty_tiles[i] = 0xFFFFFFFF; } } IWRAM_CODE void clear_screen(u32 *layer, u8 clr) { // We have to make sure we leave the last tile blank to use as alpha channel // when moving the BG during double buffering in case we are using that. dma_fill(layer, 0x11111111 * clr, KB(20) - 32, 3); redraw(); } IWRAM_CODE static inline void draw_hline(u32 *layer, size_t x0, size_t x1, size_t y0, u8 clr) { BOUNDCHECK_SCREEN(x0, y0); BOUNDCHECK_SCREEN(x1, y0); // Find row positions for the given x/y coordinates. size_t tile_x0 = x0 / 8; size_t tile_x1 = x1 / 8; size_t tile_y = y0 / 8; size_t start_col = x0 % 8; size_t end_col = x1 % 8; size_t start_row = y0 % 8; u32 dirtyflag = (1 << tile_x0) | (1 << tile_x1); // Horizontal line. There are 3 cases: // 1. Lines fit on a single tile. // 2. Lines go through 2 tiles, both require partial row updates. // 3. Lines go through 3 or more tiles, first and last tiles use // partial row updates, rows in the middle can write the entire // row. size_t dtx = tile_x1 - tile_x0; u32 *dst = &layer[start_row + (tile_x0 + tile_y * 32) * 8]; if (dtx < 1) { size_t shift_left = start_col * 4; size_t shift_right = (7 - end_col) * 4; u32 mask = (0xFFFFFFFF >> shift_right) & (0xFFFFFFFF << shift_left); u32 row = (0x11111111 * clr) & mask; *dst = (*dst & ~mask) | row; } else { size_t shift_left = start_col * 4; size_t shift_right = (7 - end_col) * 4; u32 mask = 0xFFFFFFFF; u32 row = 0x11111111 * clr; *dst = (*dst & ~(mask << shift_left)) | (row << shift_left); dst += 8; for (size_t i = 1; i < dtx; i++) { dirtyflag |= (1 << (tile_x0 + i)); *dst = row; dst += 8; } *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right); } dirty_tiles[tile_y] |= dirtyflag; } IWRAM_CODE void screen_fill(u32 *layer, u16 x0, u16 y0, u16 x1, u16 y1, u8 clr) { MAYBE_SWAP(x0, x1); MAYBE_SWAP(y0, y1); // Special condition. If the screen is to be completely filled, use the DMA // instead. u16 max_width = SCREEN_WIDTH - 1; u16 max_height = SCREEN_HEIGHT - 1; if (x0 == 0 && x1 >= max_width && y0 == 0 && y1 >= max_height) { clear_screen(layer, clr); return; } // Drawline implementation. for (size_t y = y0; y <= y1; y++) { draw_hline(layer, x0, x1, y, clr); } } #if NEW_PPU == 0 IWRAM_CODE void ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { u8 sprline; u16 v; u32 dirtyflag = (1 << (x >> 3)) | (1 << ((x + 7) >> 3)); u32 layerpos = ((y & 7) + (((x >> 3) + (y >> 3) * 32) * 8)); u32 *layerptr = &layer[layerpos]; u32 shift = (x & 7) << 2; u32 *lut_expand = flip_x ? dec_byte_flip_x : dec_byte; if (flip_y) flip_y = 7; BOUNDCHECK_SCREEN(x, y); if (blending[4][clr]) { u64 mask = ~((u64)0xFFFFFFFF << shift); for (v = 0; v < 8; v++, layerptr++) { if ((y + v) >= SCREEN_HEIGHT) break; sprline = sprite[v ^ flip_y]; u64 data = (u64)(lut_expand[sprline] * (clr & 3)) << shift; data |= (u64)(lut_expand[sprline ^ 0xFF] * (clr >> 2)) << shift; layerptr[0] = (layerptr[0] & mask) | data; layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; } } else { for (v = 0; v < 8; v++, layerptr++) { if ((y + v) >= SCREEN_HEIGHT) break; sprline = sprite[v ^ flip_y]; u64 mask = ~((u64)(lut_expand[sprline] * 0xF) << shift); u64 data = (u64)(lut_expand[sprline] * (clr & 3)) << shift; layerptr[0] = (layerptr[0] & mask) | data; layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; } } dirty_tiles[y >> 3] |= dirtyflag; dirty_tiles[(y + 7) >> 3] |= dirtyflag; } #else IWRAM_CODE UNROLL_LOOPS void ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { BOUNDCHECK_SCREEN(x, y); size_t tile_x = x / 8; size_t tile_y = y / 8; size_t start_col = x % 8; size_t start_row = y % 8; size_t shift_left = start_col * 4; size_t shift_right = (8 - start_col) * 4; u32 dirtyflag = (1 << tile_x) | (1 << ((x + 7) >> 3)); u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; if (blending[4][clr]) { u32 mask = 0xFFFFFFFF; if (!flip_y) { for(size_t v = 0; v < 8; v++, dst++) { if ((y + v) >= SCREEN_HEIGHT) break; u8 ch1 = sprite[v]; u32 color_1 = lut[ch1]; u32 color_2 = (color_1 ^ 0xFFFFFFFF) & 0x11111111; u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2)); dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); if ((start_row + v) == 7) dst += (32 - 1) * 8; } } else { for(size_t v = 0; v < 8; v++, dst++) { if ((y + v) >= SCREEN_HEIGHT) break; u8 ch1 = sprite[(7 - v)]; u32 color_1 = lut[ch1]; u32 color_2 = (color_1 ^ 0xFFFFFFFF) & 0x11111111; u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2)); dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); if ((start_row + v) == 7) dst += (32 - 1) * 8; } } } else { if (!flip_y) { for(size_t v = 0; v < 8; v++, dst++) { if ((y + v) >= SCREEN_HEIGHT) break; u8 ch1 = sprite[v]; u32 color= lut[ch1]; u32 mask = color * 0xF; color *= clr & 3; dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); if ((start_row + v) == 7) dst += (32 - 1) * 8; } } else { for(size_t v = 0; v < 8; v++, dst++) { if ((y + v) >= SCREEN_HEIGHT) break; u8 ch1 = sprite[(7 - v)]; u32 color= lut[ch1]; u32 mask = color * 0xF; color *= clr & 3; dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); if ((start_row + v) == 7) dst += (32 - 1) * 8; } } } dirty_tiles[y >> 3] |= dirtyflag; dirty_tiles[(y + 7) >> 3] |= dirtyflag; } #endif #if NEW_PPU == 0 IWRAM_CODE void ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flip_x, u8 flip_y) { u8 sprline1, sprline2; u8 xrightedge = x < ((32 - 1) * 8); u16 v, h; u32 dirtyflag = (1 << (x >> 3)) | (1 << ((x + 7) >> 3)); u32 layerpos = ((y & 7) + (((x >> 3) + (y >> 3) * 32) * 8)); u32 *layerptr = &layer[layerpos]; u32 shift = (x & 7) << 2; if (flip_y) flip_y = 7; BOUNDCHECK_SCREEN(x, y); if (color == 1) { u32 *lut_expand = flip_x ? dec_byte_flip_x : dec_byte; u64 mask = ~((u64)0xFFFFFFFF << shift); for (v = 0; v < 8; v++, layerptr++) { if ((y + v) >= (24 * 8)) break; sprline1 = sprite[v ^ flip_y]; sprline2 = sprite[(v ^ flip_y) | 8]; u32 data32 = (lut_expand[sprline1]) | (lut_expand[sprline2] << 1); u64 data = ((u64) (data32 & 0x33333333)) << shift; layerptr[0] = (layerptr[0] & mask) | data; if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; } } else if (blending[4][color]) { u64 mask = ~((u64)0xFFFFFFFF << shift); for (v = 0; v < 8; v++, layerptr++) { if ((y + v) >= (24 * 8)) break; u8 ch1 = sprite[v ^ flip_y]; u8 ch2 = sprite[(v ^ flip_y) | 8]; u32 data32 = 0; if (!flip_x) { for (h = 0; h < 8; h++) { data32 <<= 4; u8 ch = (ch1 & 1) | ((ch2 & 1) << 1); data32 |= blending[ch][color]; ch1 >>= 1; ch2 >>= 1; } } else { for (h = 0; h < 8; h++) { data32 <<= 4; u8 ch = (ch1 >> 7) | ((ch2 >> 7) << 1); data32 |= blending[ch][color]; ch1 <<= 1; ch2 <<= 1; } } u64 data = ((u64) (data32 & 0x33333333)) << shift; layerptr[0] = (layerptr[0] & mask) | data; if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; } } else { for (v = 0; v < 8; v++, layerptr++) { if ((y + v) >= (24 * 8)) break; u8 ch1 = sprite[v ^ flip_y]; u8 ch2 = sprite[(v ^ flip_y) | 8]; u32 data32 = 0; u32 mask32 = 0; if (!flip_x) { for (h = 0; h < 8; h++) { data32 <<= 4; mask32 <<= 4; if ((ch1 | ch2) & 1) { u8 ch = (ch1 & 1) | ((ch2 & 1) << 1); data32 |= blending[ch][color]; mask32 |= 0xF; } ch1 >>= 1; ch2 >>= 1; } } else { for (h = 0; h < 8; h++) { data32 <<= 4; mask32 <<= 4; if ((ch1 | ch2) & 128) { u8 ch = (ch1 >> 7) | ((ch2 >> 7) << 1); data32 |= blending[ch][color]; mask32 |= 0xF; } ch1 <<= 1; ch2 <<= 1; } } u64 data = ((u64) (data32 & 0x33333333)) << shift; u64 mask = ~(((u64) (mask32 & 0x33333333)) << shift); layerptr[0] = (layerptr[0] & mask) | data; if (xrightedge) layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); if (((y + v) & 7) == 7) layerptr += (32 - 1) * 8; } } dirty_tiles[y >> 3] |= dirtyflag; dirty_tiles[(y + 7) >> 3] |= dirtyflag; } #else IWRAM_CODE UNROLL_LOOPS void ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { BOUNDCHECK_SCREEN(x, y); size_t tile_x = x / 8; size_t tile_y = y / 8; size_t start_col = x % 8; size_t start_row = y % 8; size_t shift_left = start_col * 4; size_t shift_right = (8 - start_col) * 4; u32 dirtyflag = (1 << tile_x) | (1 << ((x + 7) >> 3)); u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8]; u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; if (clr == 1) { u32 mask = 0xFFFFFFFF; if (!flip_y) { for(size_t v = 0; v < 8; v++, dst++) { if ((y + v) >= SCREEN_HEIGHT) break; u8 ch1 = sprite[v]; u8 ch2 = sprite[v | 8]; u32 color = lut[ch1] | (lut[ch2] << 1); dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); if ((start_row + v) == 7) dst += (32 - 1) * 8; } } else { for(size_t v = 0; v < 8; v++, dst++) { if ((y + v) >= SCREEN_HEIGHT) break; u8 ch1 = sprite[(7 - v)]; u8 ch2 = sprite[(7 - v) | 8]; u32 color = lut[ch1] | (lut[ch2] << 1); dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); if ((start_row + v) == 7) dst += (32 - 1) * 8; } } } else if (blending[4][clr]) { u32 mask = 0xFFFFFFFF; u8 clr0 = blending[0][clr]; u8 clr1 = blending[1][clr]; u8 clr2 = blending[2][clr]; u8 clr3 = blending[3][clr]; if (!flip_y) { for(size_t v = 0; v < 8; v++, dst++) { if ((y + v) >= SCREEN_HEIGHT) break; u8 ch1 = sprite[v]; u8 ch2 = sprite[v | 8]; u32 color = lut[ch1] | (lut[ch2] << 1); u32 col1mask = (color & 0x11111111); u32 col2mask = (color & 0x22222222) >> 1; u32 col3mask = (col1mask & col2mask) * 0xF; col1mask &= ~col3mask; col2mask &= ~col3mask; col3mask = (color & col3mask) & 0x11111111; u32 col0mask = ~(col1mask | col2mask | col3mask) & 0x11111111; color = (clr0 * col0mask) | (clr1 * col1mask) | (clr2 * col2mask) | (clr3 * col3mask); dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); if ((start_row + v) == 7) dst += (32 - 1) * 8; } } else { for(size_t v = 0; v < 8; v++, dst++) { if ((y + v) >= SCREEN_HEIGHT) break; u8 ch1 = sprite[(7 - v)]; u8 ch2 = sprite[(7 - v) | 8]; u32 color = lut[ch1] | (lut[ch2] << 1); u32 col1mask = (color & 0x11111111); u32 col2mask = (color & 0x22222222) >> 1; u32 col3mask = (col1mask & col2mask) * 0xF; col1mask &= ~col3mask; col2mask &= ~col3mask; col3mask = (color & col3mask) & 0x11111111; u32 col0mask = ~(col1mask | col2mask | col3mask) & 0x11111111; color = (clr0 * col0mask) | (clr1 * col1mask) | (clr2 * col2mask) | (clr3 * col3mask); dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); if ((start_row + v) == 7) dst += (32 - 1) * 8; } } } else { u8 clr1 = blending[1][clr]; u8 clr2 = blending[2][clr]; u8 clr3 = blending[3][clr]; if (!flip_y) { for(size_t v = 0; v < 8; v++, dst++) { if ((y + v) >= SCREEN_HEIGHT) break; u8 ch1 = sprite[v]; u8 ch2 = sprite[v | 8]; u32 color = lut[ch1] | (lut[ch2] << 1); u32 col1mask = (color & 0x11111111); u32 col2mask = (color & 0x22222222) >> 1; u32 col3mask = (col1mask & col2mask) * 0xF; col1mask &= ~col3mask; col2mask &= ~col3mask; col3mask = (color & col3mask) & 0x11111111; u32 mask = (col1mask | col2mask | col3mask) * 0xF; color = (clr1 * col1mask) | (clr2 * col2mask) | (clr3 * col3mask); dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); if ((start_row + v) == 7) dst += (32 - 1) * 8; } } else { for(size_t v = 0; v < 8; v++, dst++) { if ((y + v) >= SCREEN_HEIGHT) break; u8 ch1 = sprite[(7 - v)]; u8 ch2 = sprite[(7 - v) | 8]; u32 color = lut[ch1] | (lut[ch2] << 1); u32 col1mask = (color & 0x11111111); u32 col2mask = (color & 0x22222222) >> 1; u32 col3mask = (col1mask & col2mask) * 0xF; col1mask &= ~col3mask; col2mask &= ~col3mask; col3mask = (color & col3mask) & 0x11111111; u32 mask = (col1mask | col2mask | col3mask) * 0xF; color = (clr1 * col1mask) | (clr2 * col2mask) | (clr3 * col3mask); dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); if ((start_row + v) == 7) dst += (32 - 1) * 8; } } } dirty_tiles[y >> 3] |= dirtyflag; dirty_tiles[(y + 7) >> 3] |= dirtyflag; } #endif IWRAM_CODE void putfontchar(u32 *layer, u16 tile_x, u16 tile_y, u8 ch, u8 color) { u32 pos = (tile_x + tile_y * 32) * 8; u32 *tile_data = &layer[pos]; u32 *font_data = &FONT_DATA[8 * ch]; for (size_t i = 0; i < 8; ++i) { tile_data[i] = font_data[i] * color; } dirty_tiles[tile_y] |= 1 << tile_x; } IWRAM_CODE void flipbuf() { u32 *fg_back = FG_BACK; u32 *bg_back = BG_BACK; u32 *bg_front = BG_FRONT; u32 *fg_front = FG_FRONT; for (size_t j = 0; j < 20; ++j) { if (dirty_tiles[j] == 0) { continue; } #if FLIPBUF_DMA == 1 u32 offset = j * 32 * 8; dma_copy(fg_front + offset, fg_back + offset, 32 * 8 * 4, 3); dma_copy(bg_front + offset, bg_back + offset, 32 * 8 * 4, 3); #else size_t k = 1; for (size_t i = 0; i < 30; ++i, k <<= 1) { if (dirty_tiles[j] & k) { Tile *mem_fg = FG_FRONT; Tile *mem_bg = BG_FRONT; Tile *tile_fg = FG_BACK; Tile *tile_bg = BG_BACK; mem_fg[i + j * 32] = tile_fg[i + j * 32]; mem_bg[i + j * 32] = tile_bg[i + j * 32]; } } #endif dirty_tiles[j] = 0; } } typedef struct KeyboardChar { int x; int y; u8 symbol; } KeyboardChar; static u8 cursor_position = 0; #define KEYBOARD_ROW_SIZE 12 #define KEYBOARD_START_TILE_X (30 / 2 - KEYBOARD_ROW_SIZE / 2) #define KEYBOARD_START_TILE_Y (20 / 2 - 3) KeyboardChar keyboard[] = { {0, 0, '!'}, {0, 0, '?'}, {0, 0, '@'}, {0, 0, '#'}, {0, 0, '$'}, {0, 0, '%'}, {0, 0, '^'}, {0, 0, '&'}, {0, 0, '*'}, {0, 0, '"'}, {0, 0, '\''}, {0, 0, 0x7f}, {0, 0, '('}, {0, 0, ')'}, {0, 0, '['}, {0, 0, ']'}, {0, 0, '{'}, {0, 0, '}'}, {0, 0, '<'}, {0, 0, '>'}, {0, 0, '+'}, {0, 0, '-'}, {0, 0, '='}, {0, 0, 0x14}, {0, 0, '0'}, {0, 0, '1'}, {0, 0, '2'}, {0, 0, '3'}, {0, 0, '4'}, {0, 0, '5'}, {0, 0, '6'}, {0, 0, '7'}, {0, 0, '8'}, {0, 0, '9'}, {0, 0, '~'}, {0, 0, 0x18}, {0, 0, 'a'}, {0, 0, 'b'}, {0, 0, 'c'}, {0, 0, 'd'}, {0, 0, 'e'}, {0, 0, 'f'}, {0, 0, 'g'}, {0, 0, 'h'}, {0, 0, 'i'}, {0, 0, 'j'}, {0, 0, '/'}, {0, 0, 0x19}, {0, 0, 'k'}, {0, 0, 'l'}, {0, 0, 'm'}, {0, 0, 'n'}, {0, 0, 'o'}, {0, 0, 'p'}, {0, 0, 'q'}, {0, 0, 'r'}, {0, 0, 's'}, {0, 0, 't'}, {0, 0, '\\'}, {0, 0, 0x1b}, {0, 0, 'u'}, {0, 0, 'v'}, {0, 0, 'w'}, {0, 0, 'x'}, {0, 0, 'y'}, {0, 0, 'z'}, {0, 0, ','}, {0, 0, '.'}, {0, 0, ';'}, {0, 0, ':'}, {0, 0, ' '}, {0, 0, 0x1a}, }; bool keyboard_on = false; void toggle_keyboard(void) { DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1 | DISP_OBJ; for (size_t i = 0; i < LEN(keyboard); ++i) { OBJ_ATTR_0(i) ^= OBJ_HIDDEN; } OBJ_ATTR_0(127) ^= OBJ_HIDDEN; if (keyboard_on) { keyboard_on = false; DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1; } else { keyboard_on = true; DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1 | DISP_OBJ; } } void update_cursor(u8 pos) { cursor_position = CLAMP(pos, 0, LEN(keyboard) - 1); OBJ_ATTR_0(127) = (OBJ_ATTR_0(127) & ~0xFF) | OBJ_Y_COORD(keyboard[cursor_position].y); OBJ_ATTR_1(127) = (OBJ_ATTR_0(127) & ~0x1FF) | OBJ_X_COORD(keyboard[cursor_position].x); } void video_init() { // Clear VRAM. dma_fill((u32*)MEM_VRAM, 0, KB(96), 3); // Initialize display mode and bg palette. DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1; // Initialize backgrounds. u8 cb_fg = 0; u8 cb_bg = 1; u8 sb_fg = 20; u8 sb_bg = 21; BG_CTRL(0) = BG_CHARBLOCK(cb_fg) | BG_SCREENBLOCK(sb_fg) | BG_PRIORITY(1); BG_CTRL(1) = BG_CHARBLOCK(cb_bg) | BG_SCREENBLOCK(sb_bg) | BG_PRIORITY(2); // Initialize default palette. PAL_BUFFER_BG[0] = COLOR_BLACK; PAL_BUFFER_BG[1] = COLOR_WHITE; PAL_BUFFER_BG[2] = COLOR_RED; PAL_BUFFER_BG[3] = COLOR_BLUE; for (size_t i = 0; i < 16; ++i) { PAL_BUFFER_SPRITES[i] = COLOR_BLACK; PAL_BUFFER_SPRITES[i + 1 * 16] = COLOR_WHITE; PAL_BUFFER_SPRITES[i + 2 * 16] = COLOR_RED; PAL_BUFFER_SPRITES[i + 3 * 16] = COLOR_BLUE; } // Initialize background memory map. u16 *mem_map_fg = SCREENBLOCK_MEM[sb_fg]; u16 *mem_map_bg = SCREENBLOCK_MEM[sb_bg]; size_t k = 0; for (size_t i = 0; i < 32 * 20; ++i, ++k) { mem_map_fg[i] = k; mem_map_bg[i] = k + 32 * 4; } // Load font data into VRAM. unpack_tiles(&bd_font, FONT_DATA, 256); // Initialize keyboard sprites. int tile_x = KEYBOARD_START_TILE_X; int tile_y = KEYBOARD_START_TILE_Y; for (size_t i = 0; i < sizeof(keyboard) / sizeof(keyboard[0]); ++i) { keyboard[i].x = tile_x * 8; keyboard[i].y = tile_y * 8; OBJ_ATTR_0(i) = OBJ_SHAPE_SQUARE | OBJ_Y_COORD(keyboard[i].y) | OBJ_HIDDEN; OBJ_ATTR_1(i) = OBJ_SIZE_SMALL | OBJ_X_COORD(keyboard[i].x); OBJ_ATTR_2(i) = (SPRITE_START_IDX + keyboard[i].symbol) | OBJ_PAL_BANK(2); tile_x++; if (tile_x - KEYBOARD_START_TILE_X >= KEYBOARD_ROW_SIZE) { tile_x = KEYBOARD_START_TILE_X; tile_y++; } } OBJ_ATTR_0(127) = OBJ_SHAPE_SQUARE | OBJ_Y_COORD(keyboard[cursor_position].y) | OBJ_HIDDEN; OBJ_ATTR_1(127) = OBJ_SIZE_SMALL | OBJ_X_COORD(keyboard[cursor_position].x); OBJ_ATTR_2(127) = (SPRITE_START_IDX + 0xdb) | OBJ_PAL_BANK(3); }