aboutsummaryrefslogtreecommitdiffstats
path: root/src/ppu.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/ppu.c')
-rw-r--r--src/ppu.c524
1 files changed, 439 insertions, 85 deletions
diff --git a/src/ppu.c b/src/ppu.c
index 018555f..d22b3fd 100644
--- a/src/ppu.c
+++ b/src/ppu.c
@@ -15,6 +15,9 @@ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
15WITH REGARD TO THIS SOFTWARE. 15WITH REGARD TO THIS SOFTWARE.
16*/ 16*/
17 17
18#define NEW_PPU 1
19#define FLIPBUF_DMA 1
20
18#define FG_FRONT ((u32*)(MEM_VRAM)) 21#define FG_FRONT ((u32*)(MEM_VRAM))
19#define BG_FRONT ((u32*)(MEM_VRAM + KB(20))) 22#define BG_FRONT ((u32*)(MEM_VRAM + KB(20)))
20#define FG_BACK ((u32*)(MEM_VRAM + KB(44))) 23#define FG_BACK ((u32*)(MEM_VRAM + KB(44)))
@@ -22,10 +25,23 @@ WITH REGARD TO THIS SOFTWARE.
22#define TILE_MAP ((u32*)(MEM_VRAM + KB(40))) 25#define TILE_MAP ((u32*)(MEM_VRAM + KB(40)))
23#define FONT_DATA ((u32*)(MEM_VRAM + KB(84))) 26#define FONT_DATA ((u32*)(MEM_VRAM + KB(84)))
24 27
28#ifdef DISABLE_BOUNDCHECK_SCREEN
29#define BOUNDCHECK_SCREEN(X,Y)
30#else
31#define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return;
32#endif
33
34// Swap A and B values without a tmp variable.
35#define SWAP(A, B) (((A) ^= (B)), ((B) ^= (A)), ((A) ^= (B)))
36
37// Swap A and B values to make sure A <= B.
38#define MAYBE_SWAP(A,B) if ((A) > (B)) { SWAP(A,B); }
39
25// Keyboard. 40// Keyboard.
26#define SPRITE_START_IDX 640 41#define SPRITE_START_IDX 640
27 42
28static u32 lut_2bpp[256] = { 43// TODO: Can we put these tables on the VRAM for extra speed?
44static u32 dec_byte_flip_x[256] = {
29 0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100, 45 0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100,
30 0x00000101, 0x00000110, 0x00000111, 0x00001000, 0x00001001, 46 0x00000101, 0x00000110, 0x00000111, 0x00001000, 0x00001001,
31 0x00001010, 0x00001011, 0x00001100, 0x00001101, 0x00001110, 47 0x00001010, 0x00001011, 0x00001100, 0x00001101, 0x00001110,
@@ -80,7 +96,7 @@ static u32 lut_2bpp[256] = {
80 0x11111111 96 0x11111111
81}; 97};
82 98
83static u32 lut2bpp_flipx[256] = { 99static u32 dec_byte[256] = {
84 0x00000000, 0x10000000, 0x01000000, 0x11000000, 0x00100000, 100 0x00000000, 0x10000000, 0x01000000, 0x11000000, 0x00100000,
85 0x10100000, 0x01100000, 0x11100000, 0x00010000, 0x10010000, 101 0x10100000, 0x01100000, 0x11100000, 0x00010000, 0x10010000,
86 0x01010000, 0x11010000, 0x00110000, 0x10110000, 0x01110000, 102 0x01010000, 0x11010000, 0x00110000, 0x10110000, 0x01110000,
@@ -135,54 +151,186 @@ static u32 lut2bpp_flipx[256] = {
135 0x11111111 151 0x11111111
136}; 152};
137 153
154// Blending table
155//
156// | BLEND BITS | COLOR
157// CLR | 0 0 0 0 | 0 1 2 3 T
158// ----+-------------+----------
159// 0x0 | 0 0 0 0 | 0 0 1 2 1
160// 0x1 | 0 0 0 1 | 0 1 2 3 1
161// 0x2 | 0 0 1 0 | 0 2 3 1 1
162// 0x3 | 0 0 1 1 | 0 3 1 2 1
163// 0x4 | 0 1 0 0 | 1 0 1 2 1
164// 0x5 | 0 1 0 1 | * 1 2 3 0
165// 0x6 | 0 1 1 0 | 1 2 3 1 1
166// 0x7 | 0 1 1 1 | 1 3 1 2 1
167// 0x8 | 1 0 0 0 | 2 0 1 2 1
168// 0x9 | 1 0 0 1 | 2 1 2 3 1
169// 0xA | 1 0 1 0 | * 2 3 1 0
170// 0xB | 1 0 1 1 | 2 3 1 2 1
171// 0xC | 1 1 0 0 | 3 0 1 2 1
172// 0xD | 1 1 0 1 | 3 1 2 3 1
173// 0xE | 1 1 1 0 | 3 2 3 1 1
174// 0xF | 1 1 1 1 | * 3 1 2 0
175// ----+-------------+----------
176//
177// Colors 0x5, 0xA and 0xF have transparent background and must be dealt
178// with separately, blending color 0 with existing data in that pixel.
179//
180// We need to do the following:
181//
182// 1. Extract the color row as u32 (4bpp).
183// 2. Split the row into each of its colors.
184// 3. Multiply based on the table, for example for color blend 2: 0123 -> 0231
185// 4. Obtain final color by ORing the colors from each channel.
186//
187// clr0 = blending[0][clr];
188// clr1 = blending[1][clr];
189// clr2 = blending[2][clr];
190// clr3 = blending[3][clr];
191// color = 0x00112233; 0b 0000 0000 0001 0001 0010 0010 0011 0011 0x00112233
192// col1mask = (color & 0x11111111); 0b 0000 0000 0001 0001 0000 0000 0001 0001 0x00110011
193// col2mask = (color & 0x22222222) >> 1; 0b 0000 0000 0000 0000 0001 0001 0001 0001 0x00001111
194// col3mask = (col1mask & col2mask) * 0xF; 0b 0000 0000 0000 0000 0000 0000 1111 1111 0x000000FF
195// col1mask &= ~col3mask; 0b 0000 0000 0000 0000 0000 0000 0001 0001 0x00000011
196// col2mask &= ~col3mask; 0b 0000 0000 0000 0000 0001 0001 0000 0000 0x00001100
197// col3mask = (color & col3mask) & 0x11111111; 0b 0000 0000 0000 0000 0000 0000 0001 0001 0x00000011
198// col0mask = ~(col1mask | col2mask | col3mask) & 0x11111111; 0b 0001 0001 0000 0000 0000 0000 0000 0000 0x11000000
199// color = (clr0 * col0mask) |
200// (clr1 * col1mask) |
201// (clr2 * col2mask) |
202// (clr3 * col3mask);
203//
204// Note that in case of transparent nodes col0mask can be used to mask off the
205// bits we want to pull from the existing framebuffer.
206//
138static u8 blending[5][16] = { 207static u8 blending[5][16] = {
139 {0, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 2, 3, 3, 3, 0}, 208 {0, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 2, 3, 3, 3, 0}, // Color 0 map.
140 {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}, 209 {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}, // Color 1 map.
141 {1, 2, 3, 1, 1, 2, 3, 1, 1, 2, 3, 1, 1, 2, 3, 1}, 210 {1, 2, 3, 1, 1, 2, 3, 1, 1, 2, 3, 1, 1, 2, 3, 1}, // Color 2 map.
142 {2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2}, 211 {2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2, 2, 3, 1, 2}, // Color 3 map.
143 {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0} 212 {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0}, // Transparency marker.
144}; 213};
145 214
146static u32 dirty_tiles[21] = {0}; 215static u32 dirty_tiles[21] = {0};
147 216
148void 217void
149putcolors(u8 *addr) { 218putcolors(u8 *addr) {
150 int i; 219 for(size_t i = 0; i < 4; ++i) {
151 for(i = 0; i < 4; ++i) { 220 u8 r = (*(addr + 0 + i / 2) >> (!(i % 2) << 2)) & 0x0f;
152 u8 221 u8 g = (*(addr + 2 + i / 2) >> (!(i % 2) << 2)) & 0x0f;
153 r = (*(addr + i / 2) >> (!(i % 2) << 2)) & 0x0f, 222 u8 b = (*(addr + 4 + i / 2) >> (!(i % 2) << 2)) & 0x0f;
154 g = (*(addr + 2 + i / 2) >> (!(i % 2) << 2)) & 0x0f, 223 Color color = rgb15(
155 b = (*(addr + 4 + i / 2) >> (!(i % 2) << 2)) & 0x0f;
156 PAL_BUFFER_BG[i] = rgb15(
157 (r << 1) | (r >> 3), 224 (r << 1) | (r >> 3),
158 (g << 1) | (g >> 3), 225 (g << 1) | (g >> 3),
159 (b << 1) | (b >> 3)); 226 (b << 1) | (b >> 3));
160 for (size_t j = 0; j < 16; ++j) { 227 PAL_BUFFER_BG[i] = color;
161 PAL_BUFFER_SPRITES[i * 16 + j] = rgb15(
162 (r << 1) | (r >> 3),
163 (g << 1) | (g >> 3),
164 (b << 1) | (b >> 3));
165 }
166 } 228 }
167} 229}
168 230
169IWRAM_CODE 231IWRAM_CODE
170void 232void
171ppu_pixel(u32 *layer, u16 x, u16 y, u8 color) { 233ppu_pixel(u32 *layer, u16 x, u16 y, u8 clr) {
172 if (x >= SCREEN_WIDTH || y >= SCREEN_HEIGHT) return; 234 if (x > SCREEN_WIDTH || y > SCREEN_HEIGHT) return;
173 size_t tile_x = x / 8; 235 size_t tile_x = x / 8;
174 size_t tile_y = y / 8; 236 size_t tile_y = y / 8;
175 size_t start_col = x % 8; 237 size_t start_col = x % 8;
176 size_t start_row = y % 8; 238 size_t start_row = y % 8;
177 size_t pos = (start_row + ((tile_x + tile_y * 32) * 8)); 239 size_t shift_left = start_col * 4;
178 size_t shift = start_col * 4; 240 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8];
179 layer[pos] = (layer[pos] & (~(0xF << shift))) | (color << shift); 241 u32 mask = 0xF << shift_left;
242 *dst = (*dst & ~mask) | (clr << shift_left);
180 dirty_tiles[tile_y] |= 1 << tile_x; 243 dirty_tiles[tile_y] |= 1 << tile_x;
181} 244}
182 245
246static inline
247void
248redraw(void) {
249 for (size_t i = 0; i < 21; i++) {
250 dirty_tiles[i] = 0xFFFFFFFF;
251 }
252}
253
254IWRAM_CODE
255void clear_screen(u32 *layer, u8 clr) {
256 // We have to make sure we leave the last tile blank to use as alpha channel
257 // when moving the BG during double buffering in case we are using that.
258 dma_fill(layer, 0x11111111 * clr, KB(20) - 32, 3);
259 redraw();
260}
261
262IWRAM_CODE
263static inline
264void
265draw_hline(u32 *layer, size_t x0, size_t x1, size_t y0, u8 clr) {
266 BOUNDCHECK_SCREEN(x0, y0);
267 BOUNDCHECK_SCREEN(x1, y0);
268 // Find row positions for the given x/y coordinates.
269 size_t tile_x0 = x0 / 8;
270 size_t tile_x1 = x1 / 8;
271 size_t tile_y = y0 / 8;
272 size_t start_col = x0 % 8;
273 size_t end_col = x1 % 8;
274 size_t start_row = y0 % 8;
275 u32 dirtyflag = (1 << tile_x0) | (1 << tile_x1);
276
277 // Horizontal line. There are 3 cases:
278 // 1. Lines fit on a single tile.
279 // 2. Lines go through 2 tiles, both require partial row updates.
280 // 3. Lines go through 3 or more tiles, first and last tiles use
281 // partial row updates, rows in the middle can write the entire
282 // row.
283 size_t dtx = tile_x1 - tile_x0;
284 u32 *dst = &layer[start_row + (tile_x0 + tile_y * 32) * 8];
285 if (dtx < 1) {
286 size_t shift_left = start_col * 4;
287 size_t shift_right = (7 - end_col) * 4;
288 u32 mask = (0xFFFFFFFF >> shift_right) & (0xFFFFFFFF << shift_left);
289 u32 row = (0x11111111 * clr) & mask;
290 *dst = (*dst & ~mask) | row;
291 } else {
292 size_t shift_left = start_col * 4;
293 size_t shift_right = (7 - end_col) * 4;
294 u32 mask = 0xFFFFFFFF;
295 u32 row = 0x11111111 * clr;
296 *dst = (*dst & ~(mask << shift_left)) | (row << shift_left);
297 dst += 8;
298 for (size_t i = 1; i < dtx; i++) {
299 dirtyflag |= (1 << (tile_x0 + i));
300 *dst = row;
301 dst += 8;
302 }
303 *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right);
304 }
305
306 dirty_tiles[tile_y] |= dirtyflag;
307}
308
183IWRAM_CODE 309IWRAM_CODE
184void 310void
185ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) { 311screen_fill(u32 *layer, u16 x0, u16 y0, u16 x1, u16 y1, u8 clr) {
312 MAYBE_SWAP(x0, x1);
313 MAYBE_SWAP(y0, y1);
314
315 // Special condition. If the screen is to be completely filled, use the DMA
316 // instead.
317 u16 max_width = SCREEN_WIDTH - 1;
318 u16 max_height = SCREEN_HEIGHT - 1;
319 if (x0 == 0 && x1 >= max_width && y0 == 0 && y1 >= max_height) {
320 clear_screen(layer, clr);
321 return;
322 }
323
324 // Drawline implementation.
325 for (size_t y = y0; y <= y1; y++) {
326 draw_hline(layer, x0, x1, y, clr);
327 }
328}
329
330#if NEW_PPU == 0
331IWRAM_CODE
332void
333ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
186 u8 sprline; 334 u8 sprline;
187 u16 v; 335 u16 v;
188 u32 dirtyflag = (1 << (x >> 3)) | (1 << ((x + 7) >> 3)); 336 u32 dirtyflag = (1 << (x >> 3)) | (1 << ((x + 7) >> 3));
@@ -190,21 +338,21 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) {
190 u32 layerpos = ((y & 7) + (((x >> 3) + (y >> 3) * 32) * 8)); 338 u32 layerpos = ((y & 7) + (((x >> 3) + (y >> 3) * 32) * 8));
191 u32 *layerptr = &layer[layerpos]; 339 u32 *layerptr = &layer[layerpos];
192 u32 shift = (x & 7) << 2; 340 u32 shift = (x & 7) << 2;
193 u32 *lut_expand = flipx ? lut_2bpp : lut2bpp_flipx; 341 u32 *lut_expand = flip_x ? dec_byte_flip_x : dec_byte;
194 342
195 if (flipy) flipy = 7; 343 if (flip_y) flip_y = 7;
196 344
197 if (x >= SCREEN_WIDTH || y >= SCREEN_HEIGHT) return; 345 BOUNDCHECK_SCREEN(x, y);
198 346
199 if (blending[4][color]) { 347 if (blending[4][clr]) {
200 u64 mask = ~((u64)0xFFFFFFFF << shift); 348 u64 mask = ~((u64)0xFFFFFFFF << shift);
201 349
202 for (v = 0; v < 8; v++, layerptr++) { 350 for (v = 0; v < 8; v++, layerptr++) {
203 if ((y + v) >= SCREEN_HEIGHT) break; 351 if ((y + v) >= SCREEN_HEIGHT) break;
204 352
205 sprline = sprite[v ^ flipy]; 353 sprline = sprite[v ^ flip_y];
206 u64 data = (u64)(lut_expand[sprline] * (color & 3)) << shift; 354 u64 data = (u64)(lut_expand[sprline] * (clr & 3)) << shift;
207 data |= (u64)(lut_expand[sprline ^ 0xFF] * (color >> 2)) << shift; 355 data |= (u64)(lut_expand[sprline ^ 0xFF] * (clr >> 2)) << shift;
208 356
209 layerptr[0] = (layerptr[0] & mask) | data; 357 layerptr[0] = (layerptr[0] & mask) | data;
210 layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); 358 layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32);
@@ -215,9 +363,9 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) {
215 for (v = 0; v < 8; v++, layerptr++) { 363 for (v = 0; v < 8; v++, layerptr++) {
216 if ((y + v) >= SCREEN_HEIGHT) break; 364 if ((y + v) >= SCREEN_HEIGHT) break;
217 365
218 sprline = sprite[v ^ flipy]; 366 sprline = sprite[v ^ flip_y];
219 u64 mask = ~((u64)(lut_expand[sprline] * 0xF) << shift); 367 u64 mask = ~((u64)(lut_expand[sprline] * 0xF) << shift);
220 u64 data = (u64)(lut_expand[sprline] * (color & 3)) << shift; 368 u64 data = (u64)(lut_expand[sprline] * (clr & 3)) << shift;
221 369
222 layerptr[0] = (layerptr[0] & mask) | data; 370 layerptr[0] = (layerptr[0] & mask) | data;
223 layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32); 371 layerptr[8] = (layerptr[8] & (mask >> 32)) | (data >> 32);
@@ -230,10 +378,81 @@ ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flipx, u8 flipy) {
230 dirty_tiles[(y + 7) >> 3] |= dirtyflag; 378 dirty_tiles[(y + 7) >> 3] |= dirtyflag;
231} 379}
232 380
381#else
233IWRAM_CODE 382IWRAM_CODE
383UNROLL_LOOPS
234void 384void
235ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, 385ppu_1bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
236 u8 flipx, u8 flipy) { 386 BOUNDCHECK_SCREEN(x, y);
387 size_t tile_x = x / 8;
388 size_t tile_y = y / 8;
389 size_t start_col = x % 8;
390 size_t start_row = y % 8;
391 size_t shift_left = start_col * 4;
392 size_t shift_right = (8 - start_col) * 4;
393 u32 dirtyflag = (1 << tile_x) | (1 << ((x + 7) >> 3));
394
395 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8];
396 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte;
397 if (blending[4][clr]) {
398 u32 mask = 0xFFFFFFFF;
399 if (!flip_y) {
400 for(size_t v = 0; v < 8; v++, dst++) {
401 if ((y + v) >= SCREEN_HEIGHT) break;
402 u8 ch1 = sprite[v];
403 u32 color_1 = lut[ch1];
404 u32 color_2 = (color_1 ^ 0xFFFFFFFF) & 0x11111111;
405 u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2));
406 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
407 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
408 if ((start_row + v) == 7) dst += (32 - 1) * 8;
409 }
410 } else {
411 for(size_t v = 0; v < 8; v++, dst++) {
412 if ((y + v) >= SCREEN_HEIGHT) break;
413 u8 ch1 = sprite[(7 - v)];
414 u32 color_1 = lut[ch1];
415 u32 color_2 = (color_1 ^ 0xFFFFFFFF) & 0x11111111;
416 u32 color = (color_1 * (clr & 3)) | (color_2 * (clr >> 2));
417 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
418 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
419 if ((start_row + v) == 7) dst += (32 - 1) * 8;
420 }
421 }
422 } else {
423 if (!flip_y) {
424 for(size_t v = 0; v < 8; v++, dst++) {
425 if ((y + v) >= SCREEN_HEIGHT) break;
426 u8 ch1 = sprite[v];
427 u32 color= lut[ch1];
428 u32 mask = color * 0xF;
429 color *= clr & 3;
430 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
431 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
432 if ((start_row + v) == 7) dst += (32 - 1) * 8;
433 }
434 } else {
435 for(size_t v = 0; v < 8; v++, dst++) {
436 if ((y + v) >= SCREEN_HEIGHT) break;
437 u8 ch1 = sprite[(7 - v)];
438 u32 color= lut[ch1];
439 u32 mask = color * 0xF;
440 color *= clr & 3;
441 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
442 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
443 if ((start_row + v) == 7) dst += (32 - 1) * 8;
444 }
445 }
446 }
447 dirty_tiles[y >> 3] |= dirtyflag;
448 dirty_tiles[(y + 7) >> 3] |= dirtyflag;
449}
450#endif
451
452#if NEW_PPU == 0
453IWRAM_CODE
454void
455ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color, u8 flip_x, u8 flip_y) {
237 u8 sprline1, sprline2; 456 u8 sprline1, sprline2;
238 u8 xrightedge = x < ((32 - 1) * 8); 457 u8 xrightedge = x < ((32 - 1) * 8);
239 u16 v, h; 458 u16 v, h;
@@ -243,20 +462,19 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color,
243 u32 *layerptr = &layer[layerpos]; 462 u32 *layerptr = &layer[layerpos];
244 u32 shift = (x & 7) << 2; 463 u32 shift = (x & 7) << 2;
245 464
246 if (flipy) flipy = 7; 465 if (flip_y) flip_y = 7;
247 466
248 if (x >= SCREEN_WIDTH || y >= SCREEN_HEIGHT) return; 467 BOUNDCHECK_SCREEN(x, y);
249 468
250 if (color == 1) { 469 if (color == 1) {
251 u32 *lut_expand = flipx ? lut_2bpp : lut2bpp_flipx; 470 u32 *lut_expand = flip_x ? dec_byte_flip_x : dec_byte;
252 u64 mask = ~((u64)0xFFFFFFFF << shift); 471 u64 mask = ~((u64)0xFFFFFFFF << shift);
253 472
254 for (v = 0; v < 8; v++, layerptr++) { 473 for (v = 0; v < 8; v++, layerptr++) {
255 if ((y + v) >= (24 * 8)) break; 474 if ((y + v) >= (24 * 8)) break;
256 475
257 sprline1 = sprite[v ^ flipy]; 476 sprline1 = sprite[v ^ flip_y];
258 sprline2 = sprite[(v ^ flipy) | 8]; 477 sprline2 = sprite[(v ^ flip_y) | 8];
259
260 u32 data32 = (lut_expand[sprline1]) | (lut_expand[sprline2] << 1); 478 u32 data32 = (lut_expand[sprline1]) | (lut_expand[sprline2] << 1);
261 u64 data = ((u64) (data32 & 0x33333333)) << shift; 479 u64 data = ((u64) (data32 & 0x33333333)) << shift;
262 480
@@ -271,11 +489,11 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color,
271 for (v = 0; v < 8; v++, layerptr++) { 489 for (v = 0; v < 8; v++, layerptr++) {
272 if ((y + v) >= (24 * 8)) break; 490 if ((y + v) >= (24 * 8)) break;
273 491
274 u8 ch1 = sprite[v ^ flipy]; 492 u8 ch1 = sprite[v ^ flip_y];
275 u8 ch2 = sprite[(v ^ flipy) | 8]; 493 u8 ch2 = sprite[(v ^ flip_y) | 8];
276 u32 data32 = 0; 494 u32 data32 = 0;
277 495
278 if (!flipx) { 496 if (!flip_x) {
279 for (h = 0; h < 8; h++) { 497 for (h = 0; h < 8; h++) {
280 data32 <<= 4; 498 data32 <<= 4;
281 499
@@ -306,12 +524,12 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color,
306 for (v = 0; v < 8; v++, layerptr++) { 524 for (v = 0; v < 8; v++, layerptr++) {
307 if ((y + v) >= (24 * 8)) break; 525 if ((y + v) >= (24 * 8)) break;
308 526
309 u8 ch1 = sprite[v ^ flipy]; 527 u8 ch1 = sprite[v ^ flip_y];
310 u8 ch2 = sprite[(v ^ flipy) | 8]; 528 u8 ch2 = sprite[(v ^ flip_y) | 8];
311 u32 data32 = 0; 529 u32 data32 = 0;
312 u32 mask32 = 0; 530 u32 mask32 = 0;
313 531
314 if (!flipx) { 532 if (!flip_x) {
315 for (h = 0; h < 8; h++) { 533 for (h = 0; h < 8; h++) {
316 data32 <<= 4; mask32 <<= 4; 534 data32 <<= 4; mask32 <<= 4;
317 535
@@ -350,6 +568,143 @@ ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 color,
350 dirty_tiles[y >> 3] |= dirtyflag; 568 dirty_tiles[y >> 3] |= dirtyflag;
351 dirty_tiles[(y + 7) >> 3] |= dirtyflag; 569 dirty_tiles[(y + 7) >> 3] |= dirtyflag;
352} 570}
571#else
572IWRAM_CODE
573UNROLL_LOOPS
574void
575ppu_2bpp(u32 *layer, u16 x, u16 y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
576 BOUNDCHECK_SCREEN(x, y);
577 size_t tile_x = x / 8;
578 size_t tile_y = y / 8;
579 size_t start_col = x % 8;
580 size_t start_row = y % 8;
581 size_t shift_left = start_col * 4;
582 size_t shift_right = (8 - start_col) * 4;
583 u32 dirtyflag = (1 << tile_x) | (1 << ((x + 7) >> 3));
584 u32 *dst = &layer[start_row + (tile_x + tile_y * 32) * 8];
585 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte;
586 if (clr == 1) {
587 u32 mask = 0xFFFFFFFF;
588 if (!flip_y) {
589 for(size_t v = 0; v < 8; v++, dst++) {
590 if ((y + v) >= SCREEN_HEIGHT) break;
591 u8 ch1 = sprite[v];
592 u8 ch2 = sprite[v | 8];
593 u32 color = lut[ch1] | (lut[ch2] << 1);
594 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
595 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
596 if ((start_row + v) == 7) dst += (32 - 1) * 8;
597 }
598 } else {
599 for(size_t v = 0; v < 8; v++, dst++) {
600 if ((y + v) >= SCREEN_HEIGHT) break;
601 u8 ch1 = sprite[(7 - v)];
602 u8 ch2 = sprite[(7 - v) | 8];
603 u32 color = lut[ch1] | (lut[ch2] << 1);
604 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
605 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
606 if ((start_row + v) == 7) dst += (32 - 1) * 8;
607 }
608 }
609 } else if (blending[4][clr]) {
610 u32 mask = 0xFFFFFFFF;
611 u8 clr0 = blending[0][clr];
612 u8 clr1 = blending[1][clr];
613 u8 clr2 = blending[2][clr];
614 u8 clr3 = blending[3][clr];
615 if (!flip_y) {
616 for(size_t v = 0; v < 8; v++, dst++) {
617 if ((y + v) >= SCREEN_HEIGHT) break;
618 u8 ch1 = sprite[v];
619 u8 ch2 = sprite[v | 8];
620 u32 color = lut[ch1] | (lut[ch2] << 1);
621 u32 col1mask = (color & 0x11111111);
622 u32 col2mask = (color & 0x22222222) >> 1;
623 u32 col3mask = (col1mask & col2mask) * 0xF;
624 col1mask &= ~col3mask;
625 col2mask &= ~col3mask;
626 col3mask = (color & col3mask) & 0x11111111;
627 u32 col0mask = ~(col1mask | col2mask | col3mask) & 0x11111111;
628 color = (clr0 * col0mask) |
629 (clr1 * col1mask) |
630 (clr2 * col2mask) |
631 (clr3 * col3mask);
632 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
633 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
634 if ((start_row + v) == 7) dst += (32 - 1) * 8;
635 }
636 } else {
637 for(size_t v = 0; v < 8; v++, dst++) {
638 if ((y + v) >= SCREEN_HEIGHT) break;
639 u8 ch1 = sprite[(7 - v)];
640 u8 ch2 = sprite[(7 - v) | 8];
641 u32 color = lut[ch1] | (lut[ch2] << 1);
642 u32 col1mask = (color & 0x11111111);
643 u32 col2mask = (color & 0x22222222) >> 1;
644 u32 col3mask = (col1mask & col2mask) * 0xF;
645 col1mask &= ~col3mask;
646 col2mask &= ~col3mask;
647 col3mask = (color & col3mask) & 0x11111111;
648 u32 col0mask = ~(col1mask | col2mask | col3mask) & 0x11111111;
649 color = (clr0 * col0mask) |
650 (clr1 * col1mask) |
651 (clr2 * col2mask) |
652 (clr3 * col3mask);
653 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
654 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
655 if ((start_row + v) == 7) dst += (32 - 1) * 8;
656 }
657 }
658 } else {
659 u8 clr1 = blending[1][clr];
660 u8 clr2 = blending[2][clr];
661 u8 clr3 = blending[3][clr];
662 if (!flip_y) {
663 for(size_t v = 0; v < 8; v++, dst++) {
664 if ((y + v) >= SCREEN_HEIGHT) break;
665 u8 ch1 = sprite[v];
666 u8 ch2 = sprite[v | 8];
667 u32 color = lut[ch1] | (lut[ch2] << 1);
668 u32 col1mask = (color & 0x11111111);
669 u32 col2mask = (color & 0x22222222) >> 1;
670 u32 col3mask = (col1mask & col2mask) * 0xF;
671 col1mask &= ~col3mask;
672 col2mask &= ~col3mask;
673 col3mask = (color & col3mask) & 0x11111111;
674 u32 mask = (col1mask | col2mask | col3mask) * 0xF;
675 color = (clr1 * col1mask) |
676 (clr2 * col2mask) |
677 (clr3 * col3mask);
678 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
679 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
680 if ((start_row + v) == 7) dst += (32 - 1) * 8;
681 }
682 } else {
683 for(size_t v = 0; v < 8; v++, dst++) {
684 if ((y + v) >= SCREEN_HEIGHT) break;
685 u8 ch1 = sprite[(7 - v)];
686 u8 ch2 = sprite[(7 - v) | 8];
687 u32 color = lut[ch1] | (lut[ch2] << 1);
688 u32 col1mask = (color & 0x11111111);
689 u32 col2mask = (color & 0x22222222) >> 1;
690 u32 col3mask = (col1mask & col2mask) * 0xF;
691 col1mask &= ~col3mask;
692 col2mask &= ~col3mask;
693 col3mask = (color & col3mask) & 0x11111111;
694 u32 mask = (col1mask | col2mask | col3mask) * 0xF;
695 color = (clr1 * col1mask) |
696 (clr2 * col2mask) |
697 (clr3 * col3mask);
698 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
699 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
700 if ((start_row + v) == 7) dst += (32 - 1) * 8;
701 }
702 }
703 }
704 dirty_tiles[y >> 3] |= dirtyflag;
705 dirty_tiles[(y + 7) >> 3] |= dirtyflag;
706}
707#endif
353 708
354IWRAM_CODE 709IWRAM_CODE
355void 710void
@@ -365,23 +720,32 @@ putfontchar(u32 *layer, u16 tile_x, u16 tile_y, u8 ch, u8 color) {
365 720
366IWRAM_CODE 721IWRAM_CODE
367void 722void
368flipbuf(Ppu *p) { 723flipbuf() {
369 Tile *mem_fg = FG_FRONT; 724 u32 *fg_back = FG_BACK;
370 Tile *mem_bg = BG_FRONT; 725 u32 *bg_back = BG_BACK;
726 u32 *bg_front = BG_FRONT;
727 u32 *fg_front = FG_FRONT;
371 for (size_t j = 0; j < 20; ++j) { 728 for (size_t j = 0; j < 20; ++j) {
372 if (dirty_tiles[j] == 0) { 729 if (dirty_tiles[j] == 0) {
373 continue; 730 continue;
374 } 731 }
375 732#if FLIPBUF_DMA == 1
733 u32 offset = j * 32 * 8;
734 dma_copy(fg_front + offset, fg_back + offset, 32 * 8 * 4, 3);
735 dma_copy(bg_front + offset, bg_back + offset, 32 * 8 * 4, 3);
736#else
376 size_t k = 1; 737 size_t k = 1;
377 for (size_t i = 0; i < 30; ++i, k <<= 1) { 738 for (size_t i = 0; i < 30; ++i, k <<= 1) {
378 if (dirty_tiles[j] & k) { 739 if (dirty_tiles[j] & k) {
379 Tile *tile_fg = p->fg; 740 Tile *mem_fg = FG_FRONT;
380 Tile *tile_bg = p->bg; 741 Tile *mem_bg = BG_FRONT;
742 Tile *tile_fg = FG_BACK;
743 Tile *tile_bg = BG_BACK;
381 mem_fg[i + j * 32] = tile_fg[i + j * 32]; 744 mem_fg[i + j * 32] = tile_fg[i + j * 32];
382 mem_bg[i + j * 32] = tile_bg[i + j * 32]; 745 mem_bg[i + j * 32] = tile_bg[i + j * 32];
383 } 746 }
384 } 747 }
748#endif
385 dirty_tiles[j] = 0; 749 dirty_tiles[j] = 0;
386 } 750 }
387} 751}
@@ -404,15 +768,25 @@ KeyboardChar keyboard[] = {
404 {0, 0, '0'}, {0, 0, '1'}, {0, 0, '2'}, {0, 0, '3'}, {0, 0, '4'}, {0, 0, '5'}, {0, 0, '6'}, {0, 0, '7'}, {0, 0, '8'}, {0, 0, '9'}, {0, 0, '~'}, {0, 0, 0x18}, 768 {0, 0, '0'}, {0, 0, '1'}, {0, 0, '2'}, {0, 0, '3'}, {0, 0, '4'}, {0, 0, '5'}, {0, 0, '6'}, {0, 0, '7'}, {0, 0, '8'}, {0, 0, '9'}, {0, 0, '~'}, {0, 0, 0x18},
405 {0, 0, 'a'}, {0, 0, 'b'}, {0, 0, 'c'}, {0, 0, 'd'}, {0, 0, 'e'}, {0, 0, 'f'}, {0, 0, 'g'}, {0, 0, 'h'}, {0, 0, 'i'}, {0, 0, 'j'}, {0, 0, '/'}, {0, 0, 0x19}, 769 {0, 0, 'a'}, {0, 0, 'b'}, {0, 0, 'c'}, {0, 0, 'd'}, {0, 0, 'e'}, {0, 0, 'f'}, {0, 0, 'g'}, {0, 0, 'h'}, {0, 0, 'i'}, {0, 0, 'j'}, {0, 0, '/'}, {0, 0, 0x19},
406 {0, 0, 'k'}, {0, 0, 'l'}, {0, 0, 'm'}, {0, 0, 'n'}, {0, 0, 'o'}, {0, 0, 'p'}, {0, 0, 'q'}, {0, 0, 'r'}, {0, 0, 's'}, {0, 0, 't'}, {0, 0, '\\'}, {0, 0, 0x1b}, 770 {0, 0, 'k'}, {0, 0, 'l'}, {0, 0, 'm'}, {0, 0, 'n'}, {0, 0, 'o'}, {0, 0, 'p'}, {0, 0, 'q'}, {0, 0, 'r'}, {0, 0, 's'}, {0, 0, 't'}, {0, 0, '\\'}, {0, 0, 0x1b},
407 {0, 0, 'u'}, {0, 0, 'v'}, {0, 0, 'w'}, {0, 0, 'x'}, {0, 0, 'y'}, {0, 0, 'z'}, {0, 0, ','}, {0, 0, '.'}, {0, 0, ';'}, {0, 0, ':'}, {0, 0, '_'}, {0, 0, 0x1a}, 771 {0, 0, 'u'}, {0, 0, 'v'}, {0, 0, 'w'}, {0, 0, 'x'}, {0, 0, 'y'}, {0, 0, 'z'}, {0, 0, ','}, {0, 0, '.'}, {0, 0, ';'}, {0, 0, ':'}, {0, 0, ' '}, {0, 0, 0x1a},
408}; 772};
409 773
774bool keyboard_on = false;
775
410void 776void
411toggle_keyboard(void) { 777toggle_keyboard(void) {
778 DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1 | DISP_OBJ;
412 for (size_t i = 0; i < LEN(keyboard); ++i) { 779 for (size_t i = 0; i < LEN(keyboard); ++i) {
413 OBJ_ATTR_0(i) ^= OBJ_HIDDEN; 780 OBJ_ATTR_0(i) ^= OBJ_HIDDEN;
414 } 781 }
415 OBJ_ATTR_0(127) ^= OBJ_HIDDEN; 782 OBJ_ATTR_0(127) ^= OBJ_HIDDEN;
783 if (keyboard_on) {
784 keyboard_on = false;
785 DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1;
786 } else {
787 keyboard_on = true;
788 DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1 | DISP_OBJ;
789 }
416} 790}
417 791
418void 792void
@@ -424,16 +798,13 @@ update_cursor(u8 pos) {
424 | OBJ_X_COORD(keyboard[cursor_position].x); 798 | OBJ_X_COORD(keyboard[cursor_position].x);
425} 799}
426 800
427int 801void
428initppu(Ppu *p, u8 hor, u8 ver, u8 pad) { 802video_init() {
429 p->hor = hor; 803 // Clear VRAM.
430 p->ver = ver; 804 dma_fill((u32*)MEM_VRAM, 0, KB(96), 3);
431 p->pad = pad;
432 p->width = (8 * p->hor + p->pad * 2);
433 p->height = (8 * p->ver + p->pad * 2);
434 805
435 // Initialize display mode and bg palette. 806 // Initialize display mode and bg palette.
436 DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1 | DISP_OBJ; 807 DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1;
437 808
438 // Initialize backgrounds. 809 // Initialize backgrounds.
439 u8 cb_fg = 0; 810 u8 cb_fg = 0;
@@ -443,21 +814,6 @@ initppu(Ppu *p, u8 hor, u8 ver, u8 pad) {
443 BG_CTRL(0) = BG_CHARBLOCK(cb_fg) | BG_SCREENBLOCK(sb_fg) | BG_PRIORITY(1); 814 BG_CTRL(0) = BG_CHARBLOCK(cb_fg) | BG_SCREENBLOCK(sb_fg) | BG_PRIORITY(1);
444 BG_CTRL(1) = BG_CHARBLOCK(cb_bg) | BG_SCREENBLOCK(sb_bg) | BG_PRIORITY(2); 815 BG_CTRL(1) = BG_CHARBLOCK(cb_bg) | BG_SCREENBLOCK(sb_bg) | BG_PRIORITY(2);
445 816
446 // Clear front buffer.
447 p->fg = FG_FRONT;
448 p->bg = BG_FRONT;
449
450 // Use DMA to clear VRAM.
451 u32 fill = 0;
452 dma_fill(p->fg, fill, KB(20), 3);
453 dma_fill(p->bg, fill, KB(20), 3);
454
455 // Clear back buffer.
456 p->fg = FG_BACK;
457 p->bg = BG_BACK;
458 dma_fill(p->fg, fill, KB(20), 3);
459 dma_fill(p->bg, fill, KB(20), 3);
460
461 // Initialize default palette. 817 // Initialize default palette.
462 PAL_BUFFER_BG[0] = COLOR_BLACK; 818 PAL_BUFFER_BG[0] = COLOR_BLACK;
463 PAL_BUFFER_BG[1] = COLOR_WHITE; 819 PAL_BUFFER_BG[1] = COLOR_WHITE;
@@ -465,9 +821,9 @@ initppu(Ppu *p, u8 hor, u8 ver, u8 pad) {
465 PAL_BUFFER_BG[3] = COLOR_BLUE; 821 PAL_BUFFER_BG[3] = COLOR_BLUE;
466 for (size_t i = 0; i < 16; ++i) { 822 for (size_t i = 0; i < 16; ++i) {
467 PAL_BUFFER_SPRITES[i] = COLOR_BLACK; 823 PAL_BUFFER_SPRITES[i] = COLOR_BLACK;
468 PAL_BUFFER_SPRITES[1 * 16] = COLOR_WHITE; 824 PAL_BUFFER_SPRITES[i + 1 * 16] = COLOR_WHITE;
469 PAL_BUFFER_SPRITES[2 * 16] = COLOR_RED; 825 PAL_BUFFER_SPRITES[i + 2 * 16] = COLOR_RED;
470 PAL_BUFFER_SPRITES[3 * 16] = COLOR_BLUE; 826 PAL_BUFFER_SPRITES[i + 3 * 16] = COLOR_BLUE;
471 } 827 }
472 828
473 // Initialize background memory map. 829 // Initialize background memory map.
@@ -490,7 +846,7 @@ initppu(Ppu *p, u8 hor, u8 ver, u8 pad) {
490 keyboard[i].y = tile_y * 8; 846 keyboard[i].y = tile_y * 8;
491 OBJ_ATTR_0(i) = OBJ_SHAPE_SQUARE | OBJ_Y_COORD(keyboard[i].y) | OBJ_HIDDEN; 847 OBJ_ATTR_0(i) = OBJ_SHAPE_SQUARE | OBJ_Y_COORD(keyboard[i].y) | OBJ_HIDDEN;
492 OBJ_ATTR_1(i) = OBJ_SIZE_SMALL | OBJ_X_COORD(keyboard[i].x); 848 OBJ_ATTR_1(i) = OBJ_SIZE_SMALL | OBJ_X_COORD(keyboard[i].x);
493 OBJ_ATTR_2(i) = (SPRITE_START_IDX + keyboard[i].symbol) | OBJ_PAL_BANK(0); 849 OBJ_ATTR_2(i) = (SPRITE_START_IDX + keyboard[i].symbol) | OBJ_PAL_BANK(2);
494 tile_x++; 850 tile_x++;
495 if (tile_x - KEYBOARD_START_TILE_X >= KEYBOARD_ROW_SIZE) { 851 if (tile_x - KEYBOARD_START_TILE_X >= KEYBOARD_ROW_SIZE) {
496 tile_x = KEYBOARD_START_TILE_X; 852 tile_x = KEYBOARD_START_TILE_X;
@@ -500,6 +856,4 @@ initppu(Ppu *p, u8 hor, u8 ver, u8 pad) {
500 OBJ_ATTR_0(127) = OBJ_SHAPE_SQUARE | OBJ_Y_COORD(keyboard[cursor_position].y) | OBJ_HIDDEN; 856 OBJ_ATTR_0(127) = OBJ_SHAPE_SQUARE | OBJ_Y_COORD(keyboard[cursor_position].y) | OBJ_HIDDEN;
501 OBJ_ATTR_1(127) = OBJ_SIZE_SMALL | OBJ_X_COORD(keyboard[cursor_position].x); 857 OBJ_ATTR_1(127) = OBJ_SIZE_SMALL | OBJ_X_COORD(keyboard[cursor_position].x);
502 OBJ_ATTR_2(127) = (SPRITE_START_IDX + 0xdb) | OBJ_PAL_BANK(3); 858 OBJ_ATTR_2(127) = (SPRITE_START_IDX + 0xdb) | OBJ_PAL_BANK(3);
503
504 return 1;
505} 859}