diff options
Diffstat (limited to 'src/renderer_m0.c')
-rw-r--r-- | src/renderer_m0.c | 811 |
1 files changed, 811 insertions, 0 deletions
diff --git a/src/renderer_m0.c b/src/renderer_m0.c new file mode 100644 index 0000000..8bd4263 --- /dev/null +++ b/src/renderer_m0.c | |||
@@ -0,0 +1,811 @@ | |||
1 | // | ||
2 | // This Mode 0 renderer provides a way of drawing directly to a framebuffer | ||
3 | // (similar to Mode 3 and 4) while retaining the flexibility of using other | ||
4 | // backgrounds if needed. It also performs double buffering to avoid tearing | ||
5 | // artifacts and tries to only draw tiles that changed on each frame. | ||
6 | // | ||
7 | |||
8 | #include "renderer.h" | ||
9 | #include "text.h" | ||
10 | |||
11 | // | ||
12 | // Parameters. | ||
13 | // | ||
14 | |||
15 | #define SUBPIXEL_LINES 1 | ||
16 | #define DEC_BIG_LUT 1 | ||
17 | #define FLIP_TYPE 3 | ||
18 | |||
19 | // Front/back buffers for double buffering. | ||
20 | #define BUF_0 ((u32*)(MEM_VRAM)) | ||
21 | #define BUF_1 ((u32*)(MEM_VRAM + KB(20))) | ||
22 | |||
23 | // Pointer to the backbuffer. | ||
24 | static u32 *backbuf = BUF_1; | ||
25 | |||
26 | // Tracking which tiles are "dirty" and need refreshing. | ||
27 | static u32 dirty_tiles[21] = {0}; | ||
28 | |||
29 | // Position of the tilemap. | ||
30 | #define TILE_MAP ((u32*)(MEM_VRAM + KB(40))) | ||
31 | |||
32 | // Charblock and screenblock for both render buffers. | ||
33 | #define CB_0 0 | ||
34 | #define CB_1 1 | ||
35 | #define SB_0 20 | ||
36 | #define SB_1 22 | ||
37 | |||
38 | // Boundchecks can be disable at compile time but this will not always improve | ||
39 | // the performance and can in fact make it worse. It is possible that this is | ||
40 | // due to some aliasing optimizations but not sure at this moment. | ||
41 | #ifdef DISABLE_BOUNDCHECK_SCREEN | ||
42 | #define BOUNDCHECK_SCREEN(X,Y) | ||
43 | #else | ||
44 | #define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return; | ||
45 | #endif | ||
46 | |||
47 | // Swap A and B values without a tmp variable. | ||
48 | #define SWAP(A, B) (((A) ^= (B)), ((B) ^= (A)), ((A) ^= (B))) | ||
49 | |||
50 | // Swap A and B values to make sure A <= B. | ||
51 | #define MAYBE_SWAP(A,B) if ((A) > (B)) { SWAP(A,B); } | ||
52 | |||
53 | // | ||
54 | // Basic primitives. | ||
55 | // | ||
56 | |||
57 | static inline | ||
58 | void | ||
59 | redraw(void) { | ||
60 | for (size_t i = 0; i < 21; i++) { | ||
61 | dirty_tiles[i] = 0xFFFFFFFF; | ||
62 | } | ||
63 | } | ||
64 | |||
65 | IWRAM_CODE | ||
66 | void screen_fill(u8 clr) { | ||
67 | // We have to make sure we leave the last tile blank to use as alpha channel | ||
68 | // when moving the BG during double buffering. | ||
69 | dma_fill(backbuf, 0x11111111 * clr, KB(20) - 32, 3); | ||
70 | redraw(); | ||
71 | } | ||
72 | |||
73 | IWRAM_CODE | ||
74 | void | ||
75 | draw_pixel(size_t x, size_t y, u8 clr) { | ||
76 | BOUNDCHECK_SCREEN(x, y); | ||
77 | |||
78 | // Find row position for the given x/y coordinates. | ||
79 | size_t tile_x = x / 8; | ||
80 | size_t tile_y = y / 8; | ||
81 | size_t start_col = x % 8; | ||
82 | size_t start_row = y % 8; | ||
83 | u32 *dst = &backbuf[start_row + (tile_x + tile_y * 32) * 8]; | ||
84 | |||
85 | // Update backbuffer. | ||
86 | size_t shift = start_col * sizeof(u32); | ||
87 | u32 mask = 0xF << shift; | ||
88 | u32 row = clr << shift; | ||
89 | *dst = (*dst & ~mask) | row; | ||
90 | dirty_tiles[tile_y] |= 1 << tile_x; | ||
91 | } | ||
92 | |||
93 | IWRAM_CODE | ||
94 | static inline | ||
95 | void | ||
96 | draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) { | ||
97 | BOUNDCHECK_SCREEN(x0, y0); | ||
98 | BOUNDCHECK_SCREEN(x1, y0); | ||
99 | // Find row positions for the given x/y coordinates. | ||
100 | size_t tile_x0 = x0 / 8; | ||
101 | size_t tile_x1 = x1 / 8; | ||
102 | size_t tile_y = y0 / 8; | ||
103 | size_t start_col = x0 % 8; | ||
104 | size_t end_col = x1 % 8; | ||
105 | size_t start_row = y0 % 8; | ||
106 | u32 dirty = (1 << tile_x0) | (1 << tile_x1); | ||
107 | |||
108 | // Horizontal line. There are 3 cases: | ||
109 | // 1. Lines fit on a single tile. | ||
110 | // 2. Lines go through 2 tiles, both require partial row updates. | ||
111 | // 3. Lines go through 3 or more tiles, first and last tiles use | ||
112 | // partial row updates, rows in the middle can write the entire | ||
113 | // row. | ||
114 | size_t dtx = tile_x1 - tile_x0; | ||
115 | u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8]; | ||
116 | if (dtx < 1) { | ||
117 | size_t shift_left = start_col * 4; | ||
118 | size_t shift_right = (7 - end_col) * 4; | ||
119 | u32 mask = (0xFFFFFFFF >> shift_right) & (0xFFFFFFFF << shift_left); | ||
120 | u32 row = (0x11111111 * clr) & mask; | ||
121 | *dst = (*dst & ~mask) | row; | ||
122 | } else { | ||
123 | size_t shift_left = start_col * 4; | ||
124 | size_t shift_right = (7 - end_col) * 4; | ||
125 | u32 mask = 0xFFFFFFFF; | ||
126 | u32 row = 0x11111111 * clr; | ||
127 | *dst = (*dst & ~(mask << shift_left)) | (row << shift_left); | ||
128 | dst += 8; | ||
129 | for (size_t i = 1; i < dtx; i++) { | ||
130 | dirty |= (1 << (tile_x0 + i)); | ||
131 | *dst = row; | ||
132 | dst += 8; | ||
133 | } | ||
134 | *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right); | ||
135 | } | ||
136 | dirty_tiles[tile_y] |= dirty; | ||
137 | } | ||
138 | |||
139 | IWRAM_CODE | ||
140 | UNROLL_LOOPS | ||
141 | static inline | ||
142 | void | ||
143 | draw_vline(size_t x0, size_t y0, size_t y1, u8 clr) { | ||
144 | BOUNDCHECK_SCREEN(x0, y0); | ||
145 | BOUNDCHECK_SCREEN(x0, y1); | ||
146 | size_t tile_x = x0 / 8; | ||
147 | size_t tile_y = y0 / 8; | ||
148 | size_t tile_y0 = y0 / 8; | ||
149 | size_t tile_y1 = y1 / 8; | ||
150 | size_t start_col = x0 % 8; | ||
151 | size_t start_row0 = y0 % 8; | ||
152 | size_t start_row1 = y1 % 8; | ||
153 | |||
154 | size_t shift_left = start_col * 4; | ||
155 | u32 dirty = (1 << tile_x); | ||
156 | |||
157 | u32 *dst = &backbuf[start_row0 + (tile_x + tile_y * 32) * 8]; | ||
158 | u32 mask = 0x0000000F << shift_left; | ||
159 | u32 row = (0x11111111 * clr) & mask; | ||
160 | u32 dty = tile_y1 - tile_y0; | ||
161 | if (dty < 1) { | ||
162 | for (size_t i = 0; i <= (y1 - y0); i++, dst++) { | ||
163 | dst[0] = (dst[0] & ~mask) | row; | ||
164 | } | ||
165 | } else { | ||
166 | for (size_t i = 0; i < (8 - start_row0); i++, dst++) { | ||
167 | dst[0] = (dst[0] & ~mask) | row; | ||
168 | } | ||
169 | dst += 8 * 31; | ||
170 | for (size_t j = 1; j < dty; j++) { | ||
171 | dirty_tiles[tile_y0 + j] |= dirty; | ||
172 | for (size_t i = 0; i < 8; i++, dst++) { | ||
173 | dst[0] = (dst[0] & ~mask) | row; | ||
174 | } | ||
175 | dst += 8 * 31; | ||
176 | } | ||
177 | for (size_t i = 0; i <= start_row1; i++, dst++) { | ||
178 | dst[0] = (dst[0] & ~mask) | row; | ||
179 | } | ||
180 | } | ||
181 | dirty_tiles[tile_y0] |= dirty; | ||
182 | dirty_tiles[tile_y1] |= dirty; | ||
183 | } | ||
184 | |||
185 | IWRAM_CODE | ||
186 | void | ||
187 | draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { | ||
188 | BOUNDCHECK_SCREEN(x0, y0); | ||
189 | BOUNDCHECK_SCREEN(x1, y1); | ||
190 | if (y0 == y1) { | ||
191 | MAYBE_SWAP(x0, x1); | ||
192 | draw_hline(x0, x1, y0, clr); | ||
193 | } else if (x0 == x1) { | ||
194 | MAYBE_SWAP(y0, y1); | ||
195 | draw_vline(x0, y0, y1, clr); | ||
196 | } else { | ||
197 | // Fixed Precision constants. | ||
198 | const int fp_bit = 6; | ||
199 | const int fp_one = FP_NUM(1, fp_bit); | ||
200 | const int fp_half = fp_one >> 1; | ||
201 | |||
202 | int dx = x0 > x1 ? x0 - x1 : x1 - x0; | ||
203 | int dy = y0 > y1 ? y0 - y1 : y1 - y0; | ||
204 | |||
205 | if ((dx >= dy && x0 > x1) || (dx < dy && y0 > y1)) { | ||
206 | SWAP(x0, x1); | ||
207 | SWAP(y0, y1); | ||
208 | } | ||
209 | |||
210 | #if SUBPIXEL_LINES == 1 | ||
211 | int dxf = (dx << fp_bit); | ||
212 | int dyf = (dy << fp_bit); | ||
213 | int frac_x = x0 > x1 ? FP_NUM(x0 - x1, fp_bit) : FP_NUM(x1 - x0, fp_bit); | ||
214 | int frac_y = y0 > y1 ? FP_NUM(y0 - y1, fp_bit) : FP_NUM(y1 - y0, fp_bit); | ||
215 | int x_step = x0 > x1 ? -1 : 1; | ||
216 | int y_step = y0 > y1 ? -1 : 1; | ||
217 | int distance = (frac_y - fp_one) * dx - (frac_x - fp_half) * dy; | ||
218 | if (dx >= dy) { | ||
219 | int step = dxf / dyf; | ||
220 | int remaining = dx; | ||
221 | while (remaining > (step - 1)) { | ||
222 | distance += step * 2 * dyf; | ||
223 | if (distance >= 0) { | ||
224 | draw_hline(x0, x0 + step - 1, y0, clr); | ||
225 | x0 += x_step * step; | ||
226 | remaining -= step; | ||
227 | } else { | ||
228 | if (remaining < step) { | ||
229 | break; | ||
230 | } | ||
231 | draw_hline(x0, x0 + step, y0, clr); | ||
232 | distance += 2 * dyf; | ||
233 | x0 += x_step * (step + 1); | ||
234 | remaining -= step + 1; | ||
235 | } | ||
236 | distance -= 2 * dxf; | ||
237 | y0 += y_step; | ||
238 | } | ||
239 | if (remaining >= 0) { | ||
240 | draw_hline(x0, x0 + remaining, y0, clr); | ||
241 | } | ||
242 | } else { | ||
243 | int step = dyf / dxf; | ||
244 | int remaining = dy; | ||
245 | while (remaining > (step - 1)) { | ||
246 | distance += step * 2 * dxf; | ||
247 | if (distance >= 0) { | ||
248 | draw_vline(x0, y0, y0 + step - 1, clr); | ||
249 | y0 += y_step * step; | ||
250 | remaining -= step; | ||
251 | } else { | ||
252 | draw_vline(x0, y0, y0 + step, clr); | ||
253 | distance += 2 * dxf; | ||
254 | y0 += y_step * (step + 1); | ||
255 | remaining -= step + 1; | ||
256 | } | ||
257 | distance -= 2 * dyf; | ||
258 | x0 += x_step; | ||
259 | } | ||
260 | if (remaining >= 0) { | ||
261 | draw_vline(x0, y0, y0 + remaining, clr); | ||
262 | } | ||
263 | } | ||
264 | #else | ||
265 | int x_step = x0 > x1 ? -1 : 1; | ||
266 | int y_step = y0 > y1 ? -1 : 1; | ||
267 | if (dx >= dy) { | ||
268 | int diff = 2 * dy - dx; | ||
269 | for (int i = 0; i < dx + 1; i++) { | ||
270 | draw_pixel(x0, y0, clr); | ||
271 | if (diff >= 0) { | ||
272 | diff -= 2 * dx; | ||
273 | y0 += y_step; | ||
274 | } | ||
275 | diff += 2 * dy; | ||
276 | x0 += x_step; | ||
277 | } | ||
278 | } else { | ||
279 | int diff = 2 * dx - dy; | ||
280 | for (int i = 0; i < dy + 1; i++) { | ||
281 | draw_pixel(x0, y0, clr); | ||
282 | if (diff >= 0) { | ||
283 | diff -= 2 * dy; | ||
284 | x0 += x_step; | ||
285 | } | ||
286 | diff += 2 * dx; | ||
287 | y0 += y_step; | ||
288 | } | ||
289 | } | ||
290 | #endif | ||
291 | } | ||
292 | } | ||
293 | |||
294 | IWRAM_CODE | ||
295 | void | ||
296 | draw_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { | ||
297 | BOUNDCHECK_SCREEN(x0, y0); | ||
298 | BOUNDCHECK_SCREEN(x1, y1); | ||
299 | MAYBE_SWAP(x0, x1); | ||
300 | MAYBE_SWAP(y0, y1); | ||
301 | |||
302 | draw_hline(x0, x1, y0, clr); | ||
303 | draw_hline(x0, x1, y1, clr); | ||
304 | draw_vline(x0, y0, y1, clr); | ||
305 | draw_vline(x1, y0, y1, clr); | ||
306 | } | ||
307 | |||
308 | IWRAM_CODE | ||
309 | void | ||
310 | draw_filled_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { | ||
311 | BOUNDCHECK_SCREEN(x0, y0); | ||
312 | BOUNDCHECK_SCREEN(x1, y1); | ||
313 | MAYBE_SWAP(x0, x1); | ||
314 | MAYBE_SWAP(y0, y1); | ||
315 | |||
316 | // Special condition. If the screen is to be completely filled, use the DMA | ||
317 | // instead. | ||
318 | if (x0 == 0 && x1 >= (SCREEN_WIDTH - 1) && y0 == 0 && y1 >= (SCREEN_HEIGHT - 1)) { | ||
319 | screen_fill(clr); | ||
320 | return; | ||
321 | } | ||
322 | |||
323 | for (size_t y = y0; y <= y1; y++) { | ||
324 | draw_hline(x0, x1, y, clr); | ||
325 | } | ||
326 | } | ||
327 | |||
328 | // | ||
329 | // Sprites (chr/icn). | ||
330 | // | ||
331 | |||
332 | #if DEC_BIG_LUT == 1 | ||
333 | static u32 dec_byte_flip_x[256] = { | ||
334 | 0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100, | ||
335 | 0x00000101, 0x00000110, 0x00000111, 0x00001000, 0x00001001, | ||
336 | 0x00001010, 0x00001011, 0x00001100, 0x00001101, 0x00001110, | ||
337 | 0x00001111, 0x00010000, 0x00010001, 0x00010010, 0x00010011, | ||
338 | 0x00010100, 0x00010101, 0x00010110, 0x00010111, 0x00011000, | ||
339 | 0x00011001, 0x00011010, 0x00011011, 0x00011100, 0x00011101, | ||
340 | 0x00011110, 0x00011111, 0x00100000, 0x00100001, 0x00100010, | ||
341 | 0x00100011, 0x00100100, 0x00100101, 0x00100110, 0x00100111, | ||
342 | 0x00101000, 0x00101001, 0x00101010, 0x00101011, 0x00101100, | ||
343 | 0x00101101, 0x00101110, 0x00101111, 0x00110000, 0x00110001, | ||
344 | 0x00110010, 0x00110011, 0x00110100, 0x00110101, 0x00110110, | ||
345 | 0x00110111, 0x00111000, 0x00111001, 0x00111010, 0x00111011, | ||
346 | 0x00111100, 0x00111101, 0x00111110, 0x00111111, 0x01000000, | ||
347 | 0x01000001, 0x01000010, 0x01000011, 0x01000100, 0x01000101, | ||
348 | 0x01000110, 0x01000111, 0x01001000, 0x01001001, 0x01001010, | ||
349 | 0x01001011, 0x01001100, 0x01001101, 0x01001110, 0x01001111, | ||
350 | 0x01010000, 0x01010001, 0x01010010, 0x01010011, 0x01010100, | ||
351 | 0x01010101, 0x01010110, 0x01010111, 0x01011000, 0x01011001, | ||
352 | 0x01011010, 0x01011011, 0x01011100, 0x01011101, 0x01011110, | ||
353 | 0x01011111, 0x01100000, 0x01100001, 0x01100010, 0x01100011, | ||
354 | 0x01100100, 0x01100101, 0x01100110, 0x01100111, 0x01101000, | ||
355 | 0x01101001, 0x01101010, 0x01101011, 0x01101100, 0x01101101, | ||
356 | 0x01101110, 0x01101111, 0x01110000, 0x01110001, 0x01110010, | ||
357 | 0x01110011, 0x01110100, 0x01110101, 0x01110110, 0x01110111, | ||
358 | 0x01111000, 0x01111001, 0x01111010, 0x01111011, 0x01111100, | ||
359 | 0x01111101, 0x01111110, 0x01111111, 0x10000000, 0x10000001, | ||
360 | 0x10000010, 0x10000011, 0x10000100, 0x10000101, 0x10000110, | ||
361 | 0x10000111, 0x10001000, 0x10001001, 0x10001010, 0x10001011, | ||
362 | 0x10001100, 0x10001101, 0x10001110, 0x10001111, 0x10010000, | ||
363 | 0x10010001, 0x10010010, 0x10010011, 0x10010100, 0x10010101, | ||
364 | 0x10010110, 0x10010111, 0x10011000, 0x10011001, 0x10011010, | ||
365 | 0x10011011, 0x10011100, 0x10011101, 0x10011110, 0x10011111, | ||
366 | 0x10100000, 0x10100001, 0x10100010, 0x10100011, 0x10100100, | ||
367 | 0x10100101, 0x10100110, 0x10100111, 0x10101000, 0x10101001, | ||
368 | 0x10101010, 0x10101011, 0x10101100, 0x10101101, 0x10101110, | ||
369 | 0x10101111, 0x10110000, 0x10110001, 0x10110010, 0x10110011, | ||
370 | 0x10110100, 0x10110101, 0x10110110, 0x10110111, 0x10111000, | ||
371 | 0x10111001, 0x10111010, 0x10111011, 0x10111100, 0x10111101, | ||
372 | 0x10111110, 0x10111111, 0x11000000, 0x11000001, 0x11000010, | ||
373 | 0x11000011, 0x11000100, 0x11000101, 0x11000110, 0x11000111, | ||
374 | 0x11001000, 0x11001001, 0x11001010, 0x11001011, 0x11001100, | ||
375 | 0x11001101, 0x11001110, 0x11001111, 0x11010000, 0x11010001, | ||
376 | 0x11010010, 0x11010011, 0x11010100, 0x11010101, 0x11010110, | ||
377 | 0x11010111, 0x11011000, 0x11011001, 0x11011010, 0x11011011, | ||
378 | 0x11011100, 0x11011101, 0x11011110, 0x11011111, 0x11100000, | ||
379 | 0x11100001, 0x11100010, 0x11100011, 0x11100100, 0x11100101, | ||
380 | 0x11100110, 0x11100111, 0x11101000, 0x11101001, 0x11101010, | ||
381 | 0x11101011, 0x11101100, 0x11101101, 0x11101110, 0x11101111, | ||
382 | 0x11110000, 0x11110001, 0x11110010, 0x11110011, 0x11110100, | ||
383 | 0x11110101, 0x11110110, 0x11110111, 0x11111000, 0x11111001, | ||
384 | 0x11111010, 0x11111011, 0x11111100, 0x11111101, 0x11111110, | ||
385 | 0x11111111 | ||
386 | }; | ||
387 | |||
388 | static u32 dec_byte[256] = { | ||
389 | 0x00000000, 0x10000000, 0x01000000, 0x11000000, 0x00100000, | ||
390 | 0x10100000, 0x01100000, 0x11100000, 0x00010000, 0x10010000, | ||
391 | 0x01010000, 0x11010000, 0x00110000, 0x10110000, 0x01110000, | ||
392 | 0x11110000, 0x00001000, 0x10001000, 0x01001000, 0x11001000, | ||
393 | 0x00101000, 0x10101000, 0x01101000, 0x11101000, 0x00011000, | ||
394 | 0x10011000, 0x01011000, 0x11011000, 0x00111000, 0x10111000, | ||
395 | 0x01111000, 0x11111000, 0x00000100, 0x10000100, 0x01000100, | ||
396 | 0x11000100, 0x00100100, 0x10100100, 0x01100100, 0x11100100, | ||
397 | 0x00010100, 0x10010100, 0x01010100, 0x11010100, 0x00110100, | ||
398 | 0x10110100, 0x01110100, 0x11110100, 0x00001100, 0x10001100, | ||
399 | 0x01001100, 0x11001100, 0x00101100, 0x10101100, 0x01101100, | ||
400 | 0x11101100, 0x00011100, 0x10011100, 0x01011100, 0x11011100, | ||
401 | 0x00111100, 0x10111100, 0x01111100, 0x11111100, 0x00000010, | ||
402 | 0x10000010, 0x01000010, 0x11000010, 0x00100010, 0x10100010, | ||
403 | 0x01100010, 0x11100010, 0x00010010, 0x10010010, 0x01010010, | ||
404 | 0x11010010, 0x00110010, 0x10110010, 0x01110010, 0x11110010, | ||
405 | 0x00001010, 0x10001010, 0x01001010, 0x11001010, 0x00101010, | ||
406 | 0x10101010, 0x01101010, 0x11101010, 0x00011010, 0x10011010, | ||
407 | 0x01011010, 0x11011010, 0x00111010, 0x10111010, 0x01111010, | ||
408 | 0x11111010, 0x00000110, 0x10000110, 0x01000110, 0x11000110, | ||
409 | 0x00100110, 0x10100110, 0x01100110, 0x11100110, 0x00010110, | ||
410 | 0x10010110, 0x01010110, 0x11010110, 0x00110110, 0x10110110, | ||
411 | 0x01110110, 0x11110110, 0x00001110, 0x10001110, 0x01001110, | ||
412 | 0x11001110, 0x00101110, 0x10101110, 0x01101110, 0x11101110, | ||
413 | 0x00011110, 0x10011110, 0x01011110, 0x11011110, 0x00111110, | ||
414 | 0x10111110, 0x01111110, 0x11111110, 0x00000001, 0x10000001, | ||
415 | 0x01000001, 0x11000001, 0x00100001, 0x10100001, 0x01100001, | ||
416 | 0x11100001, 0x00010001, 0x10010001, 0x01010001, 0x11010001, | ||
417 | 0x00110001, 0x10110001, 0x01110001, 0x11110001, 0x00001001, | ||
418 | 0x10001001, 0x01001001, 0x11001001, 0x00101001, 0x10101001, | ||
419 | 0x01101001, 0x11101001, 0x00011001, 0x10011001, 0x01011001, | ||
420 | 0x11011001, 0x00111001, 0x10111001, 0x01111001, 0x11111001, | ||
421 | 0x00000101, 0x10000101, 0x01000101, 0x11000101, 0x00100101, | ||
422 | 0x10100101, 0x01100101, 0x11100101, 0x00010101, 0x10010101, | ||
423 | 0x01010101, 0x11010101, 0x00110101, 0x10110101, 0x01110101, | ||
424 | 0x11110101, 0x00001101, 0x10001101, 0x01001101, 0x11001101, | ||
425 | 0x00101101, 0x10101101, 0x01101101, 0x11101101, 0x00011101, | ||
426 | 0x10011101, 0x01011101, 0x11011101, 0x00111101, 0x10111101, | ||
427 | 0x01111101, 0x11111101, 0x00000011, 0x10000011, 0x01000011, | ||
428 | 0x11000011, 0x00100011, 0x10100011, 0x01100011, 0x11100011, | ||
429 | 0x00010011, 0x10010011, 0x01010011, 0x11010011, 0x00110011, | ||
430 | 0x10110011, 0x01110011, 0x11110011, 0x00001011, 0x10001011, | ||
431 | 0x01001011, 0x11001011, 0x00101011, 0x10101011, 0x01101011, | ||
432 | 0x11101011, 0x00011011, 0x10011011, 0x01011011, 0x11011011, | ||
433 | 0x00111011, 0x10111011, 0x01111011, 0x11111011, 0x00000111, | ||
434 | 0x10000111, 0x01000111, 0x11000111, 0x00100111, 0x10100111, | ||
435 | 0x01100111, 0x11100111, 0x00010111, 0x10010111, 0x01010111, | ||
436 | 0x11010111, 0x00110111, 0x10110111, 0x01110111, 0x11110111, | ||
437 | 0x00001111, 0x10001111, 0x01001111, 0x11001111, 0x00101111, | ||
438 | 0x10101111, 0x01101111, 0x11101111, 0x00011111, 0x10011111, | ||
439 | 0x01011111, 0x11011111, 0x00111111, 0x10111111, 0x01111111, | ||
440 | 0x11111111 | ||
441 | }; | ||
442 | |||
443 | IWRAM_CODE | ||
444 | static inline | ||
445 | u32 | ||
446 | decode_1bpp(u8 row, u8 flip_x) { | ||
447 | if (flip_x) { | ||
448 | return dec_byte_flip_x[row]; | ||
449 | } | ||
450 | return dec_byte[row]; | ||
451 | } | ||
452 | #else | ||
453 | static u16 dec_nibble[] = { | ||
454 | 0x0000, 0x1000, 0x0100, 0x1100, | ||
455 | 0x0010, 0x1010, 0x0110, 0x1110, | ||
456 | 0x0001, 0x1001, 0x0101, 0x1101, | ||
457 | 0x0011, 0x1011, 0x0111, 0x1111, | ||
458 | }; | ||
459 | |||
460 | static u16 dec_nibble_flip_x[] = { | ||
461 | 0x0000, 0x0001, 0x0010, 0x0011, | ||
462 | 0x0100, 0x0101, 0x0110, 0x0111, | ||
463 | 0x1000, 0x1001, 0x1010, 0x1011, | ||
464 | 0x1100, 0x1101, 0x1110, 0x1111, | ||
465 | }; | ||
466 | |||
467 | IWRAM_CODE | ||
468 | static inline | ||
469 | u32 | ||
470 | decode_1bpp(u8 row, u8 flip_x) { | ||
471 | if (flip_x) { | ||
472 | u16 *lut = dec_nibble_flip_x; | ||
473 | return (u32)lut[(row >> 4) & 0xF] << 16 | (u32)lut[(row >> 0) & 0xF]; | ||
474 | } | ||
475 | u16 *lut = dec_nibble; | ||
476 | return (u32)lut[(row >> 0) & 0xF] << 16 | (u32)lut[(row >> 4) & 0xF]; | ||
477 | } | ||
478 | #endif | ||
479 | |||
480 | IWRAM_CODE | ||
481 | UNROLL_LOOPS | ||
482 | void | ||
483 | draw_chr(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | ||
484 | BOUNDCHECK_SCREEN(x, y); | ||
485 | size_t tile_x0 = x / 8; | ||
486 | size_t tile_x1 = (x + 7) / 8; | ||
487 | size_t tile_y = y / 8; | ||
488 | size_t start_col = x % 8; | ||
489 | size_t start_row = y % 8; | ||
490 | size_t shift_left = start_col * 4; | ||
491 | size_t shift_right = (8 - start_col) * 4; | ||
492 | u32 dirty = (1 << tile_x0) | (1 << tile_x1); | ||
493 | u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8]; | ||
494 | #if DEC_BIG_LUT | ||
495 | u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; | ||
496 | #endif | ||
497 | if (!flip_y) { | ||
498 | for(size_t v = 0; v < 8; v++, dst++) { | ||
499 | if ((y + v) >= SCREEN_HEIGHT) break; | ||
500 | u8 ch1 = sprite[v + 0]; | ||
501 | u8 ch2 = sprite[v + 8]; | ||
502 | #if DEC_BIG_LUT | ||
503 | u32 clr_a = lut[ch1]; | ||
504 | u32 clr_b = lut[ch2]; | ||
505 | #else | ||
506 | u32 clr_a = decode_1bpp(ch1, flip_x); | ||
507 | u32 clr_b = decode_1bpp(ch2, flip_x); | ||
508 | #endif | ||
509 | u32 mask_a = (clr_a * 0xF); | ||
510 | u32 mask_b = (clr_b * 0xF); | ||
511 | u32 mask = (mask_a | mask_b); | ||
512 | u32 color; | ||
513 | if (clr == 0) { | ||
514 | color = clr_a + (clr_b << 1); | ||
515 | } else if (clr == 15) { | ||
516 | color = 0; | ||
517 | } else { | ||
518 | color = (clr_a | clr_b) * clr; | ||
519 | } | ||
520 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | ||
521 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); | ||
522 | if ((start_row + v) == 7) { | ||
523 | dirty_tiles[tile_y + 1] |= dirty; | ||
524 | dst += (32 - 1) * 8; | ||
525 | } | ||
526 | } | ||
527 | } else { | ||
528 | for(size_t v = 0; v < 8; v++, dst++) { | ||
529 | if ((y + v) >= SCREEN_HEIGHT) break; | ||
530 | u8 ch1 = sprite[(7 - v) + 0]; | ||
531 | u8 ch2 = sprite[(7 - v) + 8]; | ||
532 | #if DEC_BIG_LUT | ||
533 | u32 clr_a = lut[ch1]; | ||
534 | u32 clr_b = lut[ch2]; | ||
535 | #else | ||
536 | u32 clr_a = decode_1bpp(ch1, flip_x); | ||
537 | u32 clr_b = decode_1bpp(ch2, flip_x); | ||
538 | #endif | ||
539 | u32 mask_a = (clr_a * 0xF); | ||
540 | u32 mask_b = (clr_b * 0xF); | ||
541 | u32 mask = (mask_a | mask_b); | ||
542 | u32 color; | ||
543 | if (clr == 0) { | ||
544 | color = clr_a + (clr_b << 1); | ||
545 | } else if (clr == 15) { | ||
546 | color = 0; | ||
547 | } else { | ||
548 | color = (clr_a | clr_b) * clr; | ||
549 | } | ||
550 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | ||
551 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); | ||
552 | if ((start_row + v) == 7) { | ||
553 | dirty_tiles[tile_y + 1] |= dirty; | ||
554 | dst += (32 - 1) * 8; | ||
555 | } | ||
556 | } | ||
557 | } | ||
558 | dirty_tiles[tile_y] |= dirty; | ||
559 | } | ||
560 | |||
561 | IWRAM_CODE | ||
562 | UNROLL_LOOPS | ||
563 | void | ||
564 | draw_icn(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { | ||
565 | BOUNDCHECK_SCREEN(x, y); | ||
566 | size_t tile_x0 = x / 8; | ||
567 | size_t tile_x1 = (x + 7) / 8; | ||
568 | size_t tile_y = y / 8; | ||
569 | size_t start_col = x % 8; | ||
570 | size_t start_row = y % 8; | ||
571 | size_t shift_left = start_col * 4; | ||
572 | size_t shift_right = (8 - start_col) * 4; | ||
573 | u32 dirty = (1 << tile_x0) | (1 << tile_x1); | ||
574 | u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8]; | ||
575 | #if DEC_BIG_LUT | ||
576 | u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; | ||
577 | #endif | ||
578 | if (!flip_y) { | ||
579 | for(size_t v = 0; v < 8; v++, dst++) { | ||
580 | if ((y + v) >= SCREEN_HEIGHT) break; | ||
581 | u8 ch1 = sprite[v + 0]; | ||
582 | #if DEC_BIG_LUT | ||
583 | u32 color = lut[ch1]; | ||
584 | #else | ||
585 | u32 color = decode_1bpp(ch1, flip_x); | ||
586 | #endif | ||
587 | u32 mask = (color * 0xF); | ||
588 | color *= clr; | ||
589 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | ||
590 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); | ||
591 | if ((start_row + v) == 7) { | ||
592 | dirty_tiles[tile_y + 1] |= dirty; | ||
593 | dst += (32 - 1) * 8; | ||
594 | } | ||
595 | } | ||
596 | } else { | ||
597 | for(size_t v = 0; v < 8; v++, dst++) { | ||
598 | if ((y + v) >= SCREEN_HEIGHT) break; | ||
599 | u8 ch1 = sprite[(7 - v) + 0]; | ||
600 | #if DEC_BIG_LUT | ||
601 | u32 color = lut[ch1]; | ||
602 | #else | ||
603 | u32 color = decode_1bpp(ch1, flip_x); | ||
604 | #endif | ||
605 | u32 mask = (color * 0xF); | ||
606 | color *= clr; | ||
607 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | ||
608 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); | ||
609 | if ((start_row + v) == 7) { | ||
610 | dirty_tiles[tile_y + 1] |= dirty; | ||
611 | dst += (32 - 1) * 8; | ||
612 | } | ||
613 | } | ||
614 | } | ||
615 | dirty_tiles[tile_y] |= dirty; | ||
616 | } | ||
617 | |||
618 | // | ||
619 | // Flipping buffers/copying memory. | ||
620 | // | ||
621 | |||
622 | IWRAM_CODE | ||
623 | void | ||
624 | flip_buffer(void) { | ||
625 | // Mode 0: double buffering without dirty tiles. | ||
626 | #if FLIP_TYPE == 0 | ||
627 | if (backbuf == BUF_0) { | ||
628 | backbuf = BUF_1; | ||
629 | BG_H_SCROLL_0 = 0; | ||
630 | BG_H_SCROLL_1 = -240; | ||
631 | } else { | ||
632 | backbuf = BUF_0; | ||
633 | BG_H_SCROLL_0 = -240; | ||
634 | BG_H_SCROLL_1 = 0; | ||
635 | } | ||
636 | |||
637 | // Mode 1: single buffer, copy the dirty lines from backbuffer (BUF_1) to | ||
638 | // frontbuffer (BUF_0) using the DMA. | ||
639 | #elif FLIP_TYPE == 1 | ||
640 | u32 *front = BUF_0; | ||
641 | u32 *back = BUF_1; | ||
642 | BG_H_SCROLL_0 = 0; | ||
643 | BG_H_SCROLL_1 = -240; | ||
644 | for (size_t j = 0; j < 20; ++j) { | ||
645 | if (dirty_tiles[j] == 0) { | ||
646 | continue; | ||
647 | } | ||
648 | u32 offset = j * 32 * 8; | ||
649 | dma_copy(front + offset, back + offset, (30 * 8 * 4), 3); | ||
650 | dirty_tiles[j] = 0; | ||
651 | } | ||
652 | |||
653 | // Mode 2: single buffer, copy the dirty tiles from backbuffer (BUF_1) to | ||
654 | // frontbuffer (BUF_0). | ||
655 | #elif FLIP_TYPE == 2 | ||
656 | u32 *front = BUF_0; | ||
657 | u32 *back = BUF_1; | ||
658 | BG_H_SCROLL_0 = 0; | ||
659 | BG_H_SCROLL_1 = -240; | ||
660 | for (size_t j = 0; j < 20; ++j) { | ||
661 | if (dirty_tiles[j] == 0) { | ||
662 | continue; | ||
663 | } | ||
664 | size_t k = 1; | ||
665 | for (size_t i = 0; i < 30; ++i, k <<= 1) { | ||
666 | if (dirty_tiles[j] & k) { | ||
667 | Tile *mem_front = front; | ||
668 | Tile *mem_back = back; | ||
669 | mem_front[i + j * 32] = mem_back[i + j * 32]; | ||
670 | } | ||
671 | } | ||
672 | dirty_tiles[j] = 0; | ||
673 | } | ||
674 | |||
675 | // Mode 3: Double buffering with dirty line, copying the dirty lines if needed | ||
676 | // after flipping buffers with the DMA. | ||
677 | #elif FLIP_TYPE == 3 | ||
678 | bool should_flip = false; | ||
679 | for (size_t j = 0; j < 20; ++j) { | ||
680 | if (dirty_tiles[j] == 0) { | ||
681 | continue; | ||
682 | } | ||
683 | should_flip = true; | ||
684 | break; | ||
685 | } | ||
686 | if (!should_flip) { | ||
687 | return; | ||
688 | } | ||
689 | u32 *frontbuf = backbuf; | ||
690 | if (backbuf == BUF_0) { | ||
691 | backbuf = BUF_1; | ||
692 | BG_H_SCROLL_0 = 0; | ||
693 | BG_H_SCROLL_1 = -240; | ||
694 | } else { | ||
695 | backbuf = BUF_0; | ||
696 | BG_H_SCROLL_0 = -240; | ||
697 | BG_H_SCROLL_1 = 0; | ||
698 | } | ||
699 | for (size_t j = 0; j < 20; ++j) { | ||
700 | if (dirty_tiles[j] == 0) { | ||
701 | continue; | ||
702 | } | ||
703 | u32 offset = j * 32 * 8; | ||
704 | dma_copy(backbuf + offset, frontbuf + offset, (30 * 8 * 4), 3); | ||
705 | dirty_tiles[j] = 0; | ||
706 | } | ||
707 | |||
708 | // Mode 4: Double buffering with dirty tiles, copying the dirty tiles if needed | ||
709 | // after flipping buffers. | ||
710 | #elif FLIP_TYPE == 4 | ||
711 | bool should_flip = false; | ||
712 | for (size_t j = 0; j < 20; ++j) { | ||
713 | if (dirty_tiles[j] == 0) { | ||
714 | continue; | ||
715 | } | ||
716 | should_flip = true; | ||
717 | break; | ||
718 | } | ||
719 | if (!should_flip) { | ||
720 | return; | ||
721 | } | ||
722 | u32 *frontbuf = backbuf; | ||
723 | if (backbuf == BUF_0) { | ||
724 | backbuf = BUF_1; | ||
725 | BG_H_SCROLL_0 = 0; | ||
726 | BG_H_SCROLL_1 = -240; | ||
727 | } else { | ||
728 | backbuf = BUF_0; | ||
729 | BG_H_SCROLL_0 = -240; | ||
730 | BG_H_SCROLL_1 = 0; | ||
731 | } | ||
732 | for (size_t j = 0; j < 20; ++j) { | ||
733 | if (dirty_tiles[j] == 0) { | ||
734 | continue; | ||
735 | } | ||
736 | size_t k = 1; | ||
737 | for (size_t i = 0; i < 30; ++i, k <<= 1) { | ||
738 | if (dirty_tiles[j] & k) { | ||
739 | Tile *mem_front = frontbuf; | ||
740 | Tile *mem_back = backbuf; | ||
741 | mem_back[i + j * 32] = mem_front[i + j * 32]; | ||
742 | } | ||
743 | } | ||
744 | dirty_tiles[j] = 0; | ||
745 | } | ||
746 | #endif | ||
747 | } | ||
748 | |||
749 | // | ||
750 | // Text rendering. | ||
751 | // | ||
752 | |||
753 | #include "font.h" | ||
754 | |||
755 | // Font rendering function for the text engine. | ||
756 | void | ||
757 | txt_drawc(char c, size_t x, size_t y, u8 clr) { | ||
758 | u8 *tile = font_icn; | ||
759 | draw_icn(x, y, tile + 8 * c, clr, 1, 0); | ||
760 | } | ||
761 | |||
762 | // | ||
763 | // Initialization. | ||
764 | // | ||
765 | |||
766 | void | ||
767 | renderer_init(void) { | ||
768 | // Initialize display mode and bg palette. | ||
769 | DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1; | ||
770 | |||
771 | // Clear VRAM. | ||
772 | dma_fill((u32*)MEM_VRAM, 0, KB(96), 3); | ||
773 | |||
774 | // Initialize backgrounds. | ||
775 | BG_CTRL(0) = BG_CHARBLOCK(CB_0) | BG_SCREENBLOCK(SB_0) | BG_PRIORITY(0) | BG_SIZE(1); | ||
776 | BG_CTRL(1) = BG_CHARBLOCK(CB_1) | BG_SCREENBLOCK(SB_1) | BG_PRIORITY(1) | BG_SIZE(1); | ||
777 | |||
778 | // Initialize background memory map for the render buffers. The backgrounds | ||
779 | // are 64x32 each, with the second screenblock pointing to a zeroed tile. | ||
780 | // This makes it so while scrolling the backgrounds to the second screen we | ||
781 | // effectively disabling them. Thanks to this we can perform double | ||
782 | // buffering with mode 0 rendering. | ||
783 | u16 *mem_map_fg = SCREENBLOCK_MEM[SB_0]; | ||
784 | u16 *mem_map_fg_blank = SCREENBLOCK_MEM[SB_0 + 1]; | ||
785 | u16 *mem_map_bg = SCREENBLOCK_MEM[SB_1]; | ||
786 | u16 *mem_map_bg_blank = SCREENBLOCK_MEM[SB_1 + 1]; | ||
787 | for (size_t i = 0; i < 32 * 20; ++i) { | ||
788 | mem_map_fg[i] = i; | ||
789 | mem_map_fg_blank[i] = 32 * 20 - 1; | ||
790 | mem_map_bg[i] = i + 32 * 4; | ||
791 | mem_map_bg_blank[i] = (32 * 20 - 1) + 32 * 4; | ||
792 | } | ||
793 | |||
794 | // Setup initial background state. | ||
795 | BG_H_SCROLL_0 = -240; | ||
796 | BG_H_SCROLL_1 = -240; | ||
797 | |||
798 | // Initialize default palette. | ||
799 | PAL_BUFFER_BG[0] = COLOR_BLACK; | ||
800 | PAL_BUFFER_BG[1] = COLOR_WHITE; | ||
801 | PAL_BUFFER_BG[2] = COLOR_RED; | ||
802 | PAL_BUFFER_BG[3] = COLOR_BLUE; | ||
803 | PAL_BUFFER_BG[4] = COLOR_CYAN; | ||
804 | PAL_BUFFER_BG[5] = COLOR_GREY; | ||
805 | PAL_BUFFER_BG[6] = COLOR_WHITE; | ||
806 | PAL_BUFFER_BG[7] = COLOR_GREEN; | ||
807 | PAL_BUFFER_BG[8] = COLOR_PURPLE; | ||
808 | |||
809 | // Initialize text engine. | ||
810 | txt_init(txt_drawc); | ||
811 | } | ||