diff options
author | Bad Diode <bd@badd10de.dev> | 2023-04-18 09:00:52 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2023-04-18 09:00:52 +0200 |
commit | 4d1fc4013ec24b37bd2815c4143aaa46b17ef574 (patch) | |
tree | c0432487a20ca16e7fdba123b2cef2b4501dcf1c | |
parent | 45b083b8b0aca089df7dde2b194f942ae50d8e21 (diff) | |
download | gba-link-cable-tester-4d1fc4013ec24b37bd2815c4143aaa46b17ef574.tar.gz gba-link-cable-tester-4d1fc4013ec24b37bd2815c4143aaa46b17ef574.zip |
Add biglut 1bpp row decoding for m4
-rw-r--r-- | src/main.c | 14 | ||||
-rw-r--r-- | src/renderer_m0.c | 231 | ||||
-rw-r--r-- | src/renderer_m4.c | 151 |
3 files changed, 159 insertions, 237 deletions
@@ -11,7 +11,7 @@ WITH REGARD TO THIS SOFTWARE. | |||
11 | 11 | ||
12 | #include "gba/gba.h" | 12 | #include "gba/gba.h" |
13 | 13 | ||
14 | #include "renderer_m0.c" | 14 | #include "renderer_m4.c" |
15 | 15 | ||
16 | // | 16 | // |
17 | // Config parameters. | 17 | // Config parameters. |
@@ -143,8 +143,8 @@ int main(void) { | |||
143 | //draw_line(239, 81, 0, 129, 3); | 143 | //draw_line(239, 81, 0, 129, 3); |
144 | ////// right -> left && bot -> top | 144 | ////// right -> left && bot -> top |
145 | //draw_line(239, 129, 0, 40, 5); | 145 | //draw_line(239, 129, 0, 40, 5); |
146 | txt_render(); | 146 | // txt_render(); |
147 | txt_clear(); | 147 | // txt_clear(); |
148 | 148 | ||
149 | // draw_line(239, 149, 0, 10, 3); | 149 | // draw_line(239, 149, 0, 10, 3); |
150 | 150 | ||
@@ -159,10 +159,10 @@ int main(void) { | |||
159 | // txt_render(); | 159 | // txt_render(); |
160 | // txt_clear(); | 160 | // txt_clear(); |
161 | PROF(test_lines(), test_lines_cycles); | 161 | PROF(test_lines(), test_lines_cycles); |
162 | // PROF(test_rect(), test_rect_cycles); | 162 | PROF(test_rect(), test_rect_cycles); |
163 | // PROF(test_fill_rect(), test_fill_rect_cycles); | 163 | PROF(test_fill_rect(), test_fill_rect_cycles); |
164 | // PROF(test_chr(), test_chr_cycles); | 164 | PROF(test_chr(), test_chr_cycles); |
165 | // PROF(test_icn(), test_icn_cycles); | 165 | PROF(test_icn(), test_icn_cycles); |
166 | draw_filled_rect(0, 0, 140, 60, 0); | 166 | draw_filled_rect(0, 0, 140, 60, 0); |
167 | PROF_SHOW(); | 167 | PROF_SHOW(); |
168 | PROF(flip_buffer(), flip_cycles); | 168 | PROF(flip_buffer(), flip_cycles); |
diff --git a/src/renderer_m0.c b/src/renderer_m0.c index ece7dbb..7dd5b22 100644 --- a/src/renderer_m0.c +++ b/src/renderer_m0.c | |||
@@ -178,193 +178,6 @@ draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { | |||
178 | MAYBE_SWAP(y0, y1); | 178 | MAYBE_SWAP(y0, y1); |
179 | draw_vline(x0, y0, y1, clr); | 179 | draw_vline(x0, y0, y1, clr); |
180 | } else { | 180 | } else { |
181 | #if 1 | ||
182 | |||
183 | // Fixed Precision constants. | ||
184 | const int fp_bit = 6; | ||
185 | const int fp_one = FP_NUM(1, fp_bit); | ||
186 | const int fp_half = fp_one >> 1; | ||
187 | |||
188 | int dx = x0 > x1 ? x0 - x1 : x1 - x0; | ||
189 | int dy = y0 > y1 ? y0 - y1 : y1 - y0; | ||
190 | int dxf = (dx << fp_bit); | ||
191 | int dyf = (dy << fp_bit); | ||
192 | |||
193 | if ((dx >= dy && x0 > x1) || (dx < dy && y0 > y1)) { | ||
194 | SWAP(x0, x1); | ||
195 | SWAP(y0, y1); | ||
196 | } | ||
197 | |||
198 | int frac_x = x0 > x1 ? FP_NUM(x0 - x1, fp_bit) : FP_NUM(x1 - x0, fp_bit); | ||
199 | int frac_y = y0 > y1 ? FP_NUM(y0 - y1, fp_bit) : FP_NUM(y1 - y0, fp_bit); | ||
200 | int x_step = x0 > x1 ? -1 : 1; | ||
201 | int y_step = y0 > y1 ? -1 : 1; | ||
202 | int distance = (frac_y - fp_one) * dx - (frac_x - fp_half) * dy; | ||
203 | |||
204 | // | ||
205 | // DEBUG: reference | ||
206 | // for (int i = 0; i <= dx; i++) { | ||
207 | // if (distance >= 0) { | ||
208 | // distance -= 2 * dxf; | ||
209 | // y0 += y_step; | ||
210 | // } | ||
211 | // draw_pixel(x0, y0, clr); | ||
212 | // distance += 2 * dyf; | ||
213 | // x0 += x_step; | ||
214 | // } | ||
215 | // | ||
216 | |||
217 | size_t tile_x = x0 / 8; | ||
218 | size_t tile_y = y0 / 8; | ||
219 | size_t start_col = x0 % 8; | ||
220 | size_t start_row = y0 % 8; | ||
221 | u32 *dst = &backbuf[start_row + (tile_x + tile_y * 32) * 8]; | ||
222 | |||
223 | // Update backbuffer. | ||
224 | if (dx >= dy) { | ||
225 | int step = dxf / dyf; | ||
226 | int remaining = dx; | ||
227 | |||
228 | // | ||
229 | // NOTE: Reference with pointers. | ||
230 | // for (int i = 0; i <= dx; i++) { | ||
231 | // start_col = x0 % 8; | ||
232 | // start_row = y0 % 8; | ||
233 | // size_t shift = start_col * sizeof(u32); | ||
234 | // u32 mask = 0xF << shift; | ||
235 | // u32 row = clr << shift; | ||
236 | // if (distance >= 0) { | ||
237 | // distance -= 2 * dxf; | ||
238 | // y0 += y_step; | ||
239 | // if (((int)start_row + y_step) >= 8 || ((int)start_row + y_step) < 0) { | ||
240 | // dst += 8 * 31 * y_step; | ||
241 | // } | ||
242 | // dst += y_step; | ||
243 | // } | ||
244 | // *dst = (*dst & ~mask) | row; | ||
245 | // distance += 2 * dyf; | ||
246 | // if ((int)(start_col + x_step) >= 8 ||(int)(start_col + x_step) < 0) { | ||
247 | // dst += 8; | ||
248 | // } | ||
249 | // x0 += x_step; | ||
250 | // } | ||
251 | // | ||
252 | |||
253 | while (remaining > (step - 1)) { | ||
254 | distance += step * 2 * dyf; | ||
255 | size_t start_row = y0 % 8; | ||
256 | u32 a = x0; | ||
257 | size_t start_col = a % 8; | ||
258 | size_t tile_x0 = a / 8; | ||
259 | size_t shift_left = start_col * 4; | ||
260 | if (distance >= 0) { | ||
261 | u32 b = x0 + step - 1; | ||
262 | size_t tile_x1 = b / 8; | ||
263 | size_t end_col = b % 8; | ||
264 | size_t shift_right = (7 - end_col) * 4; | ||
265 | size_t dtx = tile_x1 - tile_x0; | ||
266 | if (dtx < 1) { | ||
267 | u32 mask = (0xFFFFFFFF >> shift_right) & (0xFFFFFFFF << shift_left); | ||
268 | u32 row = (0x11111111 * clr) & mask; | ||
269 | *dst = (*dst & ~mask) | row; | ||
270 | if ((start_col + x_step) >= 8 || end_col == 7) { | ||
271 | dst += 8 * x_step; | ||
272 | } | ||
273 | } else { | ||
274 | u32 mask = 0xFFFFFFFF; | ||
275 | u32 row = 0x11111111 * clr; | ||
276 | *dst = (*dst & ~(mask << shift_left)) | (row << shift_left); | ||
277 | dst += 8 * x_step; | ||
278 | for (size_t i = 1; i < dtx; i++) { | ||
279 | *dst = row; | ||
280 | dst += 8 * x_step; | ||
281 | } | ||
282 | *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right); | ||
283 | if (end_col == 7) { | ||
284 | dst += 8 * x_step; | ||
285 | } | ||
286 | } | ||
287 | // draw_hline(a, b, y0, clr); | ||
288 | x0 += step; | ||
289 | remaining -= step; | ||
290 | } else { | ||
291 | if (remaining < step) { | ||
292 | break; | ||
293 | } | ||
294 | u32 b = x0 + step; | ||
295 | size_t tile_x1 = b / 8; | ||
296 | size_t end_col = b % 8; | ||
297 | size_t dtx = tile_x1 - tile_x0; | ||
298 | size_t shift_right = (7 - end_col) * 4; | ||
299 | if (dtx < 1) { | ||
300 | u32 mask = (0xFFFFFFFF >> shift_right) & (0xFFFFFFFF << shift_left); | ||
301 | u32 row = (0x11111111 * clr) & mask; | ||
302 | *dst = (*dst & ~mask) | row; | ||
303 | if ((start_col + x_step) >= 8 || end_col == 7) { | ||
304 | dst += 8 * x_step; | ||
305 | } | ||
306 | } else { | ||
307 | u32 mask = 0xFFFFFFFF; | ||
308 | u32 row = 0x11111111 * clr; | ||
309 | *dst = (*dst & ~(mask << shift_left)) | (row << shift_left); | ||
310 | dst += 8 * x_step; | ||
311 | for (size_t i = 1; i < dtx; i++) { | ||
312 | *dst = row; | ||
313 | dst += 8 * x_step; | ||
314 | } | ||
315 | *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right); | ||
316 | if (end_col == 7) { | ||
317 | dst += 8 * x_step; | ||
318 | } | ||
319 | } | ||
320 | // draw_hline(a, b, y0, clr); | ||
321 | distance += 2 * dyf; | ||
322 | x0 += (step + 1); | ||
323 | remaining -= step + 1; | ||
324 | } | ||
325 | if (((int)start_row + y_step) >= 8 || ((int)start_row + y_step) < 0) { | ||
326 | dst += 8 * 31 * y_step; | ||
327 | } | ||
328 | dst += y_step; | ||
329 | distance -= 2 * dxf; | ||
330 | y0 += y_step; | ||
331 | } | ||
332 | if (remaining >= 0) { | ||
333 | draw_hline(x0, x0 + remaining, y0, clr); | ||
334 | } | ||
335 | |||
336 | // for (int i = 0; i <= dx; i++) { | ||
337 | // if (distance >= 0) { | ||
338 | // distance -= 2 * dxf; | ||
339 | // y0 += y_step; | ||
340 | // } | ||
341 | // draw_pixel(x0, y0, clr); | ||
342 | // distance += 2 * dyf; | ||
343 | // x0 += x_step; | ||
344 | // } | ||
345 | } else { | ||
346 | // int step = dyf / dxf; | ||
347 | // int remaining = dy; | ||
348 | // while (remaining > (step - 1)) { | ||
349 | // distance += step * 2 * dxf; | ||
350 | // if (distance >= 0) { | ||
351 | // draw_vline(x0, y0, y0 + step - 1, clr); | ||
352 | // y0 += y_step * step; | ||
353 | // remaining -= step; | ||
354 | // } else { | ||
355 | // draw_vline(x0, y0, y0 + step, clr); | ||
356 | // distance += 2 * dxf; | ||
357 | // y0 += y_step * (step + 1); | ||
358 | // remaining -= step + 1; | ||
359 | // } | ||
360 | // distance -= 2 * dyf; | ||
361 | // x0 += x_step; | ||
362 | // } | ||
363 | // if (remaining >= 0) { | ||
364 | // draw_vline(x0, y0, y0 + remaining, clr); | ||
365 | // } | ||
366 | } | ||
367 | #else | ||
368 | // Fixed Precision constants. | 181 | // Fixed Precision constants. |
369 | const int fp_bit = 6; | 182 | const int fp_bit = 6; |
370 | const int fp_one = FP_NUM(1, fp_bit); | 183 | const int fp_one = FP_NUM(1, fp_bit); |
@@ -431,7 +244,6 @@ draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { | |||
431 | draw_vline(x0, y0, y0 + remaining, clr); | 244 | draw_vline(x0, y0, y0 + remaining, clr); |
432 | } | 245 | } |
433 | } | 246 | } |
434 | #endif | ||
435 | } | 247 | } |
436 | } | 248 | } |
437 | 249 | ||
@@ -780,32 +592,19 @@ draw_2bpp_row(size_t x, size_t y, u8 a, u8 b, u8 flip_x) { | |||
780 | if (flip_x) { | 592 | if (flip_x) { |
781 | lut = dec_byte_flip_x; | 593 | lut = dec_byte_flip_x; |
782 | } | 594 | } |
783 | #endif | 595 | u32 clr_a = lut[a]; |
784 | if (start_col == 0) { | 596 | u32 clr_b = lut[b]; |
785 | #if DEC_BIG_LUT | ||
786 | u32 clr_a = lut[a]; | ||
787 | u32 clr_b = lut[b]; | ||
788 | #else | 597 | #else |
789 | u32 clr_a = decode_1bpp(a, flip_x); | 598 | u32 clr_a = decode_1bpp(a, flip_x); |
790 | u32 clr_b = decode_1bpp(b, flip_x); | 599 | u32 clr_b = decode_1bpp(b, flip_x); |
791 | #endif | 600 | #endif |
792 | u32 mask_a = (clr_a * 0xF); | 601 | u32 mask_a = (clr_a * 0xF); |
793 | u32 mask_b = (clr_b * 0xF); | 602 | u32 mask_b = (clr_b * 0xF); |
794 | u32 mask = (mask_a | mask_b); | 603 | u32 mask = (mask_a | mask_b); |
795 | u32 color = clr_a + (clr_b << 1); | 604 | u32 color = clr_a + (clr_b << 1); |
605 | if (start_col == 0) { | ||
796 | dst[0] = (dst[0] & ~mask) | color; | 606 | dst[0] = (dst[0] & ~mask) | color; |
797 | } else { | 607 | } else { |
798 | #if DEC_BIG_LUT | ||
799 | u32 clr_a = lut[a]; | ||
800 | u32 clr_b = lut[b]; | ||
801 | #else | ||
802 | u32 clr_a = decode_1bpp(a, flip_x); | ||
803 | u32 clr_b = decode_1bpp(b, flip_x); | ||
804 | #endif | ||
805 | u32 mask_a = (clr_a * 0xF); | ||
806 | u32 mask_b = (clr_b * 0xF); | ||
807 | u32 mask = (mask_a | mask_b); | ||
808 | u32 color = clr_a + (clr_b << 1); | ||
809 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | 608 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); |
810 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); | 609 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); |
811 | } | 610 | } |
@@ -826,17 +625,13 @@ draw_1bpp_row(size_t x, size_t y, u8 a, u8 clr, u8 flip_x) { | |||
826 | size_t shift_left = start_col * 4; | 625 | size_t shift_left = start_col * 4; |
827 | size_t shift_right = (8 - start_col) * 4; | 626 | size_t shift_right = (8 - start_col) * 4; |
828 | 627 | ||
829 | u32 *dst = &backbuf[(tile_x + tile_y * 32) * 8]; | 628 | u32 *dst = &backbuf[start_row + (tile_x + tile_y * 32) * 8]; |
830 | dst += start_row; | 629 | u32 color = decode_1bpp(a, flip_x); |
630 | u32 mask = (color * 0xF); | ||
631 | color *= clr; | ||
831 | if (start_col == 0) { | 632 | if (start_col == 0) { |
832 | u32 color = decode_1bpp(a, flip_x); | ||
833 | u32 mask = (color * 0xF); | ||
834 | color *= clr; | ||
835 | dst[0] = (dst[0] & ~mask) | color; | 633 | dst[0] = (dst[0] & ~mask) | color; |
836 | } else { | 634 | } else { |
837 | u32 color = decode_1bpp(a, flip_x); | ||
838 | u32 mask = (color * 0xF); | ||
839 | color *= clr; | ||
840 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | 635 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); |
841 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); | 636 | dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); |
842 | } | 637 | } |
diff --git a/src/renderer_m4.c b/src/renderer_m4.c index cc9da58..6da7996 100644 --- a/src/renderer_m4.c +++ b/src/renderer_m4.c | |||
@@ -210,6 +210,129 @@ draw_filled_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { | |||
210 | screen_updated = true; | 210 | screen_updated = true; |
211 | } | 211 | } |
212 | 212 | ||
213 | #define DEC_BIG_LUT 1 | ||
214 | |||
215 | #if DEC_BIG_LUT | ||
216 | static u64 dec_byte_flip_x[256] = { | ||
217 | 0x0000000000000000, 0x0000000000000001, 0x0000000000000100, 0x0000000000000101, 0x0000000000010000, | ||
218 | 0x0000000000010001, 0x0000000000010100, 0x0000000000010101, 0x0000000001000000, 0x0000000001000001, | ||
219 | 0x0000000001000100, 0x0000000001000101, 0x0000000001010000, 0x0000000001010001, 0x0000000001010100, | ||
220 | 0x0000000001010101, 0x0000000100000000, 0x0000000100000001, 0x0000000100000100, 0x0000000100000101, | ||
221 | 0x0000000100010000, 0x0000000100010001, 0x0000000100010100, 0x0000000100010101, 0x0000000101000000, | ||
222 | 0x0000000101000001, 0x0000000101000100, 0x0000000101000101, 0x0000000101010000, 0x0000000101010001, | ||
223 | 0x0000000101010100, 0x0000000101010101, 0x0000010000000000, 0x0000010000000001, 0x0000010000000100, | ||
224 | 0x0000010000000101, 0x0000010000010000, 0x0000010000010001, 0x0000010000010100, 0x0000010000010101, | ||
225 | 0x0000010001000000, 0x0000010001000001, 0x0000010001000100, 0x0000010001000101, 0x0000010001010000, | ||
226 | 0x0000010001010001, 0x0000010001010100, 0x0000010001010101, 0x0000010100000000, 0x0000010100000001, | ||
227 | 0x0000010100000100, 0x0000010100000101, 0x0000010100010000, 0x0000010100010001, 0x0000010100010100, | ||
228 | 0x0000010100010101, 0x0000010101000000, 0x0000010101000001, 0x0000010101000100, 0x0000010101000101, | ||
229 | 0x0000010101010000, 0x0000010101010001, 0x0000010101010100, 0x0000010101010101, 0x0001000000000000, | ||
230 | 0x0001000000000001, 0x0001000000000100, 0x0001000000000101, 0x0001000000010000, 0x0001000000010001, | ||
231 | 0x0001000000010100, 0x0001000000010101, 0x0001000001000000, 0x0001000001000001, 0x0001000001000100, | ||
232 | 0x0001000001000101, 0x0001000001010000, 0x0001000001010001, 0x0001000001010100, 0x0001000001010101, | ||
233 | 0x0001000100000000, 0x0001000100000001, 0x0001000100000100, 0x0001000100000101, 0x0001000100010000, | ||
234 | 0x0001000100010001, 0x0001000100010100, 0x0001000100010101, 0x0001000101000000, 0x0001000101000001, | ||
235 | 0x0001000101000100, 0x0001000101000101, 0x0001000101010000, 0x0001000101010001, 0x0001000101010100, | ||
236 | 0x0001000101010101, 0x0001010000000000, 0x0001010000000001, 0x0001010000000100, 0x0001010000000101, | ||
237 | 0x0001010000010000, 0x0001010000010001, 0x0001010000010100, 0x0001010000010101, 0x0001010001000000, | ||
238 | 0x0001010001000001, 0x0001010001000100, 0x0001010001000101, 0x0001010001010000, 0x0001010001010001, | ||
239 | 0x0001010001010100, 0x0001010001010101, 0x0001010100000000, 0x0001010100000001, 0x0001010100000100, | ||
240 | 0x0001010100000101, 0x0001010100010000, 0x0001010100010001, 0x0001010100010100, 0x0001010100010101, | ||
241 | 0x0001010101000000, 0x0001010101000001, 0x0001010101000100, 0x0001010101000101, 0x0001010101010000, | ||
242 | 0x0001010101010001, 0x0001010101010100, 0x0001010101010101, 0x0100000000000000, 0x0100000000000001, | ||
243 | 0x0100000000000100, 0x0100000000000101, 0x0100000000010000, 0x0100000000010001, 0x0100000000010100, | ||
244 | 0x0100000000010101, 0x0100000001000000, 0x0100000001000001, 0x0100000001000100, 0x0100000001000101, | ||
245 | 0x0100000001010000, 0x0100000001010001, 0x0100000001010100, 0x0100000001010101, 0x0100000100000000, | ||
246 | 0x0100000100000001, 0x0100000100000100, 0x0100000100000101, 0x0100000100010000, 0x0100000100010001, | ||
247 | 0x0100000100010100, 0x0100000100010101, 0x0100000101000000, 0x0100000101000001, 0x0100000101000100, | ||
248 | 0x0100000101000101, 0x0100000101010000, 0x0100000101010001, 0x0100000101010100, 0x0100000101010101, | ||
249 | 0x0100010000000000, 0x0100010000000001, 0x0100010000000100, 0x0100010000000101, 0x0100010000010000, | ||
250 | 0x0100010000010001, 0x0100010000010100, 0x0100010000010101, 0x0100010001000000, 0x0100010001000001, | ||
251 | 0x0100010001000100, 0x0100010001000101, 0x0100010001010000, 0x0100010001010001, 0x0100010001010100, | ||
252 | 0x0100010001010101, 0x0100010100000000, 0x0100010100000001, 0x0100010100000100, 0x0100010100000101, | ||
253 | 0x0100010100010000, 0x0100010100010001, 0x0100010100010100, 0x0100010100010101, 0x0100010101000000, | ||
254 | 0x0100010101000001, 0x0100010101000100, 0x0100010101000101, 0x0100010101010000, 0x0100010101010001, | ||
255 | 0x0100010101010100, 0x0100010101010101, 0x0101000000000000, 0x0101000000000001, 0x0101000000000100, | ||
256 | 0x0101000000000101, 0x0101000000010000, 0x0101000000010001, 0x0101000000010100, 0x0101000000010101, | ||
257 | 0x0101000001000000, 0x0101000001000001, 0x0101000001000100, 0x0101000001000101, 0x0101000001010000, | ||
258 | 0x0101000001010001, 0x0101000001010100, 0x0101000001010101, 0x0101000100000000, 0x0101000100000001, | ||
259 | 0x0101000100000100, 0x0101000100000101, 0x0101000100010000, 0x0101000100010001, 0x0101000100010100, | ||
260 | 0x0101000100010101, 0x0101000101000000, 0x0101000101000001, 0x0101000101000100, 0x0101000101000101, | ||
261 | 0x0101000101010000, 0x0101000101010001, 0x0101000101010100, 0x0101000101010101, 0x0101010000000000, | ||
262 | 0x0101010000000001, 0x0101010000000100, 0x0101010000000101, 0x0101010000010000, 0x0101010000010001, | ||
263 | 0x0101010000010100, 0x0101010000010101, 0x0101010001000000, 0x0101010001000001, 0x0101010001000100, | ||
264 | 0x0101010001000101, 0x0101010001010000, 0x0101010001010001, 0x0101010001010100, 0x0101010001010101, | ||
265 | 0x0101010100000000, 0x0101010100000001, 0x0101010100000100, 0x0101010100000101, 0x0101010100010000, | ||
266 | 0x0101010100010001, 0x0101010100010100, 0x0101010100010101, 0x0101010101000000, 0x0101010101000001, | ||
267 | 0x0101010101000100, 0x0101010101000101, 0x0101010101010000, 0x0101010101010001, 0x0101010101010100, | ||
268 | 0x0101010101010101 | ||
269 | }; | ||
270 | |||
271 | static u64 dec_byte[256] = { | ||
272 | 0x0000000000000000, 0x0100000000000000, 0x0001000000000000, 0x0101000000000000, 0x0000010000000000, | ||
273 | 0x0100010000000000, 0x0001010000000000, 0x0101010000000000, 0x0000000100000000, 0x0100000100000000, | ||
274 | 0x0001000100000000, 0x0101000100000000, 0x0000010100000000, 0x0100010100000000, 0x0001010100000000, | ||
275 | 0x0101010100000000, 0x0000000001000000, 0x0100000001000000, 0x0001000001000000, 0x0101000001000000, | ||
276 | 0x0000010001000000, 0x0100010001000000, 0x0001010001000000, 0x0101010001000000, 0x0000000101000000, | ||
277 | 0x0100000101000000, 0x0001000101000000, 0x0101000101000000, 0x0000010101000000, 0x0100010101000000, | ||
278 | 0x0001010101000000, 0x0101010101000000, 0x0000000000010000, 0x0100000000010000, 0x0001000000010000, | ||
279 | 0x0101000000010000, 0x0000010000010000, 0x0100010000010000, 0x0001010000010000, 0x0101010000010000, | ||
280 | 0x0000000100010000, 0x0100000100010000, 0x0001000100010000, 0x0101000100010000, 0x0000010100010000, | ||
281 | 0x0100010100010000, 0x0001010100010000, 0x0101010100010000, 0x0000000001010000, 0x0100000001010000, | ||
282 | 0x0001000001010000, 0x0101000001010000, 0x0000010001010000, 0x0100010001010000, 0x0001010001010000, | ||
283 | 0x0101010001010000, 0x0000000101010000, 0x0100000101010000, 0x0001000101010000, 0x0101000101010000, | ||
284 | 0x0000010101010000, 0x0100010101010000, 0x0001010101010000, 0x0101010101010000, 0x0000000000000100, | ||
285 | 0x0100000000000100, 0x0001000000000100, 0x0101000000000100, 0x0000010000000100, 0x0100010000000100, | ||
286 | 0x0001010000000100, 0x0101010000000100, 0x0000000100000100, 0x0100000100000100, 0x0001000100000100, | ||
287 | 0x0101000100000100, 0x0000010100000100, 0x0100010100000100, 0x0001010100000100, 0x0101010100000100, | ||
288 | 0x0000000001000100, 0x0100000001000100, 0x0001000001000100, 0x0101000001000100, 0x0000010001000100, | ||
289 | 0x0100010001000100, 0x0001010001000100, 0x0101010001000100, 0x0000000101000100, 0x0100000101000100, | ||
290 | 0x0001000101000100, 0x0101000101000100, 0x0000010101000100, 0x0100010101000100, 0x0001010101000100, | ||
291 | 0x0101010101000100, 0x0000000000010100, 0x0100000000010100, 0x0001000000010100, 0x0101000000010100, | ||
292 | 0x0000010000010100, 0x0100010000010100, 0x0001010000010100, 0x0101010000010100, 0x0000000100010100, | ||
293 | 0x0100000100010100, 0x0001000100010100, 0x0101000100010100, 0x0000010100010100, 0x0100010100010100, | ||
294 | 0x0001010100010100, 0x0101010100010100, 0x0000000001010100, 0x0100000001010100, 0x0001000001010100, | ||
295 | 0x0101000001010100, 0x0000010001010100, 0x0100010001010100, 0x0001010001010100, 0x0101010001010100, | ||
296 | 0x0000000101010100, 0x0100000101010100, 0x0001000101010100, 0x0101000101010100, 0x0000010101010100, | ||
297 | 0x0100010101010100, 0x0001010101010100, 0x0101010101010100, 0x0000000000000001, 0x0100000000000001, | ||
298 | 0x0001000000000001, 0x0101000000000001, 0x0000010000000001, 0x0100010000000001, 0x0001010000000001, | ||
299 | 0x0101010000000001, 0x0000000100000001, 0x0100000100000001, 0x0001000100000001, 0x0101000100000001, | ||
300 | 0x0000010100000001, 0x0100010100000001, 0x0001010100000001, 0x0101010100000001, 0x0000000001000001, | ||
301 | 0x0100000001000001, 0x0001000001000001, 0x0101000001000001, 0x0000010001000001, 0x0100010001000001, | ||
302 | 0x0001010001000001, 0x0101010001000001, 0x0000000101000001, 0x0100000101000001, 0x0001000101000001, | ||
303 | 0x0101000101000001, 0x0000010101000001, 0x0100010101000001, 0x0001010101000001, 0x0101010101000001, | ||
304 | 0x0000000000010001, 0x0100000000010001, 0x0001000000010001, 0x0101000000010001, 0x0000010000010001, | ||
305 | 0x0100010000010001, 0x0001010000010001, 0x0101010000010001, 0x0000000100010001, 0x0100000100010001, | ||
306 | 0x0001000100010001, 0x0101000100010001, 0x0000010100010001, 0x0100010100010001, 0x0001010100010001, | ||
307 | 0x0101010100010001, 0x0000000001010001, 0x0100000001010001, 0x0001000001010001, 0x0101000001010001, | ||
308 | 0x0000010001010001, 0x0100010001010001, 0x0001010001010001, 0x0101010001010001, 0x0000000101010001, | ||
309 | 0x0100000101010001, 0x0001000101010001, 0x0101000101010001, 0x0000010101010001, 0x0100010101010001, | ||
310 | 0x0001010101010001, 0x0101010101010001, 0x0000000000000101, 0x0100000000000101, 0x0001000000000101, | ||
311 | 0x0101000000000101, 0x0000010000000101, 0x0100010000000101, 0x0001010000000101, 0x0101010000000101, | ||
312 | 0x0000000100000101, 0x0100000100000101, 0x0001000100000101, 0x0101000100000101, 0x0000010100000101, | ||
313 | 0x0100010100000101, 0x0001010100000101, 0x0101010100000101, 0x0000000001000101, 0x0100000001000101, | ||
314 | 0x0001000001000101, 0x0101000001000101, 0x0000010001000101, 0x0100010001000101, 0x0001010001000101, | ||
315 | 0x0101010001000101, 0x0000000101000101, 0x0100000101000101, 0x0001000101000101, 0x0101000101000101, | ||
316 | 0x0000010101000101, 0x0100010101000101, 0x0001010101000101, 0x0101010101000101, 0x0000000000010101, | ||
317 | 0x0100000000010101, 0x0001000000010101, 0x0101000000010101, 0x0000010000010101, 0x0100010000010101, | ||
318 | 0x0001010000010101, 0x0101010000010101, 0x0000000100010101, 0x0100000100010101, 0x0001000100010101, | ||
319 | 0x0101000100010101, 0x0000010100010101, 0x0100010100010101, 0x0001010100010101, 0x0101010100010101, | ||
320 | 0x0000000001010101, 0x0100000001010101, 0x0001000001010101, 0x0101000001010101, 0x0000010001010101, | ||
321 | 0x0100010001010101, 0x0001010001010101, 0x0101010001010101, 0x0000000101010101, 0x0100000101010101, | ||
322 | 0x0001000101010101, 0x0101000101010101, 0x0000010101010101, 0x0100010101010101, 0x0001010101010101, | ||
323 | 0x0101010101010101 | ||
324 | }; | ||
325 | IWRAM_CODE | ||
326 | static inline | ||
327 | u64 | ||
328 | decode_1bpp(u8 row, u8 flip_x) { | ||
329 | if (flip_x) { | ||
330 | return dec_byte_flip_x[row]; | ||
331 | } | ||
332 | return dec_byte[row]; | ||
333 | } | ||
334 | #else | ||
335 | |||
213 | static u32 dec_nibble[] = { | 336 | static u32 dec_nibble[] = { |
214 | 0x00000000, 0x01000000, 0x00010000, 0x01010000, | 337 | 0x00000000, 0x01000000, 0x00010000, 0x01010000, |
215 | 0x00000100, 0x01000100, 0x00010100, 0x01010100, | 338 | 0x00000100, 0x01000100, 0x00010100, 0x01010100, |
@@ -235,6 +358,7 @@ decode_1bpp(u8 row, u8 flip_x) { | |||
235 | u32 *lut = dec_nibble; | 358 | u32 *lut = dec_nibble; |
236 | return (u64)lut[(row >> 0) & 0xF] << 32 | (u64)lut[(row >> 4) & 0xF]; | 359 | return (u64)lut[(row >> 0) & 0xF] << 32 | (u64)lut[(row >> 4) & 0xF]; |
237 | } | 360 | } |
361 | #endif | ||
238 | 362 | ||
239 | IWRAM_CODE | 363 | IWRAM_CODE |
240 | static inline | 364 | static inline |
@@ -248,21 +372,24 @@ draw_2bpp_row(size_t x, size_t y, u8 a, u8 b, u8 flip_x) { | |||
248 | size_t shift_right = (8 - start_col) * 8; | 372 | size_t shift_right = (8 - start_col) * 8; |
249 | 373 | ||
250 | u64 *dst = &backbuf[(y * 30 + tile_x) * 8 / 2]; | 374 | u64 *dst = &backbuf[(y * 30 + tile_x) * 8 / 2]; |
375 | #if DEC_BIG_LUT | ||
376 | u64 *lut = dec_byte; | ||
377 | if (flip_x) { | ||
378 | lut = dec_byte_flip_x; | ||
379 | } | ||
380 | u64 clr_a = lut[a]; | ||
381 | u64 clr_b = lut[b]; | ||
382 | #else | ||
383 | u64 clr_a = decode_1bpp(a, flip_x); | ||
384 | u64 clr_b = decode_1bpp(b, flip_x); | ||
385 | #endif | ||
386 | u64 mask_a = (clr_a * 0xF); | ||
387 | u64 mask_b = (clr_b * 0xF); | ||
388 | u64 mask = (mask_a | mask_b); | ||
389 | u64 color = clr_a + (clr_b << 1); | ||
251 | if (start_col == 0) { | 390 | if (start_col == 0) { |
252 | u64 clr_a = decode_1bpp(a, flip_x); | ||
253 | u64 clr_b = decode_1bpp(b, flip_x); | ||
254 | u64 mask_a = (clr_a * 0xF); | ||
255 | u64 mask_b = (clr_b * 0xF); | ||
256 | u64 mask = (mask_a | mask_b); | ||
257 | u64 color = clr_a + (clr_b << 1); | ||
258 | dst[0] = (dst[0] & ~mask) | color; | 391 | dst[0] = (dst[0] & ~mask) | color; |
259 | } else { | 392 | } else { |
260 | u64 clr_a = decode_1bpp(a, flip_x); | ||
261 | u64 clr_b = decode_1bpp(b, flip_x); | ||
262 | u64 mask_a = (clr_a * 0xF); | ||
263 | u64 mask_b = (clr_b * 0xF); | ||
264 | u64 mask = (mask_a | mask_b); | ||
265 | u64 color = clr_a + (clr_b << 1); | ||
266 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); | 393 | dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); |
267 | if ((x + 7) > (SCREEN_WIDTH)) { | 394 | if ((x + 7) > (SCREEN_WIDTH)) { |
268 | return; | 395 | return; |