summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2023-04-18 09:00:52 +0200
committerBad Diode <bd@badd10de.dev>2023-04-18 09:00:52 +0200
commit4d1fc4013ec24b37bd2815c4143aaa46b17ef574 (patch)
treec0432487a20ca16e7fdba123b2cef2b4501dcf1c
parent45b083b8b0aca089df7dde2b194f942ae50d8e21 (diff)
downloadgba-link-cable-tester-4d1fc4013ec24b37bd2815c4143aaa46b17ef574.tar.gz
gba-link-cable-tester-4d1fc4013ec24b37bd2815c4143aaa46b17ef574.zip
Add biglut 1bpp row decoding for m4
-rw-r--r--src/main.c14
-rw-r--r--src/renderer_m0.c231
-rw-r--r--src/renderer_m4.c151
3 files changed, 159 insertions, 237 deletions
diff --git a/src/main.c b/src/main.c
index 29e420d..fa4457a 100644
--- a/src/main.c
+++ b/src/main.c
@@ -11,7 +11,7 @@ WITH REGARD TO THIS SOFTWARE.
11 11
12#include "gba/gba.h" 12#include "gba/gba.h"
13 13
14#include "renderer_m0.c" 14#include "renderer_m4.c"
15 15
16// 16//
17// Config parameters. 17// Config parameters.
@@ -143,8 +143,8 @@ int main(void) {
143 //draw_line(239, 81, 0, 129, 3); 143 //draw_line(239, 81, 0, 129, 3);
144 ////// right -> left && bot -> top 144 ////// right -> left && bot -> top
145 //draw_line(239, 129, 0, 40, 5); 145 //draw_line(239, 129, 0, 40, 5);
146 txt_render(); 146 // txt_render();
147 txt_clear(); 147 // txt_clear();
148 148
149 // draw_line(239, 149, 0, 10, 3); 149 // draw_line(239, 149, 0, 10, 3);
150 150
@@ -159,10 +159,10 @@ int main(void) {
159 // txt_render(); 159 // txt_render();
160 // txt_clear(); 160 // txt_clear();
161 PROF(test_lines(), test_lines_cycles); 161 PROF(test_lines(), test_lines_cycles);
162 // PROF(test_rect(), test_rect_cycles); 162 PROF(test_rect(), test_rect_cycles);
163 // PROF(test_fill_rect(), test_fill_rect_cycles); 163 PROF(test_fill_rect(), test_fill_rect_cycles);
164 // PROF(test_chr(), test_chr_cycles); 164 PROF(test_chr(), test_chr_cycles);
165 // PROF(test_icn(), test_icn_cycles); 165 PROF(test_icn(), test_icn_cycles);
166 draw_filled_rect(0, 0, 140, 60, 0); 166 draw_filled_rect(0, 0, 140, 60, 0);
167 PROF_SHOW(); 167 PROF_SHOW();
168 PROF(flip_buffer(), flip_cycles); 168 PROF(flip_buffer(), flip_cycles);
diff --git a/src/renderer_m0.c b/src/renderer_m0.c
index ece7dbb..7dd5b22 100644
--- a/src/renderer_m0.c
+++ b/src/renderer_m0.c
@@ -178,193 +178,6 @@ draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
178 MAYBE_SWAP(y0, y1); 178 MAYBE_SWAP(y0, y1);
179 draw_vline(x0, y0, y1, clr); 179 draw_vline(x0, y0, y1, clr);
180 } else { 180 } else {
181#if 1
182
183 // Fixed Precision constants.
184 const int fp_bit = 6;
185 const int fp_one = FP_NUM(1, fp_bit);
186 const int fp_half = fp_one >> 1;
187
188 int dx = x0 > x1 ? x0 - x1 : x1 - x0;
189 int dy = y0 > y1 ? y0 - y1 : y1 - y0;
190 int dxf = (dx << fp_bit);
191 int dyf = (dy << fp_bit);
192
193 if ((dx >= dy && x0 > x1) || (dx < dy && y0 > y1)) {
194 SWAP(x0, x1);
195 SWAP(y0, y1);
196 }
197
198 int frac_x = x0 > x1 ? FP_NUM(x0 - x1, fp_bit) : FP_NUM(x1 - x0, fp_bit);
199 int frac_y = y0 > y1 ? FP_NUM(y0 - y1, fp_bit) : FP_NUM(y1 - y0, fp_bit);
200 int x_step = x0 > x1 ? -1 : 1;
201 int y_step = y0 > y1 ? -1 : 1;
202 int distance = (frac_y - fp_one) * dx - (frac_x - fp_half) * dy;
203
204 //
205 // DEBUG: reference
206 // for (int i = 0; i <= dx; i++) {
207 // if (distance >= 0) {
208 // distance -= 2 * dxf;
209 // y0 += y_step;
210 // }
211 // draw_pixel(x0, y0, clr);
212 // distance += 2 * dyf;
213 // x0 += x_step;
214 // }
215 //
216
217 size_t tile_x = x0 / 8;
218 size_t tile_y = y0 / 8;
219 size_t start_col = x0 % 8;
220 size_t start_row = y0 % 8;
221 u32 *dst = &backbuf[start_row + (tile_x + tile_y * 32) * 8];
222
223 // Update backbuffer.
224 if (dx >= dy) {
225 int step = dxf / dyf;
226 int remaining = dx;
227
228 //
229 // NOTE: Reference with pointers.
230 // for (int i = 0; i <= dx; i++) {
231 // start_col = x0 % 8;
232 // start_row = y0 % 8;
233 // size_t shift = start_col * sizeof(u32);
234 // u32 mask = 0xF << shift;
235 // u32 row = clr << shift;
236 // if (distance >= 0) {
237 // distance -= 2 * dxf;
238 // y0 += y_step;
239 // if (((int)start_row + y_step) >= 8 || ((int)start_row + y_step) < 0) {
240 // dst += 8 * 31 * y_step;
241 // }
242 // dst += y_step;
243 // }
244 // *dst = (*dst & ~mask) | row;
245 // distance += 2 * dyf;
246 // if ((int)(start_col + x_step) >= 8 ||(int)(start_col + x_step) < 0) {
247 // dst += 8;
248 // }
249 // x0 += x_step;
250 // }
251 //
252
253 while (remaining > (step - 1)) {
254 distance += step * 2 * dyf;
255 size_t start_row = y0 % 8;
256 u32 a = x0;
257 size_t start_col = a % 8;
258 size_t tile_x0 = a / 8;
259 size_t shift_left = start_col * 4;
260 if (distance >= 0) {
261 u32 b = x0 + step - 1;
262 size_t tile_x1 = b / 8;
263 size_t end_col = b % 8;
264 size_t shift_right = (7 - end_col) * 4;
265 size_t dtx = tile_x1 - tile_x0;
266 if (dtx < 1) {
267 u32 mask = (0xFFFFFFFF >> shift_right) & (0xFFFFFFFF << shift_left);
268 u32 row = (0x11111111 * clr) & mask;
269 *dst = (*dst & ~mask) | row;
270 if ((start_col + x_step) >= 8 || end_col == 7) {
271 dst += 8 * x_step;
272 }
273 } else {
274 u32 mask = 0xFFFFFFFF;
275 u32 row = 0x11111111 * clr;
276 *dst = (*dst & ~(mask << shift_left)) | (row << shift_left);
277 dst += 8 * x_step;
278 for (size_t i = 1; i < dtx; i++) {
279 *dst = row;
280 dst += 8 * x_step;
281 }
282 *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right);
283 if (end_col == 7) {
284 dst += 8 * x_step;
285 }
286 }
287 // draw_hline(a, b, y0, clr);
288 x0 += step;
289 remaining -= step;
290 } else {
291 if (remaining < step) {
292 break;
293 }
294 u32 b = x0 + step;
295 size_t tile_x1 = b / 8;
296 size_t end_col = b % 8;
297 size_t dtx = tile_x1 - tile_x0;
298 size_t shift_right = (7 - end_col) * 4;
299 if (dtx < 1) {
300 u32 mask = (0xFFFFFFFF >> shift_right) & (0xFFFFFFFF << shift_left);
301 u32 row = (0x11111111 * clr) & mask;
302 *dst = (*dst & ~mask) | row;
303 if ((start_col + x_step) >= 8 || end_col == 7) {
304 dst += 8 * x_step;
305 }
306 } else {
307 u32 mask = 0xFFFFFFFF;
308 u32 row = 0x11111111 * clr;
309 *dst = (*dst & ~(mask << shift_left)) | (row << shift_left);
310 dst += 8 * x_step;
311 for (size_t i = 1; i < dtx; i++) {
312 *dst = row;
313 dst += 8 * x_step;
314 }
315 *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right);
316 if (end_col == 7) {
317 dst += 8 * x_step;
318 }
319 }
320 // draw_hline(a, b, y0, clr);
321 distance += 2 * dyf;
322 x0 += (step + 1);
323 remaining -= step + 1;
324 }
325 if (((int)start_row + y_step) >= 8 || ((int)start_row + y_step) < 0) {
326 dst += 8 * 31 * y_step;
327 }
328 dst += y_step;
329 distance -= 2 * dxf;
330 y0 += y_step;
331 }
332 if (remaining >= 0) {
333 draw_hline(x0, x0 + remaining, y0, clr);
334 }
335
336 // for (int i = 0; i <= dx; i++) {
337 // if (distance >= 0) {
338 // distance -= 2 * dxf;
339 // y0 += y_step;
340 // }
341 // draw_pixel(x0, y0, clr);
342 // distance += 2 * dyf;
343 // x0 += x_step;
344 // }
345 } else {
346 // int step = dyf / dxf;
347 // int remaining = dy;
348 // while (remaining > (step - 1)) {
349 // distance += step * 2 * dxf;
350 // if (distance >= 0) {
351 // draw_vline(x0, y0, y0 + step - 1, clr);
352 // y0 += y_step * step;
353 // remaining -= step;
354 // } else {
355 // draw_vline(x0, y0, y0 + step, clr);
356 // distance += 2 * dxf;
357 // y0 += y_step * (step + 1);
358 // remaining -= step + 1;
359 // }
360 // distance -= 2 * dyf;
361 // x0 += x_step;
362 // }
363 // if (remaining >= 0) {
364 // draw_vline(x0, y0, y0 + remaining, clr);
365 // }
366 }
367#else
368 // Fixed Precision constants. 181 // Fixed Precision constants.
369 const int fp_bit = 6; 182 const int fp_bit = 6;
370 const int fp_one = FP_NUM(1, fp_bit); 183 const int fp_one = FP_NUM(1, fp_bit);
@@ -431,7 +244,6 @@ draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
431 draw_vline(x0, y0, y0 + remaining, clr); 244 draw_vline(x0, y0, y0 + remaining, clr);
432 } 245 }
433 } 246 }
434#endif
435 } 247 }
436} 248}
437 249
@@ -780,32 +592,19 @@ draw_2bpp_row(size_t x, size_t y, u8 a, u8 b, u8 flip_x) {
780 if (flip_x) { 592 if (flip_x) {
781 lut = dec_byte_flip_x; 593 lut = dec_byte_flip_x;
782 } 594 }
783#endif 595 u32 clr_a = lut[a];
784 if (start_col == 0) { 596 u32 clr_b = lut[b];
785#if DEC_BIG_LUT
786 u32 clr_a = lut[a];
787 u32 clr_b = lut[b];
788#else 597#else
789 u32 clr_a = decode_1bpp(a, flip_x); 598 u32 clr_a = decode_1bpp(a, flip_x);
790 u32 clr_b = decode_1bpp(b, flip_x); 599 u32 clr_b = decode_1bpp(b, flip_x);
791#endif 600#endif
792 u32 mask_a = (clr_a * 0xF); 601 u32 mask_a = (clr_a * 0xF);
793 u32 mask_b = (clr_b * 0xF); 602 u32 mask_b = (clr_b * 0xF);
794 u32 mask = (mask_a | mask_b); 603 u32 mask = (mask_a | mask_b);
795 u32 color = clr_a + (clr_b << 1); 604 u32 color = clr_a + (clr_b << 1);
605 if (start_col == 0) {
796 dst[0] = (dst[0] & ~mask) | color; 606 dst[0] = (dst[0] & ~mask) | color;
797 } else { 607 } else {
798#if DEC_BIG_LUT
799 u32 clr_a = lut[a];
800 u32 clr_b = lut[b];
801#else
802 u32 clr_a = decode_1bpp(a, flip_x);
803 u32 clr_b = decode_1bpp(b, flip_x);
804#endif
805 u32 mask_a = (clr_a * 0xF);
806 u32 mask_b = (clr_b * 0xF);
807 u32 mask = (mask_a | mask_b);
808 u32 color = clr_a + (clr_b << 1);
809 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); 608 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
810 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); 609 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
811 } 610 }
@@ -826,17 +625,13 @@ draw_1bpp_row(size_t x, size_t y, u8 a, u8 clr, u8 flip_x) {
826 size_t shift_left = start_col * 4; 625 size_t shift_left = start_col * 4;
827 size_t shift_right = (8 - start_col) * 4; 626 size_t shift_right = (8 - start_col) * 4;
828 627
829 u32 *dst = &backbuf[(tile_x + tile_y * 32) * 8]; 628 u32 *dst = &backbuf[start_row + (tile_x + tile_y * 32) * 8];
830 dst += start_row; 629 u32 color = decode_1bpp(a, flip_x);
630 u32 mask = (color * 0xF);
631 color *= clr;
831 if (start_col == 0) { 632 if (start_col == 0) {
832 u32 color = decode_1bpp(a, flip_x);
833 u32 mask = (color * 0xF);
834 color *= clr;
835 dst[0] = (dst[0] & ~mask) | color; 633 dst[0] = (dst[0] & ~mask) | color;
836 } else { 634 } else {
837 u32 color = decode_1bpp(a, flip_x);
838 u32 mask = (color * 0xF);
839 color *= clr;
840 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); 635 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
841 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); 636 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
842 } 637 }
diff --git a/src/renderer_m4.c b/src/renderer_m4.c
index cc9da58..6da7996 100644
--- a/src/renderer_m4.c
+++ b/src/renderer_m4.c
@@ -210,6 +210,129 @@ draw_filled_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
210 screen_updated = true; 210 screen_updated = true;
211} 211}
212 212
213#define DEC_BIG_LUT 1
214
215#if DEC_BIG_LUT
216static u64 dec_byte_flip_x[256] = {
217 0x0000000000000000, 0x0000000000000001, 0x0000000000000100, 0x0000000000000101, 0x0000000000010000,
218 0x0000000000010001, 0x0000000000010100, 0x0000000000010101, 0x0000000001000000, 0x0000000001000001,
219 0x0000000001000100, 0x0000000001000101, 0x0000000001010000, 0x0000000001010001, 0x0000000001010100,
220 0x0000000001010101, 0x0000000100000000, 0x0000000100000001, 0x0000000100000100, 0x0000000100000101,
221 0x0000000100010000, 0x0000000100010001, 0x0000000100010100, 0x0000000100010101, 0x0000000101000000,
222 0x0000000101000001, 0x0000000101000100, 0x0000000101000101, 0x0000000101010000, 0x0000000101010001,
223 0x0000000101010100, 0x0000000101010101, 0x0000010000000000, 0x0000010000000001, 0x0000010000000100,
224 0x0000010000000101, 0x0000010000010000, 0x0000010000010001, 0x0000010000010100, 0x0000010000010101,
225 0x0000010001000000, 0x0000010001000001, 0x0000010001000100, 0x0000010001000101, 0x0000010001010000,
226 0x0000010001010001, 0x0000010001010100, 0x0000010001010101, 0x0000010100000000, 0x0000010100000001,
227 0x0000010100000100, 0x0000010100000101, 0x0000010100010000, 0x0000010100010001, 0x0000010100010100,
228 0x0000010100010101, 0x0000010101000000, 0x0000010101000001, 0x0000010101000100, 0x0000010101000101,
229 0x0000010101010000, 0x0000010101010001, 0x0000010101010100, 0x0000010101010101, 0x0001000000000000,
230 0x0001000000000001, 0x0001000000000100, 0x0001000000000101, 0x0001000000010000, 0x0001000000010001,
231 0x0001000000010100, 0x0001000000010101, 0x0001000001000000, 0x0001000001000001, 0x0001000001000100,
232 0x0001000001000101, 0x0001000001010000, 0x0001000001010001, 0x0001000001010100, 0x0001000001010101,
233 0x0001000100000000, 0x0001000100000001, 0x0001000100000100, 0x0001000100000101, 0x0001000100010000,
234 0x0001000100010001, 0x0001000100010100, 0x0001000100010101, 0x0001000101000000, 0x0001000101000001,
235 0x0001000101000100, 0x0001000101000101, 0x0001000101010000, 0x0001000101010001, 0x0001000101010100,
236 0x0001000101010101, 0x0001010000000000, 0x0001010000000001, 0x0001010000000100, 0x0001010000000101,
237 0x0001010000010000, 0x0001010000010001, 0x0001010000010100, 0x0001010000010101, 0x0001010001000000,
238 0x0001010001000001, 0x0001010001000100, 0x0001010001000101, 0x0001010001010000, 0x0001010001010001,
239 0x0001010001010100, 0x0001010001010101, 0x0001010100000000, 0x0001010100000001, 0x0001010100000100,
240 0x0001010100000101, 0x0001010100010000, 0x0001010100010001, 0x0001010100010100, 0x0001010100010101,
241 0x0001010101000000, 0x0001010101000001, 0x0001010101000100, 0x0001010101000101, 0x0001010101010000,
242 0x0001010101010001, 0x0001010101010100, 0x0001010101010101, 0x0100000000000000, 0x0100000000000001,
243 0x0100000000000100, 0x0100000000000101, 0x0100000000010000, 0x0100000000010001, 0x0100000000010100,
244 0x0100000000010101, 0x0100000001000000, 0x0100000001000001, 0x0100000001000100, 0x0100000001000101,
245 0x0100000001010000, 0x0100000001010001, 0x0100000001010100, 0x0100000001010101, 0x0100000100000000,
246 0x0100000100000001, 0x0100000100000100, 0x0100000100000101, 0x0100000100010000, 0x0100000100010001,
247 0x0100000100010100, 0x0100000100010101, 0x0100000101000000, 0x0100000101000001, 0x0100000101000100,
248 0x0100000101000101, 0x0100000101010000, 0x0100000101010001, 0x0100000101010100, 0x0100000101010101,
249 0x0100010000000000, 0x0100010000000001, 0x0100010000000100, 0x0100010000000101, 0x0100010000010000,
250 0x0100010000010001, 0x0100010000010100, 0x0100010000010101, 0x0100010001000000, 0x0100010001000001,
251 0x0100010001000100, 0x0100010001000101, 0x0100010001010000, 0x0100010001010001, 0x0100010001010100,
252 0x0100010001010101, 0x0100010100000000, 0x0100010100000001, 0x0100010100000100, 0x0100010100000101,
253 0x0100010100010000, 0x0100010100010001, 0x0100010100010100, 0x0100010100010101, 0x0100010101000000,
254 0x0100010101000001, 0x0100010101000100, 0x0100010101000101, 0x0100010101010000, 0x0100010101010001,
255 0x0100010101010100, 0x0100010101010101, 0x0101000000000000, 0x0101000000000001, 0x0101000000000100,
256 0x0101000000000101, 0x0101000000010000, 0x0101000000010001, 0x0101000000010100, 0x0101000000010101,
257 0x0101000001000000, 0x0101000001000001, 0x0101000001000100, 0x0101000001000101, 0x0101000001010000,
258 0x0101000001010001, 0x0101000001010100, 0x0101000001010101, 0x0101000100000000, 0x0101000100000001,
259 0x0101000100000100, 0x0101000100000101, 0x0101000100010000, 0x0101000100010001, 0x0101000100010100,
260 0x0101000100010101, 0x0101000101000000, 0x0101000101000001, 0x0101000101000100, 0x0101000101000101,
261 0x0101000101010000, 0x0101000101010001, 0x0101000101010100, 0x0101000101010101, 0x0101010000000000,
262 0x0101010000000001, 0x0101010000000100, 0x0101010000000101, 0x0101010000010000, 0x0101010000010001,
263 0x0101010000010100, 0x0101010000010101, 0x0101010001000000, 0x0101010001000001, 0x0101010001000100,
264 0x0101010001000101, 0x0101010001010000, 0x0101010001010001, 0x0101010001010100, 0x0101010001010101,
265 0x0101010100000000, 0x0101010100000001, 0x0101010100000100, 0x0101010100000101, 0x0101010100010000,
266 0x0101010100010001, 0x0101010100010100, 0x0101010100010101, 0x0101010101000000, 0x0101010101000001,
267 0x0101010101000100, 0x0101010101000101, 0x0101010101010000, 0x0101010101010001, 0x0101010101010100,
268 0x0101010101010101
269};
270
271static u64 dec_byte[256] = {
272 0x0000000000000000, 0x0100000000000000, 0x0001000000000000, 0x0101000000000000, 0x0000010000000000,
273 0x0100010000000000, 0x0001010000000000, 0x0101010000000000, 0x0000000100000000, 0x0100000100000000,
274 0x0001000100000000, 0x0101000100000000, 0x0000010100000000, 0x0100010100000000, 0x0001010100000000,
275 0x0101010100000000, 0x0000000001000000, 0x0100000001000000, 0x0001000001000000, 0x0101000001000000,
276 0x0000010001000000, 0x0100010001000000, 0x0001010001000000, 0x0101010001000000, 0x0000000101000000,
277 0x0100000101000000, 0x0001000101000000, 0x0101000101000000, 0x0000010101000000, 0x0100010101000000,
278 0x0001010101000000, 0x0101010101000000, 0x0000000000010000, 0x0100000000010000, 0x0001000000010000,
279 0x0101000000010000, 0x0000010000010000, 0x0100010000010000, 0x0001010000010000, 0x0101010000010000,
280 0x0000000100010000, 0x0100000100010000, 0x0001000100010000, 0x0101000100010000, 0x0000010100010000,
281 0x0100010100010000, 0x0001010100010000, 0x0101010100010000, 0x0000000001010000, 0x0100000001010000,
282 0x0001000001010000, 0x0101000001010000, 0x0000010001010000, 0x0100010001010000, 0x0001010001010000,
283 0x0101010001010000, 0x0000000101010000, 0x0100000101010000, 0x0001000101010000, 0x0101000101010000,
284 0x0000010101010000, 0x0100010101010000, 0x0001010101010000, 0x0101010101010000, 0x0000000000000100,
285 0x0100000000000100, 0x0001000000000100, 0x0101000000000100, 0x0000010000000100, 0x0100010000000100,
286 0x0001010000000100, 0x0101010000000100, 0x0000000100000100, 0x0100000100000100, 0x0001000100000100,
287 0x0101000100000100, 0x0000010100000100, 0x0100010100000100, 0x0001010100000100, 0x0101010100000100,
288 0x0000000001000100, 0x0100000001000100, 0x0001000001000100, 0x0101000001000100, 0x0000010001000100,
289 0x0100010001000100, 0x0001010001000100, 0x0101010001000100, 0x0000000101000100, 0x0100000101000100,
290 0x0001000101000100, 0x0101000101000100, 0x0000010101000100, 0x0100010101000100, 0x0001010101000100,
291 0x0101010101000100, 0x0000000000010100, 0x0100000000010100, 0x0001000000010100, 0x0101000000010100,
292 0x0000010000010100, 0x0100010000010100, 0x0001010000010100, 0x0101010000010100, 0x0000000100010100,
293 0x0100000100010100, 0x0001000100010100, 0x0101000100010100, 0x0000010100010100, 0x0100010100010100,
294 0x0001010100010100, 0x0101010100010100, 0x0000000001010100, 0x0100000001010100, 0x0001000001010100,
295 0x0101000001010100, 0x0000010001010100, 0x0100010001010100, 0x0001010001010100, 0x0101010001010100,
296 0x0000000101010100, 0x0100000101010100, 0x0001000101010100, 0x0101000101010100, 0x0000010101010100,
297 0x0100010101010100, 0x0001010101010100, 0x0101010101010100, 0x0000000000000001, 0x0100000000000001,
298 0x0001000000000001, 0x0101000000000001, 0x0000010000000001, 0x0100010000000001, 0x0001010000000001,
299 0x0101010000000001, 0x0000000100000001, 0x0100000100000001, 0x0001000100000001, 0x0101000100000001,
300 0x0000010100000001, 0x0100010100000001, 0x0001010100000001, 0x0101010100000001, 0x0000000001000001,
301 0x0100000001000001, 0x0001000001000001, 0x0101000001000001, 0x0000010001000001, 0x0100010001000001,
302 0x0001010001000001, 0x0101010001000001, 0x0000000101000001, 0x0100000101000001, 0x0001000101000001,
303 0x0101000101000001, 0x0000010101000001, 0x0100010101000001, 0x0001010101000001, 0x0101010101000001,
304 0x0000000000010001, 0x0100000000010001, 0x0001000000010001, 0x0101000000010001, 0x0000010000010001,
305 0x0100010000010001, 0x0001010000010001, 0x0101010000010001, 0x0000000100010001, 0x0100000100010001,
306 0x0001000100010001, 0x0101000100010001, 0x0000010100010001, 0x0100010100010001, 0x0001010100010001,
307 0x0101010100010001, 0x0000000001010001, 0x0100000001010001, 0x0001000001010001, 0x0101000001010001,
308 0x0000010001010001, 0x0100010001010001, 0x0001010001010001, 0x0101010001010001, 0x0000000101010001,
309 0x0100000101010001, 0x0001000101010001, 0x0101000101010001, 0x0000010101010001, 0x0100010101010001,
310 0x0001010101010001, 0x0101010101010001, 0x0000000000000101, 0x0100000000000101, 0x0001000000000101,
311 0x0101000000000101, 0x0000010000000101, 0x0100010000000101, 0x0001010000000101, 0x0101010000000101,
312 0x0000000100000101, 0x0100000100000101, 0x0001000100000101, 0x0101000100000101, 0x0000010100000101,
313 0x0100010100000101, 0x0001010100000101, 0x0101010100000101, 0x0000000001000101, 0x0100000001000101,
314 0x0001000001000101, 0x0101000001000101, 0x0000010001000101, 0x0100010001000101, 0x0001010001000101,
315 0x0101010001000101, 0x0000000101000101, 0x0100000101000101, 0x0001000101000101, 0x0101000101000101,
316 0x0000010101000101, 0x0100010101000101, 0x0001010101000101, 0x0101010101000101, 0x0000000000010101,
317 0x0100000000010101, 0x0001000000010101, 0x0101000000010101, 0x0000010000010101, 0x0100010000010101,
318 0x0001010000010101, 0x0101010000010101, 0x0000000100010101, 0x0100000100010101, 0x0001000100010101,
319 0x0101000100010101, 0x0000010100010101, 0x0100010100010101, 0x0001010100010101, 0x0101010100010101,
320 0x0000000001010101, 0x0100000001010101, 0x0001000001010101, 0x0101000001010101, 0x0000010001010101,
321 0x0100010001010101, 0x0001010001010101, 0x0101010001010101, 0x0000000101010101, 0x0100000101010101,
322 0x0001000101010101, 0x0101000101010101, 0x0000010101010101, 0x0100010101010101, 0x0001010101010101,
323 0x0101010101010101
324};
325IWRAM_CODE
326static inline
327u64
328decode_1bpp(u8 row, u8 flip_x) {
329 if (flip_x) {
330 return dec_byte_flip_x[row];
331 }
332 return dec_byte[row];
333}
334#else
335
213static u32 dec_nibble[] = { 336static u32 dec_nibble[] = {
214 0x00000000, 0x01000000, 0x00010000, 0x01010000, 337 0x00000000, 0x01000000, 0x00010000, 0x01010000,
215 0x00000100, 0x01000100, 0x00010100, 0x01010100, 338 0x00000100, 0x01000100, 0x00010100, 0x01010100,
@@ -235,6 +358,7 @@ decode_1bpp(u8 row, u8 flip_x) {
235 u32 *lut = dec_nibble; 358 u32 *lut = dec_nibble;
236 return (u64)lut[(row >> 0) & 0xF] << 32 | (u64)lut[(row >> 4) & 0xF]; 359 return (u64)lut[(row >> 0) & 0xF] << 32 | (u64)lut[(row >> 4) & 0xF];
237} 360}
361#endif
238 362
239IWRAM_CODE 363IWRAM_CODE
240static inline 364static inline
@@ -248,21 +372,24 @@ draw_2bpp_row(size_t x, size_t y, u8 a, u8 b, u8 flip_x) {
248 size_t shift_right = (8 - start_col) * 8; 372 size_t shift_right = (8 - start_col) * 8;
249 373
250 u64 *dst = &backbuf[(y * 30 + tile_x) * 8 / 2]; 374 u64 *dst = &backbuf[(y * 30 + tile_x) * 8 / 2];
375#if DEC_BIG_LUT
376 u64 *lut = dec_byte;
377 if (flip_x) {
378 lut = dec_byte_flip_x;
379 }
380 u64 clr_a = lut[a];
381 u64 clr_b = lut[b];
382#else
383 u64 clr_a = decode_1bpp(a, flip_x);
384 u64 clr_b = decode_1bpp(b, flip_x);
385#endif
386 u64 mask_a = (clr_a * 0xF);
387 u64 mask_b = (clr_b * 0xF);
388 u64 mask = (mask_a | mask_b);
389 u64 color = clr_a + (clr_b << 1);
251 if (start_col == 0) { 390 if (start_col == 0) {
252 u64 clr_a = decode_1bpp(a, flip_x);
253 u64 clr_b = decode_1bpp(b, flip_x);
254 u64 mask_a = (clr_a * 0xF);
255 u64 mask_b = (clr_b * 0xF);
256 u64 mask = (mask_a | mask_b);
257 u64 color = clr_a + (clr_b << 1);
258 dst[0] = (dst[0] & ~mask) | color; 391 dst[0] = (dst[0] & ~mask) | color;
259 } else { 392 } else {
260 u64 clr_a = decode_1bpp(a, flip_x);
261 u64 clr_b = decode_1bpp(b, flip_x);
262 u64 mask_a = (clr_a * 0xF);
263 u64 mask_b = (clr_b * 0xF);
264 u64 mask = (mask_a | mask_b);
265 u64 color = clr_a + (clr_b << 1);
266 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); 393 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
267 if ((x + 7) > (SCREEN_WIDTH)) { 394 if ((x + 7) > (SCREEN_WIDTH)) {
268 return; 395 return;