summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2024-01-23 17:59:52 +0100
committerBad Diode <bd@badd10de.dev>2024-01-23 17:59:52 +0100
commitb1f597fe0ba040f9ab7106554c2c629cc949c41a (patch)
tree6e83f4fab2b20776992a6977c50b2dee95d47cc1
parentbaf4091795c5a87ff286e83f012ac091bcdd5848 (diff)
downloadgba-renderers-b1f597fe0ba040f9ab7106554c2c629cc949c41a.tar.gz
gba-renderers-b1f597fe0ba040f9ab7106554c2c629cc949c41a.zip
Update profiling macros and m0 renderer
-rw-r--r--Makefile4
-rw-r--r--src/gba/draw.s183
-rw-r--r--src/gba/gba.h209
-rwxr-xr-xsrc/gba/profiling.h134
-rw-r--r--src/gba/renderer/renderer.c1
-rw-r--r--src/gba/renderer/renderer.h (renamed from src/renderer.h)10
-rw-r--r--src/gba/renderer/renderer_m0.c (renamed from src/renderer_m0.c)606
-rw-r--r--src/gba/renderer/renderer_m3.c (renamed from src/renderer_m3.c)0
-rw-r--r--src/gba/renderer/renderer_m4.c475
-rw-r--r--src/gba/text/font.h130
-rw-r--r--src/gba/text/posprintf.h (renamed from src/text/posprintf.h)0
-rw-r--r--src/gba/text/posprintf.s (renamed from src/text/posprintf.s)0
-rw-r--r--src/gba/text/text.h (renamed from src/text/text.h)10
-rwxr-xr-xsrc/gba/utils.s82
-rw-r--r--src/main.c122
-rw-r--r--src/profiling.c304
-rw-r--r--src/renderer.c653
-rw-r--r--src/renderer_m4.c582
-rw-r--r--src/text/font.h261
19 files changed, 1704 insertions, 2062 deletions
diff --git a/Makefile b/Makefile
index 98fbfd1..8db7b02 100644
--- a/Makefile
+++ b/Makefile
@@ -17,8 +17,8 @@ BUILD_DIR := build
17SRC_MAIN := $(SRC_DIR)/main.c 17SRC_MAIN := $(SRC_DIR)/main.c
18SRC_BDGBA := $(wildcard $(SRC_DIR)/gba/*.s) 18SRC_BDGBA := $(wildcard $(SRC_DIR)/gba/*.s)
19SRC_BDGBA += $(wildcard $(SRC_DIR)/gba/*.c) 19SRC_BDGBA += $(wildcard $(SRC_DIR)/gba/*.c)
20SRC_BDTEXT := $(wildcard $(SRC_DIR)/text/*.s) 20SRC_BDTEXT := $(wildcard $(SRC_DIR)/gba/text/*.s)
21SRC_BDTEXT += $(wildcard $(SRC_DIR)/text/*.c) 21SRC_BDTEXT += $(wildcard $(SRC_DIR)/gba/text/*.c)
22SRC := $(SRC_MAIN) $(SRC_BDGBA) $(SRC_BDTEXT) 22SRC := $(SRC_MAIN) $(SRC_BDGBA) $(SRC_BDTEXT)
23WATCH_SRC := $(shell find $(SRC_DIR) -name *.c -or -name *.s -or -name *.h) 23WATCH_SRC := $(shell find $(SRC_DIR) -name *.c -or -name *.s -or -name *.h)
24INC_DIRS := $(shell find $(SRC_DIR) -type d) 24INC_DIRS := $(shell find $(SRC_DIR) -type d)
diff --git a/src/gba/draw.s b/src/gba/draw.s
new file mode 100644
index 0000000..a5705ee
--- /dev/null
+++ b/src/gba/draw.s
@@ -0,0 +1,183 @@
1.file "draw.s"
2.section .iwram, "ax", %progbits
3.arm
4.align
5
6.global decode_1bpp_mask
7decode_1bpp_mask:
8 .word 0x00000000
9 .word 0xff000000
10 .word 0x00ff0000
11 .word 0xffff0000
12 .word 0x0000ff00
13 .word 0xff00ff00
14 .word 0x00ffff00
15 .word 0xffffff00
16 .word 0x000000ff
17 .word 0xff0000ff
18 .word 0x00ff00ff
19 .word 0xffff00ff
20 .word 0x0000ffff
21 .word 0xff00ffff
22 .word 0x00ffffff
23 .word 0xffffffff
24
25.global decode_1bpp_mask_flip_x
26decode_1bpp_mask_flip_x:
27 .word 0x00000000
28 .word 0x000000ff
29 .word 0x0000ff00
30 .word 0x0000ffff
31 .word 0x00ff0000
32 .word 0x00ff00ff
33 .word 0x00ffff00
34 .word 0x00ffffff
35 .word 0xff000000
36 .word 0xff0000ff
37 .word 0xff00ff00
38 .word 0xff00ffff
39 .word 0xffff0000
40 .word 0xffff00ff
41 .word 0xffffff00
42 .word 0xffffffff
43
44color_mask:
45 .word 0x01010101
46
47@ Arguments
48@
49@ r0, r1: x, y
50@ r2: *sprite
51@ r3: clr
52@ sp0: flip_x (r4)
53@ sp1: flip_y (r5)
54@ sp2: framebuffer (r10)
55@
56.global draw_icn_asm
57draw_icn_asm:
58 @ Save fp and lr so that we can use it in our function. The fp becomes
59 @ a frame of reference to be able to access the stack passed arguments.
60 @ Right now lr can be used as a temporary volatile value to do operations,
61 @ we will restore both of these before exiting.
62 push {fp, lr}
63 mov fp, sp
64 add fp, #8
65
66 @ Make sure we don't clobber these registers.
67 push {r4-r10}
68
69 @ Get the u8 color into the proper u32 format
70 ldr r4, color_mask
71 mul r3, r3, r4
72
73 @ Fetch arguments 4-6.
74 ldmfd fp, {r4-r6}
75
76 @ Prepare backbuffer position
77 @ fp: framebuffer position for the initial tile
78 mov lr, #240
79 mul lr, r1, lr
80 add r6, lr
81 mov lr, r0, lsr #3
82 add r6, lr, lsl #3
83 mov fp, r6
84
85 @ r10: decoding table
86 adr r10, decode_1bpp_mask
87
88 @ TODO: Ignoring flip_x/flip_y for now
89 @ TODO: Once we calculate the number of rows, r0 and r1 are free to use,
90 @ assuming n_rows is 8 for now for simplicity. Using r0/r1 for shift_left/right
91
92 @ start_col
93 and lr, r0, #7
94 cmp lr, #3
95 bls .draw_icn_asm_shift_setup_b
96.draw_icn_asm_shift_setup_a:
97 @ dst++;
98 @ shift_left = 8 * (start_col - 4);
99 @ shift_right = 8 * (8 - start_col);
100 add fp, #4
101 sub r0, lr, #4
102 lsl r0, #3
103 mov r4, #8
104 sub r1, r4, lr
105 lsl r1, #3
106 b .draw_icn_asm_loop_start
107
108 @ r0: shift_left / r1: shift_right
109.draw_icn_asm_shift_setup_b:
110 @ size_t shift_left = start_col * 8;
111 @ size_t shift_right = (4 - start_col) * 8;
112 mov r0, lr, lsl #3
113 mov r1, #4
114 sub r1, r1, lr
115 lsl r1, #3
116
117.draw_icn_asm_loop_start:
118 @ lr: loop counter
119 mov lr, #8
120.Ldraw_icn_asm_loop:
121 @ Decode current sprite mask
122 @ u32 msk0 = decode_1bpp_mask[*sprite >> 4];
123 @ u32 msk1 = decode_1bpp_mask[*sprite & 0xf];
124 @ r4: msk0 (left)
125 @ r6: msk1 (right)
126 ldrb r4, [r2]
127 mov r6, r4
128 lsr r4, #4
129 and r6, #0xf
130 ldr r4, [r10, r4, lsl #2] @ mks0
131 ldr r6, [r10, r6, lsl #2] @ mks1
132
133 @ u32 msk2 = (msk0 >> shift_right) | (msk1 << shift_left);
134 @ r5: msk2 (middle)
135 mov r5, r4, lsr r1
136 orr r5, r6, lsl r0
137
138 @ msk0 <<= shift_left;
139 @ msk1 >>= shift_right;
140 lsl r4, r0
141 lsr r6, r1
142
143 @ dst[0] = (dst[0] & ~msk0) | clr0;
144 @ dst[1] = (dst[1] & ~msk2) | clr2;
145 @ dst[2] = (dst[2] & ~msk1) | clr1;
146 @ Load existing row data.
147 ldm fp, {r7-r9}
148 @ dst[0]
149 bic r7, r4
150 and r4, r3, r4
151 orr r7, r4
152 @ dst[1]
153 bic r8, r5
154 and r5, r3, r5
155 orr r8, r5
156 @ dst[2]
157 bic r9, r6
158 and r6, r3, r6
159 orr r9, r6
160
161 @ Store all three tile rows at once
162 stm fp, {r7-r9}
163
164 @ Next loop iteration
165 add fp, fp, #240
166 subs lr, lr, #1
167 add r2, #1
168 bhi .Ldraw_icn_asm_loop
169
170 @ DEBUG: Write color in framebuffer
171 @ str r3, [fp]
172 @ add fp, #4
173 @ str r3, [fp]
174
175 @ DEBUG: return value.
176 @ mov r0, r4
177 @ mov r0, r6
178 @ mov r0, r5
179
180.draw_icn_asm_end:
181 pop {r4-r10}
182 pop {fp, lr}
183 bx lr
diff --git a/src/gba/gba.h b/src/gba/gba.h
index 27a6a9a..045f2f4 100644
--- a/src/gba/gba.h
+++ b/src/gba/gba.h
@@ -289,12 +289,24 @@ profile_measure(void) {
289static u16 key_curr = 0; 289static u16 key_curr = 0;
290static u16 key_prev = 0; 290static u16 key_prev = 0;
291 291
292static inline 292// Stores number of frames since a keay was pressed.
293void 293typedef struct Controller {
294poll_keys(void) { 294 int key_up;
295 key_prev = key_curr; 295 int key_down;
296 key_curr = ~KEY_INPUTS & KEY_MASK; 296 int key_left;
297} 297 int key_right;
298 int key_select;
299 int key_start;
300 int key_b;
301 int key_a;
302 int key_l;
303 int key_r;
304} Controller;
305
306static Controller ctrl = {0};
307
308#define RETRIG_OFFSET 16
309#define RETRIG_FRAMES 2
298 310
299// Returns true if the given key has been pressed at time of calling and was not 311// Returns true if the given key has been pressed at time of calling and was not
300// pressed since the previous call. For example, if a key is being held, this 312// pressed since the previous call. For example, if a key is being held, this
@@ -327,40 +339,110 @@ key_hold(u32 key) {
327 return key_curr & key_prev & key; 339 return key_curr & key_prev & key;
328} 340}
329 341
330// Back/unpack bits.
331static inline 342static inline
332u32 343bool
333unpack_1bb(u8 hex) { 344_key_retrig(int key, int offset, int frames) {
334 const u32 conversion_u32[16] = { 345 if (key_tap(key)) {
335 0x00000000, 0x00000001, 0x00000010, 0x00000011, 346 return true;
336 0x00000100, 0x00000101, 0x00000110, 0x00000111, 347 }
337 0x00001000, 0x00001001, 0x00001010, 0x00001011, 348 switch (key) {
338 0x00001100, 0x00001101, 0x00001110, 0x00001111, 349 case KEY_L: {
339 }; 350 if (key_hold(key)) {
340 u8 low = hex & 0xF; 351 if (ctrl.key_l < offset) { return false; }
341 u8 high = (hex >> 4) & 0xF; 352 if (ctrl.key_l % frames == 0) { return true; }
342 return (conversion_u32[high] << 16) | conversion_u32[low]; 353 }
354 } break;
355 case KEY_R: {
356 if (key_hold(key)) {
357 if (ctrl.key_r < offset) { return false; }
358 if (ctrl.key_r % frames == 0) { return true; }
359 }
360 } break;
361 case KEY_A: {
362 if (key_hold(key)) {
363 if (ctrl.key_a < offset) { return false; }
364 if (ctrl.key_a % frames == 0) { return true; }
365 }
366 } break;
367 case KEY_B: {
368 if (key_hold(key)) {
369 if (ctrl.key_b < offset) { return false; }
370 if (ctrl.key_b % frames == 0) { return true; }
371 }
372 } break;
373 case KEY_SELECT: {
374 if (key_hold(key)) {
375 if (ctrl.key_select < offset) { return false; }
376 if (ctrl.key_select % frames == 0) { return true; }
377 }
378 } break;
379 case KEY_START: {
380 if (key_hold(key)) {
381 if (ctrl.key_start < offset) { return false; }
382 if (ctrl.key_start % frames == 0) { return true; }
383 }
384 } break;
385 case KEY_UP: {
386 if (key_hold(key)) {
387 if (ctrl.key_up < offset) { return false; }
388 if (ctrl.key_up % frames == 0) { return true; }
389 }
390 } break;
391 case KEY_DOWN: {
392 if (key_hold(key)) {
393 if (ctrl.key_down < offset) { return false; }
394 if (ctrl.key_down % frames == 0) { return true; }
395 }
396 } break;
397 case KEY_LEFT: {
398 if (key_hold(key)) {
399 if (ctrl.key_left < offset) { return false; }
400 if (ctrl.key_left % frames == 0) { return true; }
401 }
402 } break;
403 case KEY_RIGHT: {
404 if (key_hold(key)) {
405 if (ctrl.key_right < offset) { return false; }
406 if (ctrl.key_right % frames == 0) { return true; }
407 }
408 } break;
409 }
410 return false;
411}
412
413static inline
414bool
415key_retrig(int key) {
416 return _key_retrig(key, RETRIG_OFFSET, RETRIG_FRAMES);
343} 417}
344 418
345// Unpack N tiles packed at 1bpp.
346static inline 419static inline
347void 420void
348unpack_tiles(const u32 *src, u32 *dst, size_t n_tiles) { 421update_controller(void) {
349 const u32 *target_src = src + n_tiles * 2; 422 if (key_pressed(KEY_UP)) { ctrl.key_up++; } else if (key_released(KEY_UP)) { ctrl.key_up = 0; }
350 while (src != target_src) { 423 if (key_pressed(KEY_DOWN)) { ctrl.key_down++; } else if (key_released(KEY_DOWN)) { ctrl.key_down = 0; }
351 *dst++ = unpack_1bb((*src >> 24) & 0xFF); 424 if (key_pressed(KEY_LEFT)) { ctrl.key_left++; } else if (key_released(KEY_LEFT)) { ctrl.key_left = 0; }
352 *dst++ = unpack_1bb((*src >> 16) & 0xFF); 425 if (key_pressed(KEY_RIGHT)) { ctrl.key_right++; } else if (key_released(KEY_RIGHT)) { ctrl.key_right = 0; }
353 *dst++ = unpack_1bb((*src >> 8) & 0xFF); 426 if (key_pressed(KEY_L)) { ctrl.key_l++; } else if (key_released(KEY_L)) { ctrl.key_l = 0; }
354 *dst++ = unpack_1bb(*src & 0xFF); 427 if (key_pressed(KEY_R)) { ctrl.key_r++; } else if (key_released(KEY_R)) { ctrl.key_r = 0; }
355 src++; 428 if (key_pressed(KEY_A)) { ctrl.key_a++; } else if (key_released(KEY_A)) { ctrl.key_a = 0; }
356 } 429 if (key_pressed(KEY_B)) { ctrl.key_b++; } else if (key_released(KEY_B)) { ctrl.key_b = 0; }
430 if (key_pressed(KEY_SELECT)) { ctrl.key_select++; } else if (key_released(KEY_SELECT)) { ctrl.key_select = 0; }
431 if (key_pressed(KEY_START)) { ctrl.key_start++; } else if (key_released(KEY_START)) { ctrl.key_start = 0; }
432}
433
434static inline
435void
436poll_keys(void) {
437 key_prev = key_curr;
438 key_curr = ~KEY_INPUTS & KEY_MASK;
439 update_controller();
357} 440}
358 441
359// 442//
360// Direct Memory Access (DMA) 443// Direct Memory Access (DMA)
361// 444//
362 445
363
364// Source, destination, and control registers. 446// Source, destination, and control registers.
365#define DMA_SRC(N) *((vu32*) 0x040000B0 + (N) * 12) 447#define DMA_SRC(N) *((vu32*) 0x040000B0 + (N) * 12)
366#define DMA_DST(N) *((vu32*) 0x040000B4 + (N) * 12) 448#define DMA_DST(N) *((vu32*) 0x040000B4 + (N) * 12)
@@ -482,6 +564,24 @@ int bios_vblank_wait();
482int bios_div(int num, int denom); 564int bios_div(int num, int denom);
483 565
484// 566//
567// SIO Link Cable
568//
569
570#define SIO_MODE *((vu16*)(MEM_IO + 0x0134))
571#define SIO_CNT *((vu16*)(MEM_IO + 0x0128))
572
573#define SIO_MODE_GP (2 << 14)
574#define SIO_SC(X) ((X) << 0)
575#define SIO_SD(X) ((X) << 1)
576#define SIO_SI(X) ((X) << 2)
577#define SIO_SO(X) ((X) << 3)
578#define SIO_SC_OUT(X) ((X) << 4)
579#define SIO_SD_OUT(X) ((X) << 5)
580#define SIO_SI_OUT(X) ((X) << 6)
581#define SIO_SO_OUT(X) ((X) << 7)
582#define SIO_IRQ_ENABLE (1 << 8)
583
584//
485// Sound. 585// Sound.
486// 586//
487 587
@@ -522,10 +622,35 @@ typedef enum {
522} SoundChannel; 622} SoundChannel;
523 623
524inline u16 624inline u16
525sound_volume(SoundChannel channels, u8 volume) { 625sound_volume(SoundChannel channels, u8 volume, u8 pan) {
526 volume = volume & 0x7; 626 volume = volume & 0x7;
527 channels = channels & 0xF; 627 channels = channels & 0xF;
528 return volume | (volume << 0x4) | (channels << 0x8) | (channels << 0xC); 628 u16 right = volume | (channels << 0x8);
629 u16 left = (volume << 0x4) | (channels << 0xC);
630 if (pan == 1) {
631 return left;
632 }
633 if (pan == 2) {
634 return right;
635 }
636 return left | right;
637}
638
639inline u16 dmg_stereo_vol(u8 vol) {
640 return vol | (vol << 0x4);
641}
642
643inline u16
644channel_vol(SoundChannel channel, s8 pan) {
645 u16 left = (channel << 0xc);
646 u16 right = (channel << 0x8);
647 if (pan == -1) {
648 return left;
649 }
650 if (pan == +1) {
651 return right;
652 }
653 return left | right;
529} 654}
530 655
531// Sound Direct Sound master bits. 656// Sound Direct Sound master bits.
@@ -534,12 +659,12 @@ sound_volume(SoundChannel channels, u8 volume) {
534#define SOUND_DMG100 0x2 659#define SOUND_DMG100 0x2
535#define SOUND_DSOUND_RATIO_A (1 << 0x2) 660#define SOUND_DSOUND_RATIO_A (1 << 0x2)
536#define SOUND_DSOUND_RATIO_B (1 << 0x3) 661#define SOUND_DSOUND_RATIO_B (1 << 0x3)
537#define SOUND_DSOUND_LEFT_A (1 << 0x8) 662#define SOUND_DSOUND_RIGHT_A (1 << 0x8)
538#define SOUND_DSOUND_RIGHT_A (1 << 0x9) 663#define SOUND_DSOUND_LEFT_A (1 << 0x9)
539#define SOUND_DSOUND_TIMER_A (1 << 0xA) 664#define SOUND_DSOUND_TIMER_A (1 << 0xA)
540#define SOUND_DSOUND_RESET_A (1 << 0xB) 665#define SOUND_DSOUND_RESET_A (1 << 0xB)
541#define SOUND_DSOUND_LEFT_B (1 << 0xC) 666#define SOUND_DSOUND_RIGHT_B (1 << 0xC)
542#define SOUND_DSOUND_RIGHT_B (1 << 0xD) 667#define SOUND_DSOUND_LEFT_B (1 << 0xD)
543#define SOUND_DSOUND_TIMER_B (1 << 0xE) 668#define SOUND_DSOUND_TIMER_B (1 << 0xE)
544#define SOUND_DSOUND_RESET_B (1 << 0xF) 669#define SOUND_DSOUND_RESET_B (1 << 0xF)
545 670
@@ -661,6 +786,7 @@ wait_vsync(void) {
661// General utility macros. 786// General utility macros.
662#define MIN(A, B) ((A) <= (B) ? (A) : (B)) 787#define MIN(A, B) ((A) <= (B) ? (A) : (B))
663#define MAX(A, B) ((A) >= (B) ? (A) : (B)) 788#define MAX(A, B) ((A) >= (B) ? (A) : (B))
789#define ABS(A) (((A) ^ ((A) >> (sizeof(A) * 8 - 1))) - ((A) >> (sizeof(A) * 8 - 1)))
664#define CLAMP(X, MIN, MAX) ((X) <= (MIN) ? (MIN) : (X) > (MAX) ? (MAX): (X)) 790#define CLAMP(X, MIN, MAX) ((X) <= (MIN) ? (MIN) : (X) > (MAX) ? (MAX): (X))
665#define LEN(ARR) (sizeof(ARR) / sizeof((ARR)[0])) 791#define LEN(ARR) (sizeof(ARR) / sizeof((ARR)[0]))
666 792
@@ -688,10 +814,23 @@ memcpy32(u32 *dst, const u32 *src, u32 size) {
688 } 814 }
689} 815}
690 816
817static inline
818void
819memset32(u32 *dst, const u32 data, u32 size) {
820 for (size_t i = 0; i < size / 4; i++) {
821 dst[i] = data;
822 }
823}
824
825// Optimized ARMASM versions of memcpy32 and memset32.
826extern void copy32(u32 *dst, u32 *src, u32 chunks);
827extern void set32(u32 *dst, u32 data, u32 chunks);
828
691// 829//
692// Compiler hints. 830// Compiler hints.
693// 831//
694 832
695#define UNROLL_LOOPS __attribute__((optimize("unroll-loops"))) 833#define UNROLL_LOOPS __attribute__((optimize("unroll-loops")))
834#define INLINE __attribute__((always_inline)) inline
696 835
697#endif // GBA_H 836#endif // GBA_H
diff --git a/src/gba/profiling.h b/src/gba/profiling.h
new file mode 100755
index 0000000..5fc8d83
--- /dev/null
+++ b/src/gba/profiling.h
@@ -0,0 +1,134 @@
1//
2// Profiling macros.
3//
4
5// NOTE: Profiling uses the last two timers to count cycles, and thus can't be
6// used for measuring applications that
7
8
9static u32 frame_time = 0;
10
11#ifndef PROF_ENABLE
12#define PROF_ENABLE 0
13#endif
14
15#if PROF_ENABLE > 0
16
17#ifndef PROF_RESET_MINMAX
18#define PROF_RESET_MINMAX false
19#endif
20
21// Maximum number of profiling to monitor.
22typedef enum ProfType {
23 PROF_INPUT,
24 PROF_UPDATE,
25 PROF_RENDER,
26 PROF_FLIP,
27 PROF_FILL,
28 PROF_NUM,
29} ProfType;
30
31char *prof_type_str[PROF_NUM] = {
32 "INPUT ",
33 "UPDATE ",
34 "RENDER ",
35 "FLIPBUF ",
36 "SCRFILL ",
37};
38
39u32 prof_frame_time = 0;
40u32 prof_frame_count = 0;
41u32 prof_frame_avg = 0;
42u32 prof_frame_time_max = 0;
43u32 prof_times[PROF_NUM] = {0};
44u32 prof_count[PROF_NUM] = {0};
45u32 prof_avg[PROF_NUM] = {0};
46u32 prof_max[PROF_NUM] = {0};
47u32 prof_min[PROF_NUM] = {0};
48
49bool prof_reset_minmax = PROF_RESET_MINMAX;
50bool prof_show = true;
51
52#define PROF_INIT() do { \
53 for (size_t i = 0; i < PROF_NUM; i++) { \
54 prof_min[i] = -1; \
55 } \
56} while(0);
57
58#define PROF(func, idx) do { \
59 u32 time_before = profile_measure(); \
60 (func); \
61 u32 time_after = profile_measure(); \
62 u32 time_current = time_after - time_before; \
63 prof_times[idx] += time_current; \
64 prof_count[idx]++; \
65 prof_max[idx] = MAX(time_current, prof_max[idx]);\
66 prof_min[idx] = MIN(time_current, prof_min[idx]);\
67} while(0);
68
69#define FRAME_START() do { \
70 profile_start();\
71} while(0)
72
73
74#define FRAME_END() do { \
75 prof_frame_count++;\
76 frame_time = profile_measure();\
77 prof_frame_time_max = MAX(prof_frame_time_max, frame_time);\
78 prof_frame_time += profile_stop();\
79 if (prof_show) { \
80 draw_filled_rect(0, 0, SCREEN_WIDTH - 1, 8 * (PROF_NUM + 1), 2); \
81 u32 fps = (u64)280896 * 60 / (prof_frame_avg + 1); \
82 if (prof_frame_avg == 0) { \
83 fps = 0; \
84 } \
85 txt_drawf_small("FRAME TIME/FPS: %.9l/%.2l", 0, 0, COL_FG, \
86 prof_frame_avg, fps);\
87 txt_drawf_small("MAX: %.9l/%l", 8 * 19, 0, COL_FG, \
88 prof_frame_time_max, 280896);\
89 for (size_t idx = 0; idx < PROF_NUM; idx++) { \
90 txt_drawf_small("%s %.9l (%.9l %.9l) %08x:%08x", 0, 8 * (idx + 1), COL_FG, \
91 prof_type_str[idx], \
92 prof_avg[idx], \
93 prof_min[idx], \
94 prof_max[idx], \
95 prof_avg[idx], \
96 prof_max[idx]);\
97 }; \
98 draw_filled_rect(0, SCREEN_HEIGHT - 9, 58, SCREEN_HEIGHT - 1, 2); \
99 txt_drawf_small("CPU USAGE: %.3l", 0, SCREEN_HEIGHT - 9, COL_FG, \
100 (u64)prof_frame_avg * 100 / 280896);\
101 } \
102 if (prof_frame_count >= PROF_ENABLE) { \
103 for (size_t idx = 0; idx < PROF_NUM; idx++) { \
104 prof_avg[idx] = prof_times[idx] / prof_frame_count; \
105 if (prof_reset_minmax) { \
106 prof_min[idx] = -1; \
107 prof_max[idx] = 0; \
108 } \
109 prof_times[idx] = 0; \
110 prof_count[idx] = 0; \
111 }; \
112 prof_frame_avg = prof_frame_time / prof_frame_count; \
113 prof_frame_count = 0; \
114 prof_frame_time = 0; \
115 } \
116 } while(0)
117
118#define PROF_SHOW() do { \
119 prof_show ^= 1; \
120} while(0)
121
122#else
123
124// No profiling.
125#define PROF_INIT()
126#define PROF(F,VAR) do {F;} while(0)
127#define FRAME_START() do { \
128 profile_start();\
129} while(0)
130#define FRAME_END() do { \
131 frame_time = profile_stop();\
132} while(0)
133#define PROF_SHOW()
134#endif
diff --git a/src/gba/renderer/renderer.c b/src/gba/renderer/renderer.c
new file mode 100644
index 0000000..fec5b6f
--- /dev/null
+++ b/src/gba/renderer/renderer.c
@@ -0,0 +1 @@
#include "renderer_m0.c"
diff --git a/src/renderer.h b/src/gba/renderer/renderer.h
index e6637ef..8b37496 100644
--- a/src/renderer.h
+++ b/src/gba/renderer/renderer.h
@@ -19,14 +19,8 @@ void draw_filled_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr);
19// Fills the framebuffer with the given color. 19// Fills the framebuffer with the given color.
20void screen_fill(u8 clr); 20void screen_fill(u8 clr);
21 21
22// Draws a chr sprite (16 * u8). The first 8 bytes correspond to ch0 and the 22// Draws a 1bpp sprite in the given color.
23// last 8 to ch1. If clr is 0 the regular 4bit color will be used, from clr 1-14 23void draw_1bpp(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y);
24// the given color will overwrite the existing one. Color 15 will "clear" the
25// sprite instead.
26void draw_chr(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y);
27
28// Draws a 1bpp icn sprite in the given color.
29void draw_icn(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y);
30 24
31// Copies data and performs page flipping if needed. 25// Copies data and performs page flipping if needed.
32// To be called exactly once at the beginning of the VBlank. 26// To be called exactly once at the beginning of the VBlank.
diff --git a/src/renderer_m0.c b/src/gba/renderer/renderer_m0.c
index 0145f64..6eb45c5 100644
--- a/src/renderer_m0.c
+++ b/src/gba/renderer/renderer_m0.c
@@ -13,8 +13,9 @@
13// 13//
14 14
15#define SUBPIXEL_LINES 1 15#define SUBPIXEL_LINES 1
16#define DEC_BIG_LUT 1 16#define FLIP_TYPE 1
17#define FLIP_TYPE 3 17#define DISABLE_BOUNDCHECK_SCREEN 0
18#define NO_DMA 1
18 19
19// Front/back buffers for double buffering. 20// Front/back buffers for double buffering.
20#define BUF_0 ((u32*)(MEM_VRAM)) 21#define BUF_0 ((u32*)(MEM_VRAM))
@@ -38,7 +39,7 @@ static u32 dirty_tiles[21] = {0};
38// Boundchecks can be disable at compile time but this will not always improve 39// Boundchecks can be disable at compile time but this will not always improve
39// the performance and can in fact make it worse. It is possible that this is 40// the performance and can in fact make it worse. It is possible that this is
40// due to some aliasing optimizations but not sure at this moment. 41// due to some aliasing optimizations but not sure at this moment.
41#ifdef DISABLE_BOUNDCHECK_SCREEN 42#if DISABLE_BOUNDCHECK_SCREEN > 0
42#define BOUNDCHECK_SCREEN(X,Y) 43#define BOUNDCHECK_SCREEN(X,Y)
43#else 44#else
44#define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return; 45#define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return;
@@ -50,6 +51,24 @@ static u32 dirty_tiles[21] = {0};
50// Swap A and B values to make sure A <= B. 51// Swap A and B values to make sure A <= B.
51#define MAYBE_SWAP(A,B) if ((A) > (B)) { SWAP(A,B); } 52#define MAYBE_SWAP(A,B) if ((A) > (B)) { SWAP(A,B); }
52 53
54// Color macros.
55#define COL_BG 0
56#define COL_FG 1
57#define COL_00 2
58#define COL_01 3
59#define COL_02 4
60#define COL_03 5
61#define COL_04 6
62#define COL_05 7
63#define COL_06 8
64#define COL_07 9
65#define COL_08 10
66#define COL_09 11
67#define COL_10 12
68#define COL_11 13
69#define COL_12 14
70#define COL_13 15
71
53// 72//
54// Basic primitives. 73// Basic primitives.
55// 74//
@@ -66,7 +85,11 @@ IWRAM_CODE
66void screen_fill(u8 clr) { 85void screen_fill(u8 clr) {
67 // We have to make sure we leave the last tile blank to use as alpha channel 86 // We have to make sure we leave the last tile blank to use as alpha channel
68 // when moving the BG during double buffering. 87 // when moving the BG during double buffering.
88#if NO_DMA == 0
69 dma_fill(backbuf, 0x11111111 * clr, KB(20) - 32, 3); 89 dma_fill(backbuf, 0x11111111 * clr, KB(20) - 32, 3);
90#else
91 set32(backbuf, 0x11111111 * clr, (KB(20) / 4) - 8);
92#endif
70 redraw(); 93 redraw();
71} 94}
72 95
@@ -85,12 +108,13 @@ draw_pixel(size_t x, size_t y, u8 clr) {
85 // Update backbuffer. 108 // Update backbuffer.
86 size_t shift = start_col * sizeof(u32); 109 size_t shift = start_col * sizeof(u32);
87 u32 mask = 0xF << shift; 110 u32 mask = 0xF << shift;
88 u32 row = clr << shift; 111 u32 color = clr << shift;
89 *dst = (*dst & ~mask) | row; 112 *dst = (*dst & ~mask) | color;
90 dirty_tiles[tile_y] |= 1 << tile_x; 113 dirty_tiles[tile_y] |= 1 << tile_x;
91} 114}
92 115
93IWRAM_CODE 116IWRAM_CODE
117UNROLL_LOOPS
94static inline 118static inline
95void 119void
96draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) { 120draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) {
@@ -117,21 +141,21 @@ draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) {
117 size_t shift_left = start_col * 4; 141 size_t shift_left = start_col * 4;
118 size_t shift_right = (7 - end_col) * 4; 142 size_t shift_right = (7 - end_col) * 4;
119 u32 mask = (0xFFFFFFFF >> shift_right) & (0xFFFFFFFF << shift_left); 143 u32 mask = (0xFFFFFFFF >> shift_right) & (0xFFFFFFFF << shift_left);
120 u32 row = (0x11111111 * clr) & mask; 144 u32 color = (0x11111111 * clr) & mask;
121 *dst = (*dst & ~mask) | row; 145 *dst = (*dst & ~mask) | color;
122 } else { 146 } else {
123 size_t shift_left = start_col * 4; 147 size_t shift_left = start_col * 4;
124 size_t shift_right = (7 - end_col) * 4; 148 size_t shift_right = (7 - end_col) * 4;
125 u32 mask = 0xFFFFFFFF; 149 u32 mask = 0xFFFFFFFF;
126 u32 row = 0x11111111 * clr; 150 u32 color = 0x11111111 * clr;
127 *dst = (*dst & ~(mask << shift_left)) | (row << shift_left); 151 *dst = (*dst & ~(mask << shift_left)) | (color << shift_left);
128 dst += 8; 152 dst += 8;
129 for (size_t i = 1; i < dtx; i++) { 153 for (size_t i = 1; i < dtx; i++) {
130 dirty |= (1 << (tile_x0 + i)); 154 dirty |= (1 << (tile_x0 + i));
131 *dst = row; 155 *dst = color;
132 dst += 8; 156 dst += 8;
133 } 157 }
134 *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right); 158 *dst = (*dst & ~(mask >> shift_right)) | (color >> shift_right);
135 } 159 }
136 dirty_tiles[tile_y] |= dirty; 160 dirty_tiles[tile_y] |= dirty;
137} 161}
@@ -156,26 +180,26 @@ draw_vline(size_t x0, size_t y0, size_t y1, u8 clr) {
156 180
157 u32 *dst = &backbuf[start_row0 + (tile_x + tile_y * 32) * 8]; 181 u32 *dst = &backbuf[start_row0 + (tile_x + tile_y * 32) * 8];
158 u32 mask = 0x0000000F << shift_left; 182 u32 mask = 0x0000000F << shift_left;
159 u32 row = (0x11111111 * clr) & mask; 183 u32 color = clr << shift_left;
160 u32 dty = tile_y1 - tile_y0; 184 u32 dty = tile_y1 - tile_y0;
161 if (dty < 1) { 185 if (dty < 1) {
162 for (size_t i = 0; i <= (y1 - y0); i++, dst++) { 186 for (size_t i = 0; i <= (y1 - y0); i++, dst++) {
163 dst[0] = (dst[0] & ~mask) | row; 187 dst[0] = (dst[0] & ~mask) | color;
164 } 188 }
165 } else { 189 } else {
166 for (size_t i = 0; i < (8 - start_row0); i++, dst++) { 190 for (size_t i = 0; i < (8 - start_row0); i++, dst++) {
167 dst[0] = (dst[0] & ~mask) | row; 191 dst[0] = (dst[0] & ~mask) | color;
168 } 192 }
169 dst += 8 * 31; 193 dst += 8 * 31;
170 for (size_t j = 1; j < dty; j++) { 194 for (size_t j = 1; j < dty; j++) {
171 dirty_tiles[tile_y0 + j] |= dirty; 195 dirty_tiles[tile_y0 + j] |= dirty;
172 for (size_t i = 0; i < 8; i++, dst++) { 196 for (size_t i = 0; i < 8; i++, dst++) {
173 dst[0] = (dst[0] & ~mask) | row; 197 dst[0] = (dst[0] & ~mask) | color;
174 } 198 }
175 dst += 8 * 31; 199 dst += 8 * 31;
176 } 200 }
177 for (size_t i = 0; i <= start_row1; i++, dst++) { 201 for (size_t i = 0; i <= start_row1; i++, dst++) {
178 dst[0] = (dst[0] & ~mask) | row; 202 dst[0] = (dst[0] & ~mask) | color;
179 } 203 }
180 } 204 }
181 dirty_tiles[tile_y0] |= dirty; 205 dirty_tiles[tile_y0] |= dirty;
@@ -313,8 +337,8 @@ draw_filled_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
313 MAYBE_SWAP(x0, x1); 337 MAYBE_SWAP(x0, x1);
314 MAYBE_SWAP(y0, y1); 338 MAYBE_SWAP(y0, y1);
315 339
316 // Special condition. If the screen is to be completely filled, use the DMA 340 // Special condition. If the screen is to be completely filled, use the
317 // instead. 341 // full clearing functions instead.
318 if (x0 == 0 && x1 >= (SCREEN_WIDTH - 1) && y0 == 0 && y1 >= (SCREEN_HEIGHT - 1)) { 342 if (x0 == 0 && x1 >= (SCREEN_WIDTH - 1) && y0 == 0 && y1 >= (SCREEN_HEIGHT - 1)) {
319 screen_fill(clr); 343 screen_fill(clr);
320 return; 344 return;
@@ -326,161 +350,137 @@ draw_filled_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
326} 350}
327 351
328// 352//
329// Sprites (chr/icn). 353// Sprites (1bpp).
330// 354//
331 355
332#if DEC_BIG_LUT == 1 356// NOTE: If IWRAM is too full, these could be made `const` at the cost of some
333static u32 dec_byte_flip_x[256] = { 357// performance when decoding 1bpp sprites.
334 0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100, 358static u32 lut_1bpp_mask[256] = {
335 0x00000101, 0x00000110, 0x00000111, 0x00001000, 0x00001001, 359 0x00000000, 0xf0000000, 0x0f000000, 0xff000000, 0x00f00000,
336 0x00001010, 0x00001011, 0x00001100, 0x00001101, 0x00001110, 360 0xf0f00000, 0x0ff00000, 0xfff00000, 0x000f0000, 0xf00f0000,
337 0x00001111, 0x00010000, 0x00010001, 0x00010010, 0x00010011, 361 0x0f0f0000, 0xff0f0000, 0x00ff0000, 0xf0ff0000, 0x0fff0000,
338 0x00010100, 0x00010101, 0x00010110, 0x00010111, 0x00011000, 362 0xffff0000, 0x0000f000, 0xf000f000, 0x0f00f000, 0xff00f000,
339 0x00011001, 0x00011010, 0x00011011, 0x00011100, 0x00011101, 363 0x00f0f000, 0xf0f0f000, 0x0ff0f000, 0xfff0f000, 0x000ff000,
340 0x00011110, 0x00011111, 0x00100000, 0x00100001, 0x00100010, 364 0xf00ff000, 0x0f0ff000, 0xff0ff000, 0x00fff000, 0xf0fff000,
341 0x00100011, 0x00100100, 0x00100101, 0x00100110, 0x00100111, 365 0x0ffff000, 0xfffff000, 0x00000f00, 0xf0000f00, 0x0f000f00,
342 0x00101000, 0x00101001, 0x00101010, 0x00101011, 0x00101100, 366 0xff000f00, 0x00f00f00, 0xf0f00f00, 0x0ff00f00, 0xfff00f00,
343 0x00101101, 0x00101110, 0x00101111, 0x00110000, 0x00110001, 367 0x000f0f00, 0xf00f0f00, 0x0f0f0f00, 0xff0f0f00, 0x00ff0f00,
344 0x00110010, 0x00110011, 0x00110100, 0x00110101, 0x00110110, 368 0xf0ff0f00, 0x0fff0f00, 0xffff0f00, 0x0000ff00, 0xf000ff00,
345 0x00110111, 0x00111000, 0x00111001, 0x00111010, 0x00111011, 369 0x0f00ff00, 0xff00ff00, 0x00f0ff00, 0xf0f0ff00, 0x0ff0ff00,
346 0x00111100, 0x00111101, 0x00111110, 0x00111111, 0x01000000, 370 0xfff0ff00, 0x000fff00, 0xf00fff00, 0x0f0fff00, 0xff0fff00,
347 0x01000001, 0x01000010, 0x01000011, 0x01000100, 0x01000101, 371 0x00ffff00, 0xf0ffff00, 0x0fffff00, 0xffffff00, 0x000000f0,
348 0x01000110, 0x01000111, 0x01001000, 0x01001001, 0x01001010, 372 0xf00000f0, 0x0f0000f0, 0xff0000f0, 0x00f000f0, 0xf0f000f0,
349 0x01001011, 0x01001100, 0x01001101, 0x01001110, 0x01001111, 373 0x0ff000f0, 0xfff000f0, 0x000f00f0, 0xf00f00f0, 0x0f0f00f0,
350 0x01010000, 0x01010001, 0x01010010, 0x01010011, 0x01010100, 374 0xff0f00f0, 0x00ff00f0, 0xf0ff00f0, 0x0fff00f0, 0xffff00f0,
351 0x01010101, 0x01010110, 0x01010111, 0x01011000, 0x01011001, 375 0x0000f0f0, 0xf000f0f0, 0x0f00f0f0, 0xff00f0f0, 0x00f0f0f0,
352 0x01011010, 0x01011011, 0x01011100, 0x01011101, 0x01011110, 376 0xf0f0f0f0, 0x0ff0f0f0, 0xfff0f0f0, 0x000ff0f0, 0xf00ff0f0,
353 0x01011111, 0x01100000, 0x01100001, 0x01100010, 0x01100011, 377 0x0f0ff0f0, 0xff0ff0f0, 0x00fff0f0, 0xf0fff0f0, 0x0ffff0f0,
354 0x01100100, 0x01100101, 0x01100110, 0x01100111, 0x01101000, 378 0xfffff0f0, 0x00000ff0, 0xf0000ff0, 0x0f000ff0, 0xff000ff0,
355 0x01101001, 0x01101010, 0x01101011, 0x01101100, 0x01101101, 379 0x00f00ff0, 0xf0f00ff0, 0x0ff00ff0, 0xfff00ff0, 0x000f0ff0,
356 0x01101110, 0x01101111, 0x01110000, 0x01110001, 0x01110010, 380 0xf00f0ff0, 0x0f0f0ff0, 0xff0f0ff0, 0x00ff0ff0, 0xf0ff0ff0,
357 0x01110011, 0x01110100, 0x01110101, 0x01110110, 0x01110111, 381 0x0fff0ff0, 0xffff0ff0, 0x0000fff0, 0xf000fff0, 0x0f00fff0,
358 0x01111000, 0x01111001, 0x01111010, 0x01111011, 0x01111100, 382 0xff00fff0, 0x00f0fff0, 0xf0f0fff0, 0x0ff0fff0, 0xfff0fff0,
359 0x01111101, 0x01111110, 0x01111111, 0x10000000, 0x10000001, 383 0x000ffff0, 0xf00ffff0, 0x0f0ffff0, 0xff0ffff0, 0x00fffff0,
360 0x10000010, 0x10000011, 0x10000100, 0x10000101, 0x10000110, 384 0xf0fffff0, 0x0ffffff0, 0xfffffff0, 0x0000000f, 0xf000000f,
361 0x10000111, 0x10001000, 0x10001001, 0x10001010, 0x10001011, 385 0x0f00000f, 0xff00000f, 0x00f0000f, 0xf0f0000f, 0x0ff0000f,
362 0x10001100, 0x10001101, 0x10001110, 0x10001111, 0x10010000, 386 0xfff0000f, 0x000f000f, 0xf00f000f, 0x0f0f000f, 0xff0f000f,
363 0x10010001, 0x10010010, 0x10010011, 0x10010100, 0x10010101, 387 0x00ff000f, 0xf0ff000f, 0x0fff000f, 0xffff000f, 0x0000f00f,
364 0x10010110, 0x10010111, 0x10011000, 0x10011001, 0x10011010, 388 0xf000f00f, 0x0f00f00f, 0xff00f00f, 0x00f0f00f, 0xf0f0f00f,
365 0x10011011, 0x10011100, 0x10011101, 0x10011110, 0x10011111, 389 0x0ff0f00f, 0xfff0f00f, 0x000ff00f, 0xf00ff00f, 0x0f0ff00f,
366 0x10100000, 0x10100001, 0x10100010, 0x10100011, 0x10100100, 390 0xff0ff00f, 0x00fff00f, 0xf0fff00f, 0x0ffff00f, 0xfffff00f,
367 0x10100101, 0x10100110, 0x10100111, 0x10101000, 0x10101001, 391 0x00000f0f, 0xf0000f0f, 0x0f000f0f, 0xff000f0f, 0x00f00f0f,
368 0x10101010, 0x10101011, 0x10101100, 0x10101101, 0x10101110, 392 0xf0f00f0f, 0x0ff00f0f, 0xfff00f0f, 0x000f0f0f, 0xf00f0f0f,
369 0x10101111, 0x10110000, 0x10110001, 0x10110010, 0x10110011, 393 0x0f0f0f0f, 0xff0f0f0f, 0x00ff0f0f, 0xf0ff0f0f, 0x0fff0f0f,
370 0x10110100, 0x10110101, 0x10110110, 0x10110111, 0x10111000, 394 0xffff0f0f, 0x0000ff0f, 0xf000ff0f, 0x0f00ff0f, 0xff00ff0f,
371 0x10111001, 0x10111010, 0x10111011, 0x10111100, 0x10111101, 395 0x00f0ff0f, 0xf0f0ff0f, 0x0ff0ff0f, 0xfff0ff0f, 0x000fff0f,
372 0x10111110, 0x10111111, 0x11000000, 0x11000001, 0x11000010, 396 0xf00fff0f, 0x0f0fff0f, 0xff0fff0f, 0x00ffff0f, 0xf0ffff0f,
373 0x11000011, 0x11000100, 0x11000101, 0x11000110, 0x11000111, 397 0x0fffff0f, 0xffffff0f, 0x000000ff, 0xf00000ff, 0x0f0000ff,
374 0x11001000, 0x11001001, 0x11001010, 0x11001011, 0x11001100, 398 0xff0000ff, 0x00f000ff, 0xf0f000ff, 0x0ff000ff, 0xfff000ff,
375 0x11001101, 0x11001110, 0x11001111, 0x11010000, 0x11010001, 399 0x000f00ff, 0xf00f00ff, 0x0f0f00ff, 0xff0f00ff, 0x00ff00ff,
376 0x11010010, 0x11010011, 0x11010100, 0x11010101, 0x11010110, 400 0xf0ff00ff, 0x0fff00ff, 0xffff00ff, 0x0000f0ff, 0xf000f0ff,
377 0x11010111, 0x11011000, 0x11011001, 0x11011010, 0x11011011, 401 0x0f00f0ff, 0xff00f0ff, 0x00f0f0ff, 0xf0f0f0ff, 0x0ff0f0ff,
378 0x11011100, 0x11011101, 0x11011110, 0x11011111, 0x11100000, 402 0xfff0f0ff, 0x000ff0ff, 0xf00ff0ff, 0x0f0ff0ff, 0xff0ff0ff,
379 0x11100001, 0x11100010, 0x11100011, 0x11100100, 0x11100101, 403 0x00fff0ff, 0xf0fff0ff, 0x0ffff0ff, 0xfffff0ff, 0x00000fff,
380 0x11100110, 0x11100111, 0x11101000, 0x11101001, 0x11101010, 404 0xf0000fff, 0x0f000fff, 0xff000fff, 0x00f00fff, 0xf0f00fff,
381 0x11101011, 0x11101100, 0x11101101, 0x11101110, 0x11101111, 405 0x0ff00fff, 0xfff00fff, 0x000f0fff, 0xf00f0fff, 0x0f0f0fff,
382 0x11110000, 0x11110001, 0x11110010, 0x11110011, 0x11110100, 406 0xff0f0fff, 0x00ff0fff, 0xf0ff0fff, 0x0fff0fff, 0xffff0fff,
383 0x11110101, 0x11110110, 0x11110111, 0x11111000, 0x11111001, 407 0x0000ffff, 0xf000ffff, 0x0f00ffff, 0xff00ffff, 0x00f0ffff,
384 0x11111010, 0x11111011, 0x11111100, 0x11111101, 0x11111110, 408 0xf0f0ffff, 0x0ff0ffff, 0xfff0ffff, 0x000fffff, 0xf00fffff,
385 0x11111111 409 0x0f0fffff, 0xff0fffff, 0x00ffffff, 0xf0ffffff, 0x0fffffff,
410 0xffffffff
386}; 411};
387 412
388static u32 dec_byte[256] = { 413static u32 lut_1bpp_mask_flip_x[256] = {
389 0x00000000, 0x10000000, 0x01000000, 0x11000000, 0x00100000, 414 0x00000000, 0x0000000f, 0x000000f0, 0x000000ff, 0x00000f00,
390 0x10100000, 0x01100000, 0x11100000, 0x00010000, 0x10010000, 415 0x00000f0f, 0x00000ff0, 0x00000fff, 0x0000f000, 0x0000f00f,
391 0x01010000, 0x11010000, 0x00110000, 0x10110000, 0x01110000, 416 0x0000f0f0, 0x0000f0ff, 0x0000ff00, 0x0000ff0f, 0x0000fff0,
392 0x11110000, 0x00001000, 0x10001000, 0x01001000, 0x11001000, 417 0x0000ffff, 0x000f0000, 0x000f000f, 0x000f00f0, 0x000f00ff,
393 0x00101000, 0x10101000, 0x01101000, 0x11101000, 0x00011000, 418 0x000f0f00, 0x000f0f0f, 0x000f0ff0, 0x000f0fff, 0x000ff000,
394 0x10011000, 0x01011000, 0x11011000, 0x00111000, 0x10111000, 419 0x000ff00f, 0x000ff0f0, 0x000ff0ff, 0x000fff00, 0x000fff0f,
395 0x01111000, 0x11111000, 0x00000100, 0x10000100, 0x01000100, 420 0x000ffff0, 0x000fffff, 0x00f00000, 0x00f0000f, 0x00f000f0,
396 0x11000100, 0x00100100, 0x10100100, 0x01100100, 0x11100100, 421 0x00f000ff, 0x00f00f00, 0x00f00f0f, 0x00f00ff0, 0x00f00fff,
397 0x00010100, 0x10010100, 0x01010100, 0x11010100, 0x00110100, 422 0x00f0f000, 0x00f0f00f, 0x00f0f0f0, 0x00f0f0ff, 0x00f0ff00,
398 0x10110100, 0x01110100, 0x11110100, 0x00001100, 0x10001100, 423 0x00f0ff0f, 0x00f0fff0, 0x00f0ffff, 0x00ff0000, 0x00ff000f,
399 0x01001100, 0x11001100, 0x00101100, 0x10101100, 0x01101100, 424 0x00ff00f0, 0x00ff00ff, 0x00ff0f00, 0x00ff0f0f, 0x00ff0ff0,
400 0x11101100, 0x00011100, 0x10011100, 0x01011100, 0x11011100, 425 0x00ff0fff, 0x00fff000, 0x00fff00f, 0x00fff0f0, 0x00fff0ff,
401 0x00111100, 0x10111100, 0x01111100, 0x11111100, 0x00000010, 426 0x00ffff00, 0x00ffff0f, 0x00fffff0, 0x00ffffff, 0x0f000000,
402 0x10000010, 0x01000010, 0x11000010, 0x00100010, 0x10100010, 427 0x0f00000f, 0x0f0000f0, 0x0f0000ff, 0x0f000f00, 0x0f000f0f,
403 0x01100010, 0x11100010, 0x00010010, 0x10010010, 0x01010010, 428 0x0f000ff0, 0x0f000fff, 0x0f00f000, 0x0f00f00f, 0x0f00f0f0,
404 0x11010010, 0x00110010, 0x10110010, 0x01110010, 0x11110010, 429 0x0f00f0ff, 0x0f00ff00, 0x0f00ff0f, 0x0f00fff0, 0x0f00ffff,
405 0x00001010, 0x10001010, 0x01001010, 0x11001010, 0x00101010, 430 0x0f0f0000, 0x0f0f000f, 0x0f0f00f0, 0x0f0f00ff, 0x0f0f0f00,
406 0x10101010, 0x01101010, 0x11101010, 0x00011010, 0x10011010, 431 0x0f0f0f0f, 0x0f0f0ff0, 0x0f0f0fff, 0x0f0ff000, 0x0f0ff00f,
407 0x01011010, 0x11011010, 0x00111010, 0x10111010, 0x01111010, 432 0x0f0ff0f0, 0x0f0ff0ff, 0x0f0fff00, 0x0f0fff0f, 0x0f0ffff0,
408 0x11111010, 0x00000110, 0x10000110, 0x01000110, 0x11000110, 433 0x0f0fffff, 0x0ff00000, 0x0ff0000f, 0x0ff000f0, 0x0ff000ff,
409 0x00100110, 0x10100110, 0x01100110, 0x11100110, 0x00010110, 434 0x0ff00f00, 0x0ff00f0f, 0x0ff00ff0, 0x0ff00fff, 0x0ff0f000,
410 0x10010110, 0x01010110, 0x11010110, 0x00110110, 0x10110110, 435 0x0ff0f00f, 0x0ff0f0f0, 0x0ff0f0ff, 0x0ff0ff00, 0x0ff0ff0f,
411 0x01110110, 0x11110110, 0x00001110, 0x10001110, 0x01001110, 436 0x0ff0fff0, 0x0ff0ffff, 0x0fff0000, 0x0fff000f, 0x0fff00f0,
412 0x11001110, 0x00101110, 0x10101110, 0x01101110, 0x11101110, 437 0x0fff00ff, 0x0fff0f00, 0x0fff0f0f, 0x0fff0ff0, 0x0fff0fff,
413 0x00011110, 0x10011110, 0x01011110, 0x11011110, 0x00111110, 438 0x0ffff000, 0x0ffff00f, 0x0ffff0f0, 0x0ffff0ff, 0x0fffff00,
414 0x10111110, 0x01111110, 0x11111110, 0x00000001, 0x10000001, 439 0x0fffff0f, 0x0ffffff0, 0x0fffffff, 0xf0000000, 0xf000000f,
415 0x01000001, 0x11000001, 0x00100001, 0x10100001, 0x01100001, 440 0xf00000f0, 0xf00000ff, 0xf0000f00, 0xf0000f0f, 0xf0000ff0,
416 0x11100001, 0x00010001, 0x10010001, 0x01010001, 0x11010001, 441 0xf0000fff, 0xf000f000, 0xf000f00f, 0xf000f0f0, 0xf000f0ff,
417 0x00110001, 0x10110001, 0x01110001, 0x11110001, 0x00001001, 442 0xf000ff00, 0xf000ff0f, 0xf000fff0, 0xf000ffff, 0xf00f0000,
418 0x10001001, 0x01001001, 0x11001001, 0x00101001, 0x10101001, 443 0xf00f000f, 0xf00f00f0, 0xf00f00ff, 0xf00f0f00, 0xf00f0f0f,
419 0x01101001, 0x11101001, 0x00011001, 0x10011001, 0x01011001, 444 0xf00f0ff0, 0xf00f0fff, 0xf00ff000, 0xf00ff00f, 0xf00ff0f0,
420 0x11011001, 0x00111001, 0x10111001, 0x01111001, 0x11111001, 445 0xf00ff0ff, 0xf00fff00, 0xf00fff0f, 0xf00ffff0, 0xf00fffff,
421 0x00000101, 0x10000101, 0x01000101, 0x11000101, 0x00100101, 446 0xf0f00000, 0xf0f0000f, 0xf0f000f0, 0xf0f000ff, 0xf0f00f00,
422 0x10100101, 0x01100101, 0x11100101, 0x00010101, 0x10010101, 447 0xf0f00f0f, 0xf0f00ff0, 0xf0f00fff, 0xf0f0f000, 0xf0f0f00f,
423 0x01010101, 0x11010101, 0x00110101, 0x10110101, 0x01110101, 448 0xf0f0f0f0, 0xf0f0f0ff, 0xf0f0ff00, 0xf0f0ff0f, 0xf0f0fff0,
424 0x11110101, 0x00001101, 0x10001101, 0x01001101, 0x11001101, 449 0xf0f0ffff, 0xf0ff0000, 0xf0ff000f, 0xf0ff00f0, 0xf0ff00ff,
425 0x00101101, 0x10101101, 0x01101101, 0x11101101, 0x00011101, 450 0xf0ff0f00, 0xf0ff0f0f, 0xf0ff0ff0, 0xf0ff0fff, 0xf0fff000,
426 0x10011101, 0x01011101, 0x11011101, 0x00111101, 0x10111101, 451 0xf0fff00f, 0xf0fff0f0, 0xf0fff0ff, 0xf0ffff00, 0xf0ffff0f,
427 0x01111101, 0x11111101, 0x00000011, 0x10000011, 0x01000011, 452 0xf0fffff0, 0xf0ffffff, 0xff000000, 0xff00000f, 0xff0000f0,
428 0x11000011, 0x00100011, 0x10100011, 0x01100011, 0x11100011, 453 0xff0000ff, 0xff000f00, 0xff000f0f, 0xff000ff0, 0xff000fff,
429 0x00010011, 0x10010011, 0x01010011, 0x11010011, 0x00110011, 454 0xff00f000, 0xff00f00f, 0xff00f0f0, 0xff00f0ff, 0xff00ff00,
430 0x10110011, 0x01110011, 0x11110011, 0x00001011, 0x10001011, 455 0xff00ff0f, 0xff00fff0, 0xff00ffff, 0xff0f0000, 0xff0f000f,
431 0x01001011, 0x11001011, 0x00101011, 0x10101011, 0x01101011, 456 0xff0f00f0, 0xff0f00ff, 0xff0f0f00, 0xff0f0f0f, 0xff0f0ff0,
432 0x11101011, 0x00011011, 0x10011011, 0x01011011, 0x11011011, 457 0xff0f0fff, 0xff0ff000, 0xff0ff00f, 0xff0ff0f0, 0xff0ff0ff,
433 0x00111011, 0x10111011, 0x01111011, 0x11111011, 0x00000111, 458 0xff0fff00, 0xff0fff0f, 0xff0ffff0, 0xff0fffff, 0xfff00000,
434 0x10000111, 0x01000111, 0x11000111, 0x00100111, 0x10100111, 459 0xfff0000f, 0xfff000f0, 0xfff000ff, 0xfff00f00, 0xfff00f0f,
435 0x01100111, 0x11100111, 0x00010111, 0x10010111, 0x01010111, 460 0xfff00ff0, 0xfff00fff, 0xfff0f000, 0xfff0f00f, 0xfff0f0f0,
436 0x11010111, 0x00110111, 0x10110111, 0x01110111, 0x11110111, 461 0xfff0f0ff, 0xfff0ff00, 0xfff0ff0f, 0xfff0fff0, 0xfff0ffff,
437 0x00001111, 0x10001111, 0x01001111, 0x11001111, 0x00101111, 462 0xffff0000, 0xffff000f, 0xffff00f0, 0xffff00ff, 0xffff0f00,
438 0x10101111, 0x01101111, 0x11101111, 0x00011111, 0x10011111, 463 0xffff0f0f, 0xffff0ff0, 0xffff0fff, 0xfffff000, 0xfffff00f,
439 0x01011111, 0x11011111, 0x00111111, 0x10111111, 0x01111111, 464 0xfffff0f0, 0xfffff0ff, 0xffffff00, 0xffffff0f, 0xfffffff0,
440 0x11111111 465 0xffffffff
441}; 466};
442 467
443IWRAM_CODE 468// Create a mask for zero sprite values in each nibble.
444static inline 469// For example: 0x12305008 -> 0xFFF0F00F
470INLINE
445u32 471u32
446decode_1bpp(u8 row, u8 flip_x) { 472create_zero_mask(u32 x) {
447 if (flip_x) { 473 x |= x >> 2;
448 return dec_byte_flip_x[row]; 474 x |= x >> 1;
449 } 475 x &= 0x11111111;
450 return dec_byte[row]; 476 return x * 0xf;
451} 477}
452#else
453static u16 dec_nibble[] = {
454 0x0000, 0x1000, 0x0100, 0x1100,
455 0x0010, 0x1010, 0x0110, 0x1110,
456 0x0001, 0x1001, 0x0101, 0x1101,
457 0x0011, 0x1011, 0x0111, 0x1111,
458};
459
460static u16 dec_nibble_flip_x[] = {
461 0x0000, 0x0001, 0x0010, 0x0011,
462 0x0100, 0x0101, 0x0110, 0x0111,
463 0x1000, 0x1001, 0x1010, 0x1011,
464 0x1100, 0x1101, 0x1110, 0x1111,
465};
466
467IWRAM_CODE
468static inline
469u32
470decode_1bpp(u8 row, u8 flip_x) {
471 if (flip_x) {
472 u16 *lut = dec_nibble_flip_x;
473 return (u32)lut[(row >> 4) & 0xF] << 16 | (u32)lut[(row >> 0) & 0xF];
474 }
475 u16 *lut = dec_nibble;
476 return (u32)lut[(row >> 0) & 0xF] << 16 | (u32)lut[(row >> 4) & 0xF];
477}
478#endif
479 478
480IWRAM_CODE 479IWRAM_CODE
481UNROLL_LOOPS 480UNROLL_LOOPS
482void 481void
483draw_chr(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { 482draw_sprite(size_t x, size_t y, u32 *sprite, u8 clear) {
483 // Copy a 4bpp sprite into memory. Color 0 is the transparency color.
484 BOUNDCHECK_SCREEN(x, y); 484 BOUNDCHECK_SCREEN(x, y);
485 size_t tile_x0 = x / 8; 485 size_t tile_x0 = x / 8;
486 size_t tile_x1 = (x + 7) / 8; 486 size_t tile_x1 = (x + 7) / 8;
@@ -491,77 +491,71 @@ draw_chr(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
491 size_t shift_right = (8 - start_col) * 4; 491 size_t shift_right = (8 - start_col) * 4;
492 u32 dirty = (1 << tile_x0) | (1 << tile_x1); 492 u32 dirty = (1 << tile_x0) | (1 << tile_x1);
493 u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8]; 493 u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8];
494#if DEC_BIG_LUT 494 size_t n_rows = 8;
495 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; 495 if (y + 8 > SCREEN_HEIGHT) {
496#endif 496 n_rows = 8 - ((y + 8) - SCREEN_HEIGHT);
497 if (!flip_y) { 497 }
498 for(size_t v = 0; v < 8; v++, dst++) { 498
499 if ((y + v) >= SCREEN_HEIGHT) break; 499 size_t n0 = MIN(8 - start_row, n_rows);
500 u8 ch1 = sprite[v + 0]; 500 if (clear) {
501 u8 ch2 = sprite[v + 8]; 501 for(size_t v = 0; v < n0; v++, dst++) {
502#if DEC_BIG_LUT 502 u32 row = sprite[v];
503 u32 clr_a = lut[ch1]; 503
504 u32 clr_b = lut[ch2]; 504 u32 mask = create_zero_mask(row);
505#else 505 u32 msk0 = mask << shift_left;
506 u32 clr_a = decode_1bpp(ch1, flip_x); 506 u32 msk1 = mask >> shift_right;
507 u32 clr_b = decode_1bpp(ch2, flip_x); 507
508#endif 508 dst[0] = (dst[0] & ~msk0);
509 u32 mask_a = (clr_a * 0xF); 509 dst[8] = (dst[8] & ~msk1);
510 u32 mask_b = (clr_b * 0xF); 510 }
511 u32 mask = (mask_a | mask_b); 511 dst += (32 - 1) * 8;
512 u32 color; 512 for(size_t v = n0; v < n_rows; v++, dst++) {
513 if (clr == 0) { 513 u32 row = sprite[v];
514 color = clr_a + (clr_b << 1); 514
515 } else if (clr == 15) { 515 u32 mask = create_zero_mask(row);
516 color = 0; 516 u32 msk0 = mask << shift_left;
517 } else { 517 u32 msk1 = mask >> shift_right;
518 color = (clr_a | clr_b) * clr; 518
519 } 519 dst[0] = (dst[0] & ~msk0);
520 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); 520 dst[8] = (dst[8] & ~msk1);
521 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right);
522 if ((start_row + v) == 7) {
523 dirty_tiles[tile_y + 1] |= dirty;
524 dst += (32 - 1) * 8;
525 }
526 } 521 }
527 } else { 522 } else {
528 for(size_t v = 0; v < 8; v++, dst++) { 523 for(size_t v = 0; v < n0; v++, dst++) {
529 if ((y + v) >= SCREEN_HEIGHT) break; 524 u32 row = sprite[v];
530 u8 ch1 = sprite[(7 - v) + 0]; 525
531 u8 ch2 = sprite[(7 - v) + 8]; 526 u32 mask = create_zero_mask(row);
532#if DEC_BIG_LUT 527 u32 msk0 = mask << shift_left;
533 u32 clr_a = lut[ch1]; 528 u32 msk1 = mask >> shift_right;
534 u32 clr_b = lut[ch2]; 529 u32 clr0 = row << shift_left;
535#else 530 u32 clr1 = row >> shift_right;
536 u32 clr_a = decode_1bpp(ch1, flip_x); 531
537 u32 clr_b = decode_1bpp(ch2, flip_x); 532 dst[0] = (dst[0] & ~msk0) | clr0;
538#endif 533 dst[8] = (dst[8] & ~msk1) | clr1;
539 u32 mask_a = (clr_a * 0xF); 534 }
540 u32 mask_b = (clr_b * 0xF); 535 dst += (32 - 1) * 8;
541 u32 mask = (mask_a | mask_b); 536 for(size_t v = n0; v < n_rows; v++, dst++) {
542 u32 color; 537 u32 row = sprite[v];
543 if (clr == 0) { 538
544 color = clr_a + (clr_b << 1); 539 u32 mask = create_zero_mask(row);
545 } else if (clr == 15) { 540 u32 msk0 = mask << shift_left;
546 color = 0; 541 u32 msk1 = mask >> shift_right;
547 } else { 542 u32 clr0 = row << shift_left;
548 color = (clr_a | clr_b) * clr; 543 u32 clr1 = row >> shift_right;
549 } 544
550 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); 545 dst[0] = (dst[0] & ~msk0) | clr0;
551 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); 546 dst[8] = (dst[8] & ~msk1) | clr1;
552 if ((start_row + v) == 7) {
553 dirty_tiles[tile_y + 1] |= dirty;
554 dst += (32 - 1) * 8;
555 }
556 } 547 }
557 } 548 }
558 dirty_tiles[tile_y] |= dirty; 549 dirty_tiles[tile_y] |= dirty;
550 if (start_row != 0) {
551 dirty_tiles[tile_y + 1] |= dirty;
552 }
559} 553}
560 554
561IWRAM_CODE 555IWRAM_CODE
562UNROLL_LOOPS 556UNROLL_LOOPS
563void 557void
564draw_icn(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) { 558draw_1bpp(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
565 BOUNDCHECK_SCREEN(x, y); 559 BOUNDCHECK_SCREEN(x, y);
566 size_t tile_x0 = x / 8; 560 size_t tile_x0 = x / 8;
567 size_t tile_x1 = (x + 7) / 8; 561 size_t tile_x1 = (x + 7) / 8;
@@ -572,47 +566,63 @@ draw_icn(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
572 size_t shift_right = (8 - start_col) * 4; 566 size_t shift_right = (8 - start_col) * 4;
573 u32 dirty = (1 << tile_x0) | (1 << tile_x1); 567 u32 dirty = (1 << tile_x0) | (1 << tile_x1);
574 u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8]; 568 u32 *dst = &backbuf[start_row + (tile_x0 + tile_y * 32) * 8];
575#if DEC_BIG_LUT 569 u32 color = clr * 0x11111111;
576 u32 *lut = flip_x ? dec_byte_flip_x : dec_byte; 570 u32 *lut = flip_x ? lut_1bpp_mask_flip_x : lut_1bpp_mask;
577#endif 571 size_t n_rows = 8;
572 if (y + 8 > SCREEN_HEIGHT) {
573 n_rows = 8 - ((y + 8) - SCREEN_HEIGHT);
574 }
575 size_t n0 = MIN(8 - start_row, n_rows);
578 if (!flip_y) { 576 if (!flip_y) {
579 for(size_t v = 0; v < 8; v++, dst++) { 577 for(size_t v = 0; v < n0; v++, dst++) {
580 if ((y + v) >= SCREEN_HEIGHT) break; 578 u32 mask = lut[*sprite];
581 u8 ch1 = sprite[v + 0]; 579 u32 msk0 = mask << shift_left;
582#if DEC_BIG_LUT 580 u32 msk1 = mask >> shift_right;
583 u32 color = lut[ch1]; 581 u32 clr0 = msk0 & color;
584#else 582 u32 clr1 = msk1 & color;
585 u32 color = decode_1bpp(ch1, flip_x); 583 dst[0] = (dst[0] & ~msk0) | clr0;
586#endif 584 dst[8] = (dst[8] & ~msk1) | clr1;
587 u32 mask = (color * 0xF); 585 sprite++;
588 color *= clr; 586 }
589 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); 587 dst += (32 - 1) * 8;
590 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); 588 for(size_t v = n0; v < n_rows; v++, dst++) {
591 if ((start_row + v) == 7) { 589 u32 mask = lut[*sprite];
592 dirty_tiles[tile_y + 1] |= dirty; 590 u32 msk0 = mask << shift_left;
593 dst += (32 - 1) * 8; 591 u32 msk1 = mask >> shift_right;
594 } 592 u32 clr0 = msk0 & color;
593 u32 clr1 = msk1 & color;
594 dst[0] = (dst[0] & ~msk0) | clr0;
595 dst[8] = (dst[8] & ~msk1) | clr1;
596 sprite++;
595 } 597 }
596 } else { 598 } else {
597 for(size_t v = 0; v < 8; v++, dst++) { 599 sprite += 7;
598 if ((y + v) >= SCREEN_HEIGHT) break; 600 for(size_t v = 0; v < n0; v++, dst++) {
599 u8 ch1 = sprite[(7 - v) + 0]; 601 u32 mask = lut[*sprite];
600#if DEC_BIG_LUT 602 u32 msk0 = mask << shift_left;
601 u32 color = lut[ch1]; 603 u32 msk1 = mask >> shift_right;
602#else 604 u32 clr0 = msk0 & color;
603 u32 color = decode_1bpp(ch1, flip_x); 605 u32 clr1 = msk1 & color;
604#endif 606 dst[0] = (dst[0] & ~msk0) | clr0;
605 u32 mask = (color * 0xF); 607 dst[8] = (dst[8] & ~msk1) | clr1;
606 color *= clr; 608 sprite--;
607 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left); 609 }
608 dst[8] = (dst[8] & ~(mask >> shift_right)) | (color >> shift_right); 610 dst += (32 - 1) * 8;
609 if ((start_row + v) == 7) { 611 for(size_t v = n0; v < n_rows; v++, dst++) {
610 dirty_tiles[tile_y + 1] |= dirty; 612 u32 mask = lut[*sprite];
611 dst += (32 - 1) * 8; 613 u32 msk0 = mask << shift_left;
612 } 614 u32 msk1 = mask >> shift_right;
615 u32 clr0 = msk0 & color;
616 u32 clr1 = msk1 & color;
617 dst[0] = (dst[0] & ~msk0) | clr0;
618 dst[8] = (dst[8] & ~msk1) | clr1;
619 sprite--;
613 } 620 }
614 } 621 }
615 dirty_tiles[tile_y] |= dirty; 622 dirty_tiles[tile_y] |= dirty;
623 if (start_row != 0) {
624 dirty_tiles[tile_y + 1] |= dirty;
625 }
616} 626}
617 627
618// 628//
@@ -620,9 +630,11 @@ draw_icn(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
620// 630//
621 631
622IWRAM_CODE 632IWRAM_CODE
633UNROLL_LOOPS
623void 634void
624flip_buffer(void) { 635flip_buffer(void) {
625// Mode 0: double buffering without dirty tiles. 636// Mode 0: double buffering without dirty tiles. Use this when we are clearing
637// the screen every single frame.
626#if FLIP_TYPE == 0 638#if FLIP_TYPE == 0
627 if (backbuf == BUF_0) { 639 if (backbuf == BUF_0) {
628 backbuf = BUF_1; 640 backbuf = BUF_1;
@@ -635,7 +647,7 @@ flip_buffer(void) {
635 } 647 }
636 648
637// Mode 1: single buffer, copy the dirty lines from backbuffer (BUF_1) to 649// Mode 1: single buffer, copy the dirty lines from backbuffer (BUF_1) to
638// frontbuffer (BUF_0) using the DMA. 650// frontbuffer (BUF_0).
639#elif FLIP_TYPE == 1 651#elif FLIP_TYPE == 1
640 u32 *front = BUF_0; 652 u32 *front = BUF_0;
641 u32 *back = BUF_1; 653 u32 *back = BUF_1;
@@ -646,7 +658,11 @@ flip_buffer(void) {
646 continue; 658 continue;
647 } 659 }
648 u32 offset = j * 32 * 8; 660 u32 offset = j * 32 * 8;
661#if NO_DMA == 0
649 dma_copy(front + offset, back + offset, (30 * 8 * 4), 3); 662 dma_copy(front + offset, back + offset, (30 * 8 * 4), 3);
663#else
664 copy32(front + offset, back + offset, (30 * 8));
665#endif
650 dirty_tiles[j] = 0; 666 dirty_tiles[j] = 0;
651 } 667 }
652 668
@@ -673,7 +689,7 @@ flip_buffer(void) {
673 } 689 }
674 690
675// Mode 3: Double buffering with dirty line, copying the dirty lines if needed 691// Mode 3: Double buffering with dirty line, copying the dirty lines if needed
676// after flipping buffers with the DMA. 692// after flipping buffers.
677#elif FLIP_TYPE == 3 693#elif FLIP_TYPE == 3
678 bool should_flip = false; 694 bool should_flip = false;
679 for (size_t j = 0; j < 20; ++j) { 695 for (size_t j = 0; j < 20; ++j) {
@@ -701,7 +717,11 @@ flip_buffer(void) {
701 continue; 717 continue;
702 } 718 }
703 u32 offset = j * 32 * 8; 719 u32 offset = j * 32 * 8;
720#if NO_DMA == 0
704 dma_copy(backbuf + offset, frontbuf + offset, (30 * 8 * 4), 3); 721 dma_copy(backbuf + offset, frontbuf + offset, (30 * 8 * 4), 3);
722#else
723 copy32(backbuf + offset, frontbuf + offset, (30 * 8));
724#endif
705 dirty_tiles[j] = 0; 725 dirty_tiles[j] = 0;
706 } 726 }
707 727
@@ -746,6 +766,22 @@ flip_buffer(void) {
746#endif 766#endif
747} 767}
748 768
769IWRAM_CODE
770UNROLL_LOOPS
771void
772decode_1bpp(u32 *dst, u8 *src, u8 clr, u8 flip_x, u32 n_tiles) {
773 u32 color = 0x11111111 * clr;
774 if (!flip_x) {
775 for (size_t i = 0; i < n_tiles * 8; i++) {
776 *dst++ = lut_1bpp_mask[*src++] & color;
777 }
778 } else {
779 for (size_t i = 0; i < n_tiles * 8; i++) {
780 *dst++ = lut_1bpp_mask_flip_x[*src++] & color;
781 }
782 }
783}
784
749// 785//
750// Text rendering. 786// Text rendering.
751// 787//
@@ -756,9 +792,28 @@ flip_buffer(void) {
756void 792void
757txt_drawc(char c, size_t x, size_t y, u8 clr) { 793txt_drawc(char c, size_t x, size_t y, u8 clr) {
758 u8 *tile = font_icn; 794 u8 *tile = font_icn;
759 draw_icn(x, y, tile + 8 * c, clr, 1, 0); 795 draw_1bpp(x, y, tile + 8 * c, clr, 1, 0);
760} 796}
761 797
798void
799txt_drawc_small(char c, size_t x, size_t y, u8 clr) {
800 u8 *tile = font_icn;
801 c = c < 'a' ? c + 16 * 6 : c + 16 * 4;
802 draw_1bpp(x, y, tile + 8 * c, clr, 1, 0);
803}
804
805#define txt_drawf_small(msg, x, y, clr, ...) \
806 { \
807 char buf[256] = {0}; \
808 posprintf(buf, msg, ##__VA_ARGS__); \
809 u8 tmp = text_engine.spacing;\
810 txt_spacing(4);\
811 text_engine.drawc = txt_drawc_small;\
812 txt_draws(buf, x, y, clr); \
813 txt_spacing(tmp);\
814 text_engine.drawc = txt_drawc;\
815 }
816
762// 817//
763// Initialization. 818// Initialization.
764// 819//
@@ -769,7 +824,11 @@ renderer_init(void) {
769 DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1; 824 DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1;
770 825
771 // Clear VRAM. 826 // Clear VRAM.
827#if NO_DMA == 0
772 dma_fill((u32*)MEM_VRAM, 0, KB(96), 3); 828 dma_fill((u32*)MEM_VRAM, 0, KB(96), 3);
829#else
830 set32((u32*)MEM_VRAM, 0, KB(96)/4);
831#endif
773 832
774 // Initialize backgrounds. 833 // Initialize backgrounds.
775 BG_CTRL(0) = BG_CHARBLOCK(CB_0) | BG_SCREENBLOCK(SB_0) | BG_PRIORITY(0) | BG_SIZE(1); 834 BG_CTRL(0) = BG_CHARBLOCK(CB_0) | BG_SCREENBLOCK(SB_0) | BG_PRIORITY(0) | BG_SIZE(1);
@@ -796,15 +855,22 @@ renderer_init(void) {
796 BG_H_SCROLL_1 = -240; 855 BG_H_SCROLL_1 = -240;
797 856
798 // Initialize default palette. 857 // Initialize default palette.
799 PAL_BUFFER_BG[0] = COLOR_BLACK; 858 PAL_BUFFER_BG[0x0] = COLOR_BLACK;
800 PAL_BUFFER_BG[1] = COLOR_WHITE; 859 PAL_BUFFER_BG[0x1] = COLOR_WHITE;
801 PAL_BUFFER_BG[2] = COLOR_RED; 860 PAL_BUFFER_BG[0x2] = COLOR_RED;
802 PAL_BUFFER_BG[3] = COLOR_BLUE; 861 PAL_BUFFER_BG[0x3] = COLOR_BLUE;
803 PAL_BUFFER_BG[4] = COLOR_CYAN; 862 PAL_BUFFER_BG[0x4] = COLOR_CYAN;
804 PAL_BUFFER_BG[5] = COLOR_PURPLE; 863 PAL_BUFFER_BG[0x5] = COLOR_PURPLE;
805 PAL_BUFFER_BG[6] = COLOR_YELLOW; 864 PAL_BUFFER_BG[0x6] = COLOR_YELLOW;
806 PAL_BUFFER_BG[7] = COLOR_GREEN; 865 PAL_BUFFER_BG[0x7] = COLOR_GREEN;
807 PAL_BUFFER_BG[8] = COLOR_GREY; 866 PAL_BUFFER_BG[0x8] = COLOR_GREY;
867 PAL_BUFFER_BG[0x9] = COLOR_WHITE;
868 PAL_BUFFER_BG[0xa] = COLOR_RED;
869 PAL_BUFFER_BG[0xb] = COLOR_BLUE;
870 PAL_BUFFER_BG[0xc] = COLOR_CYAN;
871 PAL_BUFFER_BG[0xd] = COLOR_PURPLE;
872 PAL_BUFFER_BG[0xe] = COLOR_YELLOW;
873 PAL_BUFFER_BG[0xf] = COLOR_GREEN;
808 874
809 // Initialize text engine. 875 // Initialize text engine.
810 txt_init(txt_drawc); 876 txt_init(txt_drawc);
diff --git a/src/renderer_m3.c b/src/gba/renderer/renderer_m3.c
index 709c982..709c982 100644
--- a/src/renderer_m3.c
+++ b/src/gba/renderer/renderer_m3.c
diff --git a/src/gba/renderer/renderer_m4.c b/src/gba/renderer/renderer_m4.c
new file mode 100644
index 0000000..722899b
--- /dev/null
+++ b/src/gba/renderer/renderer_m4.c
@@ -0,0 +1,475 @@
1#include "renderer.h"
2#include "text.h"
3
4//
5// Parameters.
6//
7
8#define SUBPIXEL_LINES 0
9#define DEC_BIG_LUT 0
10#define DISABLE_BOUNDCHECK_SCREEN 0
11
12// Front/back buffers for double buffering.
13#define BUF_0 ((u32*)(MEM_VRAM))
14#define BUF_1 ((u32*)(MEM_VRAM ^ 0x0A000))
15
16// Pointer to the backbuffer.
17static u16 *backbuf = BUF_1;
18
19// Boundchecks can be disable at compile time but this will not always improve
20// the performance and can in fact make it worse. It is possible that this is
21// due to some aliasing optimiztions but not sure at this moment.
22#if DISABLE_BOUNDCHECK_SCREEN > 0
23#define BOUNDCHECK_SCREEN(X,Y)
24#else
25#define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return;
26#endif
27
28// Swap A and B values without a tmp variable.
29#define SWAP(A, B) (((A) ^= (B)), ((B) ^= (A)), ((A) ^= (B)))
30
31// Swap A and B values to make sure A <= B.
32#define MAYBE_SWAP(A,B) if ((A) > (B)) { SWAP(A,B); }
33
34//
35// Basic primitives.
36//
37
38IWRAM_CODE
39void screen_fill(u8 clr) {
40 set32(backbuf, 0x01010101 * clr, KB(75) / 2 / 4);
41}
42
43IWRAM_CODE
44void
45draw_pixel(size_t x, size_t y, u8 clr) {
46 BOUNDCHECK_SCREEN(x, y);
47 u16 *dst = &backbuf[(x + y * SCREEN_WIDTH) / 2];
48 if(x & 1) {
49 *dst = (*dst & 0xF) | (clr << 8);
50 } else {
51 *dst = (*dst & ~0xF) | clr;
52 }
53}
54
55IWRAM_CODE
56UNROLL_LOOPS
57static inline
58void
59draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) {
60 BOUNDCHECK_SCREEN(x0, y0);
61 BOUNDCHECK_SCREEN(x1, y0);
62 // Find row positions for the given x/y coordinates.
63 size_t tile_x0 = x0 / 8;
64 size_t tile_x1 = x1 / 8;
65 size_t start_col = x0 % 8;
66 size_t end_col = x1 % 8;
67 // Horizontal line. There are 3 cases:
68 // 1. Lines fit on a single tile.
69 // 2. Lines go through 2 tiles, both require partial row updates.
70 // 3. Lines go through 3 or more tiles, first and last tiles use
71 // partial row updates, rows in the middle can write the entire
72 // row.
73 size_t dx = tile_x1 - tile_x0;
74 u64 *dst = &backbuf[(tile_x0 * 8 + y0 * SCREEN_WIDTH) / 2];
75 if (dx < 1) {
76 u64 row_mask = 0xFFFFFFFFFFFFFFFF;
77 row_mask >>= (7 - end_col - dx) * 8;
78 row_mask &= 0xFFFFFFFFFFFFFFFF << start_col * 8;
79 u64 row = (0x0101010101010101 * clr) & row_mask;
80 *dst = (*dst & ~row_mask) | row;
81 } else {
82 size_t shift_left = start_col * 8;
83 size_t shift_right = (7 - end_col) * 8;
84 u64 row_mask = 0xFFFFFFFFFFFFFFFF;
85 u64 row = 0x0101010101010101 * clr;
86 dst[0] = (dst[0] & ~(row_mask << shift_left)) | row << shift_left;
87 if (dx != 1) {
88 set32(&dst[1], 0x01010101 * clr, (dx - 1) * 8 / 4);
89 }
90 dst[dx] = dst[dx] & ~(row_mask >> shift_right);
91 dst[dx] |= row >> shift_right;
92 }
93}
94
95IWRAM_CODE
96UNROLL_LOOPS
97static inline
98void
99draw_vline(size_t x0, size_t y0, size_t y1, u8 clr) {
100 BOUNDCHECK_SCREEN(x0, y0);
101 BOUNDCHECK_SCREEN(x0, y1);
102 size_t tile_x0 = x0 / 8;
103 size_t start_col = x0 % 8;
104 u16 *dst = &backbuf[(start_col + tile_x0 * 8 + y0 * SCREEN_WIDTH) / 2];
105 if(x0 & 1) {
106 for (size_t i = 0; i <= y1 - y0; i++, dst += SCREEN_WIDTH / 2) {
107 *dst = (*dst & 0xF) | (clr << 8);
108 }
109 } else {
110 for (size_t i = 0; i <= y1 - y0; i++, dst += SCREEN_WIDTH / 2) {
111 *dst = (*dst & ~0xF) | clr;
112 }
113 }
114}
115
116IWRAM_CODE
117UNROLL_LOOPS
118void
119draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
120 BOUNDCHECK_SCREEN(x0, y0);
121 BOUNDCHECK_SCREEN(x1, y1);
122
123 if (y0 == y1) {
124 MAYBE_SWAP(x0, x1);
125 draw_hline(x0, x1, y0, clr);
126 } else if (x0 == x1) {
127 MAYBE_SWAP(y0, y1);
128 draw_vline(x0, y0, y1, clr);
129 } else {
130#if SUBPIXEL_LINES == 1
131 // Fixed Precision constants.
132 const int fp_bit = 6;
133 const int fp_one = FP_NUM(1, fp_bit);
134 const int fp_half = fp_one >> 1;
135
136 int dx = x0 > x1 ? x0 - x1 : x1 - x0;
137 int dy = y0 > y1 ? y0 - y1 : y1 - y0;
138 int dxf = (dx << fp_bit);
139 int dyf = (dy << fp_bit);
140
141 if ((dx >= dy && x0 > x1) || (dx < dy && y0 > y1)) {
142 SWAP(x0, x1);
143 SWAP(y0, y1);
144 }
145
146 int frac_x = x0 > x1 ? FP_NUM(x0 - x1, fp_bit) : FP_NUM(x1 - x0, fp_bit);
147 int frac_y = y0 > y1 ? FP_NUM(y0 - y1, fp_bit) : FP_NUM(y1 - y0, fp_bit);
148 int x_step = x0 > x1 ? -1 : 1;
149 int y_step = y0 > y1 ? -SCREEN_WIDTH : SCREEN_WIDTH;
150
151 u16 *dst = NULL;
152 uintptr_t addr = ((uintptr_t)backbuf + y0 * SCREEN_WIDTH + x0);
153 u32 mask = x0 & 1 ? ~0xFF : 0xFF;
154 u32 color = (clr & 0xFF) | ((clr & 0xFF) << 8);
155 if (dx >= dy) {
156 int distance = (frac_y - fp_one) * dx - (frac_x - fp_half) * dy;
157 int remaining = dx;
158 while (distance <= 0 && remaining > 0) {
159 dst = (u16*)(addr - (mask >> 31));
160 *dst = (*dst & ~mask) | (color & mask);
161 distance += 2 * dyf;
162 addr += x_step;
163 remaining--;
164 mask = ~mask;
165 }
166 distance -= 2 * dxf;
167 addr += y_step;
168
169 while (remaining >= 0) {
170 dst = (u16*)(addr - (mask >> 31));
171 *dst = (*dst & ~mask) | (color & mask);
172 if (distance >= 0) {
173 distance -= 2 * dxf;
174 addr += y_step;
175 }
176 distance += 2 * dyf;
177 addr += x_step;
178 mask = ~mask;
179 remaining--;
180 }
181 } else {
182 int distance = (frac_x - fp_one) * dy - (frac_y - fp_half) * dx;
183 int remaining = dy;
184 while (distance <= 0 && remaining > 0) {
185 dst = (u16*)(addr - (mask >> 31));
186 *dst = (*dst & ~mask) | (color & mask);
187 distance += 2 * dxf;
188 addr += y_step;
189 remaining--;
190 }
191 distance -= 2 * dyf;
192 addr += x_step;
193 mask = ~mask;
194
195 while (remaining >= 0) {
196 dst = (u16*)(addr - (mask >> 31));
197 *dst = (*dst & ~mask) | (color & mask);
198 if (distance >= 0) {
199 distance -= 2 * dyf;
200 addr += x_step;
201 mask = ~mask;
202 }
203 distance += 2 * dxf;
204 addr += y_step;
205 remaining--;
206 }
207 }
208#else
209 // Diagonal line.
210 int dx = x0 > x1 ? x0 - x1 : x1 - x0;
211 int dy = y0 > y1 ? y0 - y1 : y1 - y0;
212 int x_step = x0 > x1 ? -1 : 1;
213 int y_step = y0 > y1 ? -SCREEN_WIDTH : SCREEN_WIDTH;
214
215 u16 *dst = NULL;
216 uintptr_t addr = ((uintptr_t)backbuf + y0 * SCREEN_WIDTH + x0);
217 u32 mask = x0 & 1 ? ~0xFF : 0xFF;
218 u32 color = (clr & 0xFF) | ((clr & 0xFF) << 8);
219 if (dx >= dy) {
220 int diff = 2 * dy - dx;
221 for (int i = 0; i < dx + 1; i++) {
222 dst = (u16*)(addr - (mask >> 31));
223 *dst = (*dst & ~mask) | (color & mask);
224 if (diff >= 0) {
225 diff -= 2 * dx;
226 addr += y_step;
227 }
228 diff += 2 * dy;
229 addr += x_step;
230 mask = ~mask;
231 }
232 } else {
233 int diff = 2 * dx - dy;
234 for (int i = 0; i < dy + 1; i++) {
235 dst = (u16*)(addr - (mask >> 31));
236 *dst = (*dst & ~mask) | (color & mask);
237 if (diff >= 0) {
238 diff -= 2 * dy;
239 addr += x_step;
240 mask = ~mask;
241 }
242 diff += 2 * dx;
243 addr += y_step;
244 }
245 }
246#endif
247 }
248}
249
250IWRAM_CODE
251void
252draw_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
253 BOUNDCHECK_SCREEN(x0, y0);
254 BOUNDCHECK_SCREEN(x1, y1);
255 MAYBE_SWAP(x0, x1);
256 MAYBE_SWAP(y0, y1);
257
258 draw_hline(x0, x1, y0, clr);
259 draw_hline(x0, x1, y1, clr);
260 draw_vline(x0, y0, y1, clr);
261 draw_vline(x1, y0, y1, clr);
262}
263
264IWRAM_CODE
265void
266draw_filled_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
267 BOUNDCHECK_SCREEN(x0, y0);
268 BOUNDCHECK_SCREEN(x1, y1);
269 MAYBE_SWAP(x0, x1);
270 MAYBE_SWAP(y0, y1);
271
272 // Special condition. If the screen is to be completely filled, use the
273 // filling function instead.
274 if (x0 == 0 && x1 >= (SCREEN_WIDTH - 1) && y0 == 0 && y1 >= (SCREEN_HEIGHT - 1)) {
275 screen_fill(clr);
276 return;
277 }
278
279 // Drawline implementation.
280 for (size_t y = y0; y <= y1; y++) {
281 draw_hline(x0, x1, y, clr);
282 }
283}
284
285//
286// Sprites (chr/icn).
287//
288
289extern u32 decode_1bpp_mask[16];
290extern u32 decode_1bpp_mask_flip_x[16];
291
292IWRAM_CODE
293UNROLL_LOOPS
294void
295draw_1bpp(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
296 BOUNDCHECK_SCREEN(x, y);
297 size_t tile_x = x / 8;
298 size_t start_col = x % 8;
299
300 u32 *dst = &backbuf[(y * 30 + tile_x) * 8 / 2];
301 size_t shift_left = start_col * 8;
302 size_t shift_right = (4 - start_col) * 8;
303 if (start_col > 3) {
304 dst++;
305 shift_left = 8 * (start_col - 4);
306 shift_right = 8 * (8 - start_col);
307 }
308
309 size_t n_rows = 8;
310 if (y + 8 > SCREEN_HEIGHT) {
311 n_rows = 8 - ((y + 8) - SCREEN_HEIGHT);
312 }
313
314 u32 color = clr * 0x01010101;
315 if (!flip_y) {
316 if (!flip_x) {
317 for(size_t v = 0; v < n_rows; v++) {
318 // Decode colors.
319 u32 msk0 = decode_1bpp_mask[*sprite >> 4];
320 u32 msk1 = decode_1bpp_mask[*sprite & 0xf];
321 u32 msk2 = (msk0 >> shift_right) | (msk1 << shift_left);
322 msk0 <<= shift_left;
323 msk1 >>= shift_right;
324 u32 clr0 = msk0 & color;
325 u32 clr1 = msk1 & color;
326 u32 clr2 = msk2 & color;
327
328 // Overlay current colors on top of existing framebuffer.
329 dst[0] = (dst[0] & ~msk0) | clr0;
330 dst[1] = (dst[1] & ~msk2) | clr2;
331 dst[2] = (dst[2] & ~msk1) | clr1;
332
333 // Next row.
334 dst += 60;
335 sprite++;
336 }
337 } else {
338 for(size_t v = 0; v < n_rows; v++) {
339 // Decode colors.
340 u32 msk0 = decode_1bpp_mask_flip_x[*sprite & 0xf];
341 u32 msk1 = decode_1bpp_mask_flip_x[*sprite >> 4];
342 u32 msk2 = (msk0 >> shift_right) | (msk1 << shift_left);
343 msk0 <<= shift_left;
344 msk1 >>= shift_right;
345 u32 clr0 = msk0 & color;
346 u32 clr1 = msk1 & color;
347 u32 clr2 = msk2 & color;
348
349 // Overlay current colors on top of existing framebuffer.
350 dst[0] = (dst[0] & ~msk0) | clr0;
351 dst[1] = (dst[1] & ~msk2) | clr2;
352 dst[2] = (dst[2] & ~msk1) | clr1;
353
354 // Next row.
355 dst += 60;
356 sprite++;
357 }
358 }
359 } else {
360 sprite += 7;
361 if (!flip_x) {
362 for(size_t v = 0; v < n_rows; v++) {
363 // Decode colors.
364 u32 msk0 = decode_1bpp_mask[*sprite >> 4];
365 u32 msk1 = decode_1bpp_mask[*sprite & 0xf];
366 u32 msk2 = (msk0 >> shift_right) | (msk1 << shift_left);
367 msk0 <<= shift_left;
368 msk1 >>= shift_right;
369 u32 clr0 = msk0 & color;
370 u32 clr1 = msk1 & color;
371 u32 clr2 = msk2 & color;
372
373 // Overlay current colors on top of existing framebuffer.
374 dst[0] = (dst[0] & ~msk0) | clr0;
375 dst[1] = (dst[1] & ~msk2) | clr2;
376 dst[2] = (dst[2] & ~msk1) | clr1;
377
378 // Next row.
379 dst += 60;
380 sprite--;
381 }
382 } else {
383 for(size_t v = 0; v < 8; v++) {
384 // Decode colors.
385 u32 msk0 = decode_1bpp_mask_flip_x[*sprite & 0xf];
386 u32 msk1 = decode_1bpp_mask_flip_x[*sprite >> 4];
387 u32 msk2 = (msk0 >> shift_right) | (msk1 << shift_left);
388 msk0 <<= shift_left;
389 msk1 >>= shift_right;
390 u32 clr0 = msk0 & color;
391 u32 clr1 = msk1 & color;
392 u32 clr2 = msk2 & color;
393
394 // Overlay current colors on top of existing framebuffer.
395 dst[0] = (dst[0] & ~msk0) | clr0;
396 dst[1] = (dst[1] & ~msk2) | clr2;
397 dst[2] = (dst[2] & ~msk1) | clr1;
398
399 // Next row.
400 dst += 60;
401 sprite--;
402 }
403 }
404 }
405}
406
407//
408// Flipping buffers/copying memory.
409//
410
411IWRAM_CODE
412void
413flip_buffer(void) {
414 backbuf = (u16*)((u32)backbuf ^ 0x0A000);
415 DISP_CTRL ^= DISP_PAGE;
416}
417
418//
419// Text rendering.
420//
421
422#include "font.h"
423
424// Font rendering function for the text engine.
425void
426txt_drawc(char c, size_t x, size_t y, u8 clr) {
427 u8 *tile = font_icn;
428 draw_1bpp(x, y, tile + 8 * c, clr, 1, 0);
429}
430
431void
432txt_drawc_small(char c, size_t x, size_t y, u8 clr) {
433 u8 *tile = font_icn;
434 c = c < 'a' ? c + 16 * 6 : c + 16 * 4;
435 draw_1bpp(x, y, tile + 8 * c, clr, 1, 0);
436}
437
438#define txt_drawf_small(msg, x, y, clr, ...) \
439 { \
440 char buf[256] = {0}; \
441 posprintf(buf, msg, ##__VA_ARGS__); \
442 u8 tmp = text_engine.spacing;\
443 txt_spacing(4);\
444 text_engine.drawc = txt_drawc_small;\
445 txt_draws(buf, x, y, clr); \
446 txt_spacing(tmp);\
447 text_engine.drawc = txt_drawc;\
448 }
449
450//
451// Initialization.
452//
453
454void
455renderer_init(void) {
456 // Initialize display mode and bg palette.
457 DISP_CTRL = DISP_MODE_4 | DISP_BG_2;
458
459 // Clear VRAM.
460 set32((u16*)MEM_VRAM, 0, KB(96)/4);
461
462 // Initialize default palette.
463 PAL_BUFFER_BG[0] = COLOR_BLACK;
464 PAL_BUFFER_BG[1] = COLOR_WHITE;
465 PAL_BUFFER_BG[2] = COLOR_RED;
466 PAL_BUFFER_BG[3] = COLOR_BLUE;
467 PAL_BUFFER_BG[4] = COLOR_CYAN;
468 PAL_BUFFER_BG[5] = COLOR_PURPLE;
469 PAL_BUFFER_BG[6] = COLOR_YELLOW;
470 PAL_BUFFER_BG[7] = COLOR_GREEN;
471 PAL_BUFFER_BG[8] = COLOR_GREY;
472
473 // Initialize text engine.
474 txt_init(txt_drawc);
475}
diff --git a/src/gba/text/font.h b/src/gba/text/font.h
new file mode 100644
index 0000000..70a5e28
--- /dev/null
+++ b/src/gba/text/font.h
@@ -0,0 +1,130 @@
1static const u32 font_icn[] = {
2 0x00000000, 0x00000000, 0x00240000, 0x00003c42,
3 0x00240000, 0x0000423c, 0x3e360000, 0x00081c3e,
4 0x3e1c0800, 0x00081c3e, 0x3e1c1c00, 0x1c083e36,
5 0x3e1c0800, 0x1c083e3e, 0x18000000, 0x00000018,
6 0x667e7e7e, 0x7e7e7e66, 0x24180000, 0x00001824,
7 0x5a667e7e, 0x7e7e665a, 0x3e1c0800, 0x1c221c08,
8 0x1c221c00, 0x083e0808, 0x28381800, 0x0c0c0808,
9 0x24243c00, 0x06363424, 0x08220800, 0x0822081c,
10 0x3c1c0c04, 0x00040c1c, 0x3c383020, 0x00203038,
11 0x083e1c08, 0x081c3e08, 0x14141400, 0x00140014,
12 0x2a2a3c00, 0x2828282c, 0x3c043800, 0x1e203c42,
13 0x00000000, 0x0000007e, 0x083e1c08, 0x3e081c3e,
14 0x3e1c0800, 0x00080808, 0x08080800, 0x00081c3e,
15 0x30100000, 0x0010307e, 0x0c080000, 0x00080c7e,
16 0x00000000, 0x3e020200, 0x36140000, 0x0014367f,
17 0x1c080800, 0x003e3e1c, 0x1c3e3e00, 0x0008081c,
18 0x00000000, 0x00000000, 0x08080800, 0x00080008,
19 0x14141400, 0x00000000, 0x3e140000, 0x00143e14,
20 0x021c0800, 0x081e201c, 0x16260000, 0x00323408,
21 0x08140800, 0x002c1234, 0x08080800, 0x00000000,
22 0x04080000, 0x00080404, 0x10080000, 0x00081010,
23 0x08140000, 0x0014083e, 0x08080000, 0x0008083e,
24 0x00000000, 0x04080c00, 0x00000000, 0x0000003e,
25 0x00000000, 0x000c0c00, 0x18302000, 0x0002060c,
26 0x221c0000, 0x001c222a, 0x0a0c0000, 0x003e0808,
27 0x201c0000, 0x003e021c, 0x221c0000, 0x001c2218,
28 0x12120000, 0x00103e12, 0x023e0000, 0x001e201e,
29 0x021c0000, 0x001c221e, 0x203e0000, 0x00040810,
30 0x221c0000, 0x001c221c, 0x221c0000, 0x001c203c,
31 0x0c0c0000, 0x000c0c00, 0x0c0c0000, 0x04080c00,
32 0x08100000, 0x00100804, 0x3e000000, 0x00003e00,
33 0x08040000, 0x00040810, 0x10221c00, 0x00080008,
34 0x2a1c0000, 0x001c023a, 0x221c0000, 0x00223e22,
35 0x221e0000, 0x001e221e, 0x221c0000, 0x001c2202,
36 0x221e0000, 0x001e2222, 0x023e0000, 0x003e021e,
37 0x023e0000, 0x0002021e, 0x021c0000, 0x001c2232,
38 0x22220000, 0x0022223e, 0x083e0000, 0x003e0808,
39 0x20200000, 0x001c2220, 0x12220000, 0x0022120e,
40 0x02020000, 0x003e0202, 0x36220000, 0x0022222a,
41 0x2a260000, 0x00222232, 0x221c0000, 0x001c2222,
42 0x221e0000, 0x00021e22, 0x221c0000, 0x002c1222,
43 0x221e0000, 0x00221e22, 0x021c0000, 0x001e201c,
44 0x083e0000, 0x00080808, 0x22220000, 0x001c2222,
45 0x22220000, 0x00081422, 0x2a220000, 0x00142a2a,
46 0x14220000, 0x00221408, 0x22220000, 0x00080814,
47 0x103e0000, 0x003e0408, 0x041c0000, 0x001c0404,
48 0x0c060200, 0x00203018, 0x101c0000, 0x001c1010,
49 0x22140800, 0x00000000, 0x00000000, 0x003e0000,
50 0x10080400, 0x00000000, 0x201c0000, 0x003c223c,
51 0x1e020200, 0x001e2222, 0x021c0000, 0x001c0202,
52 0x3c202000, 0x003c2222, 0x221c0000, 0x001c021e,
53 0x023c0000, 0x0002021e, 0x223c0000, 0x1c203c22,
54 0x1e020200, 0x00222222, 0x0c000800, 0x003e0808,
55 0x20002000, 0x1c222020, 0x0a120200, 0x00320a06,
56 0x08080c00, 0x003e0808, 0x2a160000, 0x002a2a2a,
57 0x320e0000, 0x00222222, 0x221c0000, 0x001c2222,
58 0x221e0000, 0x02021e22, 0x223c0000, 0x70203c22,
59 0x261a0000, 0x00020202, 0x021c0000, 0x001e201c,
60 0x043e0400, 0x00180404, 0x22220000, 0x001c2222,
61 0x22220000, 0x00081422, 0x22220000, 0x00142a2a,
62 0x14220000, 0x00221408, 0x22220000, 0x1c203c22,
63 0x103e0000, 0x003e0408, 0x041c0000, 0x001c0406,
64 0x08080000, 0x00080808, 0x101c0000, 0x001c1030,
65 0x1a2c0000, 0x00000000, 0x14080000, 0x003e2222,
66 0x00000000, 0x00000000, 0x08080000, 0x00080008,
67 0x14140000, 0x00000000, 0x3e140000, 0x00143e14,
68 0x041c0800, 0x081c101c, 0x10040000, 0x00100408,
69 0x14080000, 0x00381438, 0x08080000, 0x00000000,
70 0x04080000, 0x00080404, 0x10080000, 0x00081010,
71 0x08140000, 0x0014081c, 0x08000000, 0x0000081c,
72 0x00000000, 0x04080000, 0x00000000, 0x0000001c,
73 0x00000000, 0x00080000, 0x18100000, 0x00040c08,
74 0x141c0000, 0x001c1414, 0x0c080000, 0x001c0808,
75 0x101c0000, 0x001c041c, 0x101c0000, 0x001c1018,
76 0x14140000, 0x0010101c, 0x041c0000, 0x001c101c,
77 0x041c0000, 0x001c141c, 0x101c0000, 0x00040408,
78 0x141c0000, 0x001c141c, 0x141c0000, 0x0010101c,
79 0x08000000, 0x00080000, 0x08000000, 0x04080000,
80 0x08100000, 0x00100804, 0x1c000000, 0x00001c00,
81 0x08040000, 0x00040810, 0x101c0000, 0x00080008,
82 0x141c0000, 0x001c0414, 0x141c0000, 0x00141c14,
83 0x141c0000, 0x001c140c, 0x041c0000, 0x001c0404,
84 0x140c0000, 0x000c1414, 0x041c0000, 0x001c040c,
85 0x041c0000, 0x0004040c, 0x041c0000, 0x001c1414,
86 0x14140000, 0x0014141c, 0x081c0000, 0x001c0808,
87 0x10100000, 0x001c1410, 0x14140000, 0x0014140c,
88 0x04040000, 0x001c0404, 0x1c140000, 0x00141414,
89 0x140c0000, 0x00141414, 0x140c0000, 0x001c1414,
90 0x141c0000, 0x0004041c, 0x141c0000, 0x00101c14,
91 0x141c0000, 0x0014140c, 0x04180000, 0x000c101c,
92 0x081c0000, 0x00080808, 0x14140000, 0x001c1414,
93 0x14140000, 0x00181414, 0x14140000, 0x00141c14,
94 0x14140000, 0x00141408, 0x14140000, 0x0008081c,
95 0x101c0000, 0x001c0408, 0x041c0000, 0x001c0404,
96 0x0c040000, 0x00101808, 0x101c0000, 0x001c1010,
97 0x14080000, 0x00000000, 0x00000000, 0x001c0000,
98 0x00000000, 0x00000000, 0x00000000, 0x00000000,
99 0x00000000, 0x00000000, 0x00000000, 0x00000000,
100 0x00000000, 0x00000000, 0x00000000, 0x00000000,
101 0x00000000, 0x00000000, 0x00000000, 0x00000000,
102 0x00000000, 0x00000000, 0x00000000, 0x00000000,
103 0x00000000, 0x00000000, 0x00000000, 0x00000000,
104 0x00000000, 0x00000000, 0x00000000, 0x00000000,
105 0x00000000, 0x00000000, 0x00000000, 0x00000000,
106 0x3232323e, 0x003e3232, 0x18181818, 0x00181818,
107 0x3e30303e, 0x003e0606, 0x3c30303e, 0x003e3030,
108 0x32323232, 0x0030303e, 0x3e06063e, 0x003e3030,
109 0x3e06063e, 0x003e2626, 0x1830303e, 0x0006060c,
110 0x3e32323e, 0x003e3232, 0x3e32323e, 0x003e3030,
111 0x00000000, 0x00060600, 0x18362600, 0x0032360c,
112 0x32323e1c, 0x0032323e, 0x1e26261e, 0x001e2626,
113 0x0606063c, 0x003c0606, 0x3232321e, 0x001e3232,
114 0x1e06063e, 0x003e0606, 0x1e06063e, 0x00060606,
115 0x3606063c, 0x001c2626, 0x3e323232, 0x00323232,
116 0x0c0c0c1e, 0x001e0c0c, 0x30303030, 0x003e3232,
117 0x0e162626, 0x00262616, 0x06060606, 0x003e0606,
118 0x6a7e7662, 0x00626262, 0x323a3e36, 0x00323232,
119 0x3232321c, 0x001c3232, 0x3e32323e, 0x00020202,
120 0x3232321c, 0x002c1a3a, 0x1e26261e, 0x00262626,
121 0x1e06063c, 0x001e3030, 0x1818187e, 0x00181818,
122 0x32323232, 0x003e3232, 0x32323232, 0x00183432,
123 0x32323232, 0x00143a32, 0x0c323232, 0x00323232,
124 0x32323232, 0x001e203c, 0x1c30303e, 0x003e0606,
125 0x00000000, 0x00000000, 0x00000000, 0x00000000,
126 0x00000000, 0x00000000, 0x00000000, 0x00000000,
127 0x00000000, 0x00000000, 0x00000000, 0x00000000,
128 0x00000000, 0x00000000, 0x00000000, 0x00000000,
129 0x00000000, 0x00000000, 0x00000000, 0x00000000,
130};
diff --git a/src/text/posprintf.h b/src/gba/text/posprintf.h
index a560db5..a560db5 100644
--- a/src/text/posprintf.h
+++ b/src/gba/text/posprintf.h
diff --git a/src/text/posprintf.s b/src/gba/text/posprintf.s
index f3ef1e6..f3ef1e6 100644
--- a/src/text/posprintf.s
+++ b/src/gba/text/posprintf.s
diff --git a/src/text/text.h b/src/gba/text/text.h
index 24573f8..87db439 100644
--- a/src/text/text.h
+++ b/src/gba/text/text.h
@@ -92,6 +92,12 @@ txt_color(u8 clr) {
92 text_engine.color = clr; 92 text_engine.color = clr;
93} 93}
94 94
95static inline
96void
97txt_spacing(u8 spacing) {
98 text_engine.spacing = spacing;
99}
100
95// Renders the contents of the scrollback buffer to the screen. 101// Renders the contents of the scrollback buffer to the screen.
96void 102void
97txt_render(void) { 103txt_render(void) {
@@ -119,7 +125,9 @@ txt_draws(char *msg, size_t x, size_t y, u8 clr) {
119 size_t i = 0; 125 size_t i = 0;
120 while (*msg) { 126 while (*msg) {
121 char c = *msg++; 127 char c = *msg++;
122 text_engine.drawc(c, x + i++ * text_engine.spacing, y, clr); 128 if (c != 0) {
129 text_engine.drawc(c, x + i++ * text_engine.spacing, y, clr);
130 }
123 } 131 }
124} 132}
125 133
diff --git a/src/gba/utils.s b/src/gba/utils.s
new file mode 100755
index 0000000..d70d7ef
--- /dev/null
+++ b/src/gba/utils.s
@@ -0,0 +1,82 @@
1.file "utils.s"
2.section .iwram, "ax", %progbits
3.arm
4.align
5
6@ Efficient memcpy32 function (borrowed from TONC). It uses a two step
7@ approach. It tries to copy 8 u32 chunks at a time with the ldm and stm
8@ instructions and then copy the remainder if there are less than 8 chunks
9@ left.
10@
11@ r0: destination address
12@ r1: source address
13@ r2: number of 32bit chunks to copy
14@
15.global copy32
16copy32:
17 cmp r2, #0
18 beq .copy32_end
19
20 and r12, r2, #7 @ r12 = r2 % 8
21 movs r2, r2, lsr #3 @ r2 = r2 / 8
22 beq .Lcopy32_residual
23
24 @ Copy 8 32B chunks at a time
25 push {r4-r10}
26.Lcopy32_chunks:
27 ldmia r1!, {r3-r10}
28 stmia r0!, {r3-r10}
29 subs r2, r2, #1
30 bhi .Lcopy32_chunks
31 pop {r4-r10}
32
33 @ Copy residual 32B chunks (0-7)
34.Lcopy32_residual:
35 subs r12, r12, #1
36 ldrhs r3, [r1], #4
37 strhs r3, [r0], #4
38 bhi .Lcopy32_residual
39
40.copy32_end:
41 bx lr
42
43
44@ Efficient memset32 function (borrowed from TONC). It uses a two step
45@ approach. Uses the same stmia approach from memcpy32 but, no need for ldmia
46@
47@ r0: destination address
48@ r1: u32 value to set
49@ r2: number of 32bit chunks to set
50@
51.global set32
52set32:
53 cmp r2, #0
54 beq .set32_end
55
56 and r12, r2, #7 @ r12 = r2 % 8
57 movs r2, r2, lsr #3 @ r2 = r2 / 8
58 beq .Lset32_residual
59
60 @ Set 8 32B chunks at a time
61 push {r4-r9}
62 mov r3, r1
63 mov r4, r1
64 mov r5, r1
65 mov r6, r1
66 mov r7, r1
67 mov r8, r1
68 mov r9, r1
69.Lset32_chunks:
70 stmia r0!, {r1, r3-r9}
71 subs r2, r2, #1
72 bhi .Lset32_chunks
73 pop {r4-r9}
74
75 @ Set residual 32B chunks (0-7)
76.Lset32_residual:
77 subs r12, r12, #1
78 strhs r1, [r0], #4
79 bhi .Lset32_residual
80
81.set32_end:
82 bx lr
diff --git a/src/main.c b/src/main.c
index 0a78367..7ca8370 100644
--- a/src/main.c
+++ b/src/main.c
@@ -17,7 +17,7 @@ WITH REGARD TO THIS SOFTWARE.
17// Config parameters. 17// Config parameters.
18// 18//
19 19
20#define PROF_ENABLE 1 20#define PROF_ENABLE 30
21#include "profiling.c" 21#include "profiling.c"
22 22
23void 23void
@@ -42,20 +42,6 @@ test_fill_rect(void) {
42} 42}
43 43
44void 44void
45test_chr(void) {
46 u8 tile[16] = {
47 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0x00, 0x00, 0x00,
48 0x00, 0x00, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x00
49 };
50
51 for (size_t y = 10; y < 20; y++) {
52 for (size_t x = 20; x < 30; x++) {
53 draw_chr(8 + x * 8, 2 + y * 8, tile, 0, 0, 0);
54 }
55 }
56}
57
58void
59test_icn(void) { 45test_icn(void) {
60 u8 tile[8] = { 46 u8 tile[8] = {
61 0xf8, 0xf8, 0xf8, 0xf8 | 0x3e, 0xf8 | 0x3e, 0x3e, 0x3e, 0x00, 47 0xf8, 0xf8, 0xf8, 0xf8 | 0x3e, 0xf8 | 0x3e, 0x3e, 0x3e, 0x00,
@@ -63,7 +49,7 @@ test_icn(void) {
63 49
64 for (size_t y = 10; y < 20; y++) { 50 for (size_t y = 10; y < 20; y++) {
65 for (size_t x = 10; x < 20; x++) { 51 for (size_t x = 10; x < 20; x++) {
66 draw_icn(8 + x * 8, 2 + y * 8, tile, 7, 0, 1); 52 draw_1bpp(8 + x * 8, 2 + y * 8, tile, 7, 0, 1);
67 } 53 }
68 } 54 }
69} 55}
@@ -96,13 +82,16 @@ test_moving_line(void) {
96 } else { 82 } else {
97 should_move = 1; 83 should_move = 1;
98 } 84 }
85 if (key_hold(KEY_SELECT) && key_tap(KEY_START)) {
86 PROF_SHOW();
87 }
99 bios_vblank_wait(); 88 bios_vblank_wait();
100 FRAME_START(); 89 FRAME_START();
101 PROF(flip_buffer(), flip_cycles); 90 PROF(flip_buffer(), PROF_FLIP);
102#if FLIP_TYPE == 0 91#if FLIP_TYPE == 0
103 PROF(screen_fill(3), clear_cycles); 92 PROF(screen_fill(3), PROF_FILL);
104#endif 93#endif
105 PROF(draw_line(x, y, 239 - x, 159 - y, 3), line_cycles); 94 PROF(draw_line(x, y, 239 - x, 159 - y, 3), PROF_LINE);
106 x += inc_x * should_move; 95 x += inc_x * should_move;
107 y += inc_y * should_move; 96 y += inc_y * should_move;
108 if (x == 239 && inc_x == 1) { 97 if (x == 239 && inc_x == 1) {
@@ -115,9 +104,8 @@ test_moving_line(void) {
115 inc_x = 1; 104 inc_x = 1;
116 inc_y = 0; 105 inc_y = 0;
117 } 106 }
118 PROF(draw_line(x, y, 239 - x, 159 - y, 2), line_cycles); 107 PROF(draw_line(x, y, 239 - x, 159 - y, 2), PROF_LINE);
119 FRAME_END(); 108 FRAME_END();
120 PROF_SHOW();
121 } 109 }
122} 110}
123 111
@@ -128,17 +116,18 @@ test_all_static(void) {
128 if (key_tap(KEY_A)) { 116 if (key_tap(KEY_A)) {
129 break; 117 break;
130 } 118 }
119 if (key_hold(KEY_SELECT) && key_tap(KEY_START)) {
120 PROF_SHOW();
121 }
131 bios_vblank_wait(); 122 bios_vblank_wait();
132 FRAME_START(); 123 FRAME_START();
133 PROF(flip_buffer(), flip_cycles); 124 PROF(flip_buffer(), PROF_FLIP);
134 PROF(test_clear(), clear_cycles); 125 PROF(test_clear(), PROF_FILL);
135 PROF(test_lines(), line_cycles); 126 PROF(test_lines(), PROF_LINE);
136 PROF(test_rect(), rect_cycles); 127 PROF(test_rect(), PROF_RECT);
137 PROF(test_fill_rect(), fill_rect_cycles); 128 PROF(test_fill_rect(), PROF_FRECT);
138 PROF(test_chr(), chr_cycles); 129 PROF(test_icn(), PROF_ICN);
139 PROF(test_icn(), icn_cycles);
140 FRAME_END(); 130 FRAME_END();
141 PROF_SHOW();
142 } 131 }
143} 132}
144 133
@@ -169,6 +158,9 @@ test_growing_rects(void) {
169 if (key_tap(KEY_A)) { 158 if (key_tap(KEY_A)) {
170 break; 159 break;
171 } 160 }
161 if (key_hold(KEY_SELECT) && key_tap(KEY_START)) {
162 PROF_SHOW();
163 }
172 if (key_pressed(KEY_B)) { 164 if (key_pressed(KEY_B)) {
173 if (key_pressed(KEY_LEFT)) { 165 if (key_pressed(KEY_LEFT)) {
174 rects[0].x0 = CLAMP(rects[0].x0 - 1, 0, SCREEN_WIDTH - 1); 166 rects[0].x0 = CLAMP(rects[0].x0 - 1, 0, SCREEN_WIDTH - 1);
@@ -200,22 +192,21 @@ test_growing_rects(void) {
200 } 192 }
201 bios_vblank_wait(); 193 bios_vblank_wait();
202 FRAME_START(); 194 FRAME_START();
203 PROF(flip_buffer(), flip_cycles); 195 PROF(flip_buffer(), PROF_FLIP);
204 PROF(screen_fill(0), clear_cycles); 196 PROF(screen_fill(0), PROF_FILL);
205 PROF(draw_filled_rect( 197 PROF(draw_filled_rect(
206 rects[0].x0, 198 rects[0].x0,
207 rects[0].y0, 199 rects[0].y0,
208 rects[0].x1, 200 rects[0].x1,
209 rects[0].y1, 201 rects[0].y1,
210 2), rect_cycles); 202 2), PROF_RECT);
211 PROF(draw_rect( 203 PROF(draw_rect(
212 rects[1].x0, 204 rects[1].x0,
213 rects[1].y0, 205 rects[1].y0,
214 rects[1].x1, 206 rects[1].x1,
215 rects[1].y1, 207 rects[1].y1,
216 3), fill_rect_cycles); 208 3), PROF_FRECT);
217 FRAME_END(); 209 FRAME_END();
218 PROF_SHOW();
219 } 210 }
220} 211}
221 212
@@ -224,10 +215,6 @@ test_sprites_bounce(void) {
224 u8 sprite_icn[8] = { 215 u8 sprite_icn[8] = {
225 0xf8, 0xf8, 0xf8, 0xf8 | 0x3e, 0xf8 | 0x3e, 0x3e, 0x3e, 0x00, 216 0xf8, 0xf8, 0xf8, 0xf8 | 0x3e, 0xf8 | 0x3e, 0x3e, 0x3e, 0x00,
226 }; 217 };
227 u8 sprite_chr[16] = {
228 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0x00, 0x00, 0x00,
229 0x00, 0x00, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x00,
230 };
231 typedef struct Sprite { 218 typedef struct Sprite {
232 int x; 219 int x;
233 int y; 220 int y;
@@ -260,36 +247,29 @@ test_sprites_bounce(void) {
260 } else { 247 } else {
261 should_move = 1; 248 should_move = 1;
262 } 249 }
250 if (key_hold(KEY_SELECT) && key_tap(KEY_START)) {
251 PROF_SHOW();
252 }
263 bios_vblank_wait(); 253 bios_vblank_wait();
264 FRAME_START(); 254 FRAME_START();
265 PROF(flip_buffer(), flip_cycles); 255 PROF(flip_buffer(), PROF_FLIP);
266#if FLIP_TYPE == 0 256#if FLIP_TYPE == 0
267 PROF(screen_fill(8), clear_cycles); 257 PROF(screen_fill(8), PROF_FILL);
268#endif 258#endif
269 for (size_t i = 0; i < LEN(sprites) * should_move; i++) { 259 for (size_t i = 0; i < LEN(sprites); i++) {
270 Sprite *s = &sprites[i]; 260 Sprite *s = &sprites[i];
271 PROF(draw_icn( 261 PROF(draw_1bpp(
272 s->x, s->y, 262 s->x, s->y,
273 sprite_icn, 263 sprite_icn,
274 8, 264 8,
275 s->flip_x, s->flip_y), icn_cycles); 265 s->flip_x, s->flip_y), PROF_ICN);
276 PROF(draw_chr( 266 s->x += s->inc_x * should_move;
277 (240 - s->x - 8), (160 - s->y - 8), 267 s->y += s->inc_y * should_move;
278 sprite_chr, 268 PROF(draw_1bpp(
279 8,
280 s->flip_x, s->flip_y), chr_cycles);
281 s->x += s->inc_x;
282 s->y += s->inc_y;
283 PROF(draw_icn(
284 s->x, s->y, 269 s->x, s->y,
285 sprite_icn, 270 sprite_icn,
286 s->clr, 271 s->clr,
287 s->flip_x, s->flip_y), icn_cycles); 272 s->flip_x, s->flip_y), PROF_ICN);
288 PROF(draw_chr(
289 (240 - s->x - 8), (160 - s->y - 8),
290 sprite_chr,
291 s->clr,
292 s->flip_x, s->flip_y), chr_cycles);
293 if (s->x >= (240 - 8) && s->inc_x > 0) { 273 if (s->x >= (240 - 8) && s->inc_x > 0) {
294 s->inc_x *= -1; 274 s->inc_x *= -1;
295 } else if (s->x <= 0 && s->inc_x < 0){ 275 } else if (s->x <= 0 && s->inc_x < 0){
@@ -302,7 +282,6 @@ test_sprites_bounce(void) {
302 } 282 }
303 } 283 }
304 FRAME_END(); 284 FRAME_END();
305 PROF_SHOW();
306 } 285 }
307} 286}
308 287
@@ -313,27 +292,29 @@ test_text_rendering(void) {
313 if (key_tap(KEY_A)) { 292 if (key_tap(KEY_A)) {
314 break; 293 break;
315 } 294 }
295 if (key_hold(KEY_SELECT) && key_tap(KEY_START)) {
296 PROF_SHOW();
297 }
316 bios_vblank_wait(); 298 bios_vblank_wait();
317 FRAME_START(); 299 FRAME_START();
318 PROF(flip_buffer(), flip_cycles); 300 PROF(flip_buffer(), PROF_FLIP);
319 PROF(screen_fill(0), clear_cycles); 301 PROF(screen_fill(0), PROF_FILL);
320 txt_color(2); 302 txt_color(2);
321 PROF(txt_drawf("The strongest bulwark of", 4, 8 * 2, 3), txt_drawf_cycles); 303 PROF(txt_drawf("The strongest bulwark of", 4, 8 * 2, 3), PROF_DRAWTXT);
322 PROF(txt_drawf("authority is uniformity;", 2, 8 * 3, 3), txt_drawf_cycles); 304 PROF(txt_drawf("authority is uniformity;", 2, 8 * 3, 3), PROF_DRAWTXT);
323 PROF(txt_drawf("the least divergence from it", 8, 8 * 4, 3), txt_drawf_cycles); 305 PROF(txt_drawf("the least divergence from it", 8, 8 * 4, 3), PROF_DRAWTXT);
324 PROF(txt_drawf("it's the greatest crime", 6, 8 * 5, 3), txt_drawf_cycles); 306 PROF(txt_drawf("it's the greatest crime", 6, 8 * 5, 3), PROF_DRAWTXT);
325 PROF(txt_drawf("- Emma Goldman", 100, 8 * 6 + 3, 3), txt_drawf_cycles); 307 PROF(txt_drawf("- Emma Goldman", 100, 8 * 6 + 3, 3), PROF_DRAWTXT);
326 txt_position(0, 10); 308 txt_position(0, 10);
327 PROF(txt_printf("The only way to deal with an\n" 309 PROF(txt_printf("The only way to deal with an\n"
328 "unfree world is to become\n" 310 "unfree world is to become\n"
329 "so absolutely free,\n" 311 "so absolutely free,\n"
330 "that your very existence\n" 312 "that your very existence\n"
331 "is an act of rebellion.\n"), txt_printf_cycles); 313 "is an act of rebellion.\n"), PROF_PRINTXT);
332 PROF(txt_printf("\n - Albert Camus\n"), txt_printf_cycles); 314 PROF(txt_printf("\n - Albert Camus\n"), PROF_PRINTXT);
333 PROF(txt_render(), txt_render_cycles); 315 PROF(txt_render(), PROF_TXTREND);
334 PROF(txt_clear(), txt_clear_cycles); 316 PROF(txt_clear(), PROF_TXTCLEAR);
335 FRAME_END(); 317 FRAME_END();
336 PROF_SHOW();
337 } 318 }
338} 319}
339 320
@@ -348,6 +329,7 @@ int main(void) {
348 irq_init(); 329 irq_init();
349 irs_set(IRQ_VBLANK, irs_stub); 330 irs_set(IRQ_VBLANK, irs_stub);
350 331
332 PROF_INIT();
351 while (true) { 333 while (true) {
352 test_sprites_bounce(); 334 test_sprites_bounce();
353 test_text_rendering(); 335 test_text_rendering();
diff --git a/src/profiling.c b/src/profiling.c
index 90215c8..e1f4feb 100644
--- a/src/profiling.c
+++ b/src/profiling.c
@@ -2,199 +2,143 @@
2// Profiling macros. 2// Profiling macros.
3// 3//
4 4
5// NOTE: Profiling uses the last two timers to count cycles, and thus can't be
6// used for measuring applications that
7
8
9static u32 frame_time = 0;
10
5#ifndef PROF_ENABLE 11#ifndef PROF_ENABLE
6#define PROF_ENABLE 0 12#define PROF_ENABLE 0
7#endif 13#endif
8 14
9#if PROF_ENABLE > 0 && PROF_ENABLE < 3 15#if PROF_ENABLE > 0
10 16
11#ifndef PROF_N_FRAMES 17#ifndef PROF_RESET_MINMAX
12#define PROF_N_FRAMES 30 18#define PROF_RESET_MINMAX false
13#endif 19#endif
14 20
15// Profile method 1: Average per N frames. 21// Maximum number of profiling to monitor.
16#if PROF_ENABLE == 1 22typedef enum ProfType {
17#define TEXT_ENABLE 1 23 PROF_FLIP,
18#define PROF(F,VAR) \ 24 PROF_FILL,
19 do { \ 25 PROF_LINE,
20 u32 __tmp_prof = profile_measure();\ 26 PROF_RECT,
21 F;\ 27 PROF_FRECT,
22 (VAR) += profile_measure() - __tmp_prof;\ 28 PROF_ICN,
23 } while (0) 29 PROF_DRAWTXT,
24 30 PROF_PRINTXT,
25// Profile method 2: Maximum in N frames. 31 PROF_TXTREND,
26#elif PROF_ENABLE == 2 32 PROF_TXTCLEAR,
27#define TEXT_ENABLE 1 33 PROF_NUM,
28#define PROF(F,VAR) \ 34} ProfType;
29 do { \
30 u32 __tmp_prof = profile_measure();\
31 (F);\
32 (VAR) = MAX(profile_measure() - __tmp_prof, (VAR));\
33 } while (0)
34#endif
35 35
36#ifndef PROF_SHOW_X 36char *prof_type_str[PROF_NUM] = {
37#define PROF_SHOW_X 0 37 "FLIPBUF ",
38#endif 38 "SCRFILL ",
39#ifndef PROF_SHOW_Y 39 "LINE ",
40#define PROF_SHOW_Y 0 40 "RECT ",
41#endif 41 "FRECT ",
42 "ICN ",
43 "DRAWTXT ",
44 "PRINTXT ",
45 "TXTREND ",
46 "TXTCLEAR ",
47};
42 48
43static bool profile_show = true; 49u32 prof_frame_time = 0;
44static bool profile_bg_show = true; 50u32 prof_frame_count = 0;
45 51u32 prof_frame_avg = 0;
46#define PROF_SHOW() \ 52u32 prof_frame_time_max = 0;
47 do { \ 53u32 prof_times[PROF_NUM] = {0};
48 if (key_tap(KEY_START)) {\ 54u32 prof_count[PROF_NUM] = {0};
49 profile_show ^= 1;\ 55u32 prof_avg[PROF_NUM] = {0};
50 }\ 56u32 prof_max[PROF_NUM] = {0};
51 if (key_tap(KEY_SELECT)) {\ 57u32 prof_min[PROF_NUM] = {0};
52 profile_bg_show ^= 1;\ 58
53 }\ 59bool prof_reset_minmax = PROF_RESET_MINMAX;
54 if (profile_show) {\ 60bool prof_show = true;
55 txt_color(1);\ 61
56 txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ 62#define PROF_INIT() do { \
57 if (profile_bg_show) {\ 63 for (size_t i = 0; i < PROF_NUM; i++) { \
58 draw_filled_rect((PROF_SHOW_X), (PROF_SHOW_X), 8 * 18, 8 * 14, 0);\ 64 prof_min[i] = -1; \
59 }\ 65 } \
60 txt_printf("VIDEO\n");\ 66} while(0);
61 txt_printf(">CLEAR %.8lu\n", avg_clear_cycles);\ 67
62 txt_printf(">LINES %.8lu\n", avg_line_cycles);\ 68#define PROF(func, idx) do { \
63 txt_printf(">RECT %.8lu\n", avg_rect_cycles);\ 69 u32 time_before = profile_measure(); \
64 txt_printf(">FRECT %.8lu\n", avg_fill_rect_cycles);\ 70 (func); \
65 txt_printf(">1BPP %.8lu\n", avg_icn_cycles);\ 71 u32 time_after = profile_measure(); \
66 txt_printf(">2BPP %.8lu\n", avg_chr_cycles);\ 72 u32 time_current = time_after - time_before; \
67 txt_printf(">FLIP %.8lu\n", avg_flip_cycles);\ 73 prof_times[idx] += time_current; \
68 txt_printf("TEXT\n");\ 74 prof_count[idx]++; \
69 txt_printf(">DRAWF %.8lu\n", avg_txt_drawf_cycles);\ 75 prof_max[idx] = MAX(time_current, prof_max[idx]);\
70 txt_printf(">PRINTF %.8lu\n", avg_txt_printf_cycles);\ 76 prof_min[idx] = MIN(time_current, prof_min[idx]);\
71 txt_printf(">RENDER %.8lu\n", avg_txt_render_cycles);\ 77} while(0);
72 txt_printf(">CLEAR %.8lu\n", avg_txt_clear_cycles);\ 78
73 txt_printf("TOTAL %.8lu\n", avg_frame_cycles);\ 79#define FRAME_START() do { \
74 txt_render();\ 80 profile_start();\
75 }\ 81} while(0)
76 if (profile_bg_show) {\ 82
77 u32 frame_time =\ 83
78 FP_DIV(\ 84#define FRAME_END() do { \
79 FP_NUM(avg_frame_cycles + 1, 2),\ 85 prof_frame_count++;\
80 FP_NUM(2809, 2),\ 86 frame_time = profile_measure();\
81 2) * 166;\ 87 prof_frame_time_max = MAX(prof_frame_time_max, frame_time);\
82 u32 fps =\ 88 prof_frame_time += profile_stop();\
83 FP_DIV(\ 89 if (prof_show) { \
84 FP_NUM(280896 * 60, 2),\ 90 draw_filled_rect(0, 0, SCREEN_WIDTH - 1, 8 * (PROF_NUM + 1), 2); \
85 FP_NUM(avg_frame_cycles + 1, 2),\ 91 u32 fps = (u64)280896 * 60 / (prof_frame_avg + 1); \
86 2);\ 92 if (prof_frame_avg == 0) { \
87 draw_filled_rect(8 * 18, 0, 239, 16, 0);\ 93 fps = 0; \
88 txt_drawf("TIME: %.6lu", 8 * 18, 0, 1, frame_time >> 2);\ 94 } \
89 txt_drawf("MAX FPS:%.4lu", 8 * 18, 8, 1, (fps >> 2) + 1);\ 95 txt_drawf_small("FRAME TIME/FPS: %.9l/%.2l", 0, 0, COL_FG, \
90 }\ 96 prof_frame_avg, fps);\
91 } while (0) 97 txt_drawf_small("MAX: %.9l/%l", 8 * 19, 0, COL_FG, \
92 98 prof_frame_time_max, 280896);\
93static u32 prof_frame_counter = 0; 99 for (size_t idx = 0; idx < PROF_NUM; idx++) { \
94 100 txt_drawf_small("%s %.9l (%.9l %.9l) %08x:%08x", 0, 8 * (idx + 1), COL_FG, \
95static u32 frame_cycles = 0; 101 prof_type_str[idx], \
96static u32 flip_cycles = 0; 102 prof_avg[idx], \
97static u32 clear_cycles = 0; 103 prof_min[idx], \
98static u32 line_cycles = 0; 104 prof_max[idx], \
99static u32 rect_cycles = 0; 105 prof_avg[idx], \
100static u32 fill_rect_cycles = 0; 106 prof_max[idx]);\
101static u32 chr_cycles = 0; 107 }; \
102static u32 icn_cycles = 0; 108 draw_filled_rect(0, SCREEN_HEIGHT - 9, 58, SCREEN_HEIGHT - 1, 2); \
103static u32 txt_drawf_cycles = 0; 109 txt_drawf_small("CPU USAGE: %.3l", 0, SCREEN_HEIGHT - 9, COL_FG, \
104static u32 txt_printf_cycles = 0; 110 (u64)prof_frame_avg * 100 / 280896);\
105static u32 txt_render_cycles = 0; 111 } \
106static u32 txt_clear_cycles = 0; 112 if (prof_frame_count >= PROF_ENABLE) { \
107 113 for (size_t idx = 0; idx < PROF_NUM; idx++) { \
108static u32 avg_frame_cycles = 0; 114 prof_avg[idx] = prof_times[idx] / prof_frame_count; \
109static u32 avg_flip_cycles = 0; 115 if (prof_reset_minmax) { \
110static u32 avg_clear_cycles = 0; 116 prof_min[idx] = -1; \
111static u32 avg_line_cycles = 0; 117 prof_max[idx] = 0; \
112static u32 avg_rect_cycles = 0; 118 } \
113static u32 avg_fill_rect_cycles = 0; 119 prof_times[idx] = 0; \
114static u32 avg_chr_cycles = 0; 120 prof_count[idx] = 0; \
115static u32 avg_icn_cycles = 0; 121 }; \
116static u32 avg_txt_drawf_cycles = 0; 122 prof_frame_avg = prof_frame_time / prof_frame_count; \
117static u32 avg_txt_printf_cycles = 0; 123 prof_frame_count = 0; \
118static u32 avg_txt_render_cycles = 0; 124 prof_frame_time = 0; \
119static u32 avg_txt_clear_cycles = 0; 125 } \
120 126 } while(0)
121#if PROF_ENABLE == 1
122#define FRAME_START()\
123 do { \
124 if (prof_frame_counter == PROF_N_FRAMES) {\
125 avg_frame_cycles = frame_cycles / prof_frame_counter;\
126 avg_flip_cycles = flip_cycles / prof_frame_counter;\
127 avg_clear_cycles = clear_cycles / prof_frame_counter;\
128 avg_line_cycles = line_cycles / prof_frame_counter;\
129 avg_rect_cycles = rect_cycles / prof_frame_counter;\
130 avg_fill_rect_cycles = fill_rect_cycles / prof_frame_counter;\
131 avg_chr_cycles = chr_cycles / prof_frame_counter;\
132 avg_icn_cycles = icn_cycles / prof_frame_counter;\
133 avg_txt_drawf_cycles = txt_drawf_cycles / prof_frame_counter;\
134 avg_txt_printf_cycles = txt_printf_cycles / prof_frame_counter;\
135 avg_txt_render_cycles = txt_render_cycles / prof_frame_counter;\
136 avg_txt_clear_cycles = txt_clear_cycles / prof_frame_counter;\
137 frame_cycles = 0;\
138 flip_cycles = 0;\
139 clear_cycles = 0;\
140 line_cycles = 0;\
141 rect_cycles = 0;\
142 fill_rect_cycles = 0;\
143 chr_cycles = 0;\
144 icn_cycles = 0;\
145 txt_drawf_cycles = 0;\
146 txt_printf_cycles = 0;\
147 txt_render_cycles = 0;\
148 txt_clear_cycles = 0;\
149 prof_frame_counter = 0;\
150 }\
151 profile_start();\
152 } while (0)
153#elif PROF_ENABLE == 2
154#define FRAME_START()\
155 do { \
156 if (prof_frame_counter == PROF_N_FRAMES) {\
157 avg_frame_cycles = frame_cycles;\
158 avg_flip_cycles = flip_cycles;\
159 avg_clear_cycles = clear_cycles;\
160 avg_line_cycles = line_cycles;\
161 avg_rect_cycles = rect_cycles;\
162 avg_fill_rect_cycles = fill_rect_cycles;\
163 avg_chr_cycles = chr_cycles;\
164 avg_icn_cycles = icn_cycles;\
165 avg_txt_drawf_cycles = txt_drawf_cycles;\
166 avg_txt_printf_cycles = txt_printf_cycles;\
167 avg_txt_render_cycles = txt_render_cycles;\
168 avg_txt_clear_cycles = txt_clear_cycles;\
169 frame_cycles = 0;\
170 flip_cycles = 0;\
171 clear_cycles = 0;\
172 line_cycles = 0;\
173 rect_cycles = 0;\
174 fill_rect_cycles = 0;\
175 chr_cycles = 0;\
176 icn_cycles = 0;\
177 txt_drawf_cycles = 0;\
178 txt_printf_cycles = 0;\
179 txt_render_cycles = 0;\
180 txt_clear_cycles = 0;\
181 prof_frame_counter = 0;\
182 }\
183 profile_start();\
184 } while (0)
185#endif
186 127
187#define FRAME_END() \ 128#define PROF_SHOW() do { \
188 do { \ 129 prof_show ^= 1; \
189 prof_frame_counter++;\ 130} while(0)
190 frame_cycles += profile_stop();\
191 } while (0)
192 131
193#else 132#else
194 133
195// No profiling. 134// No profiling.
196#define PROF(F,VAR) (F) 135#define PROF_INIT()
136#define PROF(F,VAR) do {F;} while(0)
137#define FRAME_START() do { \
138 profile_start();\
139} while(0)
140#define FRAME_END() do { \
141 frame_time = profile_stop();\
142} while(0)
197#define PROF_SHOW() 143#define PROF_SHOW()
198#define FRAME_START()
199#define FRAME_END()
200#endif 144#endif
diff --git a/src/renderer.c b/src/renderer.c
deleted file mode 100644
index 5ef9cab..0000000
--- a/src/renderer.c
+++ /dev/null
@@ -1,653 +0,0 @@
1//
2// This Mode 0 renderer provides a way of drawing directly to a framebuffer
3// (similar to Mode 3 and 4) while retaining the flexibility of using other
4// backgrounds if needed. It also performs double buffering to avoid tearing
5// artifacts and tries to only draw tiles that changed on each frame.
6//
7// In addition to the frontbuffer (displayed on background 0), a tiled text
8// layer is displayed on background 1, which can be used for application
9// development or for debug information.
10//
11// These two layers occupy the first and second background charblocks, leaving
12// the remaining two available for other background layers. There are 14KB of
13// sprite memory available, since the backbuffer is located at the end of the
14// VRAM, but if more space is needed it can be moved to the end of the BG
15// charblocks instead.
16//
17
18#include "renderer.h"
19#include "text.h"
20
21// The frontbuffs,eer is located at the beginning of the VRAM, and requires 20KB of
22// video memory for 32 * 20 tiles at 4bpp.
23#define FRONTBUF ((u32*)(MEM_VRAM))
24
25// Adjust both of these if the location of the map changes. Each screnblock
26// requires less than 2KB.
27#define FRONTBUF_TILEMAP ((u16*)(MEM_VRAM + KB(20)))
28#define FRONTBUF_SB 10
29
30// The backbuffer is located at the end of the VRAM. This can allow us to use
31// more backgrounds but eats into the available memory for sprites. This should
32// be fine for non sprite intensive applications. If more sprite memory is
33// needed, the backbuffer can be located at the end of the background memory
34// instead (64KB - 20KB).
35#define BACKBUF ((u32*)(MEM_VRAM + KB(96) - KB(20)))
36
37// Keep track of which tiles need to be copied to the frontbuffer.
38static u32 dirty_tiles[21] = {0};
39
40// Boundchecks can be disable at compile time but this will not always improve
41// the performance and can in fact make it worse. It is possible that this is
42// due to some aliasing optimiztions but not sure at this moment.
43#ifdef DISABLE_BOUNDCHECK_SCREEN
44#define BOUNDCHECK_SCREEN(X,Y)
45#else
46#define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return;
47#endif
48
49// Swap A and B values without a tmp variable.
50#define SWAP(A, B) (((A) ^= (B)), ((B) ^= (A)), ((A) ^= (B)))
51
52// Swap A and B values to make sure A <= B.
53#define MAYBE_SWAP(A,B) if ((A) > (B)) { SWAP(A,B); }
54
55IWRAM_CODE
56void screen_fill(u8 clr) {
57// #if 0
58// u32 *dst = backbuf;
59// for(int i = 0; i < KB(75) / 8; i++) {
60// *dst++ = 0x01010101 * clr;
61// }
62// #else
63// dma_fill(backbuf, 0x01010101 * clr, KB(75) / 2, 3);
64// screen_updated = true;
65// #endif
66}
67
68IWRAM_CODE
69void
70draw_pixel(size_t x, size_t y, u8 clr) {
71 BOUNDCHECK_SCREEN(x, y);
72
73 // Find row position for the given x/y coordinates.
74 size_t tile_x = x / 8;
75 size_t tile_y = y / 8;
76 size_t start_col = x % 8;
77 size_t start_row = y % 8;
78 size_t pos = start_row + (tile_x + tile_y * 32) * 8;
79
80 // Update backbuffer.
81 size_t shift = start_col * sizeof(u32);
82 BACKBUF[pos] = (BACKBUF[pos] & ~(0xF << shift)) | clr << shift;
83
84 // Mark tile as dirty.
85 dirty_tiles[tile_y] |= 1 << tile_x;
86}
87
88IWRAM_CODE
89static inline
90void
91draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) {
92 // TODO
93}
94
95IWRAM_CODE
96UNROLL_LOOPS
97static inline
98void
99draw_vline(size_t x0, size_t y0, size_t y1, u8 clr) {
100 // TODO
101}
102
103IWRAM_CODE
104void
105draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
106 // BOUNDCHECK_SCREEN(x0, y0);
107 // BOUNDCHECK_SCREEN(x1, y1);
108 if (y0 == y1) {
109 MAYBE_SWAP(x0, x1);
110 draw_hline(x0, x1, y0, clr);
111 } else if (x0 == x1) {
112 MAYBE_SWAP(y0, y1);
113 draw_vline(x0, y0, y1, clr);
114 } else {
115 // // Diagonal line.
116 // int dx = x0 > x1 ? x0 - x1 : x1 - x0;
117 // int dy = y0 > y1 ? y0 - y1 : y1 - y0;
118 // int x_step = x0 > x1 ? -1 : 1;
119 // int y_step = y0 > y1 ? -SCREEN_WIDTH : SCREEN_WIDTH;
120
121 // u16 *dst = NULL;
122 // uintptr_t addr = ((uintptr_t)backbuf + y0 * SCREEN_WIDTH + x0);
123 // u32 mask = x0 & 1 ? ~0xFF : 0xFF;
124 // u32 color = (clr & 0xFF) | ((clr & 0xFF) << 8);
125 // if (dx >= dy) {
126 // int diff = 2 * dy - dx;
127 // for (int i = 0; i < dx + 1; i++) {
128 // dst = (u16 *)(addr - (mask >> 31));
129 // *dst = (*dst & ~mask) | (color & mask);
130 // if (diff >= 0) {
131 // diff -= 2 * dx;
132 // addr += y_step;
133 // }
134 // diff += 2 * dy;
135 // addr += x_step;
136 // mask = ~mask;
137 // }
138 // } else {
139 // int diff = 2 * dx - dy;
140 // for (int i = 0; i < dy + 1; i++) {
141 // dst = (u16 *)(addr - (mask >> 31));
142 // *dst = (*dst & ~mask) | (color & mask);
143 // if (diff >= 0) {
144 // diff -= 2 * dy;
145 // addr += x_step;
146 // mask = ~mask;
147 // }
148 // diff += 2 * dx;
149 // addr += y_step;
150 // }
151 // }
152 }
153
154 // // Find row positions for the given x/y coordinates.
155 // size_t tile_x0 = x0 / 8;
156 // size_t tile_y0 = y0 / 8;
157 // size_t tile_x1 = x1 / 8;
158 // size_t tile_y1 = y1 / 8;
159 // size_t start_col0 = x0 % 8;
160 // size_t start_col1 = x1 % 8;
161 // size_t start_row0 = y0 % 8;
162 // size_t start_row1 = y1 % 8;
163
164 // // Get a pointer to the backbuffer and the tile row.
165 // u32 *backbuffer = &BACKBUF[start_row0 + (tile_x0 + tile_y0 * 32) * 8];
166
167 // if (y0 == y1) {
168 // // Horizontal line. There are 3 cases:
169 // // 1. Lines fit on a single tile.
170 // // 2. Lines go through 2 tiles, both require partial row updates.
171 // // 3. Lines go through 3 or more tiles, first and last tiles use
172 // // partial row updates, rows in the middle can write the.
173 // size_t dx = tile_x1 - tile_x0;
174 // if (dx < 1) {
175 // u32 row_mask = 0xFFFFFFFF;
176 // row_mask >>= (7 - start_col1 - dx) * 4;
177 // row_mask &= 0xFFFFFFFF << start_col0 * 4;
178 // u32 row = (0x11111111 * clr) & row_mask;
179 // backbuffer[0] = (backbuffer[0] & ~row_mask) | row;
180 // dirty_tiles[tile_y0] |= 1 << tile_x0;
181 // } else {
182 // size_t shift_left = start_col0 * 4;
183 // size_t shift_right = (7 - start_col1) * 4;
184 // u32 row_mask = 0xFFFFFFFF;
185 // u32 row = 0x11111111 * clr;
186 // backbuffer[0] = backbuffer[0] & ~(row_mask << shift_left);
187 // backbuffer[0] |= row << shift_left;
188 // dirty_tiles[tile_y0] |= 1 << tile_x0;
189 // for (size_t i = 1; i < dx; i++) {
190 // backbuffer[i * 8] = row;
191 // dirty_tiles[tile_y0] |= 1 << (tile_x0 + i);
192 // }
193 // backbuffer[dx * 8] = backbuffer[dx * 8] & ~(row_mask >> shift_right);
194 // backbuffer[dx * 8] |= row >> shift_right;
195 // dirty_tiles[tile_y0] |= 1 << (tile_x0 + dx);
196 // }
197 // } else if (x0 == x1) {
198 // // Vertical line. The cases are analogous to the horizontal ones.
199 // size_t dy = tile_y1 - tile_y0;
200 // u32 row_mask = 0xF << start_col0 * 4;
201 // u32 row_left = (0x11111111 * clr) & row_mask;
202 // if (dy < 1) {
203 // for (size_t i = 0; i <= y1 - y0; i++, backbuffer++) {
204 // backbuffer[0] = (backbuffer[0] & ~row_mask) | row_left;
205 // }
206 // } else {
207 // for (size_t i = 0; i < (8 - start_row0); i++, backbuffer++) {
208 // backbuffer[0] = (backbuffer[0] & ~row_mask) | row_left;
209 // }
210 // dirty_tiles[tile_y0] |= 1 << tile_x0;
211 // backbuffer += 8 * 31;
212 // for (size_t j = 1; j < dy; j++) {
213 // for (size_t i = 0; i < 8; i++, backbuffer++) {
214 // backbuffer[0] = (backbuffer[0] & ~row_mask) | row_left;
215 // }
216 // backbuffer += 8 * 31;
217 // dirty_tiles[tile_y0 + j] |= 1 << tile_x0;
218 // }
219 // for (size_t i = 0; i <= start_row1; i++, backbuffer++) {
220 // backbuffer[0] = (backbuffer[0] & ~row_mask) | row_left;
221 // }
222 // dirty_tiles[tile_y1] |= 1 << tile_x0;
223 // }
224 // } else {
225 // // Diagonal line.
226 // int dx = x0 > x1 ? x0 - x1 : x1 - x0;
227 // int dy = y0 > y1 ? y1 - y0 : y0 - y1;
228 // int x_step = x0 < x1 ? 1 : -1;
229 // int y_step = y0 < y1 ? 1 : -1;
230 // int err = dx + dy;
231 // while (!(x0 == x1 && y0 == y1)) {
232 // draw_pixel(x0, y0, clr);
233 // int diff = 2 * err;
234 // if (diff >= dy) {
235 // err += dy;
236 // x0 += x_step;
237 // }
238 // if (diff <= dx) {
239 // err += dx;
240 // y0 += y_step;
241 // }
242 // }
243 // }
244}
245
246IWRAM_CODE
247void
248draw_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
249 BOUNDCHECK_SCREEN(x0, y0);
250 BOUNDCHECK_SCREEN(x1, y1);
251 MAYBE_SWAP(x0, x1);
252 MAYBE_SWAP(y0, y1);
253
254 draw_hline(x0, x1, y0, clr);
255 draw_hline(x0, x1, y1, clr);
256 draw_vline(x0, y0, y1, clr);
257 draw_vline(x1, y0, y1, clr);
258
259 // TODO: check if this is better.
260// BOUNDCHECK_SCREEN(x0, y0);
261// BOUNDCHECK_SCREEN(x1, y1);
262
263// // Find row positions for the given x/y coordinates.
264// size_t tile_x0 = x0 / 8;
265// size_t tile_y0 = y0 / 8;
266// size_t tile_x1 = x1 / 8;
267// size_t tile_y1 = y1 / 8;
268// size_t start_col0 = x0 % 8;
269// size_t start_col1 = x1 % 8;
270// size_t start_row0 = y0 % 8;
271// size_t start_row1 = y1 % 8;
272
273// // Get a pointer to the backbuffer and the tile row.
274// u32 *buf_top = &BACKBUF[start_row0 + (tile_x0 + tile_y0 * 32) * 8];
275// u32 *buf_bot = &BACKBUF[start_row1 + (tile_x0 + tile_y1 * 32) * 8];
276
277// size_t dx = tile_x1 - tile_x0;
278// size_t dy = tile_y1 - tile_y0;
279
280// memset(buf_top, 3, x1 - x0);
281// memset(buf_bot, 3, x1 - x0);
282
283// if (dx < 1) {
284// dirty_tiles[tile_y0] |= 1 << tile_x0;
285// dirty_tiles[tile_y1] |= 1 << tile_x0;
286// } else {
287// dirty_tiles[tile_y0] |= 1 << tile_x0;
288// dirty_tiles[tile_y1] |= 1 << tile_x0;
289// for (size_t i = 1; i < dx; i++) {
290// dirty_tiles[tile_y0] |= 1 << (tile_x0 + i);
291// dirty_tiles[tile_y1] |= 1 << (tile_x0 + i);
292// }
293// dirty_tiles[tile_y0] |= 1 << (tile_x0 + dx);
294// dirty_tiles[tile_y1] |= 1 << (tile_x0 + dx);
295// }
296// if (dy < 1) {
297// } else {
298// for (size_t j = 1; j < dy; j++) {
299// dirty_tiles[tile_y0 + j] |= 1 << tile_x0;
300// dirty_tiles[tile_y0 + j] |= 1 << (tile_x0 + dx);
301// }
302// }
303}
304
305IWRAM_CODE
306void
307draw_filled_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
308 BOUNDCHECK_SCREEN(x0, y0);
309 BOUNDCHECK_SCREEN(x1, y1);
310 MAYBE_SWAP(x0, x1);
311 MAYBE_SWAP(y0, y1);
312
313 // Special condition. If the screen is to be completely filled, use the DMA
314 // instead.
315 if (x0 == 0 && x1 >= (SCREEN_WIDTH - 1) && y0 == 0 && y1 >= (SCREEN_HEIGHT - 1)) {
316 screen_fill(clr);
317 return;
318 }
319
320 // Drawline implementation.
321 for (size_t y = y0; y <= y1; y++) {
322 // NOTE: Unclear why here draw_hline is faster than draw_line.
323 draw_hline(x0, x1, y, clr);
324 }
325
326 // TODO: check if this is better.
327 // BOUNDCHECK_SCREEN(x0, y0);
328 // BOUNDCHECK_SCREEN(x1, y1);
329
330 // size_t dx = x1 - x0;
331 // size_t dy = y1 - y0;
332 // u8 *buf = &BACKBUF[0];
333 // memset(buf, 0x11 * clr, 16);
334 //for (size_t j = 0; j < 1; j++) {
335 // // for (size_t i = 0; i < dx; i++) {
336 // // buf[i + j * 16] = clr;
337 // // }
338 // //
339 // // BACKBUF[j + 0] = 0x11111111 * clr;
340 // // BACKBUF[j + 1] = 0x11111111 * clr;
341 // // BACKBUF[j + 2] = 0x11111111 * clr;
342 // // BACKBUF[j + 3] = 0x11111111 * clr;
343 // // BACKBUF[j + 4] = 0x11111111 * clr;
344 // // BACKBUF[j + 5] = 0x11111111 * clr;
345 // // BACKBUF[j + 6] = 0x11111111 * clr;
346 // // BACKBUF[j + 7] = 0x11111111 * clr;
347
348 // buf[j + 0] = 0x1 * clr;
349 // buf[j + 1] = 0x1 * clr;
350 // buf[j + 2] = 0x1 * clr;
351 // buf[j + 3] = 0x1 * clr;
352 // // buf[j + 4] = 0x1111 * clr;
353 // // buf[j + 5] = 0x1111 * clr;
354 // // buf[j + 6] = 0x1111 * clr;
355 // // buf[j + 7] = 0x1111 * clr;
356 //}
357 // u8 *buf = &BACKBUF[0];
358 // buf[8 * 16 + 0] = clr;
359 // buf[8 * 16 + 1] = clr;
360 // buf[8 * 16 + 2] = clr;
361 // buf[8 * 16 + 3] = clr;
362 // buf[8 * 16 + 4] = clr;
363 // buf[8 * 16 + 5] = clr;
364 // buf[8 * 16 + 6] = clr;
365 // buf[8 * 16 + 7] = clr;
366 // for (size_t j = 0; j < dy; j++) {
367 // for (size_t i = 0; i < dx; i++) {
368 // buf[i + j * 16] = clr;
369 // }
370 // }
371 // size_t n_rect = MIN(dx, dy);
372 // n_rect = n_rect / 2 + 1;
373 // for (size_t i = 0; i < n_rect; i++) {
374 // draw_rect(x0 + i, y0 + i, x1 - i, y1 - i, clr);
375 // }
376}
377
378// IWRAM_CODE
379// void
380// draw_tile(size_t x, size_t y, Tile *tile, u8 clr) {
381// BOUNDCHECK_SCREEN(x, y);
382
383// // Find row position for the given x/y coordinates.
384// size_t tile_x = x / 8;
385// size_t tile_y = y / 8;
386// size_t start_col = x % 8;
387// size_t start_row = y % 8;
388
389// // Get a pointer to the backbuffer and the tile row.
390// size_t pos = start_row + (tile_x + tile_y * 32) * 8;
391// u32 *backbuffer = &BACKBUF[pos];
392// u32 *row = tile;
393
394// // This will blend all colors weirdly if using tiles that contain colors
395// // higher than 1.
396// size_t shift_left = start_col * 4;
397// size_t shift_right = (8 - start_col) * 4;
398// // u32 row_mask_left = merge ? 0 : 0xFFFFFFFF << shift_left;
399// // u32 row_mask_right = merge ? 0 : 0xFFFFFFFF >> shift_right;
400
401// // Draw the tiles. There are 4 possible cases:
402// // 1. The tile is exactly at the tile boundary.
403// // 2. The tile spans 2 tiles horizontally.
404// // 3. The tile spans 2 tiles vertically.
405// // 4. The tile spans 4 tiles.
406// if (start_col == 0 && start_row == 0) {
407// for (size_t i = 0; i < (8 - start_row); i++, backbuffer++) {
408// BOUNDCHECK_SCREEN(x, y + i);
409// backbuffer[0] = (backbuffer[0] & ~row_mask_left) | row[i] * clr;
410// }
411// dirty_tiles[tile_y] |= 1 << tile_x;
412// } else if (start_row == 0) {
413// for (size_t i = 0; i < 8; i++, backbuffer++) {
414// BOUNDCHECK_SCREEN(x, y + i);
415// backbuffer[0] = (backbuffer[0] & ~row_mask_left) | (row[i] * clr << shift_left);
416// backbuffer[8] = (backbuffer[8] & ~row_mask_right) | (row[i] * clr >> shift_right);
417// }
418// dirty_tiles[tile_y] |= 1 << tile_x;
419// dirty_tiles[tile_y] |= 1 << (tile_x + 1);
420// } else if (start_col == 0) {
421// for (size_t i = 0; i < (8 - start_row); i++, backbuffer++) {
422// BOUNDCHECK_SCREEN(x, y + i);
423// backbuffer[0] = (backbuffer[0] & ~row_mask_left) | row[i] * clr;
424// }
425// backbuffer += 8 * 31;
426// for (size_t i = (8 - start_row); i < 8; i++, backbuffer++) {
427// BOUNDCHECK_SCREEN(x, y + i);
428// backbuffer[0] = (backbuffer[0] & ~row_mask_left) | row[i] * clr;
429// }
430// dirty_tiles[tile_y] |= 1 << tile_x;
431// dirty_tiles[tile_y + 1] |= 1 << tile_x;
432// } else {
433// for (size_t i = 0; i < (8 - start_row); i++, backbuffer++) {
434// BOUNDCHECK_SCREEN(x, y + i);
435// backbuffer[0] = (backbuffer[0] & ~row_mask_left) | (row[i] * clr << shift_left);
436// backbuffer[8] = (backbuffer[8] & ~row_mask_right) | (row[i] * clr >> shift_right);
437// }
438// backbuffer += 8 * 31;
439// for (size_t i = (8 - start_row); i < 8; i++, backbuffer++) {
440// BOUNDCHECK_SCREEN(x, y + i);
441// backbuffer[0] = (backbuffer[0] & ~row_mask_left) | (row[i] * clr << shift_left);
442// backbuffer[8] = (backbuffer[8] & ~row_mask_right) | (row[i] * clr >> shift_right);
443// }
444// dirty_tiles[tile_y] |= 1 << tile_x;
445// dirty_tiles[tile_y] |= 1 << (tile_x + 1);
446// dirty_tiles[tile_y + 1] |= 1 << tile_x;
447// dirty_tiles[tile_y + 1] |= 1 << (tile_x + 1);
448// }
449// }
450
451// void
452// clear_screen(void) {
453// dma_fill(FRONTBUF, 0, KB(20), 3);
454// }
455
456IWRAM_CODE
457void
458flip_buffer(void) {
459 // TODO: Copying all tiles for now. Study if it's better to use dirty_tiles
460 // or dirty_lines.
461 // Copy dirty tiles from the backbuffer to the frontbuffer.
462 Tile *dst = FRONTBUF;
463 Tile *src = BACKBUF;
464 for (size_t j = 0; j < 20; ++j) {
465 // if (dirty_tiles[j] == 0) {
466 // continue;
467 // }
468 for (size_t i = 0, k = 1; i < 30; ++i, k <<= 1) {
469 // if (dirty_tiles[j] & k) {
470 dst[i + j * 32] = src[i + j * 32];
471 // }
472 }
473 // dirty_tiles[j] = 0;
474 }
475}
476
477static u32 dec_nibble[] = {
478 0x00000000, 0x01000000, 0x00010000, 0x01010000,
479 0x00000100, 0x01000100, 0x00010100, 0x01010100,
480 0x00000001, 0x01000001, 0x00010001, 0x01010001,
481 0x00000101, 0x01000101, 0x00010101, 0x01010101,
482};
483
484static u32 dec_nibble_flip_x[] = {
485 0x00000000, 0x00000001, 0x00000100, 0x00000101,
486 0x00010000, 0x00010001, 0x00010100, 0x00010101,
487 0x01000000, 0x01000001, 0x01000100, 0x01000101,
488 0x01010000, 0x01010001, 0x01010100, 0x01010101,
489};
490
491IWRAM_CODE
492static inline
493u64
494decode_1bpp(u8 row, u8 flip_x) {
495 if (flip_x) {
496 u32 *lut = dec_nibble_flip_x;
497 return (u64)lut[(row >> 4) & 0xF] << 32 | (u64)lut[(row >> 0) & 0xF];
498 }
499 u32 *lut = dec_nibble;
500 return (u64)lut[(row >> 0) & 0xF] << 32 | (u64)lut[(row >> 4) & 0xF];
501}
502
503IWRAM_CODE
504static inline
505void
506draw_2bpp_row(size_t x, size_t y, u8 a, u8 b, u8 flip_x) {
507 // BOUNDCHECK_SCREEN(x, y);
508
509 // size_t tile_x = x / 8;
510 // size_t start_col = x % 8;
511 // size_t shift_left = start_col * 8;
512 // size_t shift_right = (8 - start_col) * 8;
513
514 // u64 *dst = &backbuf[(y * 30 + tile_x) * 8 / 2];
515 // if (start_col == 0) {
516 // u64 clr_a = decode_1bpp(a, flip_x);
517 // u64 clr_b = decode_1bpp(b, flip_x);
518 // u64 mask_a = (clr_a * 0xF);
519 // u64 mask_b = (clr_b * 0xF);
520 // u64 mask = (mask_a | mask_b);
521 // u64 color = clr_a + (clr_b << 1);
522 // dst[0] = (dst[0] & ~mask) | color;
523 // } else {
524 // u64 clr_a = decode_1bpp(a, flip_x);
525 // u64 clr_b = decode_1bpp(b, flip_x);
526 // u64 mask_a = (clr_a * 0xF);
527 // u64 mask_b = (clr_b * 0xF);
528 // u64 mask = (mask_a | mask_b);
529 // u64 color = clr_a + (clr_b << 1);
530 // dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
531 // if ((x + 7) > (SCREEN_WIDTH)) {
532 // return;
533 // }
534 // dst[1] = (dst[1] & ~(mask >> shift_right)) | (color >> shift_right);
535 // }
536
537 // TODO: different blend modes?
538}
539
540IWRAM_CODE
541static inline
542void
543draw_1bpp_row(size_t x, size_t y, u8 a, u8 clr, u8 flip_x) {
544 // BOUNDCHECK_SCREEN(x, y);
545
546 // size_t tile_x = x / 8;
547 // size_t start_col = x % 8;
548 // size_t shift_left = start_col * 8;
549 // size_t shift_right = (8 - start_col) * 8;
550
551 // u64 *dst = &backbuf[(y * 30 + tile_x) * 8 / 2];
552 // if (start_col == 0) {
553 // u64 color = decode_1bpp(a, flip_x);
554 // u64 mask = (color * 0xF);
555 // color *= clr;
556 // dst[0] = (dst[0] & ~mask) | color;
557 // } else {
558 // u64 color = decode_1bpp(a, flip_x);
559 // u64 mask = (color * 0xF);
560 // color *= clr;
561 // dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
562 // if ((x + 7) > (SCREEN_WIDTH)) {
563 // return;
564 // }
565 // dst[1] = (dst[1] & ~(mask >> shift_right)) | (color >> shift_right);
566 // }
567
568 // TODO: different blend modes?
569}
570
571IWRAM_CODE
572void
573draw_chr(size_t x, size_t y, u8 *sprite, u8 flip_x, u8 flip_y) {
574 // BOUNDCHECK_SCREEN(x, y);
575 // if (!flip_y) {
576 // for(size_t v = 0; v < 8; v++) {
577 // if ((y + v) >= SCREEN_HEIGHT) break;
578 // u8 ch1 = sprite[v + 0];
579 // u8 ch2 = sprite[v + 8];
580 // draw_2bpp_row(x, y + v, ch1, ch2, flip_x);
581 // }
582 // } else {
583 // for(size_t v = 0; v < 8; v++) {
584 // if ((y + v) >= SCREEN_HEIGHT) break;
585 // u8 ch1 = sprite[(7 - v) + 0];
586 // u8 ch2 = sprite[(7 - v) + 8];
587 // draw_2bpp_row(x, y + v, ch1, ch2, flip_x);
588 // }
589 // }
590}
591
592IWRAM_CODE
593void
594draw_icn(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
595 // BOUNDCHECK_SCREEN(x, y);
596 // if (!flip_y) {
597 // for(size_t v = 0; v < 8; v++) {
598 // if ((y + v) >= SCREEN_HEIGHT) break;
599 // u8 ch1 = sprite[v];
600 // draw_1bpp_row(x, y + v, ch1, clr, flip_x);
601 // }
602 // } else {
603 // for(size_t v = 0; v < 8; v++) {
604 // if ((y + v) >= SCREEN_HEIGHT) break;
605 // u8 ch1 = sprite[(7 - v)];
606 // draw_1bpp_row(x, y + v, ch1, clr, flip_x);
607 // }
608 // }
609}
610
611#include "font.h"
612
613// Font rendering function for the text engine.
614void
615txt_drawc(char c, size_t x, size_t y, u8 clr) {
616 u8 *tile = font_icn;
617 draw_icn(x, y, tile + 8 * c, clr, 1, 0);
618}
619
620void
621renderer_init(void) {
622 // Initialize display mode and bg palette.
623 DISP_CTRL = DISP_MODE_3 | DISP_BG_2;
624
625 // Clear VRAM.
626 dma_fill(MEM_VRAM, 0, KB(96), 3);
627
628 // Initialize display mode and bg palette.
629 DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1 | DISP_OBJ;
630
631 // Initialize backgrounds.
632 BG_CTRL(0) = BG_CHARBLOCK(0) | BG_SCREENBLOCK(FRONTBUF_SB) | BG_PRIORITY(1);
633 // BG_CTRL(1) = BG_CHARBLOCK(1) | BG_SCREENBLOCK(FONT_SB) | BG_PRIORITY(0);
634
635 // Initialize background memory map for frontbuffer.
636 for (size_t i = 0; i < 32 * 20; ++i) {
637 FRONTBUF_TILEMAP[i] = i;
638 }
639
640 // Initialize default palette.
641 PAL_BUFFER_BG[0] = COLOR_BLACK;
642 PAL_BUFFER_BG[1] = COLOR_WHITE;
643 PAL_BUFFER_BG[2] = COLOR_RED;
644 PAL_BUFFER_BG[3] = COLOR_BLUE;
645 PAL_BUFFER_BG[4] = COLOR_CYAN;
646 PAL_BUFFER_BG[5] = COLOR_PURPLE;
647 PAL_BUFFER_BG[6] = COLOR_YELLOW;
648 PAL_BUFFER_BG[7] = COLOR_GREEN;
649 PAL_BUFFER_BG[8] = COLOR_GREY;
650
651 // Initialize text engine.
652 txt_init(txt_drawc);
653}
diff --git a/src/renderer_m4.c b/src/renderer_m4.c
deleted file mode 100644
index 657bb73..0000000
--- a/src/renderer_m4.c
+++ /dev/null
@@ -1,582 +0,0 @@
1#include "renderer.h"
2#include "text.h"
3
4//
5// Parameters.
6//
7
8#define SUBPIXEL_LINES 0
9#define DEC_BIG_LUT 1
10
11// Front/back buffers for double buffering.
12#define BUF_0 ((u32*)(MEM_VRAM))
13#define BUF_1 ((u32*)(MEM_VRAM ^ 0x0A000))
14
15// Pointer to the backbuffer.
16static u16 *backbuf = BUF_1;
17
18// Boundchecks can be disable at compile time but this will not always improve
19// the performance and can in fact make it worse. It is possible that this is
20// due to some aliasing optimiztions but not sure at this moment.
21#ifdef DISABLE_BOUNDCHECK_SCREEN
22#define BOUNDCHECK_SCREEN(X,Y)
23#else
24#define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return;
25#endif
26
27// Swap A and B values without a tmp variable.
28#define SWAP(A, B) (((A) ^= (B)), ((B) ^= (A)), ((A) ^= (B)))
29
30// Swap A and B values to make sure A <= B.
31#define MAYBE_SWAP(A,B) if ((A) > (B)) { SWAP(A,B); }
32
33//
34// Basic primitives.
35//
36
37IWRAM_CODE
38void screen_fill(u8 clr) {
39 dma_fill(backbuf, 0x01010101 * clr, KB(75) / 2, 3);
40}
41
42IWRAM_CODE
43void
44draw_pixel(size_t x, size_t y, u8 clr) {
45 BOUNDCHECK_SCREEN(x, y);
46 u16 *dst = &backbuf[(x + y * SCREEN_WIDTH) / 2];
47 if(x & 1) {
48 *dst = (*dst & 0xF) | (clr << 8);
49 } else {
50 *dst = (*dst & ~0xF) | clr;
51 }
52}
53
54IWRAM_CODE
55static inline
56void
57draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) {
58 BOUNDCHECK_SCREEN(x0, y0);
59 BOUNDCHECK_SCREEN(x1, y0);
60 // Find row positions for the given x/y coordinates.
61 size_t tile_x0 = x0 / 8;
62 size_t tile_x1 = x1 / 8;
63 size_t start_col = x0 % 8;
64 size_t end_col = x1 % 8;
65 // Horizontal line. There are 3 cases:
66 // 1. Lines fit on a single tile.
67 // 2. Lines go through 2 tiles, both require partial row updates.
68 // 3. Lines go through 3 or more tiles, first and last tiles use
69 // partial row updates, rows in the middle can write the entire
70 // row.
71 size_t dx = tile_x1 - tile_x0;
72 u64 *dst = &backbuf[(tile_x0 * 8 + y0 * SCREEN_WIDTH) / 2];
73 if (dx < 1) {
74 u64 row_mask = 0xFFFFFFFFFFFFFFFF;
75 row_mask >>= (7 - end_col - dx) * 8;
76 row_mask &= 0xFFFFFFFFFFFFFFFF << start_col * 8;
77 u64 row = (0x0101010101010101 * clr) & row_mask;
78 *dst = (*dst & ~row_mask) | row;
79 } else {
80 size_t shift_left = start_col * 8;
81 size_t shift_right = (7 - end_col) * 8;
82 u64 row_mask = 0xFFFFFFFFFFFFFFFF;
83 u64 row = 0x0101010101010101 * clr;
84 dst[0] = (dst[0] & ~(row_mask << shift_left)) | row << shift_left;
85 if (dx != 1) {
86 dma_fill(&dst[1], 0x01010101 * clr, (dx - 1) * 8, 3);
87 }
88 dst[dx] = dst[dx] & ~(row_mask >> shift_right);
89 dst[dx] |= row >> shift_right;
90 }
91}
92
93IWRAM_CODE
94UNROLL_LOOPS
95static inline
96void
97draw_vline(size_t x0, size_t y0, size_t y1, u8 clr) {
98 BOUNDCHECK_SCREEN(x0, y0);
99 BOUNDCHECK_SCREEN(x0, y1);
100 size_t tile_x0 = x0 / 8;
101 size_t start_col = x0 % 8;
102 u16 *dst = &backbuf[(start_col + tile_x0 * 8 + y0 * SCREEN_WIDTH) / 2];
103 if(x0 & 1) {
104 for (size_t i = 0; i <= y1 - y0; i++, dst += SCREEN_WIDTH / 2) {
105 *dst = (*dst & 0xF) | (clr << 8);
106 }
107 } else {
108 for (size_t i = 0; i <= y1 - y0; i++, dst += SCREEN_WIDTH / 2) {
109 *dst = (*dst & ~0xF) | clr;
110 }
111 }
112}
113
114IWRAM_CODE
115UNROLL_LOOPS
116void
117draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
118 BOUNDCHECK_SCREEN(x0, y0);
119 BOUNDCHECK_SCREEN(x1, y1);
120
121 if (y0 == y1) {
122 MAYBE_SWAP(x0, x1);
123 draw_hline(x0, x1, y0, clr);
124 } else if (x0 == x1) {
125 MAYBE_SWAP(y0, y1);
126 draw_vline(x0, y0, y1, clr);
127 } else {
128#if SUBPIXEL_LINES == 1
129 // Fixed Precision constants.
130 const int fp_bit = 6;
131 const int fp_one = FP_NUM(1, fp_bit);
132 const int fp_half = fp_one >> 1;
133
134 int dx = x0 > x1 ? x0 - x1 : x1 - x0;
135 int dy = y0 > y1 ? y0 - y1 : y1 - y0;
136 int dxf = (dx << fp_bit);
137 int dyf = (dy << fp_bit);
138
139 if ((dx >= dy && x0 > x1) || (dx < dy && y0 > y1)) {
140 SWAP(x0, x1);
141 SWAP(y0, y1);
142 }
143
144 int frac_x = x0 > x1 ? FP_NUM(x0 - x1, fp_bit) : FP_NUM(x1 - x0, fp_bit);
145 int frac_y = y0 > y1 ? FP_NUM(y0 - y1, fp_bit) : FP_NUM(y1 - y0, fp_bit);
146 int x_step = x0 > x1 ? -1 : 1;
147 int y_step = y0 > y1 ? -SCREEN_WIDTH : SCREEN_WIDTH;
148
149 u16 *dst = NULL;
150 uintptr_t addr = ((uintptr_t)backbuf + y0 * SCREEN_WIDTH + x0);
151 u32 mask = x0 & 1 ? ~0xFF : 0xFF;
152 u32 color = (clr & 0xFF) | ((clr & 0xFF) << 8);
153 if (dx >= dy) {
154 int distance = (frac_y - fp_one) * dx - (frac_x - fp_half) * dy;
155 int remaining = dx;
156 while (distance <= 0 && remaining > 0) {
157 dst = (u16*)(addr - (mask >> 31));
158 *dst = (*dst & ~mask) | (color & mask);
159 distance += 2 * dyf;
160 addr += x_step;
161 remaining--;
162 mask = ~mask;
163 }
164 distance -= 2 * dxf;
165 addr += y_step;
166
167 while (remaining >= 0) {
168 dst = (u16*)(addr - (mask >> 31));
169 *dst = (*dst & ~mask) | (color & mask);
170 if (distance >= 0) {
171 distance -= 2 * dxf;
172 addr += y_step;
173 }
174 distance += 2 * dyf;
175 addr += x_step;
176 mask = ~mask;
177 remaining--;
178 }
179 } else {
180 int distance = (frac_x - fp_one) * dy - (frac_y - fp_half) * dx;
181 int remaining = dy;
182 while (distance <= 0 && remaining > 0) {
183 dst = (u16*)(addr - (mask >> 31));
184 *dst = (*dst & ~mask) | (color & mask);
185 distance += 2 * dxf;
186 addr += y_step;
187 remaining--;
188 }
189 distance -= 2 * dyf;
190 addr += x_step;
191 mask = ~mask;
192
193 while (remaining >= 0) {
194 dst = (u16*)(addr - (mask >> 31));
195 *dst = (*dst & ~mask) | (color & mask);
196 if (distance >= 0) {
197 distance -= 2 * dyf;
198 addr += x_step;
199 mask = ~mask;
200 }
201 distance += 2 * dxf;
202 addr += y_step;
203 remaining--;
204 }
205 }
206#else
207 // Diagonal line.
208 int dx = x0 > x1 ? x0 - x1 : x1 - x0;
209 int dy = y0 > y1 ? y0 - y1 : y1 - y0;
210 int x_step = x0 > x1 ? -1 : 1;
211 int y_step = y0 > y1 ? -SCREEN_WIDTH : SCREEN_WIDTH;
212
213 u16 *dst = NULL;
214 uintptr_t addr = ((uintptr_t)backbuf + y0 * SCREEN_WIDTH + x0);
215 u32 mask = x0 & 1 ? ~0xFF : 0xFF;
216 u32 color = (clr & 0xFF) | ((clr & 0xFF) << 8);
217 if (dx >= dy) {
218 int diff = 2 * dy - dx;
219 for (int i = 0; i < dx + 1; i++) {
220 dst = (u16*)(addr - (mask >> 31));
221 *dst = (*dst & ~mask) | (color & mask);
222 if (diff >= 0) {
223 diff -= 2 * dx;
224 addr += y_step;
225 }
226 diff += 2 * dy;
227 addr += x_step;
228 mask = ~mask;
229 }
230 } else {
231 int diff = 2 * dx - dy;
232 for (int i = 0; i < dy + 1; i++) {
233 dst = (u16*)(addr - (mask >> 31));
234 *dst = (*dst & ~mask) | (color & mask);
235 if (diff >= 0) {
236 diff -= 2 * dy;
237 addr += x_step;
238 mask = ~mask;
239 }
240 diff += 2 * dx;
241 addr += y_step;
242 }
243 }
244#endif
245 }
246}
247
248IWRAM_CODE
249void
250draw_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
251 BOUNDCHECK_SCREEN(x0, y0);
252 BOUNDCHECK_SCREEN(x1, y1);
253 MAYBE_SWAP(x0, x1);
254 MAYBE_SWAP(y0, y1);
255
256 draw_hline(x0, x1, y0, clr);
257 draw_hline(x0, x1, y1, clr);
258 draw_vline(x0, y0, y1, clr);
259 draw_vline(x1, y0, y1, clr);
260}
261
262IWRAM_CODE
263void
264draw_filled_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
265 BOUNDCHECK_SCREEN(x0, y0);
266 BOUNDCHECK_SCREEN(x1, y1);
267 MAYBE_SWAP(x0, x1);
268 MAYBE_SWAP(y0, y1);
269
270 // Special condition. If the screen is to be completely filled, use the DMA
271 // instead.
272 if (x0 == 0 && x1 >= (SCREEN_WIDTH - 1) && y0 == 0 && y1 >= (SCREEN_HEIGHT - 1)) {
273 screen_fill(clr);
274 return;
275 }
276
277 // Drawline implementation.
278 for (size_t y = y0; y <= y1; y++) {
279 draw_hline(x0, x1, y, clr);
280 }
281}
282
283//
284// Sprites (chr/icn).
285//
286
287#if DEC_BIG_LUT == 1
288
289static u64 dec_byte_flip_x[256] = {
290 0x0000000000000000, 0x0000000000000001, 0x0000000000000100, 0x0000000000000101, 0x0000000000010000,
291 0x0000000000010001, 0x0000000000010100, 0x0000000000010101, 0x0000000001000000, 0x0000000001000001,
292 0x0000000001000100, 0x0000000001000101, 0x0000000001010000, 0x0000000001010001, 0x0000000001010100,
293 0x0000000001010101, 0x0000000100000000, 0x0000000100000001, 0x0000000100000100, 0x0000000100000101,
294 0x0000000100010000, 0x0000000100010001, 0x0000000100010100, 0x0000000100010101, 0x0000000101000000,
295 0x0000000101000001, 0x0000000101000100, 0x0000000101000101, 0x0000000101010000, 0x0000000101010001,
296 0x0000000101010100, 0x0000000101010101, 0x0000010000000000, 0x0000010000000001, 0x0000010000000100,
297 0x0000010000000101, 0x0000010000010000, 0x0000010000010001, 0x0000010000010100, 0x0000010000010101,
298 0x0000010001000000, 0x0000010001000001, 0x0000010001000100, 0x0000010001000101, 0x0000010001010000,
299 0x0000010001010001, 0x0000010001010100, 0x0000010001010101, 0x0000010100000000, 0x0000010100000001,
300 0x0000010100000100, 0x0000010100000101, 0x0000010100010000, 0x0000010100010001, 0x0000010100010100,
301 0x0000010100010101, 0x0000010101000000, 0x0000010101000001, 0x0000010101000100, 0x0000010101000101,
302 0x0000010101010000, 0x0000010101010001, 0x0000010101010100, 0x0000010101010101, 0x0001000000000000,
303 0x0001000000000001, 0x0001000000000100, 0x0001000000000101, 0x0001000000010000, 0x0001000000010001,
304 0x0001000000010100, 0x0001000000010101, 0x0001000001000000, 0x0001000001000001, 0x0001000001000100,
305 0x0001000001000101, 0x0001000001010000, 0x0001000001010001, 0x0001000001010100, 0x0001000001010101,
306 0x0001000100000000, 0x0001000100000001, 0x0001000100000100, 0x0001000100000101, 0x0001000100010000,
307 0x0001000100010001, 0x0001000100010100, 0x0001000100010101, 0x0001000101000000, 0x0001000101000001,
308 0x0001000101000100, 0x0001000101000101, 0x0001000101010000, 0x0001000101010001, 0x0001000101010100,
309 0x0001000101010101, 0x0001010000000000, 0x0001010000000001, 0x0001010000000100, 0x0001010000000101,
310 0x0001010000010000, 0x0001010000010001, 0x0001010000010100, 0x0001010000010101, 0x0001010001000000,
311 0x0001010001000001, 0x0001010001000100, 0x0001010001000101, 0x0001010001010000, 0x0001010001010001,
312 0x0001010001010100, 0x0001010001010101, 0x0001010100000000, 0x0001010100000001, 0x0001010100000100,
313 0x0001010100000101, 0x0001010100010000, 0x0001010100010001, 0x0001010100010100, 0x0001010100010101,
314 0x0001010101000000, 0x0001010101000001, 0x0001010101000100, 0x0001010101000101, 0x0001010101010000,
315 0x0001010101010001, 0x0001010101010100, 0x0001010101010101, 0x0100000000000000, 0x0100000000000001,
316 0x0100000000000100, 0x0100000000000101, 0x0100000000010000, 0x0100000000010001, 0x0100000000010100,
317 0x0100000000010101, 0x0100000001000000, 0x0100000001000001, 0x0100000001000100, 0x0100000001000101,
318 0x0100000001010000, 0x0100000001010001, 0x0100000001010100, 0x0100000001010101, 0x0100000100000000,
319 0x0100000100000001, 0x0100000100000100, 0x0100000100000101, 0x0100000100010000, 0x0100000100010001,
320 0x0100000100010100, 0x0100000100010101, 0x0100000101000000, 0x0100000101000001, 0x0100000101000100,
321 0x0100000101000101, 0x0100000101010000, 0x0100000101010001, 0x0100000101010100, 0x0100000101010101,
322 0x0100010000000000, 0x0100010000000001, 0x0100010000000100, 0x0100010000000101, 0x0100010000010000,
323 0x0100010000010001, 0x0100010000010100, 0x0100010000010101, 0x0100010001000000, 0x0100010001000001,
324 0x0100010001000100, 0x0100010001000101, 0x0100010001010000, 0x0100010001010001, 0x0100010001010100,
325 0x0100010001010101, 0x0100010100000000, 0x0100010100000001, 0x0100010100000100, 0x0100010100000101,
326 0x0100010100010000, 0x0100010100010001, 0x0100010100010100, 0x0100010100010101, 0x0100010101000000,
327 0x0100010101000001, 0x0100010101000100, 0x0100010101000101, 0x0100010101010000, 0x0100010101010001,
328 0x0100010101010100, 0x0100010101010101, 0x0101000000000000, 0x0101000000000001, 0x0101000000000100,
329 0x0101000000000101, 0x0101000000010000, 0x0101000000010001, 0x0101000000010100, 0x0101000000010101,
330 0x0101000001000000, 0x0101000001000001, 0x0101000001000100, 0x0101000001000101, 0x0101000001010000,
331 0x0101000001010001, 0x0101000001010100, 0x0101000001010101, 0x0101000100000000, 0x0101000100000001,
332 0x0101000100000100, 0x0101000100000101, 0x0101000100010000, 0x0101000100010001, 0x0101000100010100,
333 0x0101000100010101, 0x0101000101000000, 0x0101000101000001, 0x0101000101000100, 0x0101000101000101,
334 0x0101000101010000, 0x0101000101010001, 0x0101000101010100, 0x0101000101010101, 0x0101010000000000,
335 0x0101010000000001, 0x0101010000000100, 0x0101010000000101, 0x0101010000010000, 0x0101010000010001,
336 0x0101010000010100, 0x0101010000010101, 0x0101010001000000, 0x0101010001000001, 0x0101010001000100,
337 0x0101010001000101, 0x0101010001010000, 0x0101010001010001, 0x0101010001010100, 0x0101010001010101,
338 0x0101010100000000, 0x0101010100000001, 0x0101010100000100, 0x0101010100000101, 0x0101010100010000,
339 0x0101010100010001, 0x0101010100010100, 0x0101010100010101, 0x0101010101000000, 0x0101010101000001,
340 0x0101010101000100, 0x0101010101000101, 0x0101010101010000, 0x0101010101010001, 0x0101010101010100,
341 0x0101010101010101
342};
343
344static u64 dec_byte[256] = {
345 0x0000000000000000, 0x0100000000000000, 0x0001000000000000, 0x0101000000000000, 0x0000010000000000,
346 0x0100010000000000, 0x0001010000000000, 0x0101010000000000, 0x0000000100000000, 0x0100000100000000,
347 0x0001000100000000, 0x0101000100000000, 0x0000010100000000, 0x0100010100000000, 0x0001010100000000,
348 0x0101010100000000, 0x0000000001000000, 0x0100000001000000, 0x0001000001000000, 0x0101000001000000,
349 0x0000010001000000, 0x0100010001000000, 0x0001010001000000, 0x0101010001000000, 0x0000000101000000,
350 0x0100000101000000, 0x0001000101000000, 0x0101000101000000, 0x0000010101000000, 0x0100010101000000,
351 0x0001010101000000, 0x0101010101000000, 0x0000000000010000, 0x0100000000010000, 0x0001000000010000,
352 0x0101000000010000, 0x0000010000010000, 0x0100010000010000, 0x0001010000010000, 0x0101010000010000,
353 0x0000000100010000, 0x0100000100010000, 0x0001000100010000, 0x0101000100010000, 0x0000010100010000,
354 0x0100010100010000, 0x0001010100010000, 0x0101010100010000, 0x0000000001010000, 0x0100000001010000,
355 0x0001000001010000, 0x0101000001010000, 0x0000010001010000, 0x0100010001010000, 0x0001010001010000,
356 0x0101010001010000, 0x0000000101010000, 0x0100000101010000, 0x0001000101010000, 0x0101000101010000,
357 0x0000010101010000, 0x0100010101010000, 0x0001010101010000, 0x0101010101010000, 0x0000000000000100,
358 0x0100000000000100, 0x0001000000000100, 0x0101000000000100, 0x0000010000000100, 0x0100010000000100,
359 0x0001010000000100, 0x0101010000000100, 0x0000000100000100, 0x0100000100000100, 0x0001000100000100,
360 0x0101000100000100, 0x0000010100000100, 0x0100010100000100, 0x0001010100000100, 0x0101010100000100,
361 0x0000000001000100, 0x0100000001000100, 0x0001000001000100, 0x0101000001000100, 0x0000010001000100,
362 0x0100010001000100, 0x0001010001000100, 0x0101010001000100, 0x0000000101000100, 0x0100000101000100,
363 0x0001000101000100, 0x0101000101000100, 0x0000010101000100, 0x0100010101000100, 0x0001010101000100,
364 0x0101010101000100, 0x0000000000010100, 0x0100000000010100, 0x0001000000010100, 0x0101000000010100,
365 0x0000010000010100, 0x0100010000010100, 0x0001010000010100, 0x0101010000010100, 0x0000000100010100,
366 0x0100000100010100, 0x0001000100010100, 0x0101000100010100, 0x0000010100010100, 0x0100010100010100,
367 0x0001010100010100, 0x0101010100010100, 0x0000000001010100, 0x0100000001010100, 0x0001000001010100,
368 0x0101000001010100, 0x0000010001010100, 0x0100010001010100, 0x0001010001010100, 0x0101010001010100,
369 0x0000000101010100, 0x0100000101010100, 0x0001000101010100, 0x0101000101010100, 0x0000010101010100,
370 0x0100010101010100, 0x0001010101010100, 0x0101010101010100, 0x0000000000000001, 0x0100000000000001,
371 0x0001000000000001, 0x0101000000000001, 0x0000010000000001, 0x0100010000000001, 0x0001010000000001,
372 0x0101010000000001, 0x0000000100000001, 0x0100000100000001, 0x0001000100000001, 0x0101000100000001,
373 0x0000010100000001, 0x0100010100000001, 0x0001010100000001, 0x0101010100000001, 0x0000000001000001,
374 0x0100000001000001, 0x0001000001000001, 0x0101000001000001, 0x0000010001000001, 0x0100010001000001,
375 0x0001010001000001, 0x0101010001000001, 0x0000000101000001, 0x0100000101000001, 0x0001000101000001,
376 0x0101000101000001, 0x0000010101000001, 0x0100010101000001, 0x0001010101000001, 0x0101010101000001,
377 0x0000000000010001, 0x0100000000010001, 0x0001000000010001, 0x0101000000010001, 0x0000010000010001,
378 0x0100010000010001, 0x0001010000010001, 0x0101010000010001, 0x0000000100010001, 0x0100000100010001,
379 0x0001000100010001, 0x0101000100010001, 0x0000010100010001, 0x0100010100010001, 0x0001010100010001,
380 0x0101010100010001, 0x0000000001010001, 0x0100000001010001, 0x0001000001010001, 0x0101000001010001,
381 0x0000010001010001, 0x0100010001010001, 0x0001010001010001, 0x0101010001010001, 0x0000000101010001,
382 0x0100000101010001, 0x0001000101010001, 0x0101000101010001, 0x0000010101010001, 0x0100010101010001,
383 0x0001010101010001, 0x0101010101010001, 0x0000000000000101, 0x0100000000000101, 0x0001000000000101,
384 0x0101000000000101, 0x0000010000000101, 0x0100010000000101, 0x0001010000000101, 0x0101010000000101,
385 0x0000000100000101, 0x0100000100000101, 0x0001000100000101, 0x0101000100000101, 0x0000010100000101,
386 0x0100010100000101, 0x0001010100000101, 0x0101010100000101, 0x0000000001000101, 0x0100000001000101,
387 0x0001000001000101, 0x0101000001000101, 0x0000010001000101, 0x0100010001000101, 0x0001010001000101,
388 0x0101010001000101, 0x0000000101000101, 0x0100000101000101, 0x0001000101000101, 0x0101000101000101,
389 0x0000010101000101, 0x0100010101000101, 0x0001010101000101, 0x0101010101000101, 0x0000000000010101,
390 0x0100000000010101, 0x0001000000010101, 0x0101000000010101, 0x0000010000010101, 0x0100010000010101,
391 0x0001010000010101, 0x0101010000010101, 0x0000000100010101, 0x0100000100010101, 0x0001000100010101,
392 0x0101000100010101, 0x0000010100010101, 0x0100010100010101, 0x0001010100010101, 0x0101010100010101,
393 0x0000000001010101, 0x0100000001010101, 0x0001000001010101, 0x0101000001010101, 0x0000010001010101,
394 0x0100010001010101, 0x0001010001010101, 0x0101010001010101, 0x0000000101010101, 0x0100000101010101,
395 0x0001000101010101, 0x0101000101010101, 0x0000010101010101, 0x0100010101010101, 0x0001010101010101,
396 0x0101010101010101
397};
398
399IWRAM_CODE
400static inline
401u64
402decode_1bpp(u8 row, u8 flip_x) {
403 if (flip_x) {
404 return dec_byte_flip_x[row];
405 }
406 return dec_byte[row];
407}
408
409#else
410
411static u32 dec_nibble[] = {
412 0x00000000, 0x01000000, 0x00010000, 0x01010000,
413 0x00000100, 0x01000100, 0x00010100, 0x01010100,
414 0x00000001, 0x01000001, 0x00010001, 0x01010001,
415 0x00000101, 0x01000101, 0x00010101, 0x01010101,
416};
417
418static u32 dec_nibble_flip_x[] = {
419 0x00000000, 0x00000001, 0x00000100, 0x00000101,
420 0x00010000, 0x00010001, 0x00010100, 0x00010101,
421 0x01000000, 0x01000001, 0x01000100, 0x01000101,
422 0x01010000, 0x01010001, 0x01010100, 0x01010101,
423};
424
425IWRAM_CODE
426static inline
427u64
428decode_1bpp(u8 row, u8 flip_x) {
429 if (flip_x) {
430 u32 *lut = dec_nibble_flip_x;
431 return (u64)lut[(row >> 4) & 0xF] << 32 | (u64)lut[(row >> 0) & 0xF];
432 }
433 u32 *lut = dec_nibble;
434 return (u64)lut[(row >> 0) & 0xF] << 32 | (u64)lut[(row >> 4) & 0xF];
435}
436
437#endif
438
439IWRAM_CODE
440static inline
441void
442draw_2bpp_row(size_t x, size_t y, u8 a, u8 b, u8 clr, u8 flip_x) {
443 BOUNDCHECK_SCREEN(x, y);
444
445 size_t tile_x = x / 8;
446 size_t start_col = x % 8;
447 size_t shift_left = start_col * 8;
448 size_t shift_right = (8 - start_col) * 8;
449
450 u64 *dst = &backbuf[(y * 30 + tile_x) * 8 / 2];
451#if DEC_BIG_LUT
452 u64 *lut = flip_x ? dec_byte_flip_x : dec_byte;
453 u64 clr_a = lut[a];
454 u64 clr_b = lut[b];
455#else
456 u64 clr_a = decode_1bpp(a, flip_x);
457 u64 clr_b = decode_1bpp(b, flip_x);
458#endif
459 u64 mask_a = (clr_a * 0xF);
460 u64 mask_b = (clr_b * 0xF);
461 u64 mask = (mask_a | mask_b);
462 u64 color;
463 if (clr == 0) {
464 color = clr_a + (clr_b << 1);
465 } else if (clr == 15) {
466 color = 0;
467 } else {
468 color = (clr_a | clr_b) * clr;
469 }
470 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
471 dst[1] = (dst[1] & ~(mask >> shift_right)) | (color >> shift_right);
472}
473
474IWRAM_CODE
475static inline
476void
477draw_1bpp_row(size_t x, size_t y, u8 a, u8 clr, u8 flip_x) {
478 BOUNDCHECK_SCREEN(x, y);
479
480 size_t tile_x = x / 8;
481 size_t start_col = x % 8;
482 size_t shift_left = start_col * 8;
483 size_t shift_right = (8 - start_col) * 8;
484
485 u64 *dst = &backbuf[(y * 30 + tile_x) * 8 / 2];
486 u64 color = decode_1bpp(a, flip_x);
487 u64 mask = (color * 0xF);
488 color *= clr;
489 dst[0] = (dst[0] & ~(mask << shift_left)) | (color << shift_left);
490 dst[1] = (dst[1] & ~(mask >> shift_right)) | (color >> shift_right);
491}
492
493IWRAM_CODE
494void
495draw_chr(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
496 BOUNDCHECK_SCREEN(x, y);
497 if (!flip_y) {
498 for(size_t v = 0; v < 8; v++) {
499 if ((y + v) >= SCREEN_HEIGHT) break;
500 u8 ch1 = sprite[v + 0];
501 u8 ch2 = sprite[v + 8];
502 draw_2bpp_row(x, y + v, ch1, ch2, clr, flip_x);
503 }
504 } else {
505 for(size_t v = 0; v < 8; v++) {
506 if ((y + v) >= SCREEN_HEIGHT) break;
507 u8 ch1 = sprite[(7 - v) + 0];
508 u8 ch2 = sprite[(7 - v) + 8];
509 draw_2bpp_row(x, y + v, ch1, ch2, clr, flip_x);
510 }
511 }
512}
513
514IWRAM_CODE
515void
516draw_icn(size_t x, size_t y, u8 *sprite, u8 clr, u8 flip_x, u8 flip_y) {
517 BOUNDCHECK_SCREEN(x, y);
518 if (!flip_y) {
519 for(size_t v = 0; v < 8; v++) {
520 if ((y + v) >= SCREEN_HEIGHT) break;
521 u8 ch1 = sprite[v];
522 draw_1bpp_row(x, y + v, ch1, clr, flip_x);
523 }
524 } else {
525 for(size_t v = 0; v < 8; v++) {
526 if ((y + v) >= SCREEN_HEIGHT) break;
527 u8 ch1 = sprite[(7 - v)];
528 draw_1bpp_row(x, y + v, ch1, clr, flip_x);
529 }
530 }
531}
532
533//
534// Flipping buffers/copying memory.
535//
536
537IWRAM_CODE
538void
539flip_buffer(void) {
540 backbuf = (u16*)((u32)backbuf ^ 0x0A000);
541 DISP_CTRL ^= DISP_PAGE;
542}
543
544//
545// Text rendering.
546//
547
548#include "font.h"
549
550// Font rendering function for the text engine.
551void
552txt_drawc(char c, size_t x, size_t y, u8 clr) {
553 u8 *tile = font_icn;
554 draw_icn(x, y, tile + 8 * c, clr, 1, 0);
555}
556
557//
558// Initialization.
559//
560
561void
562renderer_init(void) {
563 // Initialize display mode and bg palette.
564 DISP_CTRL = DISP_MODE_4 | DISP_BG_2;
565
566 // Clear VRAM.
567 dma_fill((u16*)MEM_VRAM, 0, KB(96), 3);
568
569 // Initialize default palette.
570 PAL_BUFFER_BG[0] = COLOR_BLACK;
571 PAL_BUFFER_BG[1] = COLOR_WHITE;
572 PAL_BUFFER_BG[2] = COLOR_RED;
573 PAL_BUFFER_BG[3] = COLOR_BLUE;
574 PAL_BUFFER_BG[4] = COLOR_CYAN;
575 PAL_BUFFER_BG[5] = COLOR_PURPLE;
576 PAL_BUFFER_BG[6] = COLOR_YELLOW;
577 PAL_BUFFER_BG[7] = COLOR_GREEN;
578 PAL_BUFFER_BG[8] = COLOR_GREY;
579
580 // Initialize text engine.
581 txt_init(txt_drawc);
582}
diff --git a/src/text/font.h b/src/text/font.h
deleted file mode 100644
index e4b089c..0000000
--- a/src/text/font.h
+++ /dev/null
@@ -1,261 +0,0 @@
1static const u32 font[] = {
2 0x00000000, 0x00000000, 0x00002400, 0x423c0000,
3 0x00002400, 0x3c420000, 0x0000363e, 0x3e1c0800,
4 0x00081c3e, 0x3e1c0800, 0x001c1c3e, 0x363e081c,
5 0x00081c3e, 0x3e3e081c, 0x00000018, 0x18000000,
6 0x7e7e7e66, 0x667e7e7e, 0x00001824, 0x24180000,
7 0x7e7e665a, 0x5a667e7e, 0x00081c3e, 0x081c221c,
8 0x001c221c, 0x08083e08, 0x00183828, 0x08080c0c,
9 0x003c2424, 0x24343606, 0x00082208, 0x1c082208,
10 0x040c1c3c, 0x1c0c0400, 0x2030383c, 0x38302000,
11 0x081c3e08, 0x083e1c08, 0x00141414, 0x14001400,
12 0x003c2a2a, 0x2c282828, 0x0038043c, 0x423c201e,
13 0x00000000, 0x7e000000, 0x081c3e08, 0x3e1c083e,
14 0x00081c3e, 0x08080800, 0x00080808, 0x3e1c0800,
15 0x00001030, 0x7e301000, 0x0000080c, 0x7e0c0800,
16 0x00000000, 0x0002023e, 0x00001436, 0x7f361400,
17 0x0008081c, 0x1c3e3e00, 0x003e3e1c, 0x1c080800,
18 0x00000000, 0x00000000, 0x00080808, 0x08000800,
19 0x00141414, 0x00000000, 0x0000143e, 0x143e1400,
20 0x00081c02, 0x1c201e08, 0x00002616, 0x08343200,
21 0x00081408, 0x34122c00, 0x00080808, 0x00000000,
22 0x00000804, 0x04040800, 0x00000810, 0x10100800,
23 0x00001408, 0x3e081400, 0x00000808, 0x3e080800,
24 0x00000000, 0x000c0804, 0x00000000, 0x3e000000,
25 0x00000000, 0x000c0c00, 0x00203018, 0x0c060200,
26 0x00001c22, 0x2a221c00, 0x00000c0a, 0x08083e00,
27 0x00001c20, 0x1c023e00, 0x00001c22, 0x18221c00,
28 0x00001212, 0x123e1000, 0x00003e02, 0x1e201e00,
29 0x00001c02, 0x1e221c00, 0x00003e20, 0x10080400,
30 0x00001c22, 0x1c221c00, 0x00001c22, 0x3c201c00,
31 0x00000c0c, 0x000c0c00, 0x00000c0c, 0x000c0804,
32 0x00001008, 0x04081000, 0x0000003e, 0x003e0000,
33 0x00000408, 0x10080400, 0x001c2210, 0x08000800,
34 0x00001c2a, 0x3a021c00, 0x00001c22, 0x223e2200,
35 0x00001e22, 0x1e221e00, 0x00001c22, 0x02221c00,
36 0x00001e22, 0x22221e00, 0x00003e02, 0x1e023e00,
37 0x00003e02, 0x1e020200, 0x00001c02, 0x32221c00,
38 0x00002222, 0x3e222200, 0x00003e08, 0x08083e00,
39 0x00002020, 0x20221c00, 0x00002212, 0x0e122200,
40 0x00000202, 0x02023e00, 0x00002236, 0x2a222200,
41 0x0000262a, 0x32222200, 0x00001c22, 0x22221c00,
42 0x00001e22, 0x221e0200, 0x00001c22, 0x22122c00,
43 0x00001e22, 0x221e2200, 0x00001c02, 0x1c201e00,
44 0x00003e08, 0x08080800, 0x00002222, 0x22221c00,
45 0x00002222, 0x22140800, 0x0000222a, 0x2a2a1400,
46 0x00002214, 0x08142200, 0x00002222, 0x14080800,
47 0x00003e10, 0x08043e00, 0x00001c04, 0x04041c00,
48 0x0002060c, 0x18302000, 0x00001c10, 0x10101c00,
49 0x00081422, 0x00000000, 0x00000000, 0x00003e00,
50 0x00040810, 0x00000000, 0x00001c20, 0x3c223c00,
51 0x0002021e, 0x22221e00, 0x00001c02, 0x02021c00,
52 0x0020203c, 0x22223c00, 0x00001c22, 0x1e021c00,
53 0x00003c02, 0x1e020200, 0x00003c22, 0x223c201c,
54 0x0002021e, 0x22222200, 0x0008000c, 0x08083e00,
55 0x00200020, 0x2020221c, 0x0002120a, 0x060a3200,
56 0x000c0808, 0x08083e00, 0x0000162a, 0x2a2a2a00,
57 0x00000e32, 0x22222200, 0x00001c22, 0x22221c00,
58 0x00001e22, 0x221e0202, 0x00003c22, 0x223c2070,
59 0x00001a26, 0x02020200, 0x00001c02, 0x1c201e00,
60 0x00043e04, 0x04041800, 0x00002222, 0x22221c00,
61 0x00002222, 0x22140800, 0x00002222, 0x2a2a1400,
62 0x00002214, 0x08142200, 0x00002222, 0x223c201c,
63 0x00003e10, 0x08043e00, 0x00001c04, 0x06041c00,
64 0x00000808, 0x08080800, 0x00001c10, 0x30101c00,
65 0x00002c1a, 0x00000000, 0x00000814, 0x22223e00,
66 0x00000000, 0x00000000, 0x00000808, 0x08000800,
67 0x00001414, 0x00000000, 0x0000143e, 0x143e1400,
68 0x00081c04, 0x1c101c08, 0x00000410, 0x08041000,
69 0x00000814, 0x38143800, 0x00000808, 0x00000000,
70 0x00000804, 0x04040800, 0x00000810, 0x10100800,
71 0x00001408, 0x1c081400, 0x00000008, 0x1c080000,
72 0x00000000, 0x00000804, 0x00000000, 0x1c000000,
73 0x00000000, 0x00000800, 0x00001018, 0x080c0400,
74 0x00001c14, 0x14141c00, 0x0000080c, 0x08081c00,
75 0x00001c10, 0x1c041c00, 0x00001c10, 0x18101c00,
76 0x00001414, 0x1c101000, 0x00001c04, 0x1c101c00,
77 0x00001c04, 0x1c141c00, 0x00001c10, 0x08040400,
78 0x00001c14, 0x1c141c00, 0x00001c14, 0x1c101000,
79 0x00000008, 0x00000800, 0x00000008, 0x00000804,
80 0x00001008, 0x04081000, 0x0000001c, 0x001c0000,
81 0x00000408, 0x10080400, 0x00001c10, 0x08000800,
82 0x00001c14, 0x14041c00, 0x00001c14, 0x141c1400,
83 0x00001c14, 0x0c141c00, 0x00001c04, 0x04041c00,
84 0x00000c14, 0x14140c00, 0x00001c04, 0x0c041c00,
85 0x00001c04, 0x0c040400, 0x00001c04, 0x14141c00,
86 0x00001414, 0x1c141400, 0x00001c08, 0x08081c00,
87 0x00001010, 0x10141c00, 0x00001414, 0x0c141400,
88 0x00000404, 0x04041c00, 0x0000141c, 0x14141400,
89 0x00000c14, 0x14141400, 0x00000c14, 0x14141c00,
90 0x00001c14, 0x1c040400, 0x00001c14, 0x141c1000,
91 0x00001c14, 0x0c141400, 0x00001804, 0x1c100c00,
92 0x00001c08, 0x08080800, 0x00001414, 0x14141c00,
93 0x00001414, 0x14141800, 0x00001414, 0x141c1400,
94 0x00001414, 0x08141400, 0x00001414, 0x1c080800,
95 0x00001c10, 0x08041c00, 0x00001c04, 0x04041c00,
96 0x0000040c, 0x08181000, 0x00001c10, 0x10101c00,
97 0x00000814, 0x00000000, 0x00000000, 0x00001c00,
98 0x00000000, 0x00000000, 0x00000000, 0x00000000,
99 0x00000000, 0x00000000, 0x00000000, 0x00000000,
100 0x00000000, 0x00000000, 0x00000000, 0x00000000,
101 0x00000000, 0x00000000, 0x00000000, 0x00000000,
102 0x00000000, 0x00000000, 0x00000000, 0x00000000,
103 0x00000000, 0x00000000, 0x00000000, 0x00000000,
104 0x00000000, 0x00000000, 0x00000000, 0x00000000,
105 0x00000000, 0x00000000, 0x00000000, 0x00000000,
106 0x3e323232, 0x32323e00, 0x18181818, 0x18181800,
107 0x3e30303e, 0x06063e00, 0x3e30303c, 0x30303e00,
108 0x32323232, 0x3e303000, 0x3e06063e, 0x30303e00,
109 0x3e06063e, 0x26263e00, 0x3e303018, 0x0c060600,
110 0x3e32323e, 0x32323e00, 0x3e32323e, 0x30303e00,
111 0x00000000, 0x00060600, 0x00263618, 0x0c363200,
112 0x1c3e3232, 0x3e323200, 0x1e26261e, 0x26261e00,
113 0x3c060606, 0x06063c00, 0x1e323232, 0x32321e00,
114 0x3e06061e, 0x06063e00, 0x3e06061e, 0x06060600,
115 0x3c060636, 0x26261c00, 0x3232323e, 0x32323200,
116 0x1e0c0c0c, 0x0c0c1e00, 0x30303030, 0x32323e00,
117 0x2626160e, 0x16262600, 0x06060606, 0x06063e00,
118 0x62767e6a, 0x62626200, 0x363e3a32, 0x32323200,
119 0x1c323232, 0x32321c00, 0x3e32323e, 0x02020200,
120 0x1c323232, 0x3a1a2c00, 0x1e26261e, 0x26262600,
121 0x3c06061e, 0x30301e00, 0x7e181818, 0x18181800,
122 0x32323232, 0x32323e00, 0x32323232, 0x32341800,
123 0x32323232, 0x323a1400, 0x3232320c, 0x32323200,
124 0x32323232, 0x3c201e00, 0x3e30301c, 0x06063e00,
125 0x00000000, 0x00000000, 0x00000000, 0x00000000,
126 0x00000000, 0x00000000, 0x00000000, 0x00000000,
127 0x00000000, 0x00000000, 0x00000000, 0x00000000,
128 0x00000000, 0x00000000, 0x00000000, 0x00000000,
129 0x00000000, 0x00000000, 0x00000000, 0x00000000,
130};
131
132u32 font_icn[] = {
133 0x00000000, 0x00000000, 0x00240000, 0x00003c42,
134 0x00240000, 0x0000423c, 0x3e360000, 0x00081c3e,
135 0x3e1c0800, 0x00081c3e, 0x3e1c1c00, 0x1c083e36,
136 0x3e1c0800, 0x1c083e3e, 0x18000000, 0x00000018,
137 0x667e7e7e, 0x7e7e7e66, 0x24180000, 0x00001824,
138 0x5a667e7e, 0x7e7e665a, 0x3e1c0800, 0x1c221c08,
139 0x1c221c00, 0x083e0808, 0x28381800, 0x0c0c0808,
140 0x24243c00, 0x06363424, 0x08220800, 0x0822081c,
141 0x3c1c0c04, 0x00040c1c, 0x3c383020, 0x00203038,
142 0x083e1c08, 0x081c3e08, 0x14141400, 0x00140014,
143 0x2a2a3c00, 0x2828282c, 0x3c043800, 0x1e203c42,
144 0x00000000, 0x0000007e, 0x083e1c08, 0x3e081c3e,
145 0x3e1c0800, 0x00080808, 0x08080800, 0x00081c3e,
146 0x30100000, 0x0010307e, 0x0c080000, 0x00080c7e,
147 0x00000000, 0x3e020200, 0x36140000, 0x0014367f,
148 0x1c080800, 0x003e3e1c, 0x1c3e3e00, 0x0008081c,
149 0x00000000, 0x00000000, 0x08080800, 0x00080008,
150 0x14141400, 0x00000000, 0x3e140000, 0x00143e14,
151 0x021c0800, 0x081e201c, 0x16260000, 0x00323408,
152 0x08140800, 0x002c1234, 0x08080800, 0x00000000,
153 0x04080000, 0x00080404, 0x10080000, 0x00081010,
154 0x08140000, 0x0014083e, 0x08080000, 0x0008083e,
155 0x00000000, 0x04080c00, 0x00000000, 0x0000003e,
156 0x00000000, 0x000c0c00, 0x18302000, 0x0002060c,
157 0x221c0000, 0x001c222a, 0x0a0c0000, 0x003e0808,
158 0x201c0000, 0x003e021c, 0x221c0000, 0x001c2218,
159 0x12120000, 0x00103e12, 0x023e0000, 0x001e201e,
160 0x021c0000, 0x001c221e, 0x203e0000, 0x00040810,
161 0x221c0000, 0x001c221c, 0x221c0000, 0x001c203c,
162 0x0c0c0000, 0x000c0c00, 0x0c0c0000, 0x04080c00,
163 0x08100000, 0x00100804, 0x3e000000, 0x00003e00,
164 0x08040000, 0x00040810, 0x10221c00, 0x00080008,
165 0x2a1c0000, 0x001c023a, 0x221c0000, 0x00223e22,
166 0x221e0000, 0x001e221e, 0x221c0000, 0x001c2202,
167 0x221e0000, 0x001e2222, 0x023e0000, 0x003e021e,
168 0x023e0000, 0x0002021e, 0x021c0000, 0x001c2232,
169 0x22220000, 0x0022223e, 0x083e0000, 0x003e0808,
170 0x20200000, 0x001c2220, 0x12220000, 0x0022120e,
171 0x02020000, 0x003e0202, 0x36220000, 0x0022222a,
172 0x2a260000, 0x00222232, 0x221c0000, 0x001c2222,
173 0x221e0000, 0x00021e22, 0x221c0000, 0x002c1222,
174 0x221e0000, 0x00221e22, 0x021c0000, 0x001e201c,
175 0x083e0000, 0x00080808, 0x22220000, 0x001c2222,
176 0x22220000, 0x00081422, 0x2a220000, 0x00142a2a,
177 0x14220000, 0x00221408, 0x22220000, 0x00080814,
178 0x103e0000, 0x003e0408, 0x041c0000, 0x001c0404,
179 0x0c060200, 0x00203018, 0x101c0000, 0x001c1010,
180 0x22140800, 0x00000000, 0x00000000, 0x003e0000,
181 0x10080400, 0x00000000, 0x201c0000, 0x003c223c,
182 0x1e020200, 0x001e2222, 0x021c0000, 0x001c0202,
183 0x3c202000, 0x003c2222, 0x221c0000, 0x001c021e,
184 0x023c0000, 0x0002021e, 0x223c0000, 0x1c203c22,
185 0x1e020200, 0x00222222, 0x0c000800, 0x003e0808,
186 0x20002000, 0x1c222020, 0x0a120200, 0x00320a06,
187 0x08080c00, 0x003e0808, 0x2a160000, 0x002a2a2a,
188 0x320e0000, 0x00222222, 0x221c0000, 0x001c2222,
189 0x221e0000, 0x02021e22, 0x223c0000, 0x70203c22,
190 0x261a0000, 0x00020202, 0x021c0000, 0x001e201c,
191 0x043e0400, 0x00180404, 0x22220000, 0x001c2222,
192 0x22220000, 0x00081422, 0x22220000, 0x00142a2a,
193 0x14220000, 0x00221408, 0x22220000, 0x1c203c22,
194 0x103e0000, 0x003e0408, 0x041c0000, 0x001c0406,
195 0x08080000, 0x00080808, 0x101c0000, 0x001c1030,
196 0x1a2c0000, 0x00000000, 0x14080000, 0x003e2222,
197 0x00000000, 0x00000000, 0x08080000, 0x00080008,
198 0x14140000, 0x00000000, 0x3e140000, 0x00143e14,
199 0x041c0800, 0x081c101c, 0x10040000, 0x00100408,
200 0x14080000, 0x00381438, 0x08080000, 0x00000000,
201 0x04080000, 0x00080404, 0x10080000, 0x00081010,
202 0x08140000, 0x0014081c, 0x08000000, 0x0000081c,
203 0x00000000, 0x04080000, 0x00000000, 0x0000001c,
204 0x00000000, 0x00080000, 0x18100000, 0x00040c08,
205 0x141c0000, 0x001c1414, 0x0c080000, 0x001c0808,
206 0x101c0000, 0x001c041c, 0x101c0000, 0x001c1018,
207 0x14140000, 0x0010101c, 0x041c0000, 0x001c101c,
208 0x041c0000, 0x001c141c, 0x101c0000, 0x00040408,
209 0x141c0000, 0x001c141c, 0x141c0000, 0x0010101c,
210 0x08000000, 0x00080000, 0x08000000, 0x04080000,
211 0x08100000, 0x00100804, 0x1c000000, 0x00001c00,
212 0x08040000, 0x00040810, 0x101c0000, 0x00080008,
213 0x141c0000, 0x001c0414, 0x141c0000, 0x00141c14,
214 0x141c0000, 0x001c140c, 0x041c0000, 0x001c0404,
215 0x140c0000, 0x000c1414, 0x041c0000, 0x001c040c,
216 0x041c0000, 0x0004040c, 0x041c0000, 0x001c1414,
217 0x14140000, 0x0014141c, 0x081c0000, 0x001c0808,
218 0x10100000, 0x001c1410, 0x14140000, 0x0014140c,
219 0x04040000, 0x001c0404, 0x1c140000, 0x00141414,
220 0x140c0000, 0x00141414, 0x140c0000, 0x001c1414,
221 0x141c0000, 0x0004041c, 0x141c0000, 0x00101c14,
222 0x141c0000, 0x0014140c, 0x04180000, 0x000c101c,
223 0x081c0000, 0x00080808, 0x14140000, 0x001c1414,
224 0x14140000, 0x00181414, 0x14140000, 0x00141c14,
225 0x14140000, 0x00141408, 0x14140000, 0x0008081c,
226 0x101c0000, 0x001c0408, 0x041c0000, 0x001c0404,
227 0x0c040000, 0x00101808, 0x101c0000, 0x001c1010,
228 0x14080000, 0x00000000, 0x00000000, 0x001c0000,
229 0x00000000, 0x00000000, 0x00000000, 0x00000000,
230 0x00000000, 0x00000000, 0x00000000, 0x00000000,
231 0x00000000, 0x00000000, 0x00000000, 0x00000000,
232 0x00000000, 0x00000000, 0x00000000, 0x00000000,
233 0x00000000, 0x00000000, 0x00000000, 0x00000000,
234 0x00000000, 0x00000000, 0x00000000, 0x00000000,
235 0x00000000, 0x00000000, 0x00000000, 0x00000000,
236 0x00000000, 0x00000000, 0x00000000, 0x00000000,
237 0x3232323e, 0x003e3232, 0x18181818, 0x00181818,
238 0x3e30303e, 0x003e0606, 0x3c30303e, 0x003e3030,
239 0x32323232, 0x0030303e, 0x3e06063e, 0x003e3030,
240 0x3e06063e, 0x003e2626, 0x1830303e, 0x0006060c,
241 0x3e32323e, 0x003e3232, 0x3e32323e, 0x003e3030,
242 0x00000000, 0x00060600, 0x18362600, 0x0032360c,
243 0x32323e1c, 0x0032323e, 0x1e26261e, 0x001e2626,
244 0x0606063c, 0x003c0606, 0x3232321e, 0x001e3232,
245 0x1e06063e, 0x003e0606, 0x1e06063e, 0x00060606,
246 0x3606063c, 0x001c2626, 0x3e323232, 0x00323232,
247 0x0c0c0c1e, 0x001e0c0c, 0x30303030, 0x003e3232,
248 0x0e162626, 0x00262616, 0x06060606, 0x003e0606,
249 0x6a7e7662, 0x00626262, 0x323a3e36, 0x00323232,
250 0x3232321c, 0x001c3232, 0x3e32323e, 0x00020202,
251 0x3232321c, 0x002c1a3a, 0x1e26261e, 0x00262626,
252 0x1e06063c, 0x001e3030, 0x1818187e, 0x00181818,
253 0x32323232, 0x003e3232, 0x32323232, 0x00183432,
254 0x32323232, 0x00143a32, 0x0c323232, 0x00323232,
255 0x32323232, 0x001e203c, 0x1c30303e, 0x003e0606,
256 0x00000000, 0x00000000, 0x00000000, 0x00000000,
257 0x00000000, 0x00000000, 0x00000000, 0x00000000,
258 0x00000000, 0x00000000, 0x00000000, 0x00000000,
259 0x00000000, 0x00000000, 0x00000000, 0x00000000,
260 0x00000000, 0x00000000, 0x00000000, 0x00000000,
261};