diff options
author | Bad Diode <bd@badd10de.dev> | 2023-04-22 18:45:35 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2023-04-22 18:45:35 +0200 |
commit | d4fe4d95f105d8b9b47d26264c4876cbf4095a5d (patch) | |
tree | c85cee096603ab54301537add8f7d6e231bdbd81 | |
parent | 320690edaff025d4d446e2f67d23304e1810196e (diff) | |
download | stepper-d4fe4d95f105d8b9b47d26264c4876cbf4095a5d.tar.gz stepper-d4fe4d95f105d8b9b47d26264c4876cbf4095a5d.zip |
Prepare profiling macros
-rw-r--r-- | src/gba/gba.h | 31 | ||||
-rw-r--r-- | src/main.c | 44 | ||||
-rw-r--r-- | src/profiling.c | 201 | ||||
-rw-r--r-- | src/sequencer.c | 2 |
4 files changed, 229 insertions, 49 deletions
diff --git a/src/gba/gba.h b/src/gba/gba.h index b02d745..27a6a9a 100644 --- a/src/gba/gba.h +++ b/src/gba/gba.h | |||
@@ -98,18 +98,18 @@ typedef u16 Color; | |||
98 | typedef Color Palette[16]; | 98 | typedef Color Palette[16]; |
99 | 99 | ||
100 | // Inline function to calculate the 15 bit color value. | 100 | // Inline function to calculate the 15 bit color value. |
101 | #define RGB15(R,G,B) (u16)(((B) << 10) | ((G) << 5) | (R)); | 101 | #define RGB15(R,G,B) (u16)(((B) << 10) | ((G) << 5) | (R)) |
102 | 102 | ||
103 | // Some nice default colors. | 103 | // Some nice default colors. |
104 | #define COLOR_BLACK RGB15(0, 0, 0) | 104 | #define COLOR_BLACK RGB15( 0, 0, 0) |
105 | #define COLOR_RED RGB15(31, 0,10) | 105 | #define COLOR_RED RGB15(31, 0, 10) |
106 | #define COLOR_GREEN RGB15(31, 0,10) | 106 | #define COLOR_GREEN RGB15( 0, 31, 18) |
107 | #define COLOR_YELLOW RGB15(31, 0,10) | 107 | #define COLOR_YELLOW RGB15(31, 31, 0) |
108 | #define COLOR_BLUE RGB15(2, 17,31) | 108 | #define COLOR_BLUE RGB15( 2, 17, 31) |
109 | #define COLOR_PURPLE RGB15(2, 17,31) | 109 | #define COLOR_PURPLE RGB15(15, 7, 31) |
110 | #define COLOR_CYAN RGB15(0, 27,30) | 110 | #define COLOR_CYAN RGB15( 0, 27, 30) |
111 | #define COLOR_GREY RGB15(16,17,19) | 111 | #define COLOR_GREY RGB15(16, 17, 19) |
112 | #define COLOR_WHITE RGB15(28,28,28) | 112 | #define COLOR_WHITE RGB15(28, 28, 28) |
113 | 113 | ||
114 | // | 114 | // |
115 | // Tile memory access. | 115 | // Tile memory access. |
@@ -419,6 +419,8 @@ inline | |||
419 | void | 419 | void |
420 | dma_copy(void *dst, const void *src, u32 size, int channel) { | 420 | dma_copy(void *dst, const void *src, u32 size, int channel) { |
421 | dma_transfer_copy(dst, src, size / 4, channel, DMA_CHUNK_32 | DMA_ENABLE); | 421 | dma_transfer_copy(dst, src, size / 4, channel, DMA_CHUNK_32 | DMA_ENABLE); |
422 | // Stall for 2 cycles in case we call this function more than once. | ||
423 | asm("nop"); asm("nop"); | ||
422 | } | 424 | } |
423 | 425 | ||
424 | // Fill the dst location with the word set at src. | 426 | // Fill the dst location with the word set at src. |
@@ -426,6 +428,8 @@ inline | |||
426 | void | 428 | void |
427 | dma_fill(void *dst, vu32 src, u32 size, int channel) { | 429 | dma_fill(void *dst, vu32 src, u32 size, int channel) { |
428 | dma_transfer_fill(dst, src, size / 4, channel, DMA_CHUNK_32 | DMA_ENABLE); | 430 | dma_transfer_fill(dst, src, size / 4, channel, DMA_CHUNK_32 | DMA_ENABLE); |
431 | // Stall for 2 cycles in case we call this function more than once. | ||
432 | asm("nop"); asm("nop"); | ||
429 | } | 433 | } |
430 | 434 | ||
431 | // | 435 | // |
@@ -661,6 +665,7 @@ wait_vsync(void) { | |||
661 | #define LEN(ARR) (sizeof(ARR) / sizeof((ARR)[0])) | 665 | #define LEN(ARR) (sizeof(ARR) / sizeof((ARR)[0])) |
662 | 666 | ||
663 | // Fixed-point arithmetic for (i.P) numbers. | 667 | // Fixed-point arithmetic for (i.P) numbers. |
668 | #define FP_NUM(A,P) ((A) << (P)) | ||
664 | #define FP_MUL(A,B,P) (((A) * (B)) >> (P)) | 669 | #define FP_MUL(A,B,P) (((A) * (B)) >> (P)) |
665 | #define FP_DIV(A,B,P) (((A) << (P)) / (B)) | 670 | #define FP_DIV(A,B,P) (((A) << (P)) / (B)) |
666 | #define FP_LERP(Y0,Y1,X,P) ((Y0) + FP_MUL((X), ((Y1) - (Y0)), P)) | 671 | #define FP_LERP(Y0,Y1,X,P) ((Y0) + FP_MUL((X), ((Y1) - (Y0)), P)) |
@@ -683,4 +688,10 @@ memcpy32(u32 *dst, const u32 *src, u32 size) { | |||
683 | } | 688 | } |
684 | } | 689 | } |
685 | 690 | ||
691 | // | ||
692 | // Compiler hints. | ||
693 | // | ||
694 | |||
695 | #define UNROLL_LOOPS __attribute__((optimize("unroll-loops"))) | ||
696 | |||
686 | #endif // GBA_H | 697 | #endif // GBA_H |
@@ -15,45 +15,13 @@ WITH REGARD TO THIS SOFTWARE. | |||
15 | #include "renderer.c" | 15 | #include "renderer.c" |
16 | #include "sequencer.c" | 16 | #include "sequencer.c" |
17 | 17 | ||
18 | #define PROF_ENABLE 0 | ||
19 | #include "profiling.c" | ||
20 | |||
18 | // | 21 | // |
19 | // Config parameters. | 22 | // Config parameters. |
20 | // | 23 | // |
21 | 24 | ||
22 | #ifdef PROF_ENABLE | ||
23 | #if PROF_ENABLE == 0 | ||
24 | #define PROF(F,VAR) (profile_start(),(F),(VAR) = profile_stop()) | ||
25 | #elif PROF_ENABLE == 1 | ||
26 | #define PROF(F,VAR) (profile_start(),(F),(VAR) = MAX(profile_stop(), (VAR))) | ||
27 | #endif | ||
28 | #ifndef PROF_SHOW_X | ||
29 | #define PROF_SHOW_X 0 | ||
30 | #endif | ||
31 | #ifndef PROF_SHOW_Y | ||
32 | #define PROF_SHOW_Y 0 | ||
33 | #endif | ||
34 | #define PROF_SHOW() \ | ||
35 | do {\ | ||
36 | txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ | ||
37 | txt_printf("EVAL: %lu ", eval_cycles);\ | ||
38 | txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+1);\ | ||
39 | txt_printf("FLIP: %lu ", flip_cycles);\ | ||
40 | txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+2);\ | ||
41 | txt_printf("INPUT: %lu ", input_cycles);\ | ||
42 | txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+3);\ | ||
43 | txt_printf("FRAME: %lu ", frame_counter);\ | ||
44 | frame_counter++;\ | ||
45 | } while (0) | ||
46 | #define PROF_INIT() \ | ||
47 | u32 frame_counter = 0;\ | ||
48 | u32 input_cycles = 0;\ | ||
49 | u32 eval_cycles = 0;\ | ||
50 | u32 flip_cycles = 0; | ||
51 | #else | ||
52 | #define PROF(F,VAR) (F) | ||
53 | #define PROF_SHOW() | ||
54 | #define PROF_INIT() | ||
55 | #endif | ||
56 | |||
57 | int main(void) { | 25 | int main(void) { |
58 | // Adjust system wait times. | 26 | // Adjust system wait times. |
59 | SYSTEM_WAIT = SYSTEM_WAIT_CARTRIDGE; | 27 | SYSTEM_WAIT = SYSTEM_WAIT_CARTRIDGE; |
@@ -72,12 +40,14 @@ int main(void) { | |||
72 | sequencer_init(); | 40 | sequencer_init(); |
73 | 41 | ||
74 | // Main loop. | 42 | // Main loop. |
75 | PROF_INIT(); | ||
76 | while (true) { | 43 | while (true) { |
44 | poll_keys(); | ||
77 | bios_vblank_wait(); | 45 | bios_vblank_wait(); |
78 | PROF(flip_buffer(), flip_cycles); | 46 | FRAME_START(); |
79 | PROF(handle_sequencer_input(), input_cycles); | 47 | PROF(handle_sequencer_input(), input_cycles); |
80 | PROF_SHOW(); | 48 | PROF_SHOW(); |
49 | PROF(flip_buffer(), flip_cycles); | ||
50 | FRAME_END(); | ||
81 | } | 51 | } |
82 | 52 | ||
83 | return 0; | 53 | return 0; |
diff --git a/src/profiling.c b/src/profiling.c new file mode 100644 index 0000000..de969d2 --- /dev/null +++ b/src/profiling.c | |||
@@ -0,0 +1,201 @@ | |||
1 | // | ||
2 | // Profiling macros. | ||
3 | // | ||
4 | |||
5 | #ifndef PROF_ENABLE | ||
6 | #define PROF_ENABLE 0 | ||
7 | #endif | ||
8 | |||
9 | #if PROF_ENABLE > 0 && PROF_ENABLE < 3 | ||
10 | |||
11 | #ifndef PROF_N_FRAMES | ||
12 | #define PROF_N_FRAMES 30 | ||
13 | #endif | ||
14 | |||
15 | // Profile method 1: Average per N frames. | ||
16 | #if PROF_ENABLE == 1 | ||
17 | #define TEXT_ENABLE 1 | ||
18 | #define PROF(F,VAR) \ | ||
19 | do { \ | ||
20 | u32 __tmp_prof = profile_measure();\ | ||
21 | F;\ | ||
22 | (VAR) += profile_measure() - __tmp_prof;\ | ||
23 | } while (0) | ||
24 | |||
25 | // Profile method 2: Maximum in N frames. | ||
26 | #elif PROF_ENABLE == 2 | ||
27 | #define TEXT_ENABLE 1 | ||
28 | #define PROF(F,VAR) \ | ||
29 | do { \ | ||
30 | u32 __tmp_prof = profile_measure();\ | ||
31 | (F);\ | ||
32 | (VAR) = MAX(profile_measure() - __tmp_prof, (VAR));\ | ||
33 | } while (0) | ||
34 | #endif | ||
35 | |||
36 | #ifndef PROF_SHOW_X | ||
37 | #define PROF_SHOW_X 0 | ||
38 | #endif | ||
39 | #ifndef PROF_SHOW_Y | ||
40 | #define PROF_SHOW_Y 0 | ||
41 | #endif | ||
42 | |||
43 | static bool profile_show = true; | ||
44 | static bool profile_bg_show = true; | ||
45 | |||
46 | #define PROF_SHOW() \ | ||
47 | do { \ | ||
48 | if (key_tap(KEY_START)) {\ | ||
49 | profile_show ^= 1;\ | ||
50 | }\ | ||
51 | if (key_tap(KEY_SELECT)) {\ | ||
52 | profile_bg_show ^= 1;\ | ||
53 | }\ | ||
54 | if (profile_show) {\ | ||
55 | txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ | ||
56 | draw_filled_rect((PROF_SHOW_X), (PROF_SHOW_X), 8 * 18, 8 * 16, 0);\ | ||
57 | txt_printf("VIDEO\n");\ | ||
58 | txt_printf(">CLEAR %.8lu\n", avg_clear_cycles);\ | ||
59 | txt_printf(">LINES %.8lu\n", avg_line_cycles);\ | ||
60 | txt_printf(">RECT %.8lu\n", avg_rect_cycles);\ | ||
61 | txt_printf(">FRECT %.8lu\n", avg_fill_rect_cycles);\ | ||
62 | txt_printf(">1BPP %.8lu\n", avg_icn_cycles);\ | ||
63 | txt_printf(">2BPP %.8lu\n", avg_chr_cycles);\ | ||
64 | txt_printf(">FLIP %.8lu\n", avg_flip_cycles);\ | ||
65 | txt_printf("TEXT\n");\ | ||
66 | txt_printf(">DRAWF %.8lu\n", avg_txt_drawf_cycles);\ | ||
67 | txt_printf(">PRINTF %.8lu\n", avg_txt_printf_cycles);\ | ||
68 | txt_printf(">RENDER %.8lu\n", avg_txt_render_cycles);\ | ||
69 | txt_printf(">CLEAR %.8lu\n", avg_txt_clear_cycles);\ | ||
70 | txt_printf("TOTAL %.8lu\n", avg_frame_cycles);\ | ||
71 | }\ | ||
72 | if (profile_bg_show) {\ | ||
73 | u32 frame_time =\ | ||
74 | FP_DIV(\ | ||
75 | FP_NUM(avg_frame_cycles + 1, 2),\ | ||
76 | FP_NUM(2809, 2),\ | ||
77 | 2) * 166;\ | ||
78 | u32 fps =\ | ||
79 | FP_DIV(\ | ||
80 | FP_NUM(280896 * 60, 2),\ | ||
81 | FP_NUM(avg_frame_cycles + 1, 2),\ | ||
82 | 2);\ | ||
83 | txt_printf("TIME %.8lu\n", frame_time >> 2);\ | ||
84 | txt_printf("FPS %.8lu\n", (fps >> 2) + 1);\ | ||
85 | }\ | ||
86 | } while (0) | ||
87 | |||
88 | static u32 prof_frame_counter = 0; | ||
89 | |||
90 | static u32 frame_cycles = 0; | ||
91 | static u32 flip_cycles = 0; | ||
92 | static u32 clear_cycles = 0; | ||
93 | static u32 line_cycles = 0; | ||
94 | static u32 rect_cycles = 0; | ||
95 | static u32 fill_rect_cycles = 0; | ||
96 | static u32 chr_cycles = 0; | ||
97 | static u32 icn_cycles = 0; | ||
98 | static u32 txt_drawf_cycles = 0; | ||
99 | static u32 txt_printf_cycles = 0; | ||
100 | static u32 txt_render_cycles = 0; | ||
101 | static u32 txt_clear_cycles = 0; | ||
102 | static u32 input_cycles = 0; | ||
103 | |||
104 | static u32 avg_frame_cycles = 0; | ||
105 | static u32 avg_flip_cycles = 0; | ||
106 | static u32 avg_clear_cycles = 0; | ||
107 | static u32 avg_line_cycles = 0; | ||
108 | static u32 avg_rect_cycles = 0; | ||
109 | static u32 avg_fill_rect_cycles = 0; | ||
110 | static u32 avg_chr_cycles = 0; | ||
111 | static u32 avg_icn_cycles = 0; | ||
112 | static u32 avg_txt_drawf_cycles = 0; | ||
113 | static u32 avg_txt_printf_cycles = 0; | ||
114 | static u32 avg_txt_render_cycles = 0; | ||
115 | static u32 avg_txt_clear_cycles = 0; | ||
116 | static u32 avg_input_cycles = 0; | ||
117 | |||
118 | #if PROF_ENABLE == 1 | ||
119 | #define FRAME_START()\ | ||
120 | do { \ | ||
121 | if (prof_frame_counter == PROF_N_FRAMES) {\ | ||
122 | avg_frame_cycles = frame_cycles / prof_frame_counter;\ | ||
123 | avg_flip_cycles = flip_cycles / prof_frame_counter;\ | ||
124 | avg_clear_cycles = clear_cycles / prof_frame_counter;\ | ||
125 | avg_line_cycles = line_cycles / prof_frame_counter;\ | ||
126 | avg_rect_cycles = rect_cycles / prof_frame_counter;\ | ||
127 | avg_fill_rect_cycles = fill_rect_cycles / prof_frame_counter;\ | ||
128 | avg_chr_cycles = chr_cycles / prof_frame_counter;\ | ||
129 | avg_icn_cycles = icn_cycles / prof_frame_counter;\ | ||
130 | avg_txt_drawf_cycles = txt_drawf_cycles / prof_frame_counter;\ | ||
131 | avg_txt_printf_cycles = txt_printf_cycles / prof_frame_counter;\ | ||
132 | avg_txt_render_cycles = txt_render_cycles / prof_frame_counter;\ | ||
133 | avg_txt_clear_cycles = txt_clear_cycles / prof_frame_counter;\ | ||
134 | avg_input_cycles = input_cycles / prof_frame_counter;\ | ||
135 | frame_cycles = 0;\ | ||
136 | flip_cycles = 0;\ | ||
137 | clear_cycles = 0;\ | ||
138 | line_cycles = 0;\ | ||
139 | rect_cycles = 0;\ | ||
140 | fill_rect_cycles = 0;\ | ||
141 | chr_cycles = 0;\ | ||
142 | icn_cycles = 0;\ | ||
143 | txt_drawf_cycles = 0;\ | ||
144 | txt_printf_cycles = 0;\ | ||
145 | txt_render_cycles = 0;\ | ||
146 | txt_clear_cycles = 0;\ | ||
147 | input_cycles = 0;\ | ||
148 | prof_frame_counter = 0;\ | ||
149 | }\ | ||
150 | profile_start();\ | ||
151 | } while (0) | ||
152 | #elif PROF_ENABLE == 2 | ||
153 | #define FRAME_START()\ | ||
154 | do { \ | ||
155 | if (prof_frame_counter == PROF_N_FRAMES) {\ | ||
156 | avg_frame_cycles = frame_cycles;\ | ||
157 | avg_flip_cycles = flip_cycles;\ | ||
158 | avg_clear_cycles = clear_cycles;\ | ||
159 | avg_line_cycles = line_cycles;\ | ||
160 | avg_rect_cycles = rect_cycles;\ | ||
161 | avg_fill_rect_cycles = fill_rect_cycles;\ | ||
162 | avg_chr_cycles = chr_cycles;\ | ||
163 | avg_icn_cycles = icn_cycles;\ | ||
164 | avg_txt_drawf_cycles = txt_drawf_cycles;\ | ||
165 | avg_txt_printf_cycles = txt_printf_cycles;\ | ||
166 | avg_txt_render_cycles = txt_render_cycles;\ | ||
167 | avg_txt_clear_cycles = txt_clear_cycles;\ | ||
168 | avg_input_cycles = input_cycles;\ | ||
169 | frame_cycles = 0;\ | ||
170 | flip_cycles = 0;\ | ||
171 | clear_cycles = 0;\ | ||
172 | line_cycles = 0;\ | ||
173 | rect_cycles = 0;\ | ||
174 | fill_rect_cycles = 0;\ | ||
175 | chr_cycles = 0;\ | ||
176 | icn_cycles = 0;\ | ||
177 | txt_drawf_cycles = 0;\ | ||
178 | txt_printf_cycles = 0;\ | ||
179 | txt_render_cycles = 0;\ | ||
180 | txt_clear_cycles = 0;\ | ||
181 | input_cycles = 0;\ | ||
182 | prof_frame_counter = 0;\ | ||
183 | }\ | ||
184 | profile_start();\ | ||
185 | } while (0) | ||
186 | #endif | ||
187 | |||
188 | #define FRAME_END() \ | ||
189 | do { \ | ||
190 | prof_frame_counter++;\ | ||
191 | frame_cycles += profile_stop();\ | ||
192 | } while (0) | ||
193 | |||
194 | #else | ||
195 | |||
196 | // No profiling. | ||
197 | #define PROF(F,VAR) (F) | ||
198 | #define PROF_SHOW() | ||
199 | #define FRAME_START() | ||
200 | #define FRAME_END() | ||
201 | #endif | ||
diff --git a/src/sequencer.c b/src/sequencer.c index 2d2e2d3..b582cf4 100644 --- a/src/sequencer.c +++ b/src/sequencer.c | |||
@@ -2921,8 +2921,6 @@ clipboard_copy(void) { | |||
2921 | 2921 | ||
2922 | void | 2922 | void |
2923 | handle_sequencer_input(void) { | 2923 | handle_sequencer_input(void) { |
2924 | poll_keys(); | ||
2925 | |||
2926 | if (key_tap(KEY_START)) { | 2924 | if (key_tap(KEY_START)) { |
2927 | // Stop the sequencer or start playing from the beginning. | 2925 | // Stop the sequencer or start playing from the beginning. |
2928 | toggle_playing(); | 2926 | toggle_playing(); |