aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2023-04-22 18:45:35 +0200
committerBad Diode <bd@badd10de.dev>2023-04-22 18:45:35 +0200
commitd4fe4d95f105d8b9b47d26264c4876cbf4095a5d (patch)
treec85cee096603ab54301537add8f7d6e231bdbd81
parent320690edaff025d4d446e2f67d23304e1810196e (diff)
downloadstepper-d4fe4d95f105d8b9b47d26264c4876cbf4095a5d.tar.gz
stepper-d4fe4d95f105d8b9b47d26264c4876cbf4095a5d.zip
Prepare profiling macros
-rw-r--r--src/gba/gba.h31
-rw-r--r--src/main.c44
-rw-r--r--src/profiling.c201
-rw-r--r--src/sequencer.c2
4 files changed, 229 insertions, 49 deletions
diff --git a/src/gba/gba.h b/src/gba/gba.h
index b02d745..27a6a9a 100644
--- a/src/gba/gba.h
+++ b/src/gba/gba.h
@@ -98,18 +98,18 @@ typedef u16 Color;
98typedef Color Palette[16]; 98typedef Color Palette[16];
99 99
100// Inline function to calculate the 15 bit color value. 100// Inline function to calculate the 15 bit color value.
101#define RGB15(R,G,B) (u16)(((B) << 10) | ((G) << 5) | (R)); 101#define RGB15(R,G,B) (u16)(((B) << 10) | ((G) << 5) | (R))
102 102
103// Some nice default colors. 103// Some nice default colors.
104#define COLOR_BLACK RGB15(0, 0, 0) 104#define COLOR_BLACK RGB15( 0, 0, 0)
105#define COLOR_RED RGB15(31, 0,10) 105#define COLOR_RED RGB15(31, 0, 10)
106#define COLOR_GREEN RGB15(31, 0,10) 106#define COLOR_GREEN RGB15( 0, 31, 18)
107#define COLOR_YELLOW RGB15(31, 0,10) 107#define COLOR_YELLOW RGB15(31, 31, 0)
108#define COLOR_BLUE RGB15(2, 17,31) 108#define COLOR_BLUE RGB15( 2, 17, 31)
109#define COLOR_PURPLE RGB15(2, 17,31) 109#define COLOR_PURPLE RGB15(15, 7, 31)
110#define COLOR_CYAN RGB15(0, 27,30) 110#define COLOR_CYAN RGB15( 0, 27, 30)
111#define COLOR_GREY RGB15(16,17,19) 111#define COLOR_GREY RGB15(16, 17, 19)
112#define COLOR_WHITE RGB15(28,28,28) 112#define COLOR_WHITE RGB15(28, 28, 28)
113 113
114// 114//
115// Tile memory access. 115// Tile memory access.
@@ -419,6 +419,8 @@ inline
419void 419void
420dma_copy(void *dst, const void *src, u32 size, int channel) { 420dma_copy(void *dst, const void *src, u32 size, int channel) {
421 dma_transfer_copy(dst, src, size / 4, channel, DMA_CHUNK_32 | DMA_ENABLE); 421 dma_transfer_copy(dst, src, size / 4, channel, DMA_CHUNK_32 | DMA_ENABLE);
422 // Stall for 2 cycles in case we call this function more than once.
423 asm("nop"); asm("nop");
422} 424}
423 425
424// Fill the dst location with the word set at src. 426// Fill the dst location with the word set at src.
@@ -426,6 +428,8 @@ inline
426void 428void
427dma_fill(void *dst, vu32 src, u32 size, int channel) { 429dma_fill(void *dst, vu32 src, u32 size, int channel) {
428 dma_transfer_fill(dst, src, size / 4, channel, DMA_CHUNK_32 | DMA_ENABLE); 430 dma_transfer_fill(dst, src, size / 4, channel, DMA_CHUNK_32 | DMA_ENABLE);
431 // Stall for 2 cycles in case we call this function more than once.
432 asm("nop"); asm("nop");
429} 433}
430 434
431// 435//
@@ -661,6 +665,7 @@ wait_vsync(void) {
661#define LEN(ARR) (sizeof(ARR) / sizeof((ARR)[0])) 665#define LEN(ARR) (sizeof(ARR) / sizeof((ARR)[0]))
662 666
663// Fixed-point arithmetic for (i.P) numbers. 667// Fixed-point arithmetic for (i.P) numbers.
668#define FP_NUM(A,P) ((A) << (P))
664#define FP_MUL(A,B,P) (((A) * (B)) >> (P)) 669#define FP_MUL(A,B,P) (((A) * (B)) >> (P))
665#define FP_DIV(A,B,P) (((A) << (P)) / (B)) 670#define FP_DIV(A,B,P) (((A) << (P)) / (B))
666#define FP_LERP(Y0,Y1,X,P) ((Y0) + FP_MUL((X), ((Y1) - (Y0)), P)) 671#define FP_LERP(Y0,Y1,X,P) ((Y0) + FP_MUL((X), ((Y1) - (Y0)), P))
@@ -683,4 +688,10 @@ memcpy32(u32 *dst, const u32 *src, u32 size) {
683 } 688 }
684} 689}
685 690
691//
692// Compiler hints.
693//
694
695#define UNROLL_LOOPS __attribute__((optimize("unroll-loops")))
696
686#endif // GBA_H 697#endif // GBA_H
diff --git a/src/main.c b/src/main.c
index a322df2..e694057 100644
--- a/src/main.c
+++ b/src/main.c
@@ -15,45 +15,13 @@ WITH REGARD TO THIS SOFTWARE.
15#include "renderer.c" 15#include "renderer.c"
16#include "sequencer.c" 16#include "sequencer.c"
17 17
18#define PROF_ENABLE 0
19#include "profiling.c"
20
18// 21//
19// Config parameters. 22// Config parameters.
20// 23//
21 24
22#ifdef PROF_ENABLE
23#if PROF_ENABLE == 0
24#define PROF(F,VAR) (profile_start(),(F),(VAR) = profile_stop())
25#elif PROF_ENABLE == 1
26#define PROF(F,VAR) (profile_start(),(F),(VAR) = MAX(profile_stop(), (VAR)))
27#endif
28#ifndef PROF_SHOW_X
29#define PROF_SHOW_X 0
30#endif
31#ifndef PROF_SHOW_Y
32#define PROF_SHOW_Y 0
33#endif
34#define PROF_SHOW() \
35 do {\
36 txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\
37 txt_printf("EVAL: %lu ", eval_cycles);\
38 txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+1);\
39 txt_printf("FLIP: %lu ", flip_cycles);\
40 txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+2);\
41 txt_printf("INPUT: %lu ", input_cycles);\
42 txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+3);\
43 txt_printf("FRAME: %lu ", frame_counter);\
44 frame_counter++;\
45 } while (0)
46#define PROF_INIT() \
47 u32 frame_counter = 0;\
48 u32 input_cycles = 0;\
49 u32 eval_cycles = 0;\
50 u32 flip_cycles = 0;
51#else
52#define PROF(F,VAR) (F)
53#define PROF_SHOW()
54#define PROF_INIT()
55#endif
56
57int main(void) { 25int main(void) {
58 // Adjust system wait times. 26 // Adjust system wait times.
59 SYSTEM_WAIT = SYSTEM_WAIT_CARTRIDGE; 27 SYSTEM_WAIT = SYSTEM_WAIT_CARTRIDGE;
@@ -72,12 +40,14 @@ int main(void) {
72 sequencer_init(); 40 sequencer_init();
73 41
74 // Main loop. 42 // Main loop.
75 PROF_INIT();
76 while (true) { 43 while (true) {
44 poll_keys();
77 bios_vblank_wait(); 45 bios_vblank_wait();
78 PROF(flip_buffer(), flip_cycles); 46 FRAME_START();
79 PROF(handle_sequencer_input(), input_cycles); 47 PROF(handle_sequencer_input(), input_cycles);
80 PROF_SHOW(); 48 PROF_SHOW();
49 PROF(flip_buffer(), flip_cycles);
50 FRAME_END();
81 } 51 }
82 52
83 return 0; 53 return 0;
diff --git a/src/profiling.c b/src/profiling.c
new file mode 100644
index 0000000..de969d2
--- /dev/null
+++ b/src/profiling.c
@@ -0,0 +1,201 @@
1//
2// Profiling macros.
3//
4
5#ifndef PROF_ENABLE
6#define PROF_ENABLE 0
7#endif
8
9#if PROF_ENABLE > 0 && PROF_ENABLE < 3
10
11#ifndef PROF_N_FRAMES
12#define PROF_N_FRAMES 30
13#endif
14
15// Profile method 1: Average per N frames.
16#if PROF_ENABLE == 1
17#define TEXT_ENABLE 1
18#define PROF(F,VAR) \
19 do { \
20 u32 __tmp_prof = profile_measure();\
21 F;\
22 (VAR) += profile_measure() - __tmp_prof;\
23 } while (0)
24
25// Profile method 2: Maximum in N frames.
26#elif PROF_ENABLE == 2
27#define TEXT_ENABLE 1
28#define PROF(F,VAR) \
29 do { \
30 u32 __tmp_prof = profile_measure();\
31 (F);\
32 (VAR) = MAX(profile_measure() - __tmp_prof, (VAR));\
33 } while (0)
34#endif
35
36#ifndef PROF_SHOW_X
37#define PROF_SHOW_X 0
38#endif
39#ifndef PROF_SHOW_Y
40#define PROF_SHOW_Y 0
41#endif
42
43static bool profile_show = true;
44static bool profile_bg_show = true;
45
46#define PROF_SHOW() \
47 do { \
48 if (key_tap(KEY_START)) {\
49 profile_show ^= 1;\
50 }\
51 if (key_tap(KEY_SELECT)) {\
52 profile_bg_show ^= 1;\
53 }\
54 if (profile_show) {\
55 txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\
56 draw_filled_rect((PROF_SHOW_X), (PROF_SHOW_X), 8 * 18, 8 * 16, 0);\
57 txt_printf("VIDEO\n");\
58 txt_printf(">CLEAR %.8lu\n", avg_clear_cycles);\
59 txt_printf(">LINES %.8lu\n", avg_line_cycles);\
60 txt_printf(">RECT %.8lu\n", avg_rect_cycles);\
61 txt_printf(">FRECT %.8lu\n", avg_fill_rect_cycles);\
62 txt_printf(">1BPP %.8lu\n", avg_icn_cycles);\
63 txt_printf(">2BPP %.8lu\n", avg_chr_cycles);\
64 txt_printf(">FLIP %.8lu\n", avg_flip_cycles);\
65 txt_printf("TEXT\n");\
66 txt_printf(">DRAWF %.8lu\n", avg_txt_drawf_cycles);\
67 txt_printf(">PRINTF %.8lu\n", avg_txt_printf_cycles);\
68 txt_printf(">RENDER %.8lu\n", avg_txt_render_cycles);\
69 txt_printf(">CLEAR %.8lu\n", avg_txt_clear_cycles);\
70 txt_printf("TOTAL %.8lu\n", avg_frame_cycles);\
71 }\
72 if (profile_bg_show) {\
73 u32 frame_time =\
74 FP_DIV(\
75 FP_NUM(avg_frame_cycles + 1, 2),\
76 FP_NUM(2809, 2),\
77 2) * 166;\
78 u32 fps =\
79 FP_DIV(\
80 FP_NUM(280896 * 60, 2),\
81 FP_NUM(avg_frame_cycles + 1, 2),\
82 2);\
83 txt_printf("TIME %.8lu\n", frame_time >> 2);\
84 txt_printf("FPS %.8lu\n", (fps >> 2) + 1);\
85 }\
86 } while (0)
87
88static u32 prof_frame_counter = 0;
89
90static u32 frame_cycles = 0;
91static u32 flip_cycles = 0;
92static u32 clear_cycles = 0;
93static u32 line_cycles = 0;
94static u32 rect_cycles = 0;
95static u32 fill_rect_cycles = 0;
96static u32 chr_cycles = 0;
97static u32 icn_cycles = 0;
98static u32 txt_drawf_cycles = 0;
99static u32 txt_printf_cycles = 0;
100static u32 txt_render_cycles = 0;
101static u32 txt_clear_cycles = 0;
102static u32 input_cycles = 0;
103
104static u32 avg_frame_cycles = 0;
105static u32 avg_flip_cycles = 0;
106static u32 avg_clear_cycles = 0;
107static u32 avg_line_cycles = 0;
108static u32 avg_rect_cycles = 0;
109static u32 avg_fill_rect_cycles = 0;
110static u32 avg_chr_cycles = 0;
111static u32 avg_icn_cycles = 0;
112static u32 avg_txt_drawf_cycles = 0;
113static u32 avg_txt_printf_cycles = 0;
114static u32 avg_txt_render_cycles = 0;
115static u32 avg_txt_clear_cycles = 0;
116static u32 avg_input_cycles = 0;
117
118#if PROF_ENABLE == 1
119#define FRAME_START()\
120 do { \
121 if (prof_frame_counter == PROF_N_FRAMES) {\
122 avg_frame_cycles = frame_cycles / prof_frame_counter;\
123 avg_flip_cycles = flip_cycles / prof_frame_counter;\
124 avg_clear_cycles = clear_cycles / prof_frame_counter;\
125 avg_line_cycles = line_cycles / prof_frame_counter;\
126 avg_rect_cycles = rect_cycles / prof_frame_counter;\
127 avg_fill_rect_cycles = fill_rect_cycles / prof_frame_counter;\
128 avg_chr_cycles = chr_cycles / prof_frame_counter;\
129 avg_icn_cycles = icn_cycles / prof_frame_counter;\
130 avg_txt_drawf_cycles = txt_drawf_cycles / prof_frame_counter;\
131 avg_txt_printf_cycles = txt_printf_cycles / prof_frame_counter;\
132 avg_txt_render_cycles = txt_render_cycles / prof_frame_counter;\
133 avg_txt_clear_cycles = txt_clear_cycles / prof_frame_counter;\
134 avg_input_cycles = input_cycles / prof_frame_counter;\
135 frame_cycles = 0;\
136 flip_cycles = 0;\
137 clear_cycles = 0;\
138 line_cycles = 0;\
139 rect_cycles = 0;\
140 fill_rect_cycles = 0;\
141 chr_cycles = 0;\
142 icn_cycles = 0;\
143 txt_drawf_cycles = 0;\
144 txt_printf_cycles = 0;\
145 txt_render_cycles = 0;\
146 txt_clear_cycles = 0;\
147 input_cycles = 0;\
148 prof_frame_counter = 0;\
149 }\
150 profile_start();\
151 } while (0)
152#elif PROF_ENABLE == 2
153#define FRAME_START()\
154 do { \
155 if (prof_frame_counter == PROF_N_FRAMES) {\
156 avg_frame_cycles = frame_cycles;\
157 avg_flip_cycles = flip_cycles;\
158 avg_clear_cycles = clear_cycles;\
159 avg_line_cycles = line_cycles;\
160 avg_rect_cycles = rect_cycles;\
161 avg_fill_rect_cycles = fill_rect_cycles;\
162 avg_chr_cycles = chr_cycles;\
163 avg_icn_cycles = icn_cycles;\
164 avg_txt_drawf_cycles = txt_drawf_cycles;\
165 avg_txt_printf_cycles = txt_printf_cycles;\
166 avg_txt_render_cycles = txt_render_cycles;\
167 avg_txt_clear_cycles = txt_clear_cycles;\
168 avg_input_cycles = input_cycles;\
169 frame_cycles = 0;\
170 flip_cycles = 0;\
171 clear_cycles = 0;\
172 line_cycles = 0;\
173 rect_cycles = 0;\
174 fill_rect_cycles = 0;\
175 chr_cycles = 0;\
176 icn_cycles = 0;\
177 txt_drawf_cycles = 0;\
178 txt_printf_cycles = 0;\
179 txt_render_cycles = 0;\
180 txt_clear_cycles = 0;\
181 input_cycles = 0;\
182 prof_frame_counter = 0;\
183 }\
184 profile_start();\
185 } while (0)
186#endif
187
188#define FRAME_END() \
189 do { \
190 prof_frame_counter++;\
191 frame_cycles += profile_stop();\
192 } while (0)
193
194#else
195
196// No profiling.
197#define PROF(F,VAR) (F)
198#define PROF_SHOW()
199#define FRAME_START()
200#define FRAME_END()
201#endif
diff --git a/src/sequencer.c b/src/sequencer.c
index 2d2e2d3..b582cf4 100644
--- a/src/sequencer.c
+++ b/src/sequencer.c
@@ -2921,8 +2921,6 @@ clipboard_copy(void) {
2921 2921
2922void 2922void
2923handle_sequencer_input(void) { 2923handle_sequencer_input(void) {
2924 poll_keys();
2925
2926 if (key_tap(KEY_START)) { 2924 if (key_tap(KEY_START)) {
2927 // Stop the sequencer or start playing from the beginning. 2925 // Stop the sequencer or start playing from the beginning.
2928 toggle_playing(); 2926 toggle_playing();