aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2023-04-20 14:51:09 +0200
committerBad Diode <bd@badd10de.dev>2023-04-20 14:51:09 +0200
commit74eb2bf14f7e82c86419374bfd46b6cfd89f2df8 (patch)
treecb5c65f8cb851183e3959e7c193b9e6dc61a47a6
parentbe9350824404fc9eb91069bd76b798c034cafed0 (diff)
downloaduxngba-74eb2bf14f7e82c86419374bfd46b6cfd89f2df8.tar.gz
uxngba-74eb2bf14f7e82c86419374bfd46b6cfd89f2df8.zip
Add improved avg profiling method
-rw-r--r--src/main.c129
1 files changed, 104 insertions, 25 deletions
diff --git a/src/main.c b/src/main.c
index b6a979f..b1562ca 100644
--- a/src/main.c
+++ b/src/main.c
@@ -39,49 +39,59 @@
39 39
40#define PROF_ENABLE 2 40#define PROF_ENABLE 2
41 41
42#ifdef PROF_ENABLE 42#if PROF_ENABLE > 0 && PROF_ENABLE < 3
43#if PROF_ENABLE == 0 43
44#define TEXT_ENABLE 1 44#ifndef PROF_N_FRAMES
45#define PROF(F,VAR) (profile_start(),(F),(VAR) = profile_stop()) 45#define PROF_N_FRAMES 15
46#elif PROF_ENABLE == 1 46#endif
47
48#if PROF_ENABLE == 1
49// Profile method 1: Average per N frames.
47#define TEXT_ENABLE 1 50#define TEXT_ENABLE 1
48#define PROF(F,VAR) (profile_start(),(F),(VAR) = MAX(profile_stop(), (VAR)))
49#elif PROF_ENABLE == 2
50#define PROF(F,VAR) \ 51#define PROF(F,VAR) \
51 do { \ 52 do { \
52 u32 __tmp_prof = profile_measure();\ 53 u32 __tmp_prof = profile_measure();\
53 (F);\ 54 (F);\
54 (VAR) = profile_measure() - __tmp_prof;\ 55 (VAR) += profile_measure() - __tmp_prof;\
55 } while (0) 56 } while (0)
57
58#elif PROF_ENABLE == 2
59// Profile method 2: Maximum in N frames.
56#define TEXT_ENABLE 1 60#define TEXT_ENABLE 1
57// TODO: allow measuring inside a PROF function using profile_measure to store 61#define PROF(F,VAR) \
58// temporary values and calcualting the difference. 62 do { \
59// TODO: allow per frame or per 60 second averaging of cycle measurement. 63 u32 __tmp_prof = profile_measure();\
60// TODO: calculate fps based on frame timing. 64 (F);\
65 (VAR) = MAX(profile_measure() - __tmp_prof, (VAR));\
66 } while (0)
61#endif 67#endif
68
62#ifndef PROF_SHOW_X 69#ifndef PROF_SHOW_X
63#define PROF_SHOW_X 0 70#define PROF_SHOW_X 0
64#endif 71#endif
65#ifndef PROF_SHOW_Y 72#ifndef PROF_SHOW_Y
66#define PROF_SHOW_Y 0 73#define PROF_SHOW_Y 0
67#endif 74#endif
75
68#define PROF_SHOW() \ 76#define PROF_SHOW() \
69 do { \ 77 do { \
70 txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ 78 txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\
71 txt_printf("INPUT %.8lu\n", input_cycles);\ 79 txt_printf("INPUT %.8lu\n", avg_input_cycles);\
72 txt_printf("EVAL %.8lu\n", eval_cycles);\ 80 txt_printf("EVAL %.8lu\n", avg_eval_cycles);\
73 txt_printf("VIDEO\n");\ 81 txt_printf("VIDEO\n");\
74 txt_printf(">PIX %.8lu\n", ppu_pixel_cycles);\ 82 txt_printf(">PIX %.8lu\n", avg_ppu_pixel_cycles);\
75 txt_printf(">FILL %.8lu\n", ppu_fill_cycles);\ 83 txt_printf(">FILL %.8lu\n", avg_ppu_fill_cycles);\
76 txt_printf(">1BPP %.8lu\n", ppu_icn_cycles);\ 84 txt_printf(">1BPP %.8lu\n", avg_ppu_icn_cycles);\
77 txt_printf(">2BPP %.8lu\n", ppu_chr_cycles);\ 85 txt_printf(">2BPP %.8lu\n", avg_ppu_chr_cycles);\
78 txt_printf(">FLIP %.8lu\n", flip_cycles);\ 86 txt_printf(">FLIP %.8lu\n", avg_flip_cycles);\
79 txt_printf("AUDIO %.8lu\n", mix_cycles);\ 87 txt_printf("AUDIO %.8lu\n", avg_mix_cycles);\
80 txt_printf("FRAME %.8lu\n", frame_cycles);\ 88 txt_printf("TOTAL %.8lu\n", avg_frame_cycles);\
81 screen_fill(BG_BACK, 0, 0, 8 * 16, 8 * 10, 0);\ 89 screen_fill(BG_BACK, 0, 0, 8 * 16, 8 * 10, 0);\
82 flipbuf();\ 90 flipbuf();\
83 } while (0) 91 } while (0)
84 92
93static u32 prof_frame_counter = 0;
94
85static u32 frame_cycles = 0; 95static u32 frame_cycles = 0;
86static u32 ppu_pixel_cycles = 0; 96static u32 ppu_pixel_cycles = 0;
87static u32 ppu_fill_cycles = 0; 97static u32 ppu_fill_cycles = 0;
@@ -91,15 +101,84 @@ static u32 flip_cycles = 0;
91static u32 eval_cycles = 0; 101static u32 eval_cycles = 0;
92static u32 input_cycles = 0; 102static u32 input_cycles = 0;
93static u32 mix_cycles = 0; 103static u32 mix_cycles = 0;
104
105static u32 avg_ppu_pixel_cycles = 0;
106static u32 avg_ppu_fill_cycles = 0;
107static u32 avg_ppu_chr_cycles = 0;
108static u32 avg_ppu_icn_cycles = 0;
109static u32 avg_flip_cycles = 0;
110static u32 avg_eval_cycles = 0;
111static u32 avg_input_cycles = 0;
112static u32 avg_mix_cycles = 0;
113static u32 avg_frame_cycles = 0;
114
115#if PROF_ENABLE == 1
94#define FRAME_START()\ 116#define FRAME_START()\
95 do { \ 117 do { \
96 frame_cycles = 0;\ 118 if (prof_frame_counter == PROF_N_FRAMES) {\
119 avg_ppu_pixel_cycles = ppu_pixel_cycles / prof_frame_counter;\
120 avg_ppu_fill_cycles = ppu_fill_cycles / prof_frame_counter;\
121 avg_ppu_chr_cycles = ppu_chr_cycles / prof_frame_counter;\
122 avg_ppu_icn_cycles = ppu_icn_cycles / prof_frame_counter;\
123 avg_flip_cycles = flip_cycles / prof_frame_counter;\
124 avg_eval_cycles = eval_cycles / prof_frame_counter;\
125 avg_input_cycles = input_cycles / prof_frame_counter;\
126 avg_mix_cycles = mix_cycles / prof_frame_counter;\
127 avg_frame_cycles = frame_cycles / prof_frame_counter;\
128 prof_frame_counter = 0;\
129 frame_cycles = 0;\
130 ppu_pixel_cycles = 0;\
131 ppu_fill_cycles = 0;\
132 ppu_chr_cycles = 0;\
133 ppu_icn_cycles = 0;\
134 flip_cycles = 0;\
135 eval_cycles = 0;\
136 input_cycles = 0;\
137 mix_cycles = 0;\
138 }\
97 profile_start();\ 139 profile_start();\
98 } while (0) 140 } while (0)
99#define FRAME_END() (frame_cycles = profile_stop()) 141#elif PROF_ENABLE == 2
142#define FRAME_START()\
143 do { \
144 if (prof_frame_counter == PROF_N_FRAMES) {\
145 avg_ppu_pixel_cycles = ppu_pixel_cycles;\
146 avg_ppu_fill_cycles = ppu_fill_cycles;\
147 avg_ppu_chr_cycles = ppu_chr_cycles;\
148 avg_ppu_icn_cycles = ppu_icn_cycles;\
149 avg_flip_cycles = flip_cycles;\
150 avg_eval_cycles = eval_cycles;\
151 avg_input_cycles = input_cycles;\
152 avg_mix_cycles = mix_cycles;\
153 avg_frame_cycles = frame_cycles / prof_frame_counter;\
154 prof_frame_counter = 0;\
155 frame_cycles = 0;\
156 ppu_pixel_cycles = 0;\
157 ppu_fill_cycles = 0;\
158 ppu_chr_cycles = 0;\
159 ppu_icn_cycles = 0;\
160 flip_cycles = 0;\
161 eval_cycles = 0;\
162 input_cycles = 0;\
163 mix_cycles = 0;\
164 }\
165 profile_start();\
166 } while (0)
167#endif
168
169#define FRAME_END() \
170 do { \
171 prof_frame_counter++;\
172 frame_cycles += profile_stop();\
173 } while (0)
174
100#else 175#else
176
177// No profiling.
101#define PROF(F,VAR) (F) 178#define PROF(F,VAR) (F)
102#define PROF_SHOW() 179#define PROF_SHOW()
180#define FRAME_START()
181#define FRAME_END()
103#endif 182#endif
104 183
105static time_t seconds = 0; 184static time_t seconds = 0;
@@ -646,8 +725,9 @@ main(void) {
646 // NOTE: A VBLANK is 83776 cycles, anything other than that will make it so 725 // NOTE: A VBLANK is 83776 cycles, anything other than that will make it so
647 // we fail to render at 60FPS. 726 // we fail to render at 60FPS.
648 while(true) { 727 while(true) {
649 FRAME_START(); 728 PROF_SHOW();
650 bios_vblank_wait(); 729 bios_vblank_wait();
730 FRAME_START();
651 PROF(handle_input(&u), input_cycles); 731 PROF(handle_input(&u), input_cycles);
652 PROF(uxn_eval(&u, PEEK2(&u.dev[0x20])), eval_cycles); 732 PROF(uxn_eval(&u, PEEK2(&u.dev[0x20])), eval_cycles);
653 PROF(sound_mix(), mix_cycles); 733 PROF(sound_mix(), mix_cycles);
@@ -660,7 +740,6 @@ main(void) {
660 frame_counter = 0; 740 frame_counter = 0;
661 } 741 }
662 FRAME_END(); 742 FRAME_END();
663 PROF_SHOW();
664 } 743 }
665 744
666 return 0; 745 return 0;