diff options
author | Bad Diode <bd@badd10de.dev> | 2023-04-20 14:51:09 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2023-04-20 14:51:09 +0200 |
commit | 74eb2bf14f7e82c86419374bfd46b6cfd89f2df8 (patch) | |
tree | cb5c65f8cb851183e3959e7c193b9e6dc61a47a6 | |
parent | be9350824404fc9eb91069bd76b798c034cafed0 (diff) | |
download | uxngba-74eb2bf14f7e82c86419374bfd46b6cfd89f2df8.tar.gz uxngba-74eb2bf14f7e82c86419374bfd46b6cfd89f2df8.zip |
Add improved avg profiling method
-rw-r--r-- | src/main.c | 129 |
1 files changed, 104 insertions, 25 deletions
@@ -39,49 +39,59 @@ | |||
39 | 39 | ||
40 | #define PROF_ENABLE 2 | 40 | #define PROF_ENABLE 2 |
41 | 41 | ||
42 | #ifdef PROF_ENABLE | 42 | #if PROF_ENABLE > 0 && PROF_ENABLE < 3 |
43 | #if PROF_ENABLE == 0 | 43 | |
44 | #define TEXT_ENABLE 1 | 44 | #ifndef PROF_N_FRAMES |
45 | #define PROF(F,VAR) (profile_start(),(F),(VAR) = profile_stop()) | 45 | #define PROF_N_FRAMES 15 |
46 | #elif PROF_ENABLE == 1 | 46 | #endif |
47 | |||
48 | #if PROF_ENABLE == 1 | ||
49 | // Profile method 1: Average per N frames. | ||
47 | #define TEXT_ENABLE 1 | 50 | #define TEXT_ENABLE 1 |
48 | #define PROF(F,VAR) (profile_start(),(F),(VAR) = MAX(profile_stop(), (VAR))) | ||
49 | #elif PROF_ENABLE == 2 | ||
50 | #define PROF(F,VAR) \ | 51 | #define PROF(F,VAR) \ |
51 | do { \ | 52 | do { \ |
52 | u32 __tmp_prof = profile_measure();\ | 53 | u32 __tmp_prof = profile_measure();\ |
53 | (F);\ | 54 | (F);\ |
54 | (VAR) = profile_measure() - __tmp_prof;\ | 55 | (VAR) += profile_measure() - __tmp_prof;\ |
55 | } while (0) | 56 | } while (0) |
57 | |||
58 | #elif PROF_ENABLE == 2 | ||
59 | // Profile method 2: Maximum in N frames. | ||
56 | #define TEXT_ENABLE 1 | 60 | #define TEXT_ENABLE 1 |
57 | // TODO: allow measuring inside a PROF function using profile_measure to store | 61 | #define PROF(F,VAR) \ |
58 | // temporary values and calcualting the difference. | 62 | do { \ |
59 | // TODO: allow per frame or per 60 second averaging of cycle measurement. | 63 | u32 __tmp_prof = profile_measure();\ |
60 | // TODO: calculate fps based on frame timing. | 64 | (F);\ |
65 | (VAR) = MAX(profile_measure() - __tmp_prof, (VAR));\ | ||
66 | } while (0) | ||
61 | #endif | 67 | #endif |
68 | |||
62 | #ifndef PROF_SHOW_X | 69 | #ifndef PROF_SHOW_X |
63 | #define PROF_SHOW_X 0 | 70 | #define PROF_SHOW_X 0 |
64 | #endif | 71 | #endif |
65 | #ifndef PROF_SHOW_Y | 72 | #ifndef PROF_SHOW_Y |
66 | #define PROF_SHOW_Y 0 | 73 | #define PROF_SHOW_Y 0 |
67 | #endif | 74 | #endif |
75 | |||
68 | #define PROF_SHOW() \ | 76 | #define PROF_SHOW() \ |
69 | do { \ | 77 | do { \ |
70 | txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ | 78 | txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ |
71 | txt_printf("INPUT %.8lu\n", input_cycles);\ | 79 | txt_printf("INPUT %.8lu\n", avg_input_cycles);\ |
72 | txt_printf("EVAL %.8lu\n", eval_cycles);\ | 80 | txt_printf("EVAL %.8lu\n", avg_eval_cycles);\ |
73 | txt_printf("VIDEO\n");\ | 81 | txt_printf("VIDEO\n");\ |
74 | txt_printf(">PIX %.8lu\n", ppu_pixel_cycles);\ | 82 | txt_printf(">PIX %.8lu\n", avg_ppu_pixel_cycles);\ |
75 | txt_printf(">FILL %.8lu\n", ppu_fill_cycles);\ | 83 | txt_printf(">FILL %.8lu\n", avg_ppu_fill_cycles);\ |
76 | txt_printf(">1BPP %.8lu\n", ppu_icn_cycles);\ | 84 | txt_printf(">1BPP %.8lu\n", avg_ppu_icn_cycles);\ |
77 | txt_printf(">2BPP %.8lu\n", ppu_chr_cycles);\ | 85 | txt_printf(">2BPP %.8lu\n", avg_ppu_chr_cycles);\ |
78 | txt_printf(">FLIP %.8lu\n", flip_cycles);\ | 86 | txt_printf(">FLIP %.8lu\n", avg_flip_cycles);\ |
79 | txt_printf("AUDIO %.8lu\n", mix_cycles);\ | 87 | txt_printf("AUDIO %.8lu\n", avg_mix_cycles);\ |
80 | txt_printf("FRAME %.8lu\n", frame_cycles);\ | 88 | txt_printf("TOTAL %.8lu\n", avg_frame_cycles);\ |
81 | screen_fill(BG_BACK, 0, 0, 8 * 16, 8 * 10, 0);\ | 89 | screen_fill(BG_BACK, 0, 0, 8 * 16, 8 * 10, 0);\ |
82 | flipbuf();\ | 90 | flipbuf();\ |
83 | } while (0) | 91 | } while (0) |
84 | 92 | ||
93 | static u32 prof_frame_counter = 0; | ||
94 | |||
85 | static u32 frame_cycles = 0; | 95 | static u32 frame_cycles = 0; |
86 | static u32 ppu_pixel_cycles = 0; | 96 | static u32 ppu_pixel_cycles = 0; |
87 | static u32 ppu_fill_cycles = 0; | 97 | static u32 ppu_fill_cycles = 0; |
@@ -91,15 +101,84 @@ static u32 flip_cycles = 0; | |||
91 | static u32 eval_cycles = 0; | 101 | static u32 eval_cycles = 0; |
92 | static u32 input_cycles = 0; | 102 | static u32 input_cycles = 0; |
93 | static u32 mix_cycles = 0; | 103 | static u32 mix_cycles = 0; |
104 | |||
105 | static u32 avg_ppu_pixel_cycles = 0; | ||
106 | static u32 avg_ppu_fill_cycles = 0; | ||
107 | static u32 avg_ppu_chr_cycles = 0; | ||
108 | static u32 avg_ppu_icn_cycles = 0; | ||
109 | static u32 avg_flip_cycles = 0; | ||
110 | static u32 avg_eval_cycles = 0; | ||
111 | static u32 avg_input_cycles = 0; | ||
112 | static u32 avg_mix_cycles = 0; | ||
113 | static u32 avg_frame_cycles = 0; | ||
114 | |||
115 | #if PROF_ENABLE == 1 | ||
94 | #define FRAME_START()\ | 116 | #define FRAME_START()\ |
95 | do { \ | 117 | do { \ |
96 | frame_cycles = 0;\ | 118 | if (prof_frame_counter == PROF_N_FRAMES) {\ |
119 | avg_ppu_pixel_cycles = ppu_pixel_cycles / prof_frame_counter;\ | ||
120 | avg_ppu_fill_cycles = ppu_fill_cycles / prof_frame_counter;\ | ||
121 | avg_ppu_chr_cycles = ppu_chr_cycles / prof_frame_counter;\ | ||
122 | avg_ppu_icn_cycles = ppu_icn_cycles / prof_frame_counter;\ | ||
123 | avg_flip_cycles = flip_cycles / prof_frame_counter;\ | ||
124 | avg_eval_cycles = eval_cycles / prof_frame_counter;\ | ||
125 | avg_input_cycles = input_cycles / prof_frame_counter;\ | ||
126 | avg_mix_cycles = mix_cycles / prof_frame_counter;\ | ||
127 | avg_frame_cycles = frame_cycles / prof_frame_counter;\ | ||
128 | prof_frame_counter = 0;\ | ||
129 | frame_cycles = 0;\ | ||
130 | ppu_pixel_cycles = 0;\ | ||
131 | ppu_fill_cycles = 0;\ | ||
132 | ppu_chr_cycles = 0;\ | ||
133 | ppu_icn_cycles = 0;\ | ||
134 | flip_cycles = 0;\ | ||
135 | eval_cycles = 0;\ | ||
136 | input_cycles = 0;\ | ||
137 | mix_cycles = 0;\ | ||
138 | }\ | ||
97 | profile_start();\ | 139 | profile_start();\ |
98 | } while (0) | 140 | } while (0) |
99 | #define FRAME_END() (frame_cycles = profile_stop()) | 141 | #elif PROF_ENABLE == 2 |
142 | #define FRAME_START()\ | ||
143 | do { \ | ||
144 | if (prof_frame_counter == PROF_N_FRAMES) {\ | ||
145 | avg_ppu_pixel_cycles = ppu_pixel_cycles;\ | ||
146 | avg_ppu_fill_cycles = ppu_fill_cycles;\ | ||
147 | avg_ppu_chr_cycles = ppu_chr_cycles;\ | ||
148 | avg_ppu_icn_cycles = ppu_icn_cycles;\ | ||
149 | avg_flip_cycles = flip_cycles;\ | ||
150 | avg_eval_cycles = eval_cycles;\ | ||
151 | avg_input_cycles = input_cycles;\ | ||
152 | avg_mix_cycles = mix_cycles;\ | ||
153 | avg_frame_cycles = frame_cycles / prof_frame_counter;\ | ||
154 | prof_frame_counter = 0;\ | ||
155 | frame_cycles = 0;\ | ||
156 | ppu_pixel_cycles = 0;\ | ||
157 | ppu_fill_cycles = 0;\ | ||
158 | ppu_chr_cycles = 0;\ | ||
159 | ppu_icn_cycles = 0;\ | ||
160 | flip_cycles = 0;\ | ||
161 | eval_cycles = 0;\ | ||
162 | input_cycles = 0;\ | ||
163 | mix_cycles = 0;\ | ||
164 | }\ | ||
165 | profile_start();\ | ||
166 | } while (0) | ||
167 | #endif | ||
168 | |||
169 | #define FRAME_END() \ | ||
170 | do { \ | ||
171 | prof_frame_counter++;\ | ||
172 | frame_cycles += profile_stop();\ | ||
173 | } while (0) | ||
174 | |||
100 | #else | 175 | #else |
176 | |||
177 | // No profiling. | ||
101 | #define PROF(F,VAR) (F) | 178 | #define PROF(F,VAR) (F) |
102 | #define PROF_SHOW() | 179 | #define PROF_SHOW() |
180 | #define FRAME_START() | ||
181 | #define FRAME_END() | ||
103 | #endif | 182 | #endif |
104 | 183 | ||
105 | static time_t seconds = 0; | 184 | static time_t seconds = 0; |
@@ -646,8 +725,9 @@ main(void) { | |||
646 | // NOTE: A VBLANK is 83776 cycles, anything other than that will make it so | 725 | // NOTE: A VBLANK is 83776 cycles, anything other than that will make it so |
647 | // we fail to render at 60FPS. | 726 | // we fail to render at 60FPS. |
648 | while(true) { | 727 | while(true) { |
649 | FRAME_START(); | 728 | PROF_SHOW(); |
650 | bios_vblank_wait(); | 729 | bios_vblank_wait(); |
730 | FRAME_START(); | ||
651 | PROF(handle_input(&u), input_cycles); | 731 | PROF(handle_input(&u), input_cycles); |
652 | PROF(uxn_eval(&u, PEEK2(&u.dev[0x20])), eval_cycles); | 732 | PROF(uxn_eval(&u, PEEK2(&u.dev[0x20])), eval_cycles); |
653 | PROF(sound_mix(), mix_cycles); | 733 | PROF(sound_mix(), mix_cycles); |
@@ -660,7 +740,6 @@ main(void) { | |||
660 | frame_counter = 0; | 740 | frame_counter = 0; |
661 | } | 741 | } |
662 | FRAME_END(); | 742 | FRAME_END(); |
663 | PROF_SHOW(); | ||
664 | } | 743 | } |
665 | 744 | ||
666 | return 0; | 745 | return 0; |