diff options
author | Bad Diode <bd@badd10de.dev> | 2023-04-20 13:33:37 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2023-04-20 13:33:37 +0200 |
commit | be9350824404fc9eb91069bd76b798c034cafed0 (patch) | |
tree | 14ab9c319578f47b6f68b35f9dcece83463584fe | |
parent | 773cd197a323d2f9b701addd63e30d54e43c74f5 (diff) | |
download | uxngba-be9350824404fc9eb91069bd76b798c034cafed0.tar.gz uxngba-be9350824404fc9eb91069bd76b798c034cafed0.zip |
Add an improved profiling method for more granularity
-rw-r--r-- | src/main.c | 66 |
1 files changed, 49 insertions, 17 deletions
@@ -37,7 +37,7 @@ | |||
37 | #define CONTROL_METHODS CONTROL_CONTROLLER,CONTROL_MOUSE,CONTROL_KEYBOARD | 37 | #define CONTROL_METHODS CONTROL_CONTROLLER,CONTROL_MOUSE,CONTROL_KEYBOARD |
38 | #endif | 38 | #endif |
39 | 39 | ||
40 | #define PROF_ENABLE 1 | 40 | #define PROF_ENABLE 2 |
41 | 41 | ||
42 | #ifdef PROF_ENABLE | 42 | #ifdef PROF_ENABLE |
43 | #if PROF_ENABLE == 0 | 43 | #if PROF_ENABLE == 0 |
@@ -47,6 +47,12 @@ | |||
47 | #define TEXT_ENABLE 1 | 47 | #define TEXT_ENABLE 1 |
48 | #define PROF(F,VAR) (profile_start(),(F),(VAR) = MAX(profile_stop(), (VAR))) | 48 | #define PROF(F,VAR) (profile_start(),(F),(VAR) = MAX(profile_stop(), (VAR))) |
49 | #elif PROF_ENABLE == 2 | 49 | #elif PROF_ENABLE == 2 |
50 | #define PROF(F,VAR) \ | ||
51 | do { \ | ||
52 | u32 __tmp_prof = profile_measure();\ | ||
53 | (F);\ | ||
54 | (VAR) = profile_measure() - __tmp_prof;\ | ||
55 | } while (0) | ||
50 | #define TEXT_ENABLE 1 | 56 | #define TEXT_ENABLE 1 |
51 | // TODO: allow measuring inside a PROF function using profile_measure to store | 57 | // TODO: allow measuring inside a PROF function using profile_measure to store |
52 | // temporary values and calcualting the difference. | 58 | // temporary values and calcualting the difference. |
@@ -62,23 +68,35 @@ | |||
62 | #define PROF_SHOW() \ | 68 | #define PROF_SHOW() \ |
63 | do { \ | 69 | do { \ |
64 | txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ | 70 | txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ |
65 | txt_printf("FLIP: %.8lu\n", flip_cycles);\ | 71 | txt_printf("INPUT %.8lu\n", input_cycles);\ |
66 | txt_printf("EVAL: %.8lu\n", eval_cycles);\ | 72 | txt_printf("EVAL %.8lu\n", eval_cycles);\ |
67 | screen_fill(BG_BACK, 0, 0, 15 * 8, 2 * 8, 0);\ | 73 | txt_printf("VIDEO\n");\ |
74 | txt_printf(">PIX %.8lu\n", ppu_pixel_cycles);\ | ||
75 | txt_printf(">FILL %.8lu\n", ppu_fill_cycles);\ | ||
76 | txt_printf(">1BPP %.8lu\n", ppu_icn_cycles);\ | ||
77 | txt_printf(">2BPP %.8lu\n", ppu_chr_cycles);\ | ||
78 | txt_printf(">FLIP %.8lu\n", flip_cycles);\ | ||
79 | txt_printf("AUDIO %.8lu\n", mix_cycles);\ | ||
80 | txt_printf("FRAME %.8lu\n", frame_cycles);\ | ||
81 | screen_fill(BG_BACK, 0, 0, 8 * 16, 8 * 10, 0);\ | ||
68 | flipbuf();\ | 82 | flipbuf();\ |
69 | } while (0) | 83 | } while (0) |
70 | 84 | ||
71 | 85 | static u32 frame_cycles = 0; | |
72 | // static u32 ppu_pixel_cycles = 0; | 86 | static u32 ppu_pixel_cycles = 0; |
73 | // static u32 ppu_fill_cycles = 0; | 87 | static u32 ppu_fill_cycles = 0; |
74 | // static u32 ppu_chr_cycles = 0; | 88 | static u32 ppu_chr_cycles = 0; |
75 | // static u32 ppu_icn_cycles = 0; | 89 | static u32 ppu_icn_cycles = 0; |
76 | static u32 flip_cycles = 0; | 90 | static u32 flip_cycles = 0; |
77 | static u32 eval_cycles = 0; | 91 | static u32 eval_cycles = 0; |
78 | static u32 input_cycles = 0; | 92 | static u32 input_cycles = 0; |
79 | static u32 mix_cycles = 0; | 93 | static u32 mix_cycles = 0; |
80 | #define FRAME_START() ; // TODO | 94 | #define FRAME_START()\ |
81 | #define FRAME_END() ; // TODO | 95 | do { \ |
96 | frame_cycles = 0;\ | ||
97 | profile_start();\ | ||
98 | } while (0) | ||
99 | #define FRAME_END() (frame_cycles = profile_stop()) | ||
82 | #else | 100 | #else |
83 | #define PROF(F,VAR) (F) | 101 | #define PROF(F,VAR) (F) |
84 | #define PROF_SHOW() | 102 | #define PROF_SHOW() |
@@ -147,9 +165,9 @@ screen_deo(u8 *ram, u8 *d, u8 port) { | |||
147 | u16 y1 = SCREEN_HEIGHT - 1; | 165 | u16 y1 = SCREEN_HEIGHT - 1; |
148 | if(ctrl & 0x10) x1 = x0, x0 = 0; | 166 | if(ctrl & 0x10) x1 = x0, x0 = 0; |
149 | if(ctrl & 0x20) y1 = y0, y0 = 0; | 167 | if(ctrl & 0x20) y1 = y0, y0 = 0; |
150 | screen_fill(layer, x0, y0, x1, y1, color); | 168 | PROF(screen_fill(layer, x0, y0, x1, y1, color), ppu_fill_cycles); |
151 | } else { | 169 | } else { |
152 | ppu_pixel(layer, x0, y0, color); | 170 | PROF(ppu_pixel(layer, x0, y0, color), ppu_pixel_cycles); |
153 | if(d[0x6] & 0x1) POKE2(d + 0x8, x0 + 1); /* auto x+1 */ | 171 | if(d[0x6] & 0x1) POKE2(d + 0x8, x0 + 1); /* auto x+1 */ |
154 | if(d[0x6] & 0x2) POKE2(d + 0xa, y0 + 1); /* auto y+1 */ | 172 | if(d[0x6] & 0x2) POKE2(d + 0xa, y0 + 1); /* auto y+1 */ |
155 | } | 173 | } |
@@ -174,9 +192,19 @@ screen_deo(u8 *ram, u8 *d, u8 port) { | |||
174 | for(size_t i = 0; i <= n; i++) { | 192 | for(size_t i = 0; i <= n; i++) { |
175 | u8 *sprite = &ram[addr]; | 193 | u8 *sprite = &ram[addr]; |
176 | if (twobpp) { | 194 | if (twobpp) { |
177 | ppu_2bpp(layer, x + dy * i, y + dx * i, sprite, color, flipx, flipy); | 195 | PROF(ppu_2bpp(layer, |
196 | x + dy * i, | ||
197 | y + dx * i, | ||
198 | sprite, | ||
199 | color, | ||
200 | flipx, flipy), ppu_chr_cycles); | ||
178 | } else { | 201 | } else { |
179 | ppu_1bpp(layer, x + dy * i, y + dx * i, sprite, color, flipx, flipy); | 202 | PROF(ppu_1bpp(layer, |
203 | x + dy * i, | ||
204 | y + dx * i, | ||
205 | sprite, | ||
206 | color, | ||
207 | flipx, flipy), ppu_icn_cycles); | ||
180 | } | 208 | } |
181 | addr += (d[0x6] & 0x04) << (1 + twobpp); | 209 | addr += (d[0x6] & 0x04) << (1 + twobpp); |
182 | } | 210 | } |
@@ -615,20 +643,24 @@ main(void) { | |||
615 | // Main loop. | 643 | // Main loop. |
616 | uxn_eval(&u, PAGE_PROGRAM); | 644 | uxn_eval(&u, PAGE_PROGRAM); |
617 | u8 frame_counter = 0; | 645 | u8 frame_counter = 0; |
646 | // NOTE: A VBLANK is 83776 cycles, anything other than that will make it so | ||
647 | // we fail to render at 60FPS. | ||
618 | while(true) { | 648 | while(true) { |
619 | FRAME_START(); | 649 | FRAME_START(); |
650 | bios_vblank_wait(); | ||
620 | PROF(handle_input(&u), input_cycles); | 651 | PROF(handle_input(&u), input_cycles); |
621 | PROF(uxn_eval(&u, PEEK2(&u.dev[0x20])), eval_cycles); | 652 | PROF(uxn_eval(&u, PEEK2(&u.dev[0x20])), eval_cycles); |
622 | PROF(sound_mix(), mix_cycles); | 653 | PROF(sound_mix(), mix_cycles); |
623 | bios_vblank_wait(); | 654 | // TODO: allow configuration to do VSYNC at 15 or 30 fps to avoid too |
655 | // much memory copying on demanding uxn roms. | ||
624 | PROF(flipbuf(), flip_cycles); | 656 | PROF(flipbuf(), flip_cycles); |
625 | frame_counter++; | 657 | frame_counter++; |
626 | if (frame_counter == 60) { | 658 | if (frame_counter == 60) { |
627 | seconds++; | 659 | seconds++; |
628 | frame_counter = 0; | 660 | frame_counter = 0; |
629 | } | 661 | } |
630 | PROF_SHOW(); | ||
631 | FRAME_END(); | 662 | FRAME_END(); |
663 | PROF_SHOW(); | ||
632 | } | 664 | } |
633 | 665 | ||
634 | return 0; | 666 | return 0; |