From be9350824404fc9eb91069bd76b798c034cafed0 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Thu, 20 Apr 2023 13:33:37 +0200 Subject: Add an improved profiling method for more granularity --- src/main.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/src/main.c b/src/main.c index 86a9376..b6a979f 100644 --- a/src/main.c +++ b/src/main.c @@ -37,7 +37,7 @@ #define CONTROL_METHODS CONTROL_CONTROLLER,CONTROL_MOUSE,CONTROL_KEYBOARD #endif -#define PROF_ENABLE 1 +#define PROF_ENABLE 2 #ifdef PROF_ENABLE #if PROF_ENABLE == 0 @@ -47,6 +47,12 @@ #define TEXT_ENABLE 1 #define PROF(F,VAR) (profile_start(),(F),(VAR) = MAX(profile_stop(), (VAR))) #elif PROF_ENABLE == 2 +#define PROF(F,VAR) \ + do { \ + u32 __tmp_prof = profile_measure();\ + (F);\ + (VAR) = profile_measure() - __tmp_prof;\ + } while (0) #define TEXT_ENABLE 1 // TODO: allow measuring inside a PROF function using profile_measure to store // temporary values and calcualting the difference. @@ -62,23 +68,35 @@ #define PROF_SHOW() \ do { \ txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ - txt_printf("FLIP: %.8lu\n", flip_cycles);\ - txt_printf("EVAL: %.8lu\n", eval_cycles);\ - screen_fill(BG_BACK, 0, 0, 15 * 8, 2 * 8, 0);\ + txt_printf("INPUT %.8lu\n", input_cycles);\ + txt_printf("EVAL %.8lu\n", eval_cycles);\ + txt_printf("VIDEO\n");\ + txt_printf(">PIX %.8lu\n", ppu_pixel_cycles);\ + txt_printf(">FILL %.8lu\n", ppu_fill_cycles);\ + txt_printf(">1BPP %.8lu\n", ppu_icn_cycles);\ + txt_printf(">2BPP %.8lu\n", ppu_chr_cycles);\ + txt_printf(">FLIP %.8lu\n", flip_cycles);\ + txt_printf("AUDIO %.8lu\n", mix_cycles);\ + txt_printf("FRAME %.8lu\n", frame_cycles);\ + screen_fill(BG_BACK, 0, 0, 8 * 16, 8 * 10, 0);\ flipbuf();\ } while (0) - -// static u32 ppu_pixel_cycles = 0; -// static u32 ppu_fill_cycles = 0; -// static u32 ppu_chr_cycles = 0; -// static u32 ppu_icn_cycles = 0; +static u32 frame_cycles = 0; +static u32 ppu_pixel_cycles = 0; +static u32 ppu_fill_cycles = 0; +static u32 ppu_chr_cycles = 0; +static u32 ppu_icn_cycles = 0; static u32 flip_cycles = 0; static u32 eval_cycles = 0; static u32 input_cycles = 0; static u32 mix_cycles = 0; -#define FRAME_START() ; // TODO -#define FRAME_END() ; // TODO +#define FRAME_START()\ + do { \ + frame_cycles = 0;\ + profile_start();\ + } while (0) +#define FRAME_END() (frame_cycles = profile_stop()) #else #define PROF(F,VAR) (F) #define PROF_SHOW() @@ -147,9 +165,9 @@ screen_deo(u8 *ram, u8 *d, u8 port) { u16 y1 = SCREEN_HEIGHT - 1; if(ctrl & 0x10) x1 = x0, x0 = 0; if(ctrl & 0x20) y1 = y0, y0 = 0; - screen_fill(layer, x0, y0, x1, y1, color); + PROF(screen_fill(layer, x0, y0, x1, y1, color), ppu_fill_cycles); } else { - ppu_pixel(layer, x0, y0, color); + PROF(ppu_pixel(layer, x0, y0, color), ppu_pixel_cycles); if(d[0x6] & 0x1) POKE2(d + 0x8, x0 + 1); /* auto x+1 */ if(d[0x6] & 0x2) POKE2(d + 0xa, y0 + 1); /* auto y+1 */ } @@ -174,9 +192,19 @@ screen_deo(u8 *ram, u8 *d, u8 port) { for(size_t i = 0; i <= n; i++) { u8 *sprite = &ram[addr]; if (twobpp) { - ppu_2bpp(layer, x + dy * i, y + dx * i, sprite, color, flipx, flipy); + PROF(ppu_2bpp(layer, + x + dy * i, + y + dx * i, + sprite, + color, + flipx, flipy), ppu_chr_cycles); } else { - ppu_1bpp(layer, x + dy * i, y + dx * i, sprite, color, flipx, flipy); + PROF(ppu_1bpp(layer, + x + dy * i, + y + dx * i, + sprite, + color, + flipx, flipy), ppu_icn_cycles); } addr += (d[0x6] & 0x04) << (1 + twobpp); } @@ -615,20 +643,24 @@ main(void) { // Main loop. uxn_eval(&u, PAGE_PROGRAM); u8 frame_counter = 0; + // NOTE: A VBLANK is 83776 cycles, anything other than that will make it so + // we fail to render at 60FPS. while(true) { FRAME_START(); + bios_vblank_wait(); PROF(handle_input(&u), input_cycles); PROF(uxn_eval(&u, PEEK2(&u.dev[0x20])), eval_cycles); PROF(sound_mix(), mix_cycles); - bios_vblank_wait(); + // TODO: allow configuration to do VSYNC at 15 or 30 fps to avoid too + // much memory copying on demanding uxn roms. PROF(flipbuf(), flip_cycles); frame_counter++; if (frame_counter == 60) { seconds++; frame_counter = 0; } - PROF_SHOW(); FRAME_END(); + PROF_SHOW(); } return 0; -- cgit v1.2.1