From d4fe4d95f105d8b9b47d26264c4876cbf4095a5d Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Sat, 22 Apr 2023 18:45:35 +0200 Subject: Prepare profiling macros --- src/gba/gba.h | 31 ++++++--- src/main.c | 44 ++----------- src/profiling.c | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/sequencer.c | 2 - 4 files changed, 229 insertions(+), 49 deletions(-) create mode 100644 src/profiling.c (limited to 'src') diff --git a/src/gba/gba.h b/src/gba/gba.h index b02d745..27a6a9a 100644 --- a/src/gba/gba.h +++ b/src/gba/gba.h @@ -98,18 +98,18 @@ typedef u16 Color; typedef Color Palette[16]; // Inline function to calculate the 15 bit color value. -#define RGB15(R,G,B) (u16)(((B) << 10) | ((G) << 5) | (R)); +#define RGB15(R,G,B) (u16)(((B) << 10) | ((G) << 5) | (R)) // Some nice default colors. -#define COLOR_BLACK RGB15(0, 0, 0) -#define COLOR_RED RGB15(31, 0,10) -#define COLOR_GREEN RGB15(31, 0,10) -#define COLOR_YELLOW RGB15(31, 0,10) -#define COLOR_BLUE RGB15(2, 17,31) -#define COLOR_PURPLE RGB15(2, 17,31) -#define COLOR_CYAN RGB15(0, 27,30) -#define COLOR_GREY RGB15(16,17,19) -#define COLOR_WHITE RGB15(28,28,28) +#define COLOR_BLACK RGB15( 0, 0, 0) +#define COLOR_RED RGB15(31, 0, 10) +#define COLOR_GREEN RGB15( 0, 31, 18) +#define COLOR_YELLOW RGB15(31, 31, 0) +#define COLOR_BLUE RGB15( 2, 17, 31) +#define COLOR_PURPLE RGB15(15, 7, 31) +#define COLOR_CYAN RGB15( 0, 27, 30) +#define COLOR_GREY RGB15(16, 17, 19) +#define COLOR_WHITE RGB15(28, 28, 28) // // Tile memory access. @@ -419,6 +419,8 @@ inline void dma_copy(void *dst, const void *src, u32 size, int channel) { dma_transfer_copy(dst, src, size / 4, channel, DMA_CHUNK_32 | DMA_ENABLE); + // Stall for 2 cycles in case we call this function more than once. + asm("nop"); asm("nop"); } // Fill the dst location with the word set at src. @@ -426,6 +428,8 @@ inline void dma_fill(void *dst, vu32 src, u32 size, int channel) { dma_transfer_fill(dst, src, size / 4, channel, DMA_CHUNK_32 | DMA_ENABLE); + // Stall for 2 cycles in case we call this function more than once. + asm("nop"); asm("nop"); } // @@ -661,6 +665,7 @@ wait_vsync(void) { #define LEN(ARR) (sizeof(ARR) / sizeof((ARR)[0])) // Fixed-point arithmetic for (i.P) numbers. +#define FP_NUM(A,P) ((A) << (P)) #define FP_MUL(A,B,P) (((A) * (B)) >> (P)) #define FP_DIV(A,B,P) (((A) << (P)) / (B)) #define FP_LERP(Y0,Y1,X,P) ((Y0) + FP_MUL((X), ((Y1) - (Y0)), P)) @@ -683,4 +688,10 @@ memcpy32(u32 *dst, const u32 *src, u32 size) { } } +// +// Compiler hints. +// + +#define UNROLL_LOOPS __attribute__((optimize("unroll-loops"))) + #endif // GBA_H diff --git a/src/main.c b/src/main.c index a322df2..e694057 100644 --- a/src/main.c +++ b/src/main.c @@ -15,45 +15,13 @@ WITH REGARD TO THIS SOFTWARE. #include "renderer.c" #include "sequencer.c" +#define PROF_ENABLE 0 +#include "profiling.c" + // // Config parameters. // -#ifdef PROF_ENABLE -#if PROF_ENABLE == 0 -#define PROF(F,VAR) (profile_start(),(F),(VAR) = profile_stop()) -#elif PROF_ENABLE == 1 -#define PROF(F,VAR) (profile_start(),(F),(VAR) = MAX(profile_stop(), (VAR))) -#endif -#ifndef PROF_SHOW_X -#define PROF_SHOW_X 0 -#endif -#ifndef PROF_SHOW_Y -#define PROF_SHOW_Y 0 -#endif -#define PROF_SHOW() \ - do {\ - txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ - txt_printf("EVAL: %lu ", eval_cycles);\ - txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+1);\ - txt_printf("FLIP: %lu ", flip_cycles);\ - txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+2);\ - txt_printf("INPUT: %lu ", input_cycles);\ - txt_position((PROF_SHOW_X), (PROF_SHOW_Y)+3);\ - txt_printf("FRAME: %lu ", frame_counter);\ - frame_counter++;\ - } while (0) -#define PROF_INIT() \ - u32 frame_counter = 0;\ - u32 input_cycles = 0;\ - u32 eval_cycles = 0;\ - u32 flip_cycles = 0; -#else -#define PROF(F,VAR) (F) -#define PROF_SHOW() -#define PROF_INIT() -#endif - int main(void) { // Adjust system wait times. SYSTEM_WAIT = SYSTEM_WAIT_CARTRIDGE; @@ -72,12 +40,14 @@ int main(void) { sequencer_init(); // Main loop. - PROF_INIT(); while (true) { + poll_keys(); bios_vblank_wait(); - PROF(flip_buffer(), flip_cycles); + FRAME_START(); PROF(handle_sequencer_input(), input_cycles); PROF_SHOW(); + PROF(flip_buffer(), flip_cycles); + FRAME_END(); } return 0; diff --git a/src/profiling.c b/src/profiling.c new file mode 100644 index 0000000..de969d2 --- /dev/null +++ b/src/profiling.c @@ -0,0 +1,201 @@ +// +// Profiling macros. +// + +#ifndef PROF_ENABLE +#define PROF_ENABLE 0 +#endif + +#if PROF_ENABLE > 0 && PROF_ENABLE < 3 + +#ifndef PROF_N_FRAMES +#define PROF_N_FRAMES 30 +#endif + +// Profile method 1: Average per N frames. +#if PROF_ENABLE == 1 +#define TEXT_ENABLE 1 +#define PROF(F,VAR) \ + do { \ + u32 __tmp_prof = profile_measure();\ + F;\ + (VAR) += profile_measure() - __tmp_prof;\ + } while (0) + +// Profile method 2: Maximum in N frames. +#elif PROF_ENABLE == 2 +#define TEXT_ENABLE 1 +#define PROF(F,VAR) \ + do { \ + u32 __tmp_prof = profile_measure();\ + (F);\ + (VAR) = MAX(profile_measure() - __tmp_prof, (VAR));\ + } while (0) +#endif + +#ifndef PROF_SHOW_X +#define PROF_SHOW_X 0 +#endif +#ifndef PROF_SHOW_Y +#define PROF_SHOW_Y 0 +#endif + +static bool profile_show = true; +static bool profile_bg_show = true; + +#define PROF_SHOW() \ + do { \ + if (key_tap(KEY_START)) {\ + profile_show ^= 1;\ + }\ + if (key_tap(KEY_SELECT)) {\ + profile_bg_show ^= 1;\ + }\ + if (profile_show) {\ + txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ + draw_filled_rect((PROF_SHOW_X), (PROF_SHOW_X), 8 * 18, 8 * 16, 0);\ + txt_printf("VIDEO\n");\ + txt_printf(">CLEAR %.8lu\n", avg_clear_cycles);\ + txt_printf(">LINES %.8lu\n", avg_line_cycles);\ + txt_printf(">RECT %.8lu\n", avg_rect_cycles);\ + txt_printf(">FRECT %.8lu\n", avg_fill_rect_cycles);\ + txt_printf(">1BPP %.8lu\n", avg_icn_cycles);\ + txt_printf(">2BPP %.8lu\n", avg_chr_cycles);\ + txt_printf(">FLIP %.8lu\n", avg_flip_cycles);\ + txt_printf("TEXT\n");\ + txt_printf(">DRAWF %.8lu\n", avg_txt_drawf_cycles);\ + txt_printf(">PRINTF %.8lu\n", avg_txt_printf_cycles);\ + txt_printf(">RENDER %.8lu\n", avg_txt_render_cycles);\ + txt_printf(">CLEAR %.8lu\n", avg_txt_clear_cycles);\ + txt_printf("TOTAL %.8lu\n", avg_frame_cycles);\ + }\ + if (profile_bg_show) {\ + u32 frame_time =\ + FP_DIV(\ + FP_NUM(avg_frame_cycles + 1, 2),\ + FP_NUM(2809, 2),\ + 2) * 166;\ + u32 fps =\ + FP_DIV(\ + FP_NUM(280896 * 60, 2),\ + FP_NUM(avg_frame_cycles + 1, 2),\ + 2);\ + txt_printf("TIME %.8lu\n", frame_time >> 2);\ + txt_printf("FPS %.8lu\n", (fps >> 2) + 1);\ + }\ + } while (0) + +static u32 prof_frame_counter = 0; + +static u32 frame_cycles = 0; +static u32 flip_cycles = 0; +static u32 clear_cycles = 0; +static u32 line_cycles = 0; +static u32 rect_cycles = 0; +static u32 fill_rect_cycles = 0; +static u32 chr_cycles = 0; +static u32 icn_cycles = 0; +static u32 txt_drawf_cycles = 0; +static u32 txt_printf_cycles = 0; +static u32 txt_render_cycles = 0; +static u32 txt_clear_cycles = 0; +static u32 input_cycles = 0; + +static u32 avg_frame_cycles = 0; +static u32 avg_flip_cycles = 0; +static u32 avg_clear_cycles = 0; +static u32 avg_line_cycles = 0; +static u32 avg_rect_cycles = 0; +static u32 avg_fill_rect_cycles = 0; +static u32 avg_chr_cycles = 0; +static u32 avg_icn_cycles = 0; +static u32 avg_txt_drawf_cycles = 0; +static u32 avg_txt_printf_cycles = 0; +static u32 avg_txt_render_cycles = 0; +static u32 avg_txt_clear_cycles = 0; +static u32 avg_input_cycles = 0; + +#if PROF_ENABLE == 1 +#define FRAME_START()\ + do { \ + if (prof_frame_counter == PROF_N_FRAMES) {\ + avg_frame_cycles = frame_cycles / prof_frame_counter;\ + avg_flip_cycles = flip_cycles / prof_frame_counter;\ + avg_clear_cycles = clear_cycles / prof_frame_counter;\ + avg_line_cycles = line_cycles / prof_frame_counter;\ + avg_rect_cycles = rect_cycles / prof_frame_counter;\ + avg_fill_rect_cycles = fill_rect_cycles / prof_frame_counter;\ + avg_chr_cycles = chr_cycles / prof_frame_counter;\ + avg_icn_cycles = icn_cycles / prof_frame_counter;\ + avg_txt_drawf_cycles = txt_drawf_cycles / prof_frame_counter;\ + avg_txt_printf_cycles = txt_printf_cycles / prof_frame_counter;\ + avg_txt_render_cycles = txt_render_cycles / prof_frame_counter;\ + avg_txt_clear_cycles = txt_clear_cycles / prof_frame_counter;\ + avg_input_cycles = input_cycles / prof_frame_counter;\ + frame_cycles = 0;\ + flip_cycles = 0;\ + clear_cycles = 0;\ + line_cycles = 0;\ + rect_cycles = 0;\ + fill_rect_cycles = 0;\ + chr_cycles = 0;\ + icn_cycles = 0;\ + txt_drawf_cycles = 0;\ + txt_printf_cycles = 0;\ + txt_render_cycles = 0;\ + txt_clear_cycles = 0;\ + input_cycles = 0;\ + prof_frame_counter = 0;\ + }\ + profile_start();\ + } while (0) +#elif PROF_ENABLE == 2 +#define FRAME_START()\ + do { \ + if (prof_frame_counter == PROF_N_FRAMES) {\ + avg_frame_cycles = frame_cycles;\ + avg_flip_cycles = flip_cycles;\ + avg_clear_cycles = clear_cycles;\ + avg_line_cycles = line_cycles;\ + avg_rect_cycles = rect_cycles;\ + avg_fill_rect_cycles = fill_rect_cycles;\ + avg_chr_cycles = chr_cycles;\ + avg_icn_cycles = icn_cycles;\ + avg_txt_drawf_cycles = txt_drawf_cycles;\ + avg_txt_printf_cycles = txt_printf_cycles;\ + avg_txt_render_cycles = txt_render_cycles;\ + avg_txt_clear_cycles = txt_clear_cycles;\ + avg_input_cycles = input_cycles;\ + frame_cycles = 0;\ + flip_cycles = 0;\ + clear_cycles = 0;\ + line_cycles = 0;\ + rect_cycles = 0;\ + fill_rect_cycles = 0;\ + chr_cycles = 0;\ + icn_cycles = 0;\ + txt_drawf_cycles = 0;\ + txt_printf_cycles = 0;\ + txt_render_cycles = 0;\ + txt_clear_cycles = 0;\ + input_cycles = 0;\ + prof_frame_counter = 0;\ + }\ + profile_start();\ + } while (0) +#endif + +#define FRAME_END() \ + do { \ + prof_frame_counter++;\ + frame_cycles += profile_stop();\ + } while (0) + +#else + +// No profiling. +#define PROF(F,VAR) (F) +#define PROF_SHOW() +#define FRAME_START() +#define FRAME_END() +#endif diff --git a/src/sequencer.c b/src/sequencer.c index 2d2e2d3..b582cf4 100644 --- a/src/sequencer.c +++ b/src/sequencer.c @@ -2921,8 +2921,6 @@ clipboard_copy(void) { void handle_sequencer_input(void) { - poll_keys(); - if (key_tap(KEY_START)) { // Stop the sequencer or start playing from the beginning. toggle_playing(); -- cgit v1.2.1