From a7ce765b1b57ec8a528263420852ed36da6d9d84 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Tue, 23 Jan 2024 10:30:54 +0100 Subject: Update profiling macros --- src/gba/gba.h | 84 +++++++++++-------- src/main.c | 94 +++++++++------------ src/profiling.c | 253 ++++++++++++++++++++------------------------------------ src/sequencer.c | 1 - src/settings.c | 2 +- 5 files changed, 182 insertions(+), 252 deletions(-) diff --git a/src/gba/gba.h b/src/gba/gba.h index b5868f0..2cc167f 100644 --- a/src/gba/gba.h +++ b/src/gba/gba.h @@ -237,25 +237,25 @@ flip_page(vu16 *backbuffer) { static inline void profile_start(void) { - TIMER_DATA_2 = 0; - TIMER_DATA_3 = 0; - TIMER_CTRL_2 = 0; - TIMER_CTRL_3 = 0; - TIMER_CTRL_3 = TIMER_CTRL_ENABLE | TIMER_CTRL_CASCADE; - TIMER_CTRL_2 = TIMER_CTRL_ENABLE; + TIMER_DATA_1 = 0; + TIMER_DATA_0 = 0; + TIMER_CTRL_1 = 0; + TIMER_CTRL_0 = 0; + TIMER_CTRL_1 = TIMER_CTRL_ENABLE | TIMER_CTRL_CASCADE; + TIMER_CTRL_0 = TIMER_CTRL_ENABLE; } static inline u32 profile_stop(void) { - TIMER_CTRL_2 = 0; - return (TIMER_DATA_3 << 16) | TIMER_DATA_2; + TIMER_CTRL_0 = 0; + return (TIMER_DATA_1 << 16) | TIMER_DATA_0; } static inline u32 profile_measure(void) { - return (TIMER_DATA_3 << 16) | TIMER_DATA_2; + return (TIMER_DATA_1 << 16) | TIMER_DATA_0; } // @@ -289,12 +289,24 @@ profile_measure(void) { static u16 key_curr = 0; static u16 key_prev = 0; -static inline -void -poll_keys(void) { - key_prev = key_curr; - key_curr = ~KEY_INPUTS & KEY_MASK; -} +// Stores number of frames since a keay was pressed. +typedef struct Controller { + int key_up; + int key_down; + int key_left; + int key_right; + int key_select; + int key_start; + int key_b; + int key_a; + int key_l; + int key_r; +} Controller; + +static Controller ctrl = {0}; + +#define RETRIG_OFFSET 16 +#define RETRIG_FRAMES 3 // Returns true if the given key has been pressed at time of calling and was not // pressed since the previous call. For example, if a key is being held, this @@ -327,25 +339,6 @@ key_hold(u32 key) { return key_curr & key_prev & key; } -// Stores number of frames since a keay was pressed. -typedef struct Controller { - int key_up; - int key_down; - int key_left; - int key_right; - int key_select; - int key_start; - int key_b; - int key_a; - int key_l; - int key_r; -} Controller; - -static Controller ctrl = {0}; - -#define RETRIG_OFFSET 16 -#define RETRIG_FRAMES 3 - static inline bool _key_retrig(int key, int offset, int frames) { @@ -438,6 +431,14 @@ update_controller(void) { if (key_pressed(KEY_START)) { ctrl.key_start++; } else if (key_released(KEY_START)) { ctrl.key_start = 0; } } +static inline +void +poll_keys(void) { + key_prev = key_curr; + key_curr = ~KEY_INPUTS & KEY_MASK; + update_controller(); +} + // // Direct Memory Access (DMA) // @@ -785,6 +786,7 @@ wait_vsync(void) { // General utility macros. #define MIN(A, B) ((A) <= (B) ? (A) : (B)) #define MAX(A, B) ((A) >= (B) ? (A) : (B)) +#define ABS(A) (((A) ^ ((A) >> (sizeof(A) * 8 - 1))) - ((A) >> (sizeof(A) * 8 - 1))) #define CLAMP(X, MIN, MAX) ((X) <= (MIN) ? (MIN) : (X) > (MAX) ? (MAX): (X)) #define LEN(ARR) (sizeof(ARR) / sizeof((ARR)[0])) @@ -812,10 +814,24 @@ memcpy32(u32 *dst, const u32 *src, u32 size) { } } +static inline +void +memset32(u32 *dst, const u32 data, u32 size) { + for (size_t i = 0; i < size / 4; i++) { + dst[i] = data; + } +} + +// Optimized ARMASM versions of memcpy32 and memset32. +extern void copy32(u32 *dst, u32 *src, u32 chunks); +extern void set32(u32 *dst, u32 data, u32 chunks); + // // Compiler hints. // #define UNROLL_LOOPS __attribute__((optimize("unroll-loops"))) +#define INLINE __attribute__((always_inline)) inline #endif // GBA_H + diff --git a/src/main.c b/src/main.c index a4d3421..6f1d0e6 100644 --- a/src/main.c +++ b/src/main.c @@ -11,7 +11,8 @@ WITH REGARD TO THIS SOFTWARE. // TODO: A list of features I would like to get to implement in the near future. // -// High priority: +// Remaining issues: +// // + Higher resolution clock to allow for microtiming and more accurate tempo. // + Look back again at the emulator issues... (I give up) // + Sync via MIDI with the Analogue cables. @@ -32,30 +33,18 @@ WITH REGARD TO THIS SOFTWARE. // + Fix A+B on ch3 // + Make sure Attack/Decay are grey for A+B // + Add help for attack/decay on ch3 +// + Bad performance when selecting patterns? // - Fix any bugs we currently have // - Add clipboard sharing between banks. // - Make sure transposing a sequence past the keyboard limit doesn't affect // the sequence and can be reversed. // - Study saving overhauls for bootleg cartridges. // - When putting a new trigger, make sure it uses the global parameters -// - Bad performance when selecting patterns? -// -// Low priority: -// -// Quality of life improvements. -// - When not on play mode, adjusting a note or a parameter triggers the sound. -// This could get annoying, so maybe it should be a configuration option to -// enable it? -// - Undo/Redo. -// -// Advanced +// - Improve memcpy32 performance (ASM?). We use it a lot in expensive places. // - Add tap tempo for BPM. -// - Allow "marking" several trigs to be able to copy/paste them and/or adjust -// their parameters. -// - Per trig LFO? How would we go about this? There is at least one empty slot -// in all channels. LFO amount? LFO speed? Would need a dedicated page for -// configuring LFOs - +// - Improve drawing routines even more (ASM?). +// - Improve interrupt handler to allow nesting/prioritization. +// #include "gba/gba.h" @@ -76,37 +65,37 @@ static int frames = 0; void render_sequencer(void) { if (redraw_trigs) { - PROF(draw_triggers(), draw_trigs_cycles); + draw_triggers(); redraw_trigs = false; } if (redraw_channels) { - PROF(draw_channels(), draw_btn_cycles); + draw_channels(); redraw_channels = false; } if (redraw_pattern_buttons) { - PROF(draw_pattern_buttons(), draw_btn_cycles); + draw_pattern_buttons(); redraw_pattern_buttons = false; } if (redraw_bank_buttons) { - PROF(draw_bank_buttons(), draw_btn_cycles); + draw_bank_buttons(); redraw_bank_buttons = false; } if (redraw_bpm) { - PROF(draw_bpm(), draw_btn_cycles); + draw_bpm(); redraw_bpm = false; } if (redraw_play_pause) { - PROF(draw_play(), draw_btn_cycles); - PROF(draw_stop(), draw_btn_cycles); - PROF(draw_settings(), draw_btn_cycles); + draw_play(); + draw_stop(); + draw_settings(); redraw_play_pause = false; } if (redraw_scale) { - PROF(draw_scale(), draw_btn_cycles); + draw_scale(); redraw_scale = false; } if (redraw_params) { - PROF(draw_parameters(), draw_param_cycles); + draw_parameters(); redraw_params = false; } @@ -117,11 +106,13 @@ render_sequencer(void) { } } - if (frames++ & 0x1) { + if (frames & 0x1) { draw_notif_bar(); - PROF(draw_piano_notes(), draw_piano_cycles); } - PROF(draw_cursors(), draw_cursor_cycles); + if (frames++ % 0x4 == 0) { + draw_piano_notes(); + } + draw_cursors(); } void @@ -169,28 +160,22 @@ render_settings(void) { txt_drawf("HELP", x0 + 2, y0 + 1, COL_FG); txt_drawf("%s", x1 + 8, y0 + 1, COL_FG, toggle_settings_str[settings.help]); - PROF(draw_settings_cursor(), draw_cursor_cycles); + draw_settings_cursor(); } void render(void) { - // NOTE: Debug key input - // PROF(screen_fill(COL_BG), clear_cycles); - // txt_printf("UP: %d\n", ctrl.key_up); - // txt_printf("DOWN: %d\n", ctrl.key_down); - // txt_printf("LEFT: %d\n", ctrl.key_left); - // txt_printf("RIGHT: %d\n", ctrl.key_right); - // txt_printf("A: %d\n", ctrl.key_a); - // txt_printf("B: %d\n", ctrl.key_b); - // txt_printf("L: %d\n", ctrl.key_l); - // txt_printf("R: %d\n", ctrl.key_r); - // txt_printf("SEL: %d\n", ctrl.key_select); - // txt_printf("START: %d\n", ctrl.key_start); - // txt_render(); - // txt_clear(); if (clear_screen) { - PROF(screen_fill(COL_BG), clear_cycles); + PROF(screen_fill(COL_BG), PROF_FILL); clear_screen = false; + redraw_trigs = true; + redraw_channels = true; + redraw_pattern_buttons = true; + redraw_bank_buttons = true; + redraw_bpm = true; + redraw_play_pause = true; + redraw_params = true; + redraw_scale = true; } switch (scene) { case SCENE_SETTINGS: { @@ -206,6 +191,10 @@ render(void) { void handle_input(void) { + if (key_tap(KEY_SELECT) && key_hold(KEY_START)) { + PROF_SHOW(); + clear_screen = true; + } switch (scene) { case SCENE_SETTINGS: { handle_settings_input(); @@ -218,7 +207,6 @@ handle_input(void) { void update(void) { - update_controller(); if (next_scene != scene) { scene = next_scene; clear_screen = true; @@ -269,15 +257,15 @@ main(void) { txt_spacing(6); // Main loop. + PROF_INIT(); while (true) { poll_keys(); bios_vblank_wait(); - PROF_SHOW(); FRAME_START(); - PROF(flip_buffer(), flip_cycles); - PROF(update(), update_cycles); - PROF(handle_input(), input_cycles); - PROF(render(), render_cycles); + PROF(flip_buffer(), PROF_FLIP); + PROF(update(), PROF_UPDATE); + PROF(handle_input(), PROF_INPUT); + PROF(render(), PROF_RENDER); FRAME_END(); } diff --git a/src/profiling.c b/src/profiling.c index 0255552..6b073ed 100644 --- a/src/profiling.c +++ b/src/profiling.c @@ -6,184 +6,111 @@ #define PROF_ENABLE 0 #endif -#if PROF_ENABLE > 0 && PROF_ENABLE < 3 +#if PROF_ENABLE > 0 -#ifndef PROF_N_FRAMES -#define PROF_N_FRAMES 30 +#ifndef PROF_RESET_MINMAX +#define PROF_RESET_MINMAX false #endif -// Profile method 1: Average per N frames. -#if PROF_ENABLE == 1 -#define TEXT_ENABLE 1 -#define PROF(F,VAR) \ - do { \ - u32 __tmp_prof = profile_measure();\ - F;\ - (VAR) += profile_measure() - __tmp_prof;\ - } while (0) +// Maximum number of profiling to monitor. +typedef enum ProfType { + PROF_INPUT, + PROF_UPDATE, + PROF_RENDER, + PROF_FLIP, + PROF_FILL, + PROF_NUM, +} ProfType; -// Profile method 2: Maximum in N frames. -#elif PROF_ENABLE == 2 -#define TEXT_ENABLE 1 -#define PROF(F,VAR) \ - do { \ - u32 __tmp_prof = profile_measure();\ - (F);\ - (VAR) = MAX(profile_measure() - __tmp_prof, (VAR));\ - } while (0) -#endif +char *prof_type_str[PROF_NUM] = { + "INPUT ", + "UPDATE ", + "RENDER ", + "FLIPBUF", + "SCRFILL", +}; -#ifndef PROF_SHOW_X -#define PROF_SHOW_X 0 -#endif -#ifndef PROF_SHOW_Y -#define PROF_SHOW_Y 0 -#endif +u32 prof_frame_time = 0; +u32 prof_frame_count = 0; +u32 prof_frame_avg = -1; +u32 prof_frame_time_max = 0; +u32 prof_times[PROF_NUM] = {0}; +u32 prof_count[PROF_NUM] = {0}; +u32 prof_avg[PROF_NUM] = {0}; +u32 prof_max[PROF_NUM] = {0}; +u32 prof_min[PROF_NUM] = {0}; -static bool profile_show = true; +bool prof_reset_minmax = PROF_RESET_MINMAX; +bool prof_show = true; -#define PROF_SHOW() \ - do { \ - if (key_tap(KEY_START)) {\ - profile_show ^= 1;\ - }\ - if (profile_show) {\ - txt_color(1);\ - txt_position((PROF_SHOW_X), (PROF_SHOW_Y));\ - draw_filled_rect((PROF_SHOW_X), (PROF_SHOW_X), 8 * 14, 8 * 10, 0);\ - txt_printf("VIDEO\n");\ - txt_printf(">CLEAR %.8lu\n", avg_clear_cycles);\ - txt_printf(">FLIP %.8lu\n", avg_flip_cycles);\ - txt_printf("SEQUENCER RENDER\n");\ - txt_printf(">TRIGS %.8lu\n", avg_draw_trigs_cycles);\ - txt_printf(">BTNS %.8lu\n", avg_draw_btns_cycles);\ - txt_printf(">PARAM %.8lu\n", avg_draw_param_cycles);\ - txt_printf(">PIANO %.8lu\n", avg_draw_piano_cycles);\ - txt_printf(">CURSOR %.8lu\n", avg_draw_cursor_cycles);\ - txt_printf("UPDATE %.8lu\n", avg_update_cycles);\ - txt_printf("INPUT %.8lu\n", avg_input_cycles);\ - txt_printf("RENDER %.8lu\n", avg_render_cycles);\ - txt_printf("TOTAL %.8lu\n", avg_frame_cycles);\ - txt_render();\ - }\ - u32 frame_time =\ - FP_DIV(\ - FP_NUM(avg_frame_cycles + 1, 2),\ - FP_NUM(2809, 2),\ - 2) * 166;\ - u32 fps =\ - FP_DIV(\ - FP_NUM(280896 * 60, 2),\ - FP_NUM(avg_frame_cycles + 1, 2),\ - 2);\ - draw_filled_rect(8 * 18, 0, 239, 16, 0);\ - txt_drawf("TIME: %.6lu", 8 * 18, 0, 1, frame_time >> 2);\ - txt_drawf("MAX FPS: %.4lu", 8 * 18, 8, 1, (fps >> 2) + 1);\ - } while (0) +#define PROF_INIT() do { \ + for (size_t i = 0; i < PROF_NUM; i++) { \ + prof_min[i] = -1; \ + } \ +} while(0); -static u32 prof_frame_counter = 0; +#define PROF(func, idx) do { \ + u32 time_before = profile_measure(); \ + (func); \ + u32 time_after = profile_measure(); \ + u32 time_current = time_after - time_before; \ + prof_times[idx] += time_current; \ + prof_count[idx]++; \ + prof_max[idx] = MAX(time_current, prof_max[idx]);\ + prof_min[idx] = MIN(time_current, prof_min[idx]);\ +} while(0); -static u32 frame_cycles = 0; -static u32 flip_cycles = 0; -static u32 clear_cycles = 0; -static u32 input_cycles = 0; -static u32 draw_trigs_cycles = 0; -static u32 draw_btn_cycles = 0; -static u32 draw_piano_cycles = 0; -static u32 draw_param_cycles = 0; -static u32 draw_cursor_cycles = 0; -static u32 render_cycles = 0; -static u32 update_cycles = 0; +#define FRAME_START() do { \ + profile_start();\ +} while(0) -static u32 avg_frame_cycles = 0; -static u32 avg_flip_cycles = 0; -static u32 avg_clear_cycles = 0; -static u32 avg_input_cycles = 0; -static u32 avg_draw_trigs_cycles = 0; -static u32 avg_draw_btns_cycles = 0; -static u32 avg_draw_piano_cycles = 0; -static u32 avg_draw_param_cycles = 0; -static u32 avg_draw_cursor_cycles = 0; -static u32 avg_render_cycles = 0; -static u32 avg_update_cycles = 0; +#define FRAME_END() do { \ + prof_frame_count++;\ + prof_frame_time_max = MAX(prof_frame_time_max, profile_measure());\ + prof_frame_time += profile_stop();\ + if (prof_show) { \ + draw_filled_rect(0, 0, SCREEN_WIDTH - 1, 8 * (PROF_NUM + 1), 0); \ + txt_drawf_small("FRAME TIME/FPS: %.9l/%.2l", 0, 0, COL_FG, \ + prof_frame_avg, \ + (u32)((u64)280896 * 60 / (prof_frame_avg + 1)));\ + txt_drawf_small("MAX: %.9l/%l", 8 * 19, 0, COL_FG, \ + prof_frame_time_max,280896);\ + for (size_t idx = 0; idx < PROF_NUM; idx++) { \ + txt_drawf_small("%s %.9l (%.9l %.9l) %08x:%08x", 0, 8 * (idx + 1), COL_FG, \ + prof_type_str[idx], \ + prof_avg[idx], \ + prof_min[idx], \ + prof_max[idx], \ + prof_avg[idx], \ + prof_max[idx]);\ + }; \ + } \ + if (prof_frame_count >= PROF_ENABLE) { \ + for (size_t idx = 0; idx < PROF_NUM; idx++) { \ + prof_avg[idx] = prof_times[idx] / prof_frame_count; \ + if (prof_reset_minmax) { \ + prof_min[idx] = -1; \ + prof_max[idx] = 0; \ + } \ + prof_times[idx] = 0; \ + prof_count[idx] = 0; \ + }; \ + prof_frame_avg = prof_frame_time / prof_frame_count; \ + prof_frame_count = 0; \ + prof_frame_time = 0; \ + } \ + } while(0) -#if PROF_ENABLE == 1 -#define FRAME_START()\ - do { \ - if (prof_frame_counter == PROF_N_FRAMES) {\ - avg_frame_cycles = frame_cycles / prof_frame_counter;\ - avg_flip_cycles = flip_cycles / prof_frame_counter;\ - avg_clear_cycles = clear_cycles / prof_frame_counter;\ - avg_draw_trigs_cycles = draw_trigs_cycles / prof_frame_counter;\ - avg_draw_btns_cycles = draw_btn_cycles / prof_frame_counter;\ - avg_draw_piano_cycles = draw_piano_cycles / prof_frame_counter;\ - avg_draw_param_cycles = draw_param_cycles / prof_frame_counter;\ - avg_draw_cursor_cycles = draw_cursor_cycles / prof_frame_counter;\ - avg_input_cycles = input_cycles / prof_frame_counter;\ - avg_render_cycles = render_cycles / prof_frame_counter;\ - avg_update_cycles = update_cycles / prof_frame_counter;\ - frame_cycles = 0;\ - flip_cycles = 0;\ - clear_cycles = 0;\ - input_cycles = 0;\ - render_cycles = 0;\ - update_cycles = 0;\ - draw_trigs_cycles = 0;\ - draw_param_cycles = 0;\ - draw_cursor_cycles = 0;\ - draw_btn_cycles = 0;\ - draw_piano_cycles = 0;\ - prof_frame_counter = 0;\ - }\ - profile_start();\ - } while (0) -#elif PROF_ENABLE == 2 -#define FRAME_START()\ - do { \ - if (prof_frame_counter == PROF_N_FRAMES) {\ - avg_frame_cycles = frame_cycles;\ - avg_flip_cycles = flip_cycles;\ - avg_clear_cycles = clear_cycles;\ - avg_line_cycles = line_cycles;\ - avg_rect_cycles = rect_cycles;\ - avg_fill_rect_cycles = fill_rect_cycles;\ - avg_chr_cycles = chr_cycles;\ - avg_icn_cycles = icn_cycles;\ - avg_txt_drawf_cycles = txt_drawf_cycles;\ - avg_txt_printf_cycles = txt_printf_cycles;\ - avg_txt_render_cycles = txt_render_cycles;\ - avg_txt_clear_cycles = txt_clear_cycles;\ - avg_input_cycles = input_cycles;\ - frame_cycles = 0;\ - flip_cycles = 0;\ - clear_cycles = 0;\ - line_cycles = 0;\ - rect_cycles = 0;\ - fill_rect_cycles = 0;\ - chr_cycles = 0;\ - icn_cycles = 0;\ - txt_drawf_cycles = 0;\ - txt_printf_cycles = 0;\ - txt_render_cycles = 0;\ - txt_clear_cycles = 0;\ - input_cycles = 0;\ - prof_frame_counter = 0;\ - }\ - profile_start();\ - } while (0) -#endif - -#define FRAME_END() \ - do { \ - prof_frame_counter++;\ - frame_cycles += profile_stop();\ - } while (0) +#define PROF_SHOW() do { \ + prof_show ^= 1; \ +} while(0) #else // No profiling. -#define PROF(F,VAR) (F) -#define PROF_SHOW() +#define PROF_INIT() +#define PROF(F,VAR) do {F;} while(0) #define FRAME_START() #define FRAME_END() +#define PROF_SHOW() #endif diff --git a/src/sequencer.c b/src/sequencer.c index c01f905..4f46e1f 100644 --- a/src/sequencer.c +++ b/src/sequencer.c @@ -351,7 +351,6 @@ IWRAM_CODE UNROLL_LOOPS void wave_ad_tick(void) { - Wave wave_active = {0}; Wave wave_zero = { 0x77777777, 0x77777777, 0x77777777, 0x77777777, }; diff --git a/src/settings.c b/src/settings.c index 9fc2f3d..3e8138e 100644 --- a/src/settings.c +++ b/src/settings.c @@ -31,7 +31,7 @@ set_audio_settings(void) { case SYNC_IN_LINK_4BPQ: { irs_set(IRQ_SERIAL, sync_in_4); } break; default: break; } - irs_set(IRQ_TIMER_2, NULL); + irs_set(IRQ_TIMER_3, NULL); SIO_MODE = SIO_MODE_GP | SIO_SC_OUT(0) | SIO_SD_OUT(0) -- cgit v1.2.1