diff options
author | Bad Diode <bd@badd10de.dev> | 2021-06-04 13:38:46 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2021-06-04 13:38:46 +0200 |
commit | f3f221524e6be30217838661b4750820a7bebecf (patch) | |
tree | 51f3dcd5d02d45abbd5ace92fe910d2241d3e4e8 | |
parent | aedaa7ade0ed623d09b18a34023f2e02201e67e6 (diff) | |
download | stepper-f3f221524e6be30217838661b4750820a7bebecf.tar.gz stepper-f3f221524e6be30217838661b4750820a7bebecf.zip |
Add initial performance optimization for rect draw
-rw-r--r-- | src/main.c | 14 | ||||
-rw-r--r-- | src/renderer.c | 72 |
2 files changed, 65 insertions, 21 deletions
@@ -50,6 +50,15 @@ WITH REGARD TO THIS SOFTWARE. | |||
50 | #define PROF_INIT() | 50 | #define PROF_INIT() |
51 | #endif | 51 | #endif |
52 | 52 | ||
53 | void | ||
54 | test_rects() { | ||
55 | for (size_t i = 0; i < 100; i++) { | ||
56 | draw_rect(30, 30, 45, 45, 1); | ||
57 | draw_rect(35, 35, 60, 40, 2); | ||
58 | draw_rect(1, 1, 6, 6, 3); | ||
59 | } | ||
60 | } | ||
61 | |||
53 | int main(void) { | 62 | int main(void) { |
54 | // Adjust system wait times. | 63 | // Adjust system wait times. |
55 | SYSTEM_WAIT = SYSTEM_WAIT_CARTRIDGE; | 64 | SYSTEM_WAIT = SYSTEM_WAIT_CARTRIDGE; |
@@ -64,14 +73,11 @@ int main(void) { | |||
64 | irq_init(); | 73 | irq_init(); |
65 | irs_set(IRQ_VBLANK, irs_stub); | 74 | irs_set(IRQ_VBLANK, irs_stub); |
66 | 75 | ||
67 | |||
68 | // Main loop. | 76 | // Main loop. |
69 | PROF_INIT(); | 77 | PROF_INIT(); |
70 | while (true) { | 78 | while (true) { |
71 | bios_vblank_wait(); | 79 | bios_vblank_wait(); |
72 | txt_drawf("Hello world: %d", 4, 4, 6, 10); | 80 | PROF(test_rects(), eval_cycles); |
73 | draw_rect(30, 30, 45, 45, 1); | ||
74 | draw_rect(35, 35, 60, 40, 2); | ||
75 | PROF_SHOW(); | 81 | PROF_SHOW(); |
76 | PROF(flip_buffer(), flip_cycles); | 82 | PROF(flip_buffer(), flip_cycles); |
77 | } | 83 | } |
diff --git a/src/renderer.c b/src/renderer.c index 4aa583d..51647cb 100644 --- a/src/renderer.c +++ b/src/renderer.c | |||
@@ -47,24 +47,62 @@ draw_pixel(u16 x, u16 y, u8 color) { | |||
47 | IWRAM_CODE | 47 | IWRAM_CODE |
48 | void | 48 | void |
49 | draw_rect(int x0, int y0, int x1, int y1, u8 clr) { | 49 | draw_rect(int x0, int y0, int x1, int y1, u8 clr) { |
50 | if (x0 > x1) { | 50 | BOUNDCHECK_SCREEN(x0, y0); |
51 | int tmp = x0; | 51 | BOUNDCHECK_SCREEN(x1, y1); |
52 | x0 = x1; | 52 | |
53 | x1 = tmp; | 53 | // Find row positions for the given x/y coordinates. |
54 | } | 54 | size_t tile_x0 = x0 / 8; |
55 | if (y0 > y1) { | 55 | size_t tile_y0 = y0 / 8; |
56 | int tmp = y0; | 56 | size_t tile_x1 = x1 / 8; |
57 | y0 = y1; | 57 | size_t tile_y1 = y1 / 8; |
58 | y1 = tmp; | 58 | size_t start_col0 = x0 % 8; |
59 | } | 59 | size_t start_col1 = x1 % 8; |
60 | int dx = x1 - x0; | 60 | size_t start_row0 = y0 % 8; |
61 | int dy = y1 - y0; | 61 | size_t start_row1 = y1 % 8; |
62 | // TODO: SLOW should be vectorized. | 62 | |
63 | for (int i = 0; i <= dx; ++i) { | 63 | // Get a pointer to the backbuffer and the tile row. |
64 | draw_pixel(x0 + i, y0, clr); | 64 | u32 *backbuffer0 = &BACKBUF[start_row0 + (tile_x0 + tile_y0 * 32) * 8]; |
65 | draw_pixel(x0 + i, y1, clr); | 65 | u32 *backbuffer1 = &BACKBUF[start_row1 + (tile_x0 + tile_y1 * 32) * 8]; |
66 | |||
67 | u16 dx = tile_x1 - tile_x0; | ||
68 | u16 dy = y1 - y0; | ||
69 | |||
70 | // There are 3 cases: | ||
71 | // 1. Lines fit on a single tile. | ||
72 | // 2. Lines go through 2 tiles, both require partial row updates. | ||
73 | // 3. Lines go through 3 or more tiles, first and last tiles use partial | ||
74 | // row updates, rows in the middle can write the. | ||
75 | if (dx < 1) { | ||
76 | u32 row_mask = 0xFFFFFFFF; | ||
77 | row_mask >>= (7 - start_col1 - dx) * 4; | ||
78 | row_mask &= 0xFFFFFFFF << start_col0 * 4; | ||
79 | u32 row = (0x11111111 * clr) & row_mask; | ||
80 | backbuffer0[0] = (backbuffer0[0] & ~row_mask) | row; | ||
81 | backbuffer1[0] = (backbuffer1[0] & ~row_mask) | row; | ||
82 | dirty_tiles[tile_y0] |= 1 << tile_x0; | ||
83 | dirty_tiles[tile_y1] |= 1 << tile_x0; | ||
84 | } else { | ||
85 | size_t shift_left = start_col0 * 4; | ||
86 | size_t shift_right = (7 - start_col1) * 4; | ||
87 | u32 row_mask = 0xFFFFFFFF; | ||
88 | u32 row = 0x11111111 * clr; | ||
89 | backbuffer0[0] = (backbuffer0[0] & ~(row_mask << shift_left)) | (row << shift_left); | ||
90 | backbuffer1[0] = (backbuffer1[0] & ~(row_mask << shift_left)) | (row << shift_left); | ||
91 | dirty_tiles[tile_y0] |= 1 << tile_x0; | ||
92 | dirty_tiles[tile_y1] |= 1 << tile_x0; | ||
93 | for (size_t i = 1; i < dx; i++) { | ||
94 | backbuffer0[i * 8] = row; | ||
95 | backbuffer1[i * 8] = row; | ||
96 | dirty_tiles[tile_y0] |= 1 << tile_x0 + i; | ||
97 | dirty_tiles[tile_y1] |= 1 << tile_x0 + i; | ||
98 | } | ||
99 | backbuffer0[dx * 8] = (backbuffer0[dx * 8] & ~(row_mask >> shift_right)) | (row >> shift_right); | ||
100 | backbuffer1[dx * 8] = (backbuffer1[dx * 8] & ~(row_mask >> shift_right)) | (row >> shift_right); | ||
101 | dirty_tiles[tile_y0] |= 1 << tile_x0 + dx; | ||
102 | dirty_tiles[tile_y1] |= 1 << tile_x0 + dx; | ||
66 | } | 103 | } |
67 | for (int i = 0; i <= dy; ++i) { | 104 | // The vertical line cases are analogous to the horizontal cases. |
105 | for (int i = 1; i < dy; ++i) { | ||
68 | draw_pixel(x0, y0 + i, clr); | 106 | draw_pixel(x0, y0 + i, clr); |
69 | draw_pixel(x1, y0 + i, clr); | 107 | draw_pixel(x1, y0 + i, clr); |
70 | } | 108 | } |