aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2021-06-04 13:38:46 +0200
committerBad Diode <bd@badd10de.dev>2021-06-04 13:38:46 +0200
commitf3f221524e6be30217838661b4750820a7bebecf (patch)
tree51f3dcd5d02d45abbd5ace92fe910d2241d3e4e8
parentaedaa7ade0ed623d09b18a34023f2e02201e67e6 (diff)
downloadstepper-f3f221524e6be30217838661b4750820a7bebecf.tar.gz
stepper-f3f221524e6be30217838661b4750820a7bebecf.zip
Add initial performance optimization for rect draw
-rw-r--r--src/main.c14
-rw-r--r--src/renderer.c72
2 files changed, 65 insertions, 21 deletions
diff --git a/src/main.c b/src/main.c
index ab41915..8df466f 100644
--- a/src/main.c
+++ b/src/main.c
@@ -50,6 +50,15 @@ WITH REGARD TO THIS SOFTWARE.
50#define PROF_INIT() 50#define PROF_INIT()
51#endif 51#endif
52 52
53void
54test_rects() {
55 for (size_t i = 0; i < 100; i++) {
56 draw_rect(30, 30, 45, 45, 1);
57 draw_rect(35, 35, 60, 40, 2);
58 draw_rect(1, 1, 6, 6, 3);
59 }
60}
61
53int main(void) { 62int main(void) {
54 // Adjust system wait times. 63 // Adjust system wait times.
55 SYSTEM_WAIT = SYSTEM_WAIT_CARTRIDGE; 64 SYSTEM_WAIT = SYSTEM_WAIT_CARTRIDGE;
@@ -64,14 +73,11 @@ int main(void) {
64 irq_init(); 73 irq_init();
65 irs_set(IRQ_VBLANK, irs_stub); 74 irs_set(IRQ_VBLANK, irs_stub);
66 75
67
68 // Main loop. 76 // Main loop.
69 PROF_INIT(); 77 PROF_INIT();
70 while (true) { 78 while (true) {
71 bios_vblank_wait(); 79 bios_vblank_wait();
72 txt_drawf("Hello world: %d", 4, 4, 6, 10); 80 PROF(test_rects(), eval_cycles);
73 draw_rect(30, 30, 45, 45, 1);
74 draw_rect(35, 35, 60, 40, 2);
75 PROF_SHOW(); 81 PROF_SHOW();
76 PROF(flip_buffer(), flip_cycles); 82 PROF(flip_buffer(), flip_cycles);
77 } 83 }
diff --git a/src/renderer.c b/src/renderer.c
index 4aa583d..51647cb 100644
--- a/src/renderer.c
+++ b/src/renderer.c
@@ -47,24 +47,62 @@ draw_pixel(u16 x, u16 y, u8 color) {
47IWRAM_CODE 47IWRAM_CODE
48void 48void
49draw_rect(int x0, int y0, int x1, int y1, u8 clr) { 49draw_rect(int x0, int y0, int x1, int y1, u8 clr) {
50 if (x0 > x1) { 50 BOUNDCHECK_SCREEN(x0, y0);
51 int tmp = x0; 51 BOUNDCHECK_SCREEN(x1, y1);
52 x0 = x1; 52
53 x1 = tmp; 53 // Find row positions for the given x/y coordinates.
54 } 54 size_t tile_x0 = x0 / 8;
55 if (y0 > y1) { 55 size_t tile_y0 = y0 / 8;
56 int tmp = y0; 56 size_t tile_x1 = x1 / 8;
57 y0 = y1; 57 size_t tile_y1 = y1 / 8;
58 y1 = tmp; 58 size_t start_col0 = x0 % 8;
59 } 59 size_t start_col1 = x1 % 8;
60 int dx = x1 - x0; 60 size_t start_row0 = y0 % 8;
61 int dy = y1 - y0; 61 size_t start_row1 = y1 % 8;
62 // TODO: SLOW should be vectorized. 62
63 for (int i = 0; i <= dx; ++i) { 63 // Get a pointer to the backbuffer and the tile row.
64 draw_pixel(x0 + i, y0, clr); 64 u32 *backbuffer0 = &BACKBUF[start_row0 + (tile_x0 + tile_y0 * 32) * 8];
65 draw_pixel(x0 + i, y1, clr); 65 u32 *backbuffer1 = &BACKBUF[start_row1 + (tile_x0 + tile_y1 * 32) * 8];
66
67 u16 dx = tile_x1 - tile_x0;
68 u16 dy = y1 - y0;
69
70 // There are 3 cases:
71 // 1. Lines fit on a single tile.
72 // 2. Lines go through 2 tiles, both require partial row updates.
73 // 3. Lines go through 3 or more tiles, first and last tiles use partial
74 // row updates, rows in the middle can write the.
75 if (dx < 1) {
76 u32 row_mask = 0xFFFFFFFF;
77 row_mask >>= (7 - start_col1 - dx) * 4;
78 row_mask &= 0xFFFFFFFF << start_col0 * 4;
79 u32 row = (0x11111111 * clr) & row_mask;
80 backbuffer0[0] = (backbuffer0[0] & ~row_mask) | row;
81 backbuffer1[0] = (backbuffer1[0] & ~row_mask) | row;
82 dirty_tiles[tile_y0] |= 1 << tile_x0;
83 dirty_tiles[tile_y1] |= 1 << tile_x0;
84 } else {
85 size_t shift_left = start_col0 * 4;
86 size_t shift_right = (7 - start_col1) * 4;
87 u32 row_mask = 0xFFFFFFFF;
88 u32 row = 0x11111111 * clr;
89 backbuffer0[0] = (backbuffer0[0] & ~(row_mask << shift_left)) | (row << shift_left);
90 backbuffer1[0] = (backbuffer1[0] & ~(row_mask << shift_left)) | (row << shift_left);
91 dirty_tiles[tile_y0] |= 1 << tile_x0;
92 dirty_tiles[tile_y1] |= 1 << tile_x0;
93 for (size_t i = 1; i < dx; i++) {
94 backbuffer0[i * 8] = row;
95 backbuffer1[i * 8] = row;
96 dirty_tiles[tile_y0] |= 1 << tile_x0 + i;
97 dirty_tiles[tile_y1] |= 1 << tile_x0 + i;
98 }
99 backbuffer0[dx * 8] = (backbuffer0[dx * 8] & ~(row_mask >> shift_right)) | (row >> shift_right);
100 backbuffer1[dx * 8] = (backbuffer1[dx * 8] & ~(row_mask >> shift_right)) | (row >> shift_right);
101 dirty_tiles[tile_y0] |= 1 << tile_x0 + dx;
102 dirty_tiles[tile_y1] |= 1 << tile_x0 + dx;
66 } 103 }
67 for (int i = 0; i <= dy; ++i) { 104 // The vertical line cases are analogous to the horizontal cases.
105 for (int i = 1; i < dy; ++i) {
68 draw_pixel(x0, y0 + i, clr); 106 draw_pixel(x0, y0 + i, clr);
69 draw_pixel(x1, y0 + i, clr); 107 draw_pixel(x1, y0 + i, clr);
70 } 108 }