diff options
author | Bad Diode <bd@badd10de.dev> | 2023-04-15 17:33:16 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2023-04-15 18:29:26 +0200 |
commit | 984f3c399b55bddcc177a2eb2a5e002fdc09a69d (patch) | |
tree | 97c2ea7dbde911cfa3d51269fc7f846ecd07af6e | |
parent | 5ddc66b6477f022802506dc3d16a542d9b303818 (diff) | |
download | gba-link-cable-tester-984f3c399b55bddcc177a2eb2a5e002fdc09a69d.tar.gz gba-link-cable-tester-984f3c399b55bddcc177a2eb2a5e002fdc09a69d.zip |
Explore some potential optimizations for fp line drawing
-rw-r--r-- | src/main.c | 34 | ||||
-rw-r--r-- | src/renderer_m4.c | 128 |
2 files changed, 146 insertions, 16 deletions
@@ -101,12 +101,12 @@ test_icn(void) { | |||
101 | 101 | ||
102 | void | 102 | void |
103 | test_lines(void) { | 103 | test_lines(void) { |
104 | for (size_t i = 0; i < 20; i++) { | 104 | for (size_t i = 0; i < 10; i++) { |
105 | draw_line(0, i * 8, (30 * 8 - 1), ((20 - i) * 8 - 1), 5); | 105 | draw_line(0, i * 8, (30 * 8 - 1), ((20 - i) * 8 - 1), 5); |
106 | } | 106 | } |
107 | for (size_t i = 0; i < 30; i++) { | 107 | // for (size_t i = 0; i < 30; i++) { |
108 | draw_line(i * 8, (20 * 8 - 1), ((30 - i) * 8 - 1), 0, 5); | 108 | // draw_line(i * 8, (20 * 8 - 1), ((30 - i) * 8 - 1), 0, 5); |
109 | } | 109 | // } |
110 | } | 110 | } |
111 | 111 | ||
112 | int main(void) { | 112 | int main(void) { |
@@ -136,11 +136,20 @@ int main(void) { | |||
136 | // // draw_line(100, 0, 0, 100, 2); | 136 | // // draw_line(100, 0, 0, 100, 2); |
137 | // // draw_line(50, 0, 0, 100, 3); | 137 | // // draw_line(50, 0, 0, 100, 3); |
138 | // draw_line(0, 0, 50, 159, 1); | 138 | // draw_line(0, 0, 50, 159, 1); |
139 | |||
139 | // draw_line(0, 0, 50, 159, 1); | 140 | // draw_line(0, 0, 50, 159, 1); |
140 | // draw_line(1, 1, 50, 159, 2); | 141 | // draw_line(0, 0, 60, 159, 1); |
141 | // draw_line(2, 2, 50, 159, 1); | 142 | // draw_line(8, 0, 50, 159, 2); |
142 | // draw_line(3, 3, 50, 159, 2); | 143 | // draw_line(8, 0, 60, 159, 2); |
143 | // draw_line(4, 4, 50, 159, 1); | 144 | // draw_line(50, 159,0, 0, 1); |
145 | // draw_line(60, 159,0, 0, 1); | ||
146 | // draw_line(50, 159,8, 0, 2); | ||
147 | // draw_line(60, 159,8, 0, 2); | ||
148 | // draw_line(0, 0, 100, 50, 1); | ||
149 | // draw_line(10, 0, 100, 50, 1); | ||
150 | // draw_line(100, 50,0, 0, 2); | ||
151 | // draw_line(100, 50,10, 0, 2); | ||
152 | |||
144 | // draw_line(5, 5, 50, 159, 2); | 153 | // draw_line(5, 5, 50, 159, 2); |
145 | // draw_line(50, 159, 0, 0, 1); | 154 | // draw_line(50, 159, 0, 0, 1); |
146 | // txt_render(); | 155 | // txt_render(); |
@@ -150,10 +159,11 @@ int main(void) { | |||
150 | // txt_render(); | 159 | // txt_render(); |
151 | // txt_clear(); | 160 | // txt_clear(); |
152 | PROF(test_lines(), test_lines_cycles); | 161 | PROF(test_lines(), test_lines_cycles); |
153 | PROF(test_rect(), test_rect_cycles); | 162 | draw_filled_rect(0, 0, 150, 60, 0); |
154 | PROF(test_fill_rect(), test_fill_rect_cycles); | 163 | // PROF(test_rect(), test_rect_cycles); |
155 | PROF(test_chr(), test_chr_cycles); | 164 | // PROF(test_fill_rect(), test_fill_rect_cycles); |
156 | PROF(test_icn(), test_icn_cycles); | 165 | // PROF(test_chr(), test_chr_cycles); |
166 | // PROF(test_icn(), test_icn_cycles); | ||
157 | PROF_SHOW(); | 167 | PROF_SHOW(); |
158 | PROF(flip_buffer(), flip_cycles); | 168 | PROF(flip_buffer(), flip_cycles); |
159 | } | 169 | } |
diff --git a/src/renderer_m4.c b/src/renderer_m4.c index a849cfe..e564c1e 100644 --- a/src/renderer_m4.c +++ b/src/renderer_m4.c | |||
@@ -32,8 +32,11 @@ static bool screen_updated = true; | |||
32 | #define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return; | 32 | #define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return; |
33 | #endif | 33 | #endif |
34 | 34 | ||
35 | // Make sure | 35 | // Swap A and B values without a tmp variable. |
36 | #define MAYBE_SWAP(A,B) if ((A) > (B)) { size_t tmp = (A); (A) = (B); (B) = tmp; } | 36 | #define SWAP(A, B) (((A) ^= (B)), ((B) ^= (A)), ((A) ^= (B))) |
37 | |||
38 | // Swap A and B values to make sure A <= B. | ||
39 | #define MAYBE_SWAP(A,B) if ((A) > (B)) { SWAP(A,B); } | ||
37 | 40 | ||
38 | IWRAM_CODE | 41 | IWRAM_CODE |
39 | void screen_fill(u8 clr) { | 42 | void screen_fill(u8 clr) { |
@@ -131,9 +134,8 @@ draw_vline(size_t x0, size_t y0, size_t y1, u8 clr) { | |||
131 | } | 134 | } |
132 | } | 135 | } |
133 | 136 | ||
134 | #define SWAP(A,B) do {size_t tmp = (A); (A) = (B); (B) = (tmp);} while(0) | ||
135 | |||
136 | IWRAM_CODE | 137 | IWRAM_CODE |
138 | UNROLL_LOOPS | ||
137 | void | 139 | void |
138 | draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { | 140 | draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { |
139 | BOUNDCHECK_SCREEN(x0, y0); | 141 | BOUNDCHECK_SCREEN(x0, y0); |
@@ -146,6 +148,123 @@ draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { | |||
146 | MAYBE_SWAP(y0, y1); | 148 | MAYBE_SWAP(y0, y1); |
147 | draw_vline(x0, y0, y1, clr); | 149 | draw_vline(x0, y0, y1, clr); |
148 | } else { | 150 | } else { |
151 | // int dx = x0 > x1 ? x0 - x1 : x1 - x0; | ||
152 | // int dy = y0 > y1 ? y0 - y1 : y1 - y0; | ||
153 | // | ||
154 | // NOTE: Simplified bresenham. | ||
155 | // int dx=x1-x0; | ||
156 | // int dy=y1-y0; | ||
157 | // int d=2*dy-dx; | ||
158 | // int e=2*dy; | ||
159 | // int ne=2*(dy-dx); | ||
160 | // int x=x0; | ||
161 | // int y=y0; | ||
162 | // for (x=x0;x<=x1;x++) { | ||
163 | // draw_pixel(x, y, clr); | ||
164 | // if (d<=0) { | ||
165 | // d+=e; | ||
166 | // } else { | ||
167 | // d+=ne; | ||
168 | // y++; // sign? | ||
169 | // } | ||
170 | // } | ||
171 | // NOTE: Simplified fp-line. | ||
172 | // s32 f; | ||
173 | // int x; | ||
174 | // s32 m = ((s32)(y1-y0)<<16)/(x1-x0); | ||
175 | |||
176 | // f=y0<<16; | ||
177 | // for (x=x0;x<=x1;x++,f += m) { | ||
178 | // s32 g = f; | ||
179 | // g += 32767; | ||
180 | // draw_pixel(x, g >> 16, clr); | ||
181 | // } | ||
182 | |||
183 | // u16 *dst = NULL; | ||
184 | // uintptr_t addr = ((uintptr_t)backbuf + y0 * SCREEN_WIDTH + x0); | ||
185 | // u32 mask = x0 & 1 ? ~0xFF : 0xFF; | ||
186 | // u32 color = (clr & 0xFF) | ((clr & 0xFF) << 8); | ||
187 | #if 0 | ||
188 | int dx = x0 > x1 ? x0 - x1 : x1 - x0; | ||
189 | int dy = y0 > y1 ? y0 - y1 : y1 - y0; | ||
190 | |||
191 | u16 *dst = NULL; | ||
192 | uintptr_t addr = ((uintptr_t)backbuf + y0 * SCREEN_WIDTH + x0); | ||
193 | u32 mask = x0 & 1 ? ~0xFF : 0xFF; | ||
194 | u32 color = (clr & 0xFF) | ((clr & 0xFF) << 8); | ||
195 | |||
196 | if (dx >= dy) { | ||
197 | int fp_val = y1 - y0; | ||
198 | int fp_div = x1 - x0; | ||
199 | int fp_inc = (fp_val << 8) / fp_div; | ||
200 | int fp_step = fp_div > 0 ? fp_inc : -fp_inc; | ||
201 | int i_step = fp_div > 0 ? 1 : -1; | ||
202 | int y_step = 0; | ||
203 | for (int i = 0; i <= dx; i++) { | ||
204 | dst = (u16 *)(addr - (mask >> 31)); | ||
205 | *dst = (*dst & ~mask) | (color & mask); | ||
206 | addr += i_step; | ||
207 | y_step += fp_step; | ||
208 | if (y_step >> 8) { | ||
209 | addr += SCREEN_WIDTH * (y_step >> 8); | ||
210 | y_step -= (y_step >> 8) << 8; | ||
211 | } | ||
212 | // TODO: +/- inc? | ||
213 | // addr += SCREEN_WIDTH * (y_step >> 8); | ||
214 | // y_step -= (y_step >> 8) << 8; | ||
215 | mask = ~mask; | ||
216 | } | ||
217 | } else { | ||
218 | // int fp_val = x1 - x0; | ||
219 | // int fp_div = y1 - y0; | ||
220 | // int fp_inc = (fp_val << 8) / fp_div; | ||
221 | // int fp_step = fp_div > 0 ? fp_inc : -fp_inc; | ||
222 | // int x_step = 0; | ||
223 | // u32 masks[] = { ~0xFF, 0xFF, }; | ||
224 | // u32 colors[] = { color & ~0xFF, color & 0xFF, }; | ||
225 | // for (int i = 0; i <= dy; i++) { | ||
226 | // // TODO: +/- inc? | ||
227 | // // METHOD 1: Conditional | ||
228 | // // dst = (u16 *)(addr - (mask >> 31)); | ||
229 | // // *dst = (*dst & ~mask) | (color & mask); | ||
230 | // // addr -= SCREEN_WIDTH; | ||
231 | // // x_step += fp_step; | ||
232 | // // if (x_step >> 8) { | ||
233 | // // addr += (x_step >> 8); | ||
234 | // // x_step -= (x_step >> 8) << 8; | ||
235 | // // mask = ~mask; | ||
236 | // // } | ||
237 | |||
238 | // // METHOD 1-2 | ||
239 | // dst = (u16 *)(addr - (masks[(x_step >> 8)] >> 31)); | ||
240 | // *dst = (*dst & ~masks[(x_step >> 8)]) | colors[(x_step>>8)]; | ||
241 | // addr -= SCREEN_WIDTH; | ||
242 | // x_step += fp_step; | ||
243 | // if (x_step >> 8) { | ||
244 | // addr += (x_step >> 8); | ||
245 | // x_step -= (x_step >> 8) << 8; | ||
246 | // } | ||
247 | |||
248 | // // METHOD 2: branchless lut | ||
249 | // // dst = (u16 *)(addr - (masks[(x_step >> 8)] >> 31)); | ||
250 | // // *dst = (*dst & ~masks[(x_step >> 8)]) | colors[(x_step>>8)]; | ||
251 | // // x_step += fp_step; | ||
252 | // // addr += (x_step >> 8); | ||
253 | // // addr -= SCREEN_WIDTH; | ||
254 | // // x_step -= (x_step >> 8) << 8; | ||
255 | |||
256 | // // METHOD 3: branchless bit hacking | ||
257 | // // dst = (u16 *)(addr - (mask >> 31)); | ||
258 | // // *dst = (*dst & ~mask) | (color & mask); | ||
259 | // // x_step += fp_step; | ||
260 | // // mask ^= ((x_step >> 8) | -(x_step >> 8)); | ||
261 | // // addr += (x_step >> 8); | ||
262 | // // addr -= SCREEN_WIDTH; | ||
263 | // // x_step -= (x_step >> 8) << 8; | ||
264 | // } | ||
265 | } | ||
266 | |||
267 | #elif 1 | ||
149 | // Diagonal line. | 268 | // Diagonal line. |
150 | int dx = x0 > x1 ? x0 - x1 : x1 - x0; | 269 | int dx = x0 > x1 ? x0 - x1 : x1 - x0; |
151 | int dy = y0 > y1 ? y0 - y1 : y1 - y0; | 270 | int dy = y0 > y1 ? y0 - y1 : y1 - y0; |
@@ -183,6 +302,7 @@ draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { | |||
183 | addr += y_step; | 302 | addr += y_step; |
184 | } | 303 | } |
185 | } | 304 | } |
305 | #endif | ||
186 | } | 306 | } |
187 | screen_updated = true; | 307 | screen_updated = true; |
188 | } | 308 | } |