summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2023-04-15 17:33:16 +0200
committerBad Diode <bd@badd10de.dev>2023-04-15 18:29:26 +0200
commit984f3c399b55bddcc177a2eb2a5e002fdc09a69d (patch)
tree97c2ea7dbde911cfa3d51269fc7f846ecd07af6e
parent5ddc66b6477f022802506dc3d16a542d9b303818 (diff)
downloadgba-renderers-984f3c399b55bddcc177a2eb2a5e002fdc09a69d.tar.gz
gba-renderers-984f3c399b55bddcc177a2eb2a5e002fdc09a69d.zip
Explore some potential optimizations for fp line drawing
-rw-r--r--src/main.c34
-rw-r--r--src/renderer_m4.c128
2 files changed, 146 insertions, 16 deletions
diff --git a/src/main.c b/src/main.c
index 1982992..78ee1b2 100644
--- a/src/main.c
+++ b/src/main.c
@@ -101,12 +101,12 @@ test_icn(void) {
101 101
102void 102void
103test_lines(void) { 103test_lines(void) {
104 for (size_t i = 0; i < 20; i++) { 104 for (size_t i = 0; i < 10; i++) {
105 draw_line(0, i * 8, (30 * 8 - 1), ((20 - i) * 8 - 1), 5); 105 draw_line(0, i * 8, (30 * 8 - 1), ((20 - i) * 8 - 1), 5);
106 } 106 }
107 for (size_t i = 0; i < 30; i++) { 107 // for (size_t i = 0; i < 30; i++) {
108 draw_line(i * 8, (20 * 8 - 1), ((30 - i) * 8 - 1), 0, 5); 108 // draw_line(i * 8, (20 * 8 - 1), ((30 - i) * 8 - 1), 0, 5);
109 } 109 // }
110} 110}
111 111
112int main(void) { 112int main(void) {
@@ -136,11 +136,20 @@ int main(void) {
136 // // draw_line(100, 0, 0, 100, 2); 136 // // draw_line(100, 0, 0, 100, 2);
137 // // draw_line(50, 0, 0, 100, 3); 137 // // draw_line(50, 0, 0, 100, 3);
138 // draw_line(0, 0, 50, 159, 1); 138 // draw_line(0, 0, 50, 159, 1);
139
139 // draw_line(0, 0, 50, 159, 1); 140 // draw_line(0, 0, 50, 159, 1);
140 // draw_line(1, 1, 50, 159, 2); 141 // draw_line(0, 0, 60, 159, 1);
141 // draw_line(2, 2, 50, 159, 1); 142 // draw_line(8, 0, 50, 159, 2);
142 // draw_line(3, 3, 50, 159, 2); 143 // draw_line(8, 0, 60, 159, 2);
143 // draw_line(4, 4, 50, 159, 1); 144 // draw_line(50, 159,0, 0, 1);
145 // draw_line(60, 159,0, 0, 1);
146 // draw_line(50, 159,8, 0, 2);
147 // draw_line(60, 159,8, 0, 2);
148 // draw_line(0, 0, 100, 50, 1);
149 // draw_line(10, 0, 100, 50, 1);
150 // draw_line(100, 50,0, 0, 2);
151 // draw_line(100, 50,10, 0, 2);
152
144 // draw_line(5, 5, 50, 159, 2); 153 // draw_line(5, 5, 50, 159, 2);
145 // draw_line(50, 159, 0, 0, 1); 154 // draw_line(50, 159, 0, 0, 1);
146 // txt_render(); 155 // txt_render();
@@ -150,10 +159,11 @@ int main(void) {
150 // txt_render(); 159 // txt_render();
151 // txt_clear(); 160 // txt_clear();
152 PROF(test_lines(), test_lines_cycles); 161 PROF(test_lines(), test_lines_cycles);
153 PROF(test_rect(), test_rect_cycles); 162 draw_filled_rect(0, 0, 150, 60, 0);
154 PROF(test_fill_rect(), test_fill_rect_cycles); 163 // PROF(test_rect(), test_rect_cycles);
155 PROF(test_chr(), test_chr_cycles); 164 // PROF(test_fill_rect(), test_fill_rect_cycles);
156 PROF(test_icn(), test_icn_cycles); 165 // PROF(test_chr(), test_chr_cycles);
166 // PROF(test_icn(), test_icn_cycles);
157 PROF_SHOW(); 167 PROF_SHOW();
158 PROF(flip_buffer(), flip_cycles); 168 PROF(flip_buffer(), flip_cycles);
159 } 169 }
diff --git a/src/renderer_m4.c b/src/renderer_m4.c
index a849cfe..e564c1e 100644
--- a/src/renderer_m4.c
+++ b/src/renderer_m4.c
@@ -32,8 +32,11 @@ static bool screen_updated = true;
32#define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return; 32#define BOUNDCHECK_SCREEN(X,Y) if ((X) >= SCREEN_WIDTH || (Y) >= SCREEN_HEIGHT) return;
33#endif 33#endif
34 34
35// Make sure 35// Swap A and B values without a tmp variable.
36#define MAYBE_SWAP(A,B) if ((A) > (B)) { size_t tmp = (A); (A) = (B); (B) = tmp; } 36#define SWAP(A, B) (((A) ^= (B)), ((B) ^= (A)), ((A) ^= (B)))
37
38// Swap A and B values to make sure A <= B.
39#define MAYBE_SWAP(A,B) if ((A) > (B)) { SWAP(A,B); }
37 40
38IWRAM_CODE 41IWRAM_CODE
39void screen_fill(u8 clr) { 42void screen_fill(u8 clr) {
@@ -131,9 +134,8 @@ draw_vline(size_t x0, size_t y0, size_t y1, u8 clr) {
131 } 134 }
132} 135}
133 136
134#define SWAP(A,B) do {size_t tmp = (A); (A) = (B); (B) = (tmp);} while(0)
135
136IWRAM_CODE 137IWRAM_CODE
138UNROLL_LOOPS
137void 139void
138draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { 140draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
139 BOUNDCHECK_SCREEN(x0, y0); 141 BOUNDCHECK_SCREEN(x0, y0);
@@ -146,6 +148,123 @@ draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
146 MAYBE_SWAP(y0, y1); 148 MAYBE_SWAP(y0, y1);
147 draw_vline(x0, y0, y1, clr); 149 draw_vline(x0, y0, y1, clr);
148 } else { 150 } else {
151 // int dx = x0 > x1 ? x0 - x1 : x1 - x0;
152 // int dy = y0 > y1 ? y0 - y1 : y1 - y0;
153 //
154 // NOTE: Simplified bresenham.
155 // int dx=x1-x0;
156 // int dy=y1-y0;
157 // int d=2*dy-dx;
158 // int e=2*dy;
159 // int ne=2*(dy-dx);
160 // int x=x0;
161 // int y=y0;
162 // for (x=x0;x<=x1;x++) {
163 // draw_pixel(x, y, clr);
164 // if (d<=0) {
165 // d+=e;
166 // } else {
167 // d+=ne;
168 // y++; // sign?
169 // }
170 // }
171 // NOTE: Simplified fp-line.
172 // s32 f;
173 // int x;
174 // s32 m = ((s32)(y1-y0)<<16)/(x1-x0);
175
176 // f=y0<<16;
177 // for (x=x0;x<=x1;x++,f += m) {
178 // s32 g = f;
179 // g += 32767;
180 // draw_pixel(x, g >> 16, clr);
181 // }
182
183 // u16 *dst = NULL;
184 // uintptr_t addr = ((uintptr_t)backbuf + y0 * SCREEN_WIDTH + x0);
185 // u32 mask = x0 & 1 ? ~0xFF : 0xFF;
186 // u32 color = (clr & 0xFF) | ((clr & 0xFF) << 8);
187#if 0
188 int dx = x0 > x1 ? x0 - x1 : x1 - x0;
189 int dy = y0 > y1 ? y0 - y1 : y1 - y0;
190
191 u16 *dst = NULL;
192 uintptr_t addr = ((uintptr_t)backbuf + y0 * SCREEN_WIDTH + x0);
193 u32 mask = x0 & 1 ? ~0xFF : 0xFF;
194 u32 color = (clr & 0xFF) | ((clr & 0xFF) << 8);
195
196 if (dx >= dy) {
197 int fp_val = y1 - y0;
198 int fp_div = x1 - x0;
199 int fp_inc = (fp_val << 8) / fp_div;
200 int fp_step = fp_div > 0 ? fp_inc : -fp_inc;
201 int i_step = fp_div > 0 ? 1 : -1;
202 int y_step = 0;
203 for (int i = 0; i <= dx; i++) {
204 dst = (u16 *)(addr - (mask >> 31));
205 *dst = (*dst & ~mask) | (color & mask);
206 addr += i_step;
207 y_step += fp_step;
208 if (y_step >> 8) {
209 addr += SCREEN_WIDTH * (y_step >> 8);
210 y_step -= (y_step >> 8) << 8;
211 }
212 // TODO: +/- inc?
213 // addr += SCREEN_WIDTH * (y_step >> 8);
214 // y_step -= (y_step >> 8) << 8;
215 mask = ~mask;
216 }
217 } else {
218 // int fp_val = x1 - x0;
219 // int fp_div = y1 - y0;
220 // int fp_inc = (fp_val << 8) / fp_div;
221 // int fp_step = fp_div > 0 ? fp_inc : -fp_inc;
222 // int x_step = 0;
223 // u32 masks[] = { ~0xFF, 0xFF, };
224 // u32 colors[] = { color & ~0xFF, color & 0xFF, };
225 // for (int i = 0; i <= dy; i++) {
226 // // TODO: +/- inc?
227 // // METHOD 1: Conditional
228 // // dst = (u16 *)(addr - (mask >> 31));
229 // // *dst = (*dst & ~mask) | (color & mask);
230 // // addr -= SCREEN_WIDTH;
231 // // x_step += fp_step;
232 // // if (x_step >> 8) {
233 // // addr += (x_step >> 8);
234 // // x_step -= (x_step >> 8) << 8;
235 // // mask = ~mask;
236 // // }
237
238 // // METHOD 1-2
239 // dst = (u16 *)(addr - (masks[(x_step >> 8)] >> 31));
240 // *dst = (*dst & ~masks[(x_step >> 8)]) | colors[(x_step>>8)];
241 // addr -= SCREEN_WIDTH;
242 // x_step += fp_step;
243 // if (x_step >> 8) {
244 // addr += (x_step >> 8);
245 // x_step -= (x_step >> 8) << 8;
246 // }
247
248 // // METHOD 2: branchless lut
249 // // dst = (u16 *)(addr - (masks[(x_step >> 8)] >> 31));
250 // // *dst = (*dst & ~masks[(x_step >> 8)]) | colors[(x_step>>8)];
251 // // x_step += fp_step;
252 // // addr += (x_step >> 8);
253 // // addr -= SCREEN_WIDTH;
254 // // x_step -= (x_step >> 8) << 8;
255
256 // // METHOD 3: branchless bit hacking
257 // // dst = (u16 *)(addr - (mask >> 31));
258 // // *dst = (*dst & ~mask) | (color & mask);
259 // // x_step += fp_step;
260 // // mask ^= ((x_step >> 8) | -(x_step >> 8));
261 // // addr += (x_step >> 8);
262 // // addr -= SCREEN_WIDTH;
263 // // x_step -= (x_step >> 8) << 8;
264 // }
265 }
266
267#elif 1
149 // Diagonal line. 268 // Diagonal line.
150 int dx = x0 > x1 ? x0 - x1 : x1 - x0; 269 int dx = x0 > x1 ? x0 - x1 : x1 - x0;
151 int dy = y0 > y1 ? y0 - y1 : y1 - y0; 270 int dy = y0 > y1 ? y0 - y1 : y1 - y0;
@@ -183,6 +302,7 @@ draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
183 addr += y_step; 302 addr += y_step;
184 } 303 }
185 } 304 }
305#endif
186 } 306 }
187 screen_updated = true; 307 screen_updated = true;
188} 308}