diff options
author | Bad Diode <bd@badd10de.dev> | 2023-04-17 08:18:47 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2023-04-17 08:18:47 +0200 |
commit | 7bd95db63bfd7e16247ce774c124806e0cf7e1fb (patch) | |
tree | 897ddabc60f9f6da5845d85a61189d73b30671c6 | |
parent | fa6f892eab58e172827abeaefa75d981085dc06b (diff) | |
download | gba-link-cable-tester-7bd95db63bfd7e16247ce774c124806e0cf7e1fb.tar.gz gba-link-cable-tester-7bd95db63bfd7e16247ce774c124806e0cf7e1fb.zip |
Update draw_vline with performance improvements
-rw-r--r-- | src/main.c | 7 | ||||
-rw-r--r-- | src/renderer_m0.c | 198 | ||||
-rw-r--r-- | src/renderer_m4.c | 10 |
3 files changed, 136 insertions, 79 deletions
@@ -120,15 +120,8 @@ int main(void) { | |||
120 | irq_init(); | 120 | irq_init(); |
121 | irs_set(IRQ_VBLANK, irs_stub); | 121 | irs_set(IRQ_VBLANK, irs_stub); |
122 | 122 | ||
123 | // draw_filled_rect(0, 0, 7, 7, 2); | ||
124 | // draw_filled_rect(8, 0, 15, 7, 3); | ||
125 | // BG_H_SCROLL_0 = 240; | ||
126 | // BG_H_SCROLL_0 = 240; | ||
127 | // BG_H_SCROLL_1 = 0; | ||
128 | // Main loop. | 123 | // Main loop. |
129 | PROF_INIT(); | 124 | PROF_INIT(); |
130 | int x = 0; | ||
131 | int inc = 1; | ||
132 | while (true) { | 125 | while (true) { |
133 | bios_vblank_wait(); | 126 | bios_vblank_wait(); |
134 | 127 | ||
diff --git a/src/renderer_m0.c b/src/renderer_m0.c index 8f23376..18d1c5d 100644 --- a/src/renderer_m0.c +++ b/src/renderer_m0.c | |||
@@ -115,7 +115,7 @@ draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) { | |||
115 | *dst = (*dst & ~(mask << shift_left)) | (row << shift_left); | 115 | *dst = (*dst & ~(mask << shift_left)) | (row << shift_left); |
116 | dst += 8; | 116 | dst += 8; |
117 | for (size_t i = 1; i < dx; i++) { | 117 | for (size_t i = 1; i < dx; i++) { |
118 | *dst = (*dst & ~mask) | row; | 118 | *dst = row; |
119 | dst += 8; | 119 | dst += 8; |
120 | } | 120 | } |
121 | *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right); | 121 | *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right); |
@@ -131,20 +131,35 @@ draw_vline(size_t x0, size_t y0, size_t y1, u8 clr) { | |||
131 | BOUNDCHECK_SCREEN(x0, y1); | 131 | BOUNDCHECK_SCREEN(x0, y1); |
132 | size_t tile_x = x0 / 8; | 132 | size_t tile_x = x0 / 8; |
133 | size_t tile_y = y0 / 8; | 133 | size_t tile_y = y0 / 8; |
134 | size_t start_row = y0 % 8; | 134 | size_t tile_y0 = y0 / 8; |
135 | size_t tile_y1 = y1 / 8; | ||
135 | size_t start_col = x0 % 8; | 136 | size_t start_col = x0 % 8; |
137 | size_t start_row0 = y0 % 8; | ||
138 | size_t start_row1 = y1 % 8; | ||
139 | |||
136 | size_t shift_left = start_col * 4; | 140 | size_t shift_left = start_col * 4; |
137 | u32 *dst = &backbuf[tile_x * 8 + tile_y * 8 * 32 + start_row]; | 141 | |
138 | u32 cur_row = start_row; | 142 | u32 *dst = &backbuf[tile_x * 8 + tile_y * 8 * 32 + start_row0]; |
139 | u32 mask = 0x0000000F; | 143 | u32 mask = 0x0000000F << shift_left; |
140 | for (size_t i = 0; i <= y1 - y0; i++) { | 144 | u32 row = (0x11111111 * clr) & mask; |
141 | *dst = (*dst & ~(mask << shift_left)) | (clr << shift_left); | 145 | u32 dy = tile_y1 - tile_y0; |
142 | if (cur_row == 7) { | 146 | if (dy < 1) { |
143 | dst += 8 * 31 + 1; | 147 | for (size_t i = 0; i < y1 - y0; i++, dst++) { |
144 | cur_row = 0; | 148 | dst[0] = (dst[0] & ~mask) | row; |
145 | } else { | 149 | } |
146 | cur_row++; | 150 | } else { |
147 | dst++; | 151 | for (size_t i = 0; i < (8 - start_row0); i++, dst++) { |
152 | dst[0] = (dst[0] & ~mask) | row; | ||
153 | } | ||
154 | dst += 8 * 31; | ||
155 | for (size_t j = 1; j < dy; j++) { | ||
156 | for (size_t i = 0; i < 8; i++, dst++) { | ||
157 | dst[0] = (dst[0] & ~mask) | row; | ||
158 | } | ||
159 | dst += 8 * 31; | ||
160 | } | ||
161 | for (size_t i = 0; i < start_row1; i++, dst++) { | ||
162 | dst[0] = (dst[0] & ~mask) | row; | ||
148 | } | 163 | } |
149 | } | 164 | } |
150 | } | 165 | } |
@@ -296,6 +311,7 @@ draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { | |||
296 | IWRAM_CODE | 311 | IWRAM_CODE |
297 | void | 312 | void |
298 | draw_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { | 313 | draw_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { |
314 | #if 1 | ||
299 | BOUNDCHECK_SCREEN(x0, y0); | 315 | BOUNDCHECK_SCREEN(x0, y0); |
300 | BOUNDCHECK_SCREEN(x1, y1); | 316 | BOUNDCHECK_SCREEN(x1, y1); |
301 | MAYBE_SWAP(x0, x1); | 317 | MAYBE_SWAP(x0, x1); |
@@ -305,6 +321,96 @@ draw_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { | |||
305 | draw_hline(x0, x1, y1, clr); | 321 | draw_hline(x0, x1, y1, clr); |
306 | draw_vline(x0, y0, y1, clr); | 322 | draw_vline(x0, y0, y1, clr); |
307 | draw_vline(x1, y0, y1, clr); | 323 | draw_vline(x1, y0, y1, clr); |
324 | #else | ||
325 | BOUNDCHECK_SCREEN(x0, y0); | ||
326 | BOUNDCHECK_SCREEN(x1, y1); | ||
327 | MAYBE_SWAP(x0, x1); | ||
328 | MAYBE_SWAP(y0, y1); | ||
329 | |||
330 | // Find row positions for the given x/y coordinates. | ||
331 | size_t tile_x0 = x0 / 8; | ||
332 | size_t tile_y0 = y0 / 8; | ||
333 | size_t tile_x1 = x1 / 8; | ||
334 | size_t tile_y1 = y1 / 8; | ||
335 | size_t start_col0 = x0 % 8; | ||
336 | size_t start_col1 = x1 % 8; | ||
337 | size_t start_row0 = y0 % 8; | ||
338 | size_t start_row1 = y1 % 8; | ||
339 | |||
340 | // Get a pointer to the backbuffer and the tile row. | ||
341 | u32 *dst0 = &backbuf[start_row0 + (tile_x0 + tile_y0 * 32) * 8]; | ||
342 | u32 *dst1 = &backbuf[start_row1 + (tile_x0 + tile_y1 * 32) * 8]; | ||
343 | |||
344 | size_t dx = tile_x1 - tile_x0; | ||
345 | size_t dy = tile_y1 - tile_y0; | ||
346 | |||
347 | // We can update two lines at a time, which is faster than calling draw_line | ||
348 | // four times. | ||
349 | if (dx < 1) { | ||
350 | u32 mask = 0xFFFFFFFF; | ||
351 | mask >>= (7 - start_col1 - dx) * 4; | ||
352 | mask &= 0xFFFFFFFF << start_col0 * 4; | ||
353 | u32 row = (0x11111111 * clr) & mask; | ||
354 | *dst0 = (*dst0 & ~mask) | row; | ||
355 | *dst1 = (*dst1 & ~mask) | row; | ||
356 | } else { | ||
357 | size_t shift_left = start_col0 * 4; | ||
358 | size_t shift_right = (7 - start_col1) * 4; | ||
359 | u32 mask = 0xFFFFFFFF; | ||
360 | u32 row = 0x11111111 * clr; | ||
361 | *dst0 = (*dst0 & ~(mask << shift_left)) | (row << shift_left); | ||
362 | *dst1 = (*dst1 & ~(mask << shift_left)) | (row << shift_left); | ||
363 | for (size_t i = 1; i < dx; i++) { | ||
364 | dst0[i * 8] = row; | ||
365 | dst1[i * 8] = row; | ||
366 | } | ||
367 | dst0[dx * 8] = (dst0[dx * 8] & ~(mask >> shift_right)) | (row >> shift_right); | ||
368 | dst1[dx * 8] = (dst1[dx * 8] & ~(mask >> shift_right)) | (row >> shift_right); | ||
369 | } | ||
370 | u32 mask_left = 0xF << start_col0 * 4; | ||
371 | u32 mask_right = 0xF << start_col1 * 4; | ||
372 | u32 row_left = (0x11111111 * clr) & mask_left; | ||
373 | u32 row_right = (0x11111111 * clr) & mask_right; | ||
374 | // u32 cur_row = start_row0; | ||
375 | // dst0 = &backbuf[start_row0 + (tile_x0 + tile_y0 * 32) * 8]; | ||
376 | // dst1 = &backbuf[start_row0 + (tile_x1 + tile_y0 * 32) * 8]; | ||
377 | // for (size_t i = 0; i <= y1 - y0; i++) { | ||
378 | // *dst0 = (*dst0 & ~mask_left) | row_left; | ||
379 | // *dst1 = (*dst1 & ~mask_right) | row_right; | ||
380 | // if (cur_row == 7) { | ||
381 | // dst0 += 8 * 31 + 1; | ||
382 | // dst1 += 8 * 31 + 1; | ||
383 | // cur_row = 0; | ||
384 | // } else { | ||
385 | // cur_row++; | ||
386 | // dst0++; | ||
387 | // dst1++; | ||
388 | // } | ||
389 | // } | ||
390 | if (dy < 1) { | ||
391 | for (size_t i = 1; i < y1 - y0; i++, dst0++) { | ||
392 | dst0[1] = (dst0[1] & ~mask_left) | row_left; | ||
393 | dst0[1 + 8 * dx] = (dst0[1 + 8 * dx] & ~mask_right) | row_right; | ||
394 | } | ||
395 | } else { | ||
396 | for (size_t i = 1; i < (8 - start_row0); i++, dst0++) { | ||
397 | dst0[1] = (dst0[1] & ~mask_left) | row_left; | ||
398 | dst0[1 + 8 * dx] = (dst0[1 + 8 * dx] & ~mask_right) | row_right; | ||
399 | } | ||
400 | dst0 += 8 * 31; | ||
401 | for (size_t j = 1; j < dy; j++) { | ||
402 | for (size_t i = 0; i < 8; i++, dst0++) { | ||
403 | dst0[1] = (dst0[1] & ~mask_left) | row_left; | ||
404 | dst0[1 + 8 * dx] = (dst0[1 + 8 * dx] & ~mask_right) | row_right; | ||
405 | } | ||
406 | dst0 += 8 * 31; | ||
407 | } | ||
408 | for (size_t i = 0; i < start_row1; i++, dst0++) { | ||
409 | dst0[1] = (dst0[1] & ~mask_left) | row_left; | ||
410 | dst0[1 + 8 * dx] = (dst0[1 + 8 * dx] & ~mask_right) | row_right; | ||
411 | } | ||
412 | } | ||
413 | #endif | ||
308 | } | 414 | } |
309 | 415 | ||
310 | IWRAM_CODE | 416 | IWRAM_CODE |
@@ -322,61 +428,21 @@ draw_filled_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { | |||
322 | return; | 428 | return; |
323 | } | 429 | } |
324 | 430 | ||
431 | #if 1 | ||
325 | // Drawline implementation. | 432 | // Drawline implementation. |
326 | for (size_t y = y0; y <= y1; y++) { | 433 | for (size_t y = y0; y <= y1; y++) { |
327 | draw_hline(x0, x1, y, clr); | 434 | draw_hline(x0, x1, y, clr); |
328 | } | 435 | } |
329 | 436 | #else | |
330 | // TODO: check if this is better. | 437 | // draw_rect implementation. |
331 | // BOUNDCHECK_SCREEN(x0, y0); | 438 | size_t dx = x1 - x0; |
332 | // BOUNDCHECK_SCREEN(x1, y1); | 439 | size_t dy = y1 - y0; |
333 | 440 | size_t n_rect = MIN(dx, dy); | |
334 | // size_t dx = x1 - x0; | 441 | n_rect = n_rect / 2 + 1; |
335 | // size_t dy = y1 - y0; | 442 | for (size_t i = 0; i < n_rect; i++) { |
336 | // u8 *buf = &backbuf[0]; | 443 | draw_rect(x0 + i, y0 + i, x1 - i, y1 - i, clr); |
337 | // memset(buf, 0x11 * clr, 16); | 444 | } |
338 | //for (size_t j = 0; j < 1; j++) { | 445 | #endif |
339 | // // for (size_t i = 0; i < dx; i++) { | ||
340 | // // buf[i + j * 16] = clr; | ||
341 | // // } | ||
342 | // // | ||
343 | // // backbuf[j + 0] = 0x11111111 * clr; | ||
344 | // // backbuf[j + 1] = 0x11111111 * clr; | ||
345 | // // backbuf[j + 2] = 0x11111111 * clr; | ||
346 | // // backbuf[j + 3] = 0x11111111 * clr; | ||
347 | // // backbuf[j + 4] = 0x11111111 * clr; | ||
348 | // // backbuf[j + 5] = 0x11111111 * clr; | ||
349 | // // backbuf[j + 6] = 0x11111111 * clr; | ||
350 | // // backbuf[j + 7] = 0x11111111 * clr; | ||
351 | |||
352 | // buf[j + 0] = 0x1 * clr; | ||
353 | // buf[j + 1] = 0x1 * clr; | ||
354 | // buf[j + 2] = 0x1 * clr; | ||
355 | // buf[j + 3] = 0x1 * clr; | ||
356 | // // buf[j + 4] = 0x1111 * clr; | ||
357 | // // buf[j + 5] = 0x1111 * clr; | ||
358 | // // buf[j + 6] = 0x1111 * clr; | ||
359 | // // buf[j + 7] = 0x1111 * clr; | ||
360 | //} | ||
361 | // u8 *buf = &backbuf[0]; | ||
362 | // buf[8 * 16 + 0] = clr; | ||
363 | // buf[8 * 16 + 1] = clr; | ||
364 | // buf[8 * 16 + 2] = clr; | ||
365 | // buf[8 * 16 + 3] = clr; | ||
366 | // buf[8 * 16 + 4] = clr; | ||
367 | // buf[8 * 16 + 5] = clr; | ||
368 | // buf[8 * 16 + 6] = clr; | ||
369 | // buf[8 * 16 + 7] = clr; | ||
370 | // for (size_t j = 0; j < dy; j++) { | ||
371 | // for (size_t i = 0; i < dx; i++) { | ||
372 | // buf[i + j * 16] = clr; | ||
373 | // } | ||
374 | // } | ||
375 | // size_t n_rect = MIN(dx, dy); | ||
376 | // n_rect = n_rect / 2 + 1; | ||
377 | // for (size_t i = 0; i < n_rect; i++) { | ||
378 | // draw_rect(x0 + i, y0 + i, x1 - i, y1 - i, clr); | ||
379 | // } | ||
380 | } | 446 | } |
381 | 447 | ||
382 | IWRAM_CODE | 448 | IWRAM_CODE |
@@ -557,11 +623,9 @@ void | |||
557 | renderer_init(void) { | 623 | renderer_init(void) { |
558 | // Initialize display mode and bg palette. | 624 | // Initialize display mode and bg palette. |
559 | DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1; | 625 | DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1; |
560 | // DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1 | DISP_OBJ; | ||
561 | // TODO: black/grey background to block font/back buffers? | ||
562 | 626 | ||
563 | // Clear VRAM. | 627 | // Clear VRAM. |
564 | dma_fill(MEM_VRAM, 0, KB(96), 3); | 628 | dma_fill((u32*)MEM_VRAM, 0, KB(96), 3); |
565 | 629 | ||
566 | // Initialize backgrounds. | 630 | // Initialize backgrounds. |
567 | BG_CTRL(0) = BG_CHARBLOCK(CB_0) | BG_SCREENBLOCK(SB_0) | BG_PRIORITY(0) | BG_SIZE(1); | 631 | BG_CTRL(0) = BG_CHARBLOCK(CB_0) | BG_SCREENBLOCK(SB_0) | BG_PRIORITY(0) | BG_SIZE(1); |
diff --git a/src/renderer_m4.c b/src/renderer_m4.c index 1539ccb..cc9da58 100644 --- a/src/renderer_m4.c +++ b/src/renderer_m4.c | |||
@@ -1,7 +1,7 @@ | |||
1 | #include "renderer.h" | 1 | #include "renderer.h" |
2 | #include "text.h" | 2 | #include "text.h" |
3 | 3 | ||
4 | static u16 *backbuf = (u16 *)(MEM_VRAM ^ 0x0A000); | 4 | static u16 *backbuf = (u16*)(MEM_VRAM ^ 0x0A000); |
5 | 5 | ||
6 | // Keep track of which tiles need to be copied to the frontbuffer. | 6 | // Keep track of which tiles need to be copied to the frontbuffer. |
7 | static bool screen_updated = true; | 7 | static bool screen_updated = true; |
@@ -81,7 +81,7 @@ draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) { | |||
81 | // No DMA. | 81 | // No DMA. |
82 | *dst++ = (*dst & ~(row_mask << shift_left)) | row << shift_left; | 82 | *dst++ = (*dst & ~(row_mask << shift_left)) | row << shift_left; |
83 | for (size_t i = 1; i < dx; i++) { | 83 | for (size_t i = 1; i < dx; i++) { |
84 | *dst++ = (*dst & ~row_mask) | row; | 84 | *dst++ = row; |
85 | } | 85 | } |
86 | *dst = (*dst & ~(row_mask >> shift_right)) | row >> shift_right; | 86 | *dst = (*dst & ~(row_mask >> shift_right)) | row >> shift_right; |
87 | #else | 87 | #else |
@@ -144,7 +144,7 @@ draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { | |||
144 | if (dx >= dy) { | 144 | if (dx >= dy) { |
145 | int diff = 2 * dy - dx; | 145 | int diff = 2 * dy - dx; |
146 | for (int i = 0; i < dx + 1; i++) { | 146 | for (int i = 0; i < dx + 1; i++) { |
147 | dst = (u16 *)(addr - (mask >> 31)); | 147 | dst = (u16*)(addr - (mask >> 31)); |
148 | *dst = (*dst & ~mask) | (color & mask); | 148 | *dst = (*dst & ~mask) | (color & mask); |
149 | if (diff >= 0) { | 149 | if (diff >= 0) { |
150 | diff -= 2 * dx; | 150 | diff -= 2 * dx; |
@@ -157,7 +157,7 @@ draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { | |||
157 | } else { | 157 | } else { |
158 | int diff = 2 * dx - dy; | 158 | int diff = 2 * dx - dy; |
159 | for (int i = 0; i < dy + 1; i++) { | 159 | for (int i = 0; i < dy + 1; i++) { |
160 | dst = (u16 *)(addr - (mask >> 31)); | 160 | dst = (u16*)(addr - (mask >> 31)); |
161 | *dst = (*dst & ~mask) | (color & mask); | 161 | *dst = (*dst & ~mask) | (color & mask); |
162 | if (diff >= 0) { | 162 | if (diff >= 0) { |
163 | diff -= 2 * dy; | 163 | diff -= 2 * dy; |
@@ -370,7 +370,7 @@ renderer_init(void) { | |||
370 | DISP_CTRL = DISP_MODE_4 | DISP_BG_2; | 370 | DISP_CTRL = DISP_MODE_4 | DISP_BG_2; |
371 | 371 | ||
372 | // Clear VRAM. | 372 | // Clear VRAM. |
373 | dma_fill((u16 *)MEM_VRAM, 0x01010101 * 0, KB(96), 3); | 373 | dma_fill((u16*)MEM_VRAM, 0x01010101 * 0, KB(96), 3); |
374 | 374 | ||
375 | // Initialize default palette. | 375 | // Initialize default palette. |
376 | PAL_BUFFER_BG[0] = COLOR_BLACK; | 376 | PAL_BUFFER_BG[0] = COLOR_BLACK; |