summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2023-04-17 08:18:47 +0200
committerBad Diode <bd@badd10de.dev>2023-04-17 08:18:47 +0200
commit7bd95db63bfd7e16247ce774c124806e0cf7e1fb (patch)
tree897ddabc60f9f6da5845d85a61189d73b30671c6
parentfa6f892eab58e172827abeaefa75d981085dc06b (diff)
downloadgba-renderers-7bd95db63bfd7e16247ce774c124806e0cf7e1fb.tar.gz
gba-renderers-7bd95db63bfd7e16247ce774c124806e0cf7e1fb.zip
Update draw_vline with performance improvements
-rw-r--r--src/main.c7
-rw-r--r--src/renderer_m0.c198
-rw-r--r--src/renderer_m4.c10
3 files changed, 136 insertions, 79 deletions
diff --git a/src/main.c b/src/main.c
index d2137c3..7e7dfd4 100644
--- a/src/main.c
+++ b/src/main.c
@@ -120,15 +120,8 @@ int main(void) {
120 irq_init(); 120 irq_init();
121 irs_set(IRQ_VBLANK, irs_stub); 121 irs_set(IRQ_VBLANK, irs_stub);
122 122
123 // draw_filled_rect(0, 0, 7, 7, 2);
124 // draw_filled_rect(8, 0, 15, 7, 3);
125 // BG_H_SCROLL_0 = 240;
126 // BG_H_SCROLL_0 = 240;
127 // BG_H_SCROLL_1 = 0;
128 // Main loop. 123 // Main loop.
129 PROF_INIT(); 124 PROF_INIT();
130 int x = 0;
131 int inc = 1;
132 while (true) { 125 while (true) {
133 bios_vblank_wait(); 126 bios_vblank_wait();
134 127
diff --git a/src/renderer_m0.c b/src/renderer_m0.c
index 8f23376..18d1c5d 100644
--- a/src/renderer_m0.c
+++ b/src/renderer_m0.c
@@ -115,7 +115,7 @@ draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) {
115 *dst = (*dst & ~(mask << shift_left)) | (row << shift_left); 115 *dst = (*dst & ~(mask << shift_left)) | (row << shift_left);
116 dst += 8; 116 dst += 8;
117 for (size_t i = 1; i < dx; i++) { 117 for (size_t i = 1; i < dx; i++) {
118 *dst = (*dst & ~mask) | row; 118 *dst = row;
119 dst += 8; 119 dst += 8;
120 } 120 }
121 *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right); 121 *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right);
@@ -131,20 +131,35 @@ draw_vline(size_t x0, size_t y0, size_t y1, u8 clr) {
131 BOUNDCHECK_SCREEN(x0, y1); 131 BOUNDCHECK_SCREEN(x0, y1);
132 size_t tile_x = x0 / 8; 132 size_t tile_x = x0 / 8;
133 size_t tile_y = y0 / 8; 133 size_t tile_y = y0 / 8;
134 size_t start_row = y0 % 8; 134 size_t tile_y0 = y0 / 8;
135 size_t tile_y1 = y1 / 8;
135 size_t start_col = x0 % 8; 136 size_t start_col = x0 % 8;
137 size_t start_row0 = y0 % 8;
138 size_t start_row1 = y1 % 8;
139
136 size_t shift_left = start_col * 4; 140 size_t shift_left = start_col * 4;
137 u32 *dst = &backbuf[tile_x * 8 + tile_y * 8 * 32 + start_row]; 141
138 u32 cur_row = start_row; 142 u32 *dst = &backbuf[tile_x * 8 + tile_y * 8 * 32 + start_row0];
139 u32 mask = 0x0000000F; 143 u32 mask = 0x0000000F << shift_left;
140 for (size_t i = 0; i <= y1 - y0; i++) { 144 u32 row = (0x11111111 * clr) & mask;
141 *dst = (*dst & ~(mask << shift_left)) | (clr << shift_left); 145 u32 dy = tile_y1 - tile_y0;
142 if (cur_row == 7) { 146 if (dy < 1) {
143 dst += 8 * 31 + 1; 147 for (size_t i = 0; i < y1 - y0; i++, dst++) {
144 cur_row = 0; 148 dst[0] = (dst[0] & ~mask) | row;
145 } else { 149 }
146 cur_row++; 150 } else {
147 dst++; 151 for (size_t i = 0; i < (8 - start_row0); i++, dst++) {
152 dst[0] = (dst[0] & ~mask) | row;
153 }
154 dst += 8 * 31;
155 for (size_t j = 1; j < dy; j++) {
156 for (size_t i = 0; i < 8; i++, dst++) {
157 dst[0] = (dst[0] & ~mask) | row;
158 }
159 dst += 8 * 31;
160 }
161 for (size_t i = 0; i < start_row1; i++, dst++) {
162 dst[0] = (dst[0] & ~mask) | row;
148 } 163 }
149 } 164 }
150} 165}
@@ -296,6 +311,7 @@ draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
296IWRAM_CODE 311IWRAM_CODE
297void 312void
298draw_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) { 313draw_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
314#if 1
299 BOUNDCHECK_SCREEN(x0, y0); 315 BOUNDCHECK_SCREEN(x0, y0);
300 BOUNDCHECK_SCREEN(x1, y1); 316 BOUNDCHECK_SCREEN(x1, y1);
301 MAYBE_SWAP(x0, x1); 317 MAYBE_SWAP(x0, x1);
@@ -305,6 +321,96 @@ draw_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
305 draw_hline(x0, x1, y1, clr); 321 draw_hline(x0, x1, y1, clr);
306 draw_vline(x0, y0, y1, clr); 322 draw_vline(x0, y0, y1, clr);
307 draw_vline(x1, y0, y1, clr); 323 draw_vline(x1, y0, y1, clr);
324#else
325 BOUNDCHECK_SCREEN(x0, y0);
326 BOUNDCHECK_SCREEN(x1, y1);
327 MAYBE_SWAP(x0, x1);
328 MAYBE_SWAP(y0, y1);
329
330 // Find row positions for the given x/y coordinates.
331 size_t tile_x0 = x0 / 8;
332 size_t tile_y0 = y0 / 8;
333 size_t tile_x1 = x1 / 8;
334 size_t tile_y1 = y1 / 8;
335 size_t start_col0 = x0 % 8;
336 size_t start_col1 = x1 % 8;
337 size_t start_row0 = y0 % 8;
338 size_t start_row1 = y1 % 8;
339
340 // Get a pointer to the backbuffer and the tile row.
341 u32 *dst0 = &backbuf[start_row0 + (tile_x0 + tile_y0 * 32) * 8];
342 u32 *dst1 = &backbuf[start_row1 + (tile_x0 + tile_y1 * 32) * 8];
343
344 size_t dx = tile_x1 - tile_x0;
345 size_t dy = tile_y1 - tile_y0;
346
347 // We can update two lines at a time, which is faster than calling draw_line
348 // four times.
349 if (dx < 1) {
350 u32 mask = 0xFFFFFFFF;
351 mask >>= (7 - start_col1 - dx) * 4;
352 mask &= 0xFFFFFFFF << start_col0 * 4;
353 u32 row = (0x11111111 * clr) & mask;
354 *dst0 = (*dst0 & ~mask) | row;
355 *dst1 = (*dst1 & ~mask) | row;
356 } else {
357 size_t shift_left = start_col0 * 4;
358 size_t shift_right = (7 - start_col1) * 4;
359 u32 mask = 0xFFFFFFFF;
360 u32 row = 0x11111111 * clr;
361 *dst0 = (*dst0 & ~(mask << shift_left)) | (row << shift_left);
362 *dst1 = (*dst1 & ~(mask << shift_left)) | (row << shift_left);
363 for (size_t i = 1; i < dx; i++) {
364 dst0[i * 8] = row;
365 dst1[i * 8] = row;
366 }
367 dst0[dx * 8] = (dst0[dx * 8] & ~(mask >> shift_right)) | (row >> shift_right);
368 dst1[dx * 8] = (dst1[dx * 8] & ~(mask >> shift_right)) | (row >> shift_right);
369 }
370 u32 mask_left = 0xF << start_col0 * 4;
371 u32 mask_right = 0xF << start_col1 * 4;
372 u32 row_left = (0x11111111 * clr) & mask_left;
373 u32 row_right = (0x11111111 * clr) & mask_right;
374 // u32 cur_row = start_row0;
375 // dst0 = &backbuf[start_row0 + (tile_x0 + tile_y0 * 32) * 8];
376 // dst1 = &backbuf[start_row0 + (tile_x1 + tile_y0 * 32) * 8];
377 // for (size_t i = 0; i <= y1 - y0; i++) {
378 // *dst0 = (*dst0 & ~mask_left) | row_left;
379 // *dst1 = (*dst1 & ~mask_right) | row_right;
380 // if (cur_row == 7) {
381 // dst0 += 8 * 31 + 1;
382 // dst1 += 8 * 31 + 1;
383 // cur_row = 0;
384 // } else {
385 // cur_row++;
386 // dst0++;
387 // dst1++;
388 // }
389 // }
390 if (dy < 1) {
391 for (size_t i = 1; i < y1 - y0; i++, dst0++) {
392 dst0[1] = (dst0[1] & ~mask_left) | row_left;
393 dst0[1 + 8 * dx] = (dst0[1 + 8 * dx] & ~mask_right) | row_right;
394 }
395 } else {
396 for (size_t i = 1; i < (8 - start_row0); i++, dst0++) {
397 dst0[1] = (dst0[1] & ~mask_left) | row_left;
398 dst0[1 + 8 * dx] = (dst0[1 + 8 * dx] & ~mask_right) | row_right;
399 }
400 dst0 += 8 * 31;
401 for (size_t j = 1; j < dy; j++) {
402 for (size_t i = 0; i < 8; i++, dst0++) {
403 dst0[1] = (dst0[1] & ~mask_left) | row_left;
404 dst0[1 + 8 * dx] = (dst0[1 + 8 * dx] & ~mask_right) | row_right;
405 }
406 dst0 += 8 * 31;
407 }
408 for (size_t i = 0; i < start_row1; i++, dst0++) {
409 dst0[1] = (dst0[1] & ~mask_left) | row_left;
410 dst0[1 + 8 * dx] = (dst0[1 + 8 * dx] & ~mask_right) | row_right;
411 }
412 }
413#endif
308} 414}
309 415
310IWRAM_CODE 416IWRAM_CODE
@@ -322,61 +428,21 @@ draw_filled_rect(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
322 return; 428 return;
323 } 429 }
324 430
431#if 1
325 // Drawline implementation. 432 // Drawline implementation.
326 for (size_t y = y0; y <= y1; y++) { 433 for (size_t y = y0; y <= y1; y++) {
327 draw_hline(x0, x1, y, clr); 434 draw_hline(x0, x1, y, clr);
328 } 435 }
329 436#else
330 // TODO: check if this is better. 437 // draw_rect implementation.
331 // BOUNDCHECK_SCREEN(x0, y0); 438 size_t dx = x1 - x0;
332 // BOUNDCHECK_SCREEN(x1, y1); 439 size_t dy = y1 - y0;
333 440 size_t n_rect = MIN(dx, dy);
334 // size_t dx = x1 - x0; 441 n_rect = n_rect / 2 + 1;
335 // size_t dy = y1 - y0; 442 for (size_t i = 0; i < n_rect; i++) {
336 // u8 *buf = &backbuf[0]; 443 draw_rect(x0 + i, y0 + i, x1 - i, y1 - i, clr);
337 // memset(buf, 0x11 * clr, 16); 444 }
338 //for (size_t j = 0; j < 1; j++) { 445#endif
339 // // for (size_t i = 0; i < dx; i++) {
340 // // buf[i + j * 16] = clr;
341 // // }
342 // //
343 // // backbuf[j + 0] = 0x11111111 * clr;
344 // // backbuf[j + 1] = 0x11111111 * clr;
345 // // backbuf[j + 2] = 0x11111111 * clr;
346 // // backbuf[j + 3] = 0x11111111 * clr;
347 // // backbuf[j + 4] = 0x11111111 * clr;
348 // // backbuf[j + 5] = 0x11111111 * clr;
349 // // backbuf[j + 6] = 0x11111111 * clr;
350 // // backbuf[j + 7] = 0x11111111 * clr;
351
352 // buf[j + 0] = 0x1 * clr;
353 // buf[j + 1] = 0x1 * clr;
354 // buf[j + 2] = 0x1 * clr;
355 // buf[j + 3] = 0x1 * clr;
356 // // buf[j + 4] = 0x1111 * clr;
357 // // buf[j + 5] = 0x1111 * clr;
358 // // buf[j + 6] = 0x1111 * clr;
359 // // buf[j + 7] = 0x1111 * clr;
360 //}
361 // u8 *buf = &backbuf[0];
362 // buf[8 * 16 + 0] = clr;
363 // buf[8 * 16 + 1] = clr;
364 // buf[8 * 16 + 2] = clr;
365 // buf[8 * 16 + 3] = clr;
366 // buf[8 * 16 + 4] = clr;
367 // buf[8 * 16 + 5] = clr;
368 // buf[8 * 16 + 6] = clr;
369 // buf[8 * 16 + 7] = clr;
370 // for (size_t j = 0; j < dy; j++) {
371 // for (size_t i = 0; i < dx; i++) {
372 // buf[i + j * 16] = clr;
373 // }
374 // }
375 // size_t n_rect = MIN(dx, dy);
376 // n_rect = n_rect / 2 + 1;
377 // for (size_t i = 0; i < n_rect; i++) {
378 // draw_rect(x0 + i, y0 + i, x1 - i, y1 - i, clr);
379 // }
380} 446}
381 447
382IWRAM_CODE 448IWRAM_CODE
@@ -557,11 +623,9 @@ void
557renderer_init(void) { 623renderer_init(void) {
558 // Initialize display mode and bg palette. 624 // Initialize display mode and bg palette.
559 DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1; 625 DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1;
560 // DISP_CTRL = DISP_MODE_0 | DISP_BG_0 | DISP_BG_1 | DISP_OBJ;
561 // TODO: black/grey background to block font/back buffers?
562 626
563 // Clear VRAM. 627 // Clear VRAM.
564 dma_fill(MEM_VRAM, 0, KB(96), 3); 628 dma_fill((u32*)MEM_VRAM, 0, KB(96), 3);
565 629
566 // Initialize backgrounds. 630 // Initialize backgrounds.
567 BG_CTRL(0) = BG_CHARBLOCK(CB_0) | BG_SCREENBLOCK(SB_0) | BG_PRIORITY(0) | BG_SIZE(1); 631 BG_CTRL(0) = BG_CHARBLOCK(CB_0) | BG_SCREENBLOCK(SB_0) | BG_PRIORITY(0) | BG_SIZE(1);
diff --git a/src/renderer_m4.c b/src/renderer_m4.c
index 1539ccb..cc9da58 100644
--- a/src/renderer_m4.c
+++ b/src/renderer_m4.c
@@ -1,7 +1,7 @@
1#include "renderer.h" 1#include "renderer.h"
2#include "text.h" 2#include "text.h"
3 3
4static u16 *backbuf = (u16 *)(MEM_VRAM ^ 0x0A000); 4static u16 *backbuf = (u16*)(MEM_VRAM ^ 0x0A000);
5 5
6// Keep track of which tiles need to be copied to the frontbuffer. 6// Keep track of which tiles need to be copied to the frontbuffer.
7static bool screen_updated = true; 7static bool screen_updated = true;
@@ -81,7 +81,7 @@ draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) {
81 // No DMA. 81 // No DMA.
82 *dst++ = (*dst & ~(row_mask << shift_left)) | row << shift_left; 82 *dst++ = (*dst & ~(row_mask << shift_left)) | row << shift_left;
83 for (size_t i = 1; i < dx; i++) { 83 for (size_t i = 1; i < dx; i++) {
84 *dst++ = (*dst & ~row_mask) | row; 84 *dst++ = row;
85 } 85 }
86 *dst = (*dst & ~(row_mask >> shift_right)) | row >> shift_right; 86 *dst = (*dst & ~(row_mask >> shift_right)) | row >> shift_right;
87#else 87#else
@@ -144,7 +144,7 @@ draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
144 if (dx >= dy) { 144 if (dx >= dy) {
145 int diff = 2 * dy - dx; 145 int diff = 2 * dy - dx;
146 for (int i = 0; i < dx + 1; i++) { 146 for (int i = 0; i < dx + 1; i++) {
147 dst = (u16 *)(addr - (mask >> 31)); 147 dst = (u16*)(addr - (mask >> 31));
148 *dst = (*dst & ~mask) | (color & mask); 148 *dst = (*dst & ~mask) | (color & mask);
149 if (diff >= 0) { 149 if (diff >= 0) {
150 diff -= 2 * dx; 150 diff -= 2 * dx;
@@ -157,7 +157,7 @@ draw_line(size_t x0, size_t y0, size_t x1, size_t y1, u8 clr) {
157 } else { 157 } else {
158 int diff = 2 * dx - dy; 158 int diff = 2 * dx - dy;
159 for (int i = 0; i < dy + 1; i++) { 159 for (int i = 0; i < dy + 1; i++) {
160 dst = (u16 *)(addr - (mask >> 31)); 160 dst = (u16*)(addr - (mask >> 31));
161 *dst = (*dst & ~mask) | (color & mask); 161 *dst = (*dst & ~mask) | (color & mask);
162 if (diff >= 0) { 162 if (diff >= 0) {
163 diff -= 2 * dy; 163 diff -= 2 * dy;
@@ -370,7 +370,7 @@ renderer_init(void) {
370 DISP_CTRL = DISP_MODE_4 | DISP_BG_2; 370 DISP_CTRL = DISP_MODE_4 | DISP_BG_2;
371 371
372 // Clear VRAM. 372 // Clear VRAM.
373 dma_fill((u16 *)MEM_VRAM, 0x01010101 * 0, KB(96), 3); 373 dma_fill((u16*)MEM_VRAM, 0x01010101 * 0, KB(96), 3);
374 374
375 // Initialize default palette. 375 // Initialize default palette.
376 PAL_BUFFER_BG[0] = COLOR_BLACK; 376 PAL_BUFFER_BG[0] = COLOR_BLACK;