summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2023-04-16 16:54:55 +0200
committerBad Diode <bd@badd10de.dev>2023-04-16 16:54:55 +0200
commitaf479588c285d6690d7bed6e0eb266751beb9f6c (patch)
tree70ec721b2b6bf37b259191a3d1ac6eccc6dbabf3
parent4b73363bbb3507641165db4c0283a5acf5ff44e3 (diff)
downloadgba-renderers-af479588c285d6690d7bed6e0eb266751beb9f6c.tar.gz
gba-renderers-af479588c285d6690d7bed6e0eb266751beb9f6c.zip
Update hline with high performance method
-rw-r--r--src/renderer_m0.c41
-rw-r--r--src/renderer_m4.c2
2 files changed, 36 insertions, 7 deletions
diff --git a/src/renderer_m0.c b/src/renderer_m0.c
index 5c57278..c0416ea 100644
--- a/src/renderer_m0.c
+++ b/src/renderer_m0.c
@@ -85,10 +85,40 @@ void
85draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) { 85draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) {
86 BOUNDCHECK_SCREEN(x0, y0); 86 BOUNDCHECK_SCREEN(x0, y0);
87 BOUNDCHECK_SCREEN(x1, y0); 87 BOUNDCHECK_SCREEN(x1, y0);
88 88 // Find row positions for the given x/y coordinates.
89 // TODO: perf 89 size_t tile_x0 = x0 / 8;
90 for (size_t x = x0; x <= x1; x++) { 90 size_t tile_x1 = x1 / 8;
91 draw_pixel(x, y0, clr); 91 size_t tile_y = y0 / 8;
92 size_t start_col = x0 % 8;
93 size_t end_col = x1 % 8;
94 size_t start_row = y0 % 8;
95
96 // Horizontal line. There are 3 cases:
97 // 1. Lines fit on a single tile.
98 // 2. Lines go through 2 tiles, both require partial row updates.
99 // 3. Lines go through 3 or more tiles, first and last tiles use
100 // partial row updates, rows in the middle can write the entire
101 // row.
102 size_t dx = tile_x1 - tile_x0;
103 u32 *dst = &backbuf[tile_x0 * 8 + tile_y * 8 * 32 + start_row];
104 if (dx < 1) {
105 u32 mask = 0xFFFFFFFF;
106 mask >>= (7 - end_col - dx) * 4;
107 mask &= 0xFFFFFFFF << start_col * 4;
108 u32 row = (0x11111111 * clr) & mask;
109 *dst = (*dst & ~mask) | row;
110 } else {
111 size_t shift_left = start_col * 4;
112 size_t shift_right = (7 - end_col) * 4;
113 u32 mask = 0xFFFFFFFF;
114 u32 row = 0x11111111 * clr;
115 *dst = (*dst & ~(mask << shift_left)) | (row << shift_left);
116 dst += 8;
117 for (size_t i = 1; i < dx; i++) {
118 *dst = (*dst & ~mask) | row;
119 dst += 8;
120 }
121 *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right);
92 } 122 }
93} 123}
94 124
@@ -407,8 +437,7 @@ draw_2bpp_row(size_t x, size_t y, u8 a, u8 b, u8 flip_x) {
407 size_t shift_left = start_col * 4; 437 size_t shift_left = start_col * 4;
408 size_t shift_right = (8 - start_col) * 4; 438 size_t shift_right = (8 - start_col) * 4;
409 439
410 u32 *dst = &backbuf[tile_x * 8 + tile_y * 8 * 32]; 440 u32 *dst = &backbuf[tile_x * 8 + tile_y * 8 * 32 + start_row];
411 dst += start_row;
412 if (start_col == 0) { 441 if (start_col == 0) {
413 u32 clr_a = decode_1bpp(a, flip_x); 442 u32 clr_a = decode_1bpp(a, flip_x);
414 u32 clr_b = decode_1bpp(b, flip_x); 443 u32 clr_b = decode_1bpp(b, flip_x);
diff --git a/src/renderer_m4.c b/src/renderer_m4.c
index 4cb526b..1539ccb 100644
--- a/src/renderer_m4.c
+++ b/src/renderer_m4.c
@@ -70,7 +70,7 @@ draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) {
70 u64 row_mask = 0xFFFFFFFFFFFFFFFF; 70 u64 row_mask = 0xFFFFFFFFFFFFFFFF;
71 row_mask >>= (7 - end_col - dx) * 8; 71 row_mask >>= (7 - end_col - dx) * 8;
72 row_mask &= 0xFFFFFFFFFFFFFFFF << start_col * 8; 72 row_mask &= 0xFFFFFFFFFFFFFFFF << start_col * 8;
73 u64 row = (0x0101010101010101 * (u8)clr) & row_mask; 73 u64 row = (0x0101010101010101 * clr) & row_mask;
74 *dst = (*dst & ~row_mask) | row; 74 *dst = (*dst & ~row_mask) | row;
75 } else { 75 } else {
76 size_t shift_left = start_col * 8; 76 size_t shift_left = start_col * 8;