diff options
author | Bad Diode <bd@badd10de.dev> | 2023-04-16 16:54:55 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2023-04-16 16:54:55 +0200 |
commit | af479588c285d6690d7bed6e0eb266751beb9f6c (patch) | |
tree | 70ec721b2b6bf37b259191a3d1ac6eccc6dbabf3 | |
parent | 4b73363bbb3507641165db4c0283a5acf5ff44e3 (diff) | |
download | gba-renderers-af479588c285d6690d7bed6e0eb266751beb9f6c.tar.gz gba-renderers-af479588c285d6690d7bed6e0eb266751beb9f6c.zip |
Update hline with high performance method
-rw-r--r-- | src/renderer_m0.c | 41 | ||||
-rw-r--r-- | src/renderer_m4.c | 2 |
2 files changed, 36 insertions, 7 deletions
diff --git a/src/renderer_m0.c b/src/renderer_m0.c index 5c57278..c0416ea 100644 --- a/src/renderer_m0.c +++ b/src/renderer_m0.c | |||
@@ -85,10 +85,40 @@ void | |||
85 | draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) { | 85 | draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) { |
86 | BOUNDCHECK_SCREEN(x0, y0); | 86 | BOUNDCHECK_SCREEN(x0, y0); |
87 | BOUNDCHECK_SCREEN(x1, y0); | 87 | BOUNDCHECK_SCREEN(x1, y0); |
88 | 88 | // Find row positions for the given x/y coordinates. | |
89 | // TODO: perf | 89 | size_t tile_x0 = x0 / 8; |
90 | for (size_t x = x0; x <= x1; x++) { | 90 | size_t tile_x1 = x1 / 8; |
91 | draw_pixel(x, y0, clr); | 91 | size_t tile_y = y0 / 8; |
92 | size_t start_col = x0 % 8; | ||
93 | size_t end_col = x1 % 8; | ||
94 | size_t start_row = y0 % 8; | ||
95 | |||
96 | // Horizontal line. There are 3 cases: | ||
97 | // 1. Lines fit on a single tile. | ||
98 | // 2. Lines go through 2 tiles, both require partial row updates. | ||
99 | // 3. Lines go through 3 or more tiles, first and last tiles use | ||
100 | // partial row updates, rows in the middle can write the entire | ||
101 | // row. | ||
102 | size_t dx = tile_x1 - tile_x0; | ||
103 | u32 *dst = &backbuf[tile_x0 * 8 + tile_y * 8 * 32 + start_row]; | ||
104 | if (dx < 1) { | ||
105 | u32 mask = 0xFFFFFFFF; | ||
106 | mask >>= (7 - end_col - dx) * 4; | ||
107 | mask &= 0xFFFFFFFF << start_col * 4; | ||
108 | u32 row = (0x11111111 * clr) & mask; | ||
109 | *dst = (*dst & ~mask) | row; | ||
110 | } else { | ||
111 | size_t shift_left = start_col * 4; | ||
112 | size_t shift_right = (7 - end_col) * 4; | ||
113 | u32 mask = 0xFFFFFFFF; | ||
114 | u32 row = 0x11111111 * clr; | ||
115 | *dst = (*dst & ~(mask << shift_left)) | (row << shift_left); | ||
116 | dst += 8; | ||
117 | for (size_t i = 1; i < dx; i++) { | ||
118 | *dst = (*dst & ~mask) | row; | ||
119 | dst += 8; | ||
120 | } | ||
121 | *dst = (*dst & ~(mask >> shift_right)) | (row >> shift_right); | ||
92 | } | 122 | } |
93 | } | 123 | } |
94 | 124 | ||
@@ -407,8 +437,7 @@ draw_2bpp_row(size_t x, size_t y, u8 a, u8 b, u8 flip_x) { | |||
407 | size_t shift_left = start_col * 4; | 437 | size_t shift_left = start_col * 4; |
408 | size_t shift_right = (8 - start_col) * 4; | 438 | size_t shift_right = (8 - start_col) * 4; |
409 | 439 | ||
410 | u32 *dst = &backbuf[tile_x * 8 + tile_y * 8 * 32]; | 440 | u32 *dst = &backbuf[tile_x * 8 + tile_y * 8 * 32 + start_row]; |
411 | dst += start_row; | ||
412 | if (start_col == 0) { | 441 | if (start_col == 0) { |
413 | u32 clr_a = decode_1bpp(a, flip_x); | 442 | u32 clr_a = decode_1bpp(a, flip_x); |
414 | u32 clr_b = decode_1bpp(b, flip_x); | 443 | u32 clr_b = decode_1bpp(b, flip_x); |
diff --git a/src/renderer_m4.c b/src/renderer_m4.c index 4cb526b..1539ccb 100644 --- a/src/renderer_m4.c +++ b/src/renderer_m4.c | |||
@@ -70,7 +70,7 @@ draw_hline(size_t x0, size_t x1, size_t y0, u8 clr) { | |||
70 | u64 row_mask = 0xFFFFFFFFFFFFFFFF; | 70 | u64 row_mask = 0xFFFFFFFFFFFFFFFF; |
71 | row_mask >>= (7 - end_col - dx) * 8; | 71 | row_mask >>= (7 - end_col - dx) * 8; |
72 | row_mask &= 0xFFFFFFFFFFFFFFFF << start_col * 8; | 72 | row_mask &= 0xFFFFFFFFFFFFFFFF << start_col * 8; |
73 | u64 row = (0x0101010101010101 * (u8)clr) & row_mask; | 73 | u64 row = (0x0101010101010101 * clr) & row_mask; |
74 | *dst = (*dst & ~row_mask) | row; | 74 | *dst = (*dst & ~row_mask) | row; |
75 | } else { | 75 | } else { |
76 | size_t shift_left = start_col * 8; | 76 | size_t shift_left = start_col * 8; |