diff options
author | Bad Diode <bd@badd10de.dev> | 2021-04-16 16:53:47 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2021-04-16 16:53:47 +0200 |
commit | 21aaf2e5f1720f9b6d227d29859b035c3367e9d7 (patch) | |
tree | 6f2391997908d51a69aee69761cfd2cb85a7e9c9 | |
parent | c0cb8634cbe2ce4bcf29873cd67d5735095b64a7 (diff) | |
download | gba-experiments-21aaf2e5f1720f9b6d227d29859b035c3367e9d7.tar.gz gba-experiments-21aaf2e5f1720f9b6d227d29859b035c3367e9d7.zip |
Update draw_line to improve performance
The new implementation is based on the one found in TONC. Instead of
updating two variables to address the framebuffer as FRAMEBUFFER[y][x],
we update the pointer that points to the target memory destination.
Changing the return type of the function from `static inline void` to
`static void` improves the performance significantly. Additionally, for
some reason, if there is an if statement for horizontal and vertical
lines, the performance once again improves. It may be due to the
compiler knowing that there is no pointer aliasing, but I'm not sure
about that.
-rw-r--r-- | src/main.c | 106 |
1 files changed, 55 insertions, 51 deletions
@@ -78,7 +78,7 @@ rgb15(u32 red, u32 green, u32 blue ) { | |||
78 | #define COLOR_WHITE rgb15(28, 28, 28) | 78 | #define COLOR_WHITE rgb15(28, 28, 28) |
79 | 79 | ||
80 | // Using bd-font, an 8x8 bitmap font. | 80 | // Using bd-font, an 8x8 bitmap font. |
81 | static inline void | 81 | static void |
82 | put_char(int x, int y, Color clr, u8 chr) { | 82 | put_char(int x, int y, Color clr, u8 chr) { |
83 | for (size_t i = 0; i < 8; ++i) { | 83 | for (size_t i = 0; i < 8; ++i) { |
84 | for (size_t j = 0; j < 8; ++j) { | 84 | for (size_t j = 0; j < 8; ++j) { |
@@ -89,7 +89,7 @@ put_char(int x, int y, Color clr, u8 chr) { | |||
89 | } | 89 | } |
90 | } | 90 | } |
91 | 91 | ||
92 | static inline void | 92 | static void |
93 | put_text(int x, int y, Color clr, char *msg) { | 93 | put_text(int x, int y, Color clr, char *msg) { |
94 | int count = 0; | 94 | int count = 0; |
95 | while (*msg) { | 95 | while (*msg) { |
@@ -100,69 +100,73 @@ put_text(int x, int y, Color clr, char *msg) { | |||
100 | 100 | ||
101 | // Draws a line with the given color between (x0,y0) and (x1,y1) using the | 101 | // Draws a line with the given color between (x0,y0) and (x1,y1) using the |
102 | // Bresenham's line drawing algorithm using exclusively integer arithmetic. | 102 | // Bresenham's line drawing algorithm using exclusively integer arithmetic. |
103 | static inline void | 103 | static void |
104 | draw_line(int x0, int y0, int x1, int y1, Color clr) { | 104 | draw_line(int x0, int y0, int x1, int y1, Color clr) { |
105 | // Keep track of the coordinate for writing to the memory buffer. | 105 | // The line length in color units. |
106 | int x = x0; | 106 | int pitch = SCREEN_WIDTH; |
107 | int y = y0; | 107 | |
108 | // Pointer to the initial position of the screen buffer where we will start | ||
109 | // writing our data. We need to multiply by 2 because in mode 3 we have | ||
110 | // 2 bytes per pixel. | ||
111 | vu16 *destination = (u16*)(SCREEN_BUFFER + y0 * pitch * 2 + x0 * 2); | ||
108 | 112 | ||
109 | // Adjust the step direction and calculate deltas. | 113 | // Adjust the step direction and calculate deltas. |
110 | int x_step = 1; | 114 | int x_step; |
111 | int y_step = 1; | 115 | int dx; |
112 | int dx = x1 - x0; | ||
113 | int dy = y1 - y0; | ||
114 | if (x0 > x1) { | 116 | if (x0 > x1) { |
115 | x_step = -1; | 117 | x_step = -1; |
116 | dx = x0 - x1; | 118 | dx = x0 - x1; |
119 | } else { | ||
120 | x_step = 1; | ||
121 | dx = x1 - x0; | ||
117 | } | 122 | } |
123 | int y_step; | ||
124 | int dy; | ||
118 | if (y0 > y1) { | 125 | if (y0 > y1) { |
119 | y_step = -1; | 126 | y_step = -pitch; |
120 | dy = y0 - y1; | 127 | dy = y0 - y1; |
128 | } else { | ||
129 | y_step = +pitch; | ||
130 | dy = y1 - y0; | ||
121 | } | 131 | } |
122 | 132 | ||
123 | // Precalculate 2 * deltas for x and y. | 133 | // Precalculate 2 * deltas for x and y. |
124 | int ddx = dx + dx; | 134 | int ddx = dx + dx; |
125 | int ddy = dy + dy; | 135 | int ddy = dy + dy; |
126 | 136 | ||
127 | // These variables are dependant on the slope. We can avoid considering | 137 | if(dy == 0) { |
128 | // separate cases for positive and negative slopes by using pointers to | 138 | // Horizontal line. |
129 | // update the step in x or y. | 139 | for(int i = 0; i <= dx; i++) { |
130 | int diff; | 140 | destination[i * x_step] = clr; |
131 | int diff_inc_a; | 141 | } |
132 | int diff_inc_b; | 142 | } else if(dx == 0) { |
133 | int n_steps; | 143 | // Vertical line. |
134 | int *a; | 144 | for(int i = 0; i <= dy; i++) { |
135 | int *b; | 145 | destination[i * y_step] = clr; |
136 | int a_step; | 146 | } |
137 | int b_step; | 147 | } else if (dx >= dy){ |
138 | if (dx >= dy) { | 148 | // Positive slope. |
139 | diff = ddy - dx; | 149 | int diff = ddy - dx; |
140 | diff_inc_a = ddy; | 150 | for (int i = 0; i <= dx; ++i) { |
141 | diff_inc_b = ddx; | 151 | *destination = clr; |
142 | n_steps = dx; | 152 | if (diff >= 0) { |
143 | a = &x; | 153 | destination += y_step; |
144 | b = &y; | 154 | diff -= ddx; |
145 | a_step = x_step; | 155 | } |
146 | b_step = y_step; | 156 | destination += x_step; |
157 | diff += ddy; | ||
158 | } | ||
147 | } else { | 159 | } else { |
148 | diff = ddx - dy; | 160 | // Negative slope. |
149 | diff_inc_a = ddx; | 161 | int diff = ddx - dy; |
150 | diff_inc_b = ddy; | 162 | for (int i = 0; i <= dy; ++i) { |
151 | n_steps = dy; | 163 | *destination = clr; |
152 | a = &y; | 164 | if (diff >= 0) { |
153 | b = &x; | 165 | destination += x_step; |
154 | a_step = y_step; | 166 | diff -= ddy; |
155 | b_step = x_step; | 167 | } |
156 | } | 168 | destination += y_step; |
157 | 169 | diff += ddx; | |
158 | // Draw the line with Bresenham's algorithm. | ||
159 | for (int i = 0; i <= n_steps; ++i) { | ||
160 | FRAMEBUFFER[y][x] = clr; | ||
161 | *a += a_step; | ||
162 | diff += diff_inc_a; | ||
163 | if (diff > 0) { | ||
164 | *b += b_step; | ||
165 | diff -= diff_inc_b; | ||
166 | } | 170 | } |
167 | } | 171 | } |
168 | } | 172 | } |
@@ -233,7 +237,7 @@ wait_vsync() { | |||
233 | // GBA needs to meet memory alignment requirements, we can't write a u8 into | 237 | // GBA needs to meet memory alignment requirements, we can't write a u8 into |
234 | // memory, instead we need to read a u16 word, mask and or the corresponding | 238 | // memory, instead we need to read a u16 word, mask and or the corresponding |
235 | // bits and wave the updated u16. | 239 | // bits and wave the updated u16. |
236 | static inline void | 240 | static void |
237 | put_pixel_m4(int x, int y, u8 col_index, vu16 *buffer) { | 241 | put_pixel_m4(int x, int y, u8 col_index, vu16 *buffer) { |
238 | int buffer_index = (y * SCREEN_WIDTH + x) / 2; | 242 | int buffer_index = (y * SCREEN_WIDTH + x) / 2; |
239 | vu16 *destination = &buffer[buffer_index]; | 243 | vu16 *destination = &buffer[buffer_index]; |
@@ -247,7 +251,7 @@ put_pixel_m4(int x, int y, u8 col_index, vu16 *buffer) { | |||
247 | } | 251 | } |
248 | } | 252 | } |
249 | 253 | ||
250 | static inline void | 254 | static void |
251 | draw_fill_rect_m4(int x0, int y0, int x1, int y1, u8 col_index, vu16 *buffer) { | 255 | draw_fill_rect_m4(int x0, int y0, int x1, int y1, u8 col_index, vu16 *buffer) { |
252 | int ix, iy; | 256 | int ix, iy; |
253 | for(iy = y0; iy < y1; iy++) { | 257 | for(iy = y0; iy < y1; iy++) { |