summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2021-04-16 16:53:47 +0200
committerBad Diode <bd@badd10de.dev>2021-04-16 16:53:47 +0200
commit21aaf2e5f1720f9b6d227d29859b035c3367e9d7 (patch)
tree6f2391997908d51a69aee69761cfd2cb85a7e9c9
parentc0cb8634cbe2ce4bcf29873cd67d5735095b64a7 (diff)
downloadgba-experiments-21aaf2e5f1720f9b6d227d29859b035c3367e9d7.tar.gz
gba-experiments-21aaf2e5f1720f9b6d227d29859b035c3367e9d7.zip
Update draw_line to improve performance
The new implementation is based on the one found in TONC. Instead of updating two variables to address the framebuffer as FRAMEBUFFER[y][x], we update the pointer that points to the target memory destination. Changing the return type of the function from `static inline void` to `static void` improves the performance significantly. Additionally, for some reason, if there is an if statement for horizontal and vertical lines, the performance once again improves. It may be due to the compiler knowing that there is no pointer aliasing, but I'm not sure about that.
-rw-r--r--src/main.c106
1 files changed, 55 insertions, 51 deletions
diff --git a/src/main.c b/src/main.c
index 0261e8a..b86a830 100644
--- a/src/main.c
+++ b/src/main.c
@@ -78,7 +78,7 @@ rgb15(u32 red, u32 green, u32 blue ) {
78#define COLOR_WHITE rgb15(28, 28, 28) 78#define COLOR_WHITE rgb15(28, 28, 28)
79 79
80// Using bd-font, an 8x8 bitmap font. 80// Using bd-font, an 8x8 bitmap font.
81static inline void 81static void
82put_char(int x, int y, Color clr, u8 chr) { 82put_char(int x, int y, Color clr, u8 chr) {
83 for (size_t i = 0; i < 8; ++i) { 83 for (size_t i = 0; i < 8; ++i) {
84 for (size_t j = 0; j < 8; ++j) { 84 for (size_t j = 0; j < 8; ++j) {
@@ -89,7 +89,7 @@ put_char(int x, int y, Color clr, u8 chr) {
89 } 89 }
90} 90}
91 91
92static inline void 92static void
93put_text(int x, int y, Color clr, char *msg) { 93put_text(int x, int y, Color clr, char *msg) {
94 int count = 0; 94 int count = 0;
95 while (*msg) { 95 while (*msg) {
@@ -100,69 +100,73 @@ put_text(int x, int y, Color clr, char *msg) {
100 100
101// Draws a line with the given color between (x0,y0) and (x1,y1) using the 101// Draws a line with the given color between (x0,y0) and (x1,y1) using the
102// Bresenham's line drawing algorithm using exclusively integer arithmetic. 102// Bresenham's line drawing algorithm using exclusively integer arithmetic.
103static inline void 103static void
104draw_line(int x0, int y0, int x1, int y1, Color clr) { 104draw_line(int x0, int y0, int x1, int y1, Color clr) {
105 // Keep track of the coordinate for writing to the memory buffer. 105 // The line length in color units.
106 int x = x0; 106 int pitch = SCREEN_WIDTH;
107 int y = y0; 107
108 // Pointer to the initial position of the screen buffer where we will start
109 // writing our data. We need to multiply by 2 because in mode 3 we have
110 // 2 bytes per pixel.
111 vu16 *destination = (u16*)(SCREEN_BUFFER + y0 * pitch * 2 + x0 * 2);
108 112
109 // Adjust the step direction and calculate deltas. 113 // Adjust the step direction and calculate deltas.
110 int x_step = 1; 114 int x_step;
111 int y_step = 1; 115 int dx;
112 int dx = x1 - x0;
113 int dy = y1 - y0;
114 if (x0 > x1) { 116 if (x0 > x1) {
115 x_step = -1; 117 x_step = -1;
116 dx = x0 - x1; 118 dx = x0 - x1;
119 } else {
120 x_step = 1;
121 dx = x1 - x0;
117 } 122 }
123 int y_step;
124 int dy;
118 if (y0 > y1) { 125 if (y0 > y1) {
119 y_step = -1; 126 y_step = -pitch;
120 dy = y0 - y1; 127 dy = y0 - y1;
128 } else {
129 y_step = +pitch;
130 dy = y1 - y0;
121 } 131 }
122 132
123 // Precalculate 2 * deltas for x and y. 133 // Precalculate 2 * deltas for x and y.
124 int ddx = dx + dx; 134 int ddx = dx + dx;
125 int ddy = dy + dy; 135 int ddy = dy + dy;
126 136
127 // These variables are dependant on the slope. We can avoid considering 137 if(dy == 0) {
128 // separate cases for positive and negative slopes by using pointers to 138 // Horizontal line.
129 // update the step in x or y. 139 for(int i = 0; i <= dx; i++) {
130 int diff; 140 destination[i * x_step] = clr;
131 int diff_inc_a; 141 }
132 int diff_inc_b; 142 } else if(dx == 0) {
133 int n_steps; 143 // Vertical line.
134 int *a; 144 for(int i = 0; i <= dy; i++) {
135 int *b; 145 destination[i * y_step] = clr;
136 int a_step; 146 }
137 int b_step; 147 } else if (dx >= dy){
138 if (dx >= dy) { 148 // Positive slope.
139 diff = ddy - dx; 149 int diff = ddy - dx;
140 diff_inc_a = ddy; 150 for (int i = 0; i <= dx; ++i) {
141 diff_inc_b = ddx; 151 *destination = clr;
142 n_steps = dx; 152 if (diff >= 0) {
143 a = &x; 153 destination += y_step;
144 b = &y; 154 diff -= ddx;
145 a_step = x_step; 155 }
146 b_step = y_step; 156 destination += x_step;
157 diff += ddy;
158 }
147 } else { 159 } else {
148 diff = ddx - dy; 160 // Negative slope.
149 diff_inc_a = ddx; 161 int diff = ddx - dy;
150 diff_inc_b = ddy; 162 for (int i = 0; i <= dy; ++i) {
151 n_steps = dy; 163 *destination = clr;
152 a = &y; 164 if (diff >= 0) {
153 b = &x; 165 destination += x_step;
154 a_step = y_step; 166 diff -= ddy;
155 b_step = x_step; 167 }
156 } 168 destination += y_step;
157 169 diff += ddx;
158 // Draw the line with Bresenham's algorithm.
159 for (int i = 0; i <= n_steps; ++i) {
160 FRAMEBUFFER[y][x] = clr;
161 *a += a_step;
162 diff += diff_inc_a;
163 if (diff > 0) {
164 *b += b_step;
165 diff -= diff_inc_b;
166 } 170 }
167 } 171 }
168} 172}
@@ -233,7 +237,7 @@ wait_vsync() {
233// GBA needs to meet memory alignment requirements, we can't write a u8 into 237// GBA needs to meet memory alignment requirements, we can't write a u8 into
234// memory, instead we need to read a u16 word, mask and or the corresponding 238// memory, instead we need to read a u16 word, mask and or the corresponding
235// bits and wave the updated u16. 239// bits and wave the updated u16.
236static inline void 240static void
237put_pixel_m4(int x, int y, u8 col_index, vu16 *buffer) { 241put_pixel_m4(int x, int y, u8 col_index, vu16 *buffer) {
238 int buffer_index = (y * SCREEN_WIDTH + x) / 2; 242 int buffer_index = (y * SCREEN_WIDTH + x) / 2;
239 vu16 *destination = &buffer[buffer_index]; 243 vu16 *destination = &buffer[buffer_index];
@@ -247,7 +251,7 @@ put_pixel_m4(int x, int y, u8 col_index, vu16 *buffer) {
247 } 251 }
248} 252}
249 253
250static inline void 254static void
251draw_fill_rect_m4(int x0, int y0, int x1, int y1, u8 col_index, vu16 *buffer) { 255draw_fill_rect_m4(int x0, int y0, int x1, int y1, u8 col_index, vu16 *buffer) {
252 int ix, iy; 256 int ix, iy;
253 for(iy = y0; iy < y1; iy++) { 257 for(iy = y0; iy < y1; iy++) {