From 21aaf2e5f1720f9b6d227d29859b035c3367e9d7 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Fri, 16 Apr 2021 16:53:47 +0200 Subject: Update draw_line to improve performance The new implementation is based on the one found in TONC. Instead of updating two variables to address the framebuffer as FRAMEBUFFER[y][x], we update the pointer that points to the target memory destination. Changing the return type of the function from `static inline void` to `static void` improves the performance significantly. Additionally, for some reason, if there is an if statement for horizontal and vertical lines, the performance once again improves. It may be due to the compiler knowing that there is no pointer aliasing, but I'm not sure about that. --- src/main.c | 106 ++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 55 insertions(+), 51 deletions(-) diff --git a/src/main.c b/src/main.c index 0261e8a..b86a830 100644 --- a/src/main.c +++ b/src/main.c @@ -78,7 +78,7 @@ rgb15(u32 red, u32 green, u32 blue ) { #define COLOR_WHITE rgb15(28, 28, 28) // Using bd-font, an 8x8 bitmap font. -static inline void +static void put_char(int x, int y, Color clr, u8 chr) { for (size_t i = 0; i < 8; ++i) { for (size_t j = 0; j < 8; ++j) { @@ -89,7 +89,7 @@ put_char(int x, int y, Color clr, u8 chr) { } } -static inline void +static void put_text(int x, int y, Color clr, char *msg) { int count = 0; while (*msg) { @@ -100,69 +100,73 @@ put_text(int x, int y, Color clr, char *msg) { // Draws a line with the given color between (x0,y0) and (x1,y1) using the // Bresenham's line drawing algorithm using exclusively integer arithmetic. -static inline void +static void draw_line(int x0, int y0, int x1, int y1, Color clr) { - // Keep track of the coordinate for writing to the memory buffer. - int x = x0; - int y = y0; + // The line length in color units. + int pitch = SCREEN_WIDTH; + + // Pointer to the initial position of the screen buffer where we will start + // writing our data. We need to multiply by 2 because in mode 3 we have + // 2 bytes per pixel. + vu16 *destination = (u16*)(SCREEN_BUFFER + y0 * pitch * 2 + x0 * 2); // Adjust the step direction and calculate deltas. - int x_step = 1; - int y_step = 1; - int dx = x1 - x0; - int dy = y1 - y0; + int x_step; + int dx; if (x0 > x1) { x_step = -1; dx = x0 - x1; + } else { + x_step = 1; + dx = x1 - x0; } + int y_step; + int dy; if (y0 > y1) { - y_step = -1; + y_step = -pitch; dy = y0 - y1; + } else { + y_step = +pitch; + dy = y1 - y0; } // Precalculate 2 * deltas for x and y. int ddx = dx + dx; int ddy = dy + dy; - // These variables are dependant on the slope. We can avoid considering - // separate cases for positive and negative slopes by using pointers to - // update the step in x or y. - int diff; - int diff_inc_a; - int diff_inc_b; - int n_steps; - int *a; - int *b; - int a_step; - int b_step; - if (dx >= dy) { - diff = ddy - dx; - diff_inc_a = ddy; - diff_inc_b = ddx; - n_steps = dx; - a = &x; - b = &y; - a_step = x_step; - b_step = y_step; + if(dy == 0) { + // Horizontal line. + for(int i = 0; i <= dx; i++) { + destination[i * x_step] = clr; + } + } else if(dx == 0) { + // Vertical line. + for(int i = 0; i <= dy; i++) { + destination[i * y_step] = clr; + } + } else if (dx >= dy){ + // Positive slope. + int diff = ddy - dx; + for (int i = 0; i <= dx; ++i) { + *destination = clr; + if (diff >= 0) { + destination += y_step; + diff -= ddx; + } + destination += x_step; + diff += ddy; + } } else { - diff = ddx - dy; - diff_inc_a = ddx; - diff_inc_b = ddy; - n_steps = dy; - a = &y; - b = &x; - a_step = y_step; - b_step = x_step; - } - - // Draw the line with Bresenham's algorithm. - for (int i = 0; i <= n_steps; ++i) { - FRAMEBUFFER[y][x] = clr; - *a += a_step; - diff += diff_inc_a; - if (diff > 0) { - *b += b_step; - diff -= diff_inc_b; + // Negative slope. + int diff = ddx - dy; + for (int i = 0; i <= dy; ++i) { + *destination = clr; + if (diff >= 0) { + destination += x_step; + diff -= ddy; + } + destination += y_step; + diff += ddx; } } } @@ -233,7 +237,7 @@ wait_vsync() { // GBA needs to meet memory alignment requirements, we can't write a u8 into // memory, instead we need to read a u16 word, mask and or the corresponding // bits and wave the updated u16. -static inline void +static void put_pixel_m4(int x, int y, u8 col_index, vu16 *buffer) { int buffer_index = (y * SCREEN_WIDTH + x) / 2; vu16 *destination = &buffer[buffer_index]; @@ -247,7 +251,7 @@ put_pixel_m4(int x, int y, u8 col_index, vu16 *buffer) { } } -static inline void +static void draw_fill_rect_m4(int x0, int y0, int x1, int y1, u8 col_index, vu16 *buffer) { int ix, iy; for(iy = y0; iy < y1; iy++) { -- cgit v1.2.1