diff options
author | Bad Diode <bd@badd10de.dev> | 2021-09-10 15:26:59 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2021-09-10 15:26:59 +0200 |
commit | ea02a49f9aa46a77aaef74f0d1c743332593b217 (patch) | |
tree | 0262c687ce4aea53863d7d0c7068b93e53faede7 | |
parent | a1bc2913625b45c6ac28b856b8ee2051489479b7 (diff) | |
download | uxnrpi-ea02a49f9aa46a77aaef74f0d1c743332593b217.tar.gz uxnrpi-ea02a49f9aa46a77aaef74f0d1c743332593b217.zip |
Force non inline of 16 bit memory poking functions
On the Raspberry Pi 4, the compiler was inlining mempoke16 and doing
some other optimizations on it, which I presume break the memory
alignment requirements on aarch64. By adding __attribute__ ((noinline))
to the uxn functions that deal with 16 bit operations, we can turn back
-O2 compiler optimizations and the code works properly again on
hardware.
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | src/common.h | 16 | ||||
-rw-r--r-- | src/main.c | 2 | ||||
-rw-r--r-- | src/uxn-fast.c | 6 |
4 files changed, 13 insertions, 13 deletions
@@ -23,7 +23,7 @@ OBJ_START = $(BUILD_DIR)/start.o | |||
23 | OBJ_MAIN = $(BUILD_DIR)/main.o | 23 | OBJ_MAIN = $(BUILD_DIR)/main.o |
24 | SRC_LINK = $(SRC_DIR)/linker.ld | 24 | SRC_LINK = $(SRC_DIR)/linker.ld |
25 | 25 | ||
26 | CFLAGS := -Wall -ffreestanding -O1 -nostdlib -lgcc -mgeneral-regs-only -fstack-protector | 26 | CFLAGS := -Wall -ffreestanding -O2 -nostdlib -lgcc -mgeneral-regs-only -fstack-protector |
27 | CFLAGS += -DRPI_VERSION=$(RPI_VERSION) | 27 | CFLAGS += -DRPI_VERSION=$(RPI_VERSION) |
28 | AFLAGS := | 28 | AFLAGS := |
29 | LDFLAGS := | 29 | LDFLAGS := |
diff --git a/src/common.h b/src/common.h index 94e499e..4c2c6b2 100644 --- a/src/common.h +++ b/src/common.h | |||
@@ -134,22 +134,22 @@ uart_init() { | |||
134 | 134 | ||
135 | static inline void | 135 | static inline void |
136 | uart_putc(char c) { | 136 | uart_putc(char c) { |
137 | while(!(AUX->mu_lsr & (1 << 5))); | 137 | while (!(AUX->mu_lsr & (1 << 5))); |
138 | AUX->mu_io = c; | 138 | AUX->mu_io = c; |
139 | return; | 139 | return; |
140 | } | 140 | } |
141 | 141 | ||
142 | static inline char | 142 | static inline char |
143 | uart_getc() { | 143 | uart_getc() { |
144 | while(!(AUX->mu_lsr & 1)); | 144 | while (!(AUX->mu_lsr & 1)); |
145 | u8 c = AUX->mu_io & 0xFF; | 145 | u8 c = AUX->mu_io & 0xFF; |
146 | return c == '\r' ? '\n' : c; | 146 | return c == '\r' ? '\n' : c; |
147 | } | 147 | } |
148 | 148 | ||
149 | static inline void | 149 | static inline void |
150 | uart_puts(char *s) { | 150 | uart_puts(char *s) { |
151 | while(*s) { | 151 | while (*s) { |
152 | if(*s == '\n') { | 152 | if (*s == '\n') { |
153 | uart_putc('\r'); | 153 | uart_putc('\r'); |
154 | } | 154 | } |
155 | uart_putc(*s++); | 155 | uart_putc(*s++); |
@@ -160,7 +160,7 @@ static inline void | |||
160 | uart_hex(unsigned int d) { | 160 | uart_hex(unsigned int d) { |
161 | unsigned int n; | 161 | unsigned int n; |
162 | uart_puts("0x"); | 162 | uart_puts("0x"); |
163 | for(int c = 28; c >= 0; c -= 4) { | 163 | for (int c = 28; c >= 0; c -= 4) { |
164 | n = (d>>c) & 0xF; | 164 | n = (d>>c) & 0xF; |
165 | n += n> 9 ? 0x37 : 0x30; | 165 | n += n> 9 ? 0x37 : 0x30; |
166 | uart_putc(n); | 166 | uart_putc(n); |
@@ -238,14 +238,14 @@ typedef enum MboxChannels { | |||
238 | 238 | ||
239 | static inline void | 239 | static inline void |
240 | mb_write(u8 channel, void *data) { | 240 | mb_write(u8 channel, void *data) { |
241 | while(MBOX->status & MBOX_FULL); | 241 | while (MBOX->status & MBOX_FULL); |
242 | MBOX->write = ((uintptr_t)data & ~0xF) | (channel & 0xF); | 242 | MBOX->write = ((uintptr_t)data & ~0xF) | (channel & 0xF); |
243 | } | 243 | } |
244 | 244 | ||
245 | static inline u32 | 245 | static inline u32 |
246 | mb_read(u8 channel) { | 246 | mb_read(u8 channel) { |
247 | while(true) { | 247 | while (true) { |
248 | while(MBOX->status & MBOX_EMPTY); | 248 | while (MBOX->status & MBOX_EMPTY); |
249 | u32 data = MBOX->read; | 249 | u32 data = MBOX->read; |
250 | if ((u8)(data & 0xF) == channel) { | 250 | if ((u8)(data & 0xF) == channel) { |
251 | return data & 0xFFFFFFF0; | 251 | return data & 0xFFFFFFF0; |
@@ -172,7 +172,7 @@ void main(void) { | |||
172 | 172 | ||
173 | uxn_eval(&u, 0x0100); | 173 | uxn_eval(&u, 0x0100); |
174 | u64 current_ticks = timer_get_ticks(); | 174 | u64 current_ticks = timer_get_ticks(); |
175 | while(1) { | 175 | while (true) { |
176 | // Echo input to standard output. | 176 | // Echo input to standard output. |
177 | uxn_eval(&u, mempeek16(devscreen->dat, 0)); | 177 | uxn_eval(&u, mempeek16(devscreen->dat, 0)); |
178 | 178 | ||
diff --git a/src/uxn-fast.c b/src/uxn-fast.c index 5066376..7ee1d04 100644 --- a/src/uxn-fast.c +++ b/src/uxn-fast.c | |||
@@ -30,9 +30,9 @@ static void mempoke8(Uint8 *m, Uint16 a, Uint8 b) { m[a] = b; } | |||
30 | static Uint8 mempeek8(Uint8 *m, Uint16 a) { return m[a]; } | 30 | static Uint8 mempeek8(Uint8 *m, Uint16 a) { return m[a]; } |
31 | static void devpoke8(Device *d, Uint8 a, Uint8 b) { d->dat[a & 0xf] = b; d->talk(d, a & 0x0f, 1); } | 31 | static void devpoke8(Device *d, Uint8 a, Uint8 b) { d->dat[a & 0xf] = b; d->talk(d, a & 0x0f, 1); } |
32 | static Uint8 devpeek8(Device *d, Uint8 a) { d->talk(d, a & 0x0f, 0); return d->dat[a & 0xf]; } | 32 | static Uint8 devpeek8(Device *d, Uint8 a) { d->talk(d, a & 0x0f, 0); return d->dat[a & 0xf]; } |
33 | void mempoke16(Uint8 *m, Uint16 a, Uint16 b) { mempoke8(m, a, b >> 8); mempoke8(m, a + 1, b); } | 33 | void __attribute__ ((noinline)) mempoke16(Uint8 *m, Uint16 a, Uint16 b) { mempoke8(m, a, b >> 8); mempoke8(m, a + 1, b); } |
34 | Uint16 mempeek16(Uint8 *m, Uint16 a) { return (mempeek8(m, a) << 8) + mempeek8(m, a + 1); } | 34 | Uint16 __attribute__ ((noinline)) mempeek16(Uint8 *m, Uint16 a) { return (mempeek8(m, a) << 8) + mempeek8(m, a + 1); } |
35 | static void devpoke16(Device *d, Uint8 a, Uint16 b) { devpoke8(d, a, b >> 8); devpoke8(d, a + 1, b); } | 35 | static void __attribute__ ((noinline)) devpoke16(Device *d, Uint8 a, Uint16 b) { devpoke8(d, a, b >> 8); devpoke8(d, a + 1, b); } |
36 | 36 | ||
37 | /* clang-format on */ | 37 | /* clang-format on */ |
38 | 38 | ||