aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2021-09-10 15:26:59 +0200
committerBad Diode <bd@badd10de.dev>2021-09-10 15:26:59 +0200
commitea02a49f9aa46a77aaef74f0d1c743332593b217 (patch)
tree0262c687ce4aea53863d7d0c7068b93e53faede7
parenta1bc2913625b45c6ac28b856b8ee2051489479b7 (diff)
downloaduxnrpi-ea02a49f9aa46a77aaef74f0d1c743332593b217.tar.gz
uxnrpi-ea02a49f9aa46a77aaef74f0d1c743332593b217.zip
Force non inline of 16 bit memory poking functions
On the Raspberry Pi 4, the compiler was inlining mempoke16 and doing some other optimizations on it, which I presume break the memory alignment requirements on aarch64. By adding __attribute__ ((noinline)) to the uxn functions that deal with 16 bit operations, we can turn back -O2 compiler optimizations and the code works properly again on hardware.
-rw-r--r--Makefile2
-rw-r--r--src/common.h16
-rw-r--r--src/main.c2
-rw-r--r--src/uxn-fast.c6
4 files changed, 13 insertions, 13 deletions
diff --git a/Makefile b/Makefile
index 1fced4f..348d974 100644
--- a/Makefile
+++ b/Makefile
@@ -23,7 +23,7 @@ OBJ_START = $(BUILD_DIR)/start.o
23OBJ_MAIN = $(BUILD_DIR)/main.o 23OBJ_MAIN = $(BUILD_DIR)/main.o
24SRC_LINK = $(SRC_DIR)/linker.ld 24SRC_LINK = $(SRC_DIR)/linker.ld
25 25
26CFLAGS := -Wall -ffreestanding -O1 -nostdlib -lgcc -mgeneral-regs-only -fstack-protector 26CFLAGS := -Wall -ffreestanding -O2 -nostdlib -lgcc -mgeneral-regs-only -fstack-protector
27CFLAGS += -DRPI_VERSION=$(RPI_VERSION) 27CFLAGS += -DRPI_VERSION=$(RPI_VERSION)
28AFLAGS := 28AFLAGS :=
29LDFLAGS := 29LDFLAGS :=
diff --git a/src/common.h b/src/common.h
index 94e499e..4c2c6b2 100644
--- a/src/common.h
+++ b/src/common.h
@@ -134,22 +134,22 @@ uart_init() {
134 134
135static inline void 135static inline void
136uart_putc(char c) { 136uart_putc(char c) {
137 while(!(AUX->mu_lsr & (1 << 5))); 137 while (!(AUX->mu_lsr & (1 << 5)));
138 AUX->mu_io = c; 138 AUX->mu_io = c;
139 return; 139 return;
140} 140}
141 141
142static inline char 142static inline char
143uart_getc() { 143uart_getc() {
144 while(!(AUX->mu_lsr & 1)); 144 while (!(AUX->mu_lsr & 1));
145 u8 c = AUX->mu_io & 0xFF; 145 u8 c = AUX->mu_io & 0xFF;
146 return c == '\r' ? '\n' : c; 146 return c == '\r' ? '\n' : c;
147} 147}
148 148
149static inline void 149static inline void
150uart_puts(char *s) { 150uart_puts(char *s) {
151 while(*s) { 151 while (*s) {
152 if(*s == '\n') { 152 if (*s == '\n') {
153 uart_putc('\r'); 153 uart_putc('\r');
154 } 154 }
155 uart_putc(*s++); 155 uart_putc(*s++);
@@ -160,7 +160,7 @@ static inline void
160uart_hex(unsigned int d) { 160uart_hex(unsigned int d) {
161 unsigned int n; 161 unsigned int n;
162 uart_puts("0x"); 162 uart_puts("0x");
163 for(int c = 28; c >= 0; c -= 4) { 163 for (int c = 28; c >= 0; c -= 4) {
164 n = (d>>c) & 0xF; 164 n = (d>>c) & 0xF;
165 n += n> 9 ? 0x37 : 0x30; 165 n += n> 9 ? 0x37 : 0x30;
166 uart_putc(n); 166 uart_putc(n);
@@ -238,14 +238,14 @@ typedef enum MboxChannels {
238 238
239static inline void 239static inline void
240mb_write(u8 channel, void *data) { 240mb_write(u8 channel, void *data) {
241 while(MBOX->status & MBOX_FULL); 241 while (MBOX->status & MBOX_FULL);
242 MBOX->write = ((uintptr_t)data & ~0xF) | (channel & 0xF); 242 MBOX->write = ((uintptr_t)data & ~0xF) | (channel & 0xF);
243} 243}
244 244
245static inline u32 245static inline u32
246mb_read(u8 channel) { 246mb_read(u8 channel) {
247 while(true) { 247 while (true) {
248 while(MBOX->status & MBOX_EMPTY); 248 while (MBOX->status & MBOX_EMPTY);
249 u32 data = MBOX->read; 249 u32 data = MBOX->read;
250 if ((u8)(data & 0xF) == channel) { 250 if ((u8)(data & 0xF) == channel) {
251 return data & 0xFFFFFFF0; 251 return data & 0xFFFFFFF0;
diff --git a/src/main.c b/src/main.c
index 7ac7037..c379ace 100644
--- a/src/main.c
+++ b/src/main.c
@@ -172,7 +172,7 @@ void main(void) {
172 172
173 uxn_eval(&u, 0x0100); 173 uxn_eval(&u, 0x0100);
174 u64 current_ticks = timer_get_ticks(); 174 u64 current_ticks = timer_get_ticks();
175 while(1) { 175 while (true) {
176 // Echo input to standard output. 176 // Echo input to standard output.
177 uxn_eval(&u, mempeek16(devscreen->dat, 0)); 177 uxn_eval(&u, mempeek16(devscreen->dat, 0));
178 178
diff --git a/src/uxn-fast.c b/src/uxn-fast.c
index 5066376..7ee1d04 100644
--- a/src/uxn-fast.c
+++ b/src/uxn-fast.c
@@ -30,9 +30,9 @@ static void mempoke8(Uint8 *m, Uint16 a, Uint8 b) { m[a] = b; }
30static Uint8 mempeek8(Uint8 *m, Uint16 a) { return m[a]; } 30static Uint8 mempeek8(Uint8 *m, Uint16 a) { return m[a]; }
31static void devpoke8(Device *d, Uint8 a, Uint8 b) { d->dat[a & 0xf] = b; d->talk(d, a & 0x0f, 1); } 31static void devpoke8(Device *d, Uint8 a, Uint8 b) { d->dat[a & 0xf] = b; d->talk(d, a & 0x0f, 1); }
32static Uint8 devpeek8(Device *d, Uint8 a) { d->talk(d, a & 0x0f, 0); return d->dat[a & 0xf]; } 32static Uint8 devpeek8(Device *d, Uint8 a) { d->talk(d, a & 0x0f, 0); return d->dat[a & 0xf]; }
33void mempoke16(Uint8 *m, Uint16 a, Uint16 b) { mempoke8(m, a, b >> 8); mempoke8(m, a + 1, b); } 33void __attribute__ ((noinline)) mempoke16(Uint8 *m, Uint16 a, Uint16 b) { mempoke8(m, a, b >> 8); mempoke8(m, a + 1, b); }
34Uint16 mempeek16(Uint8 *m, Uint16 a) { return (mempeek8(m, a) << 8) + mempeek8(m, a + 1); } 34Uint16 __attribute__ ((noinline)) mempeek16(Uint8 *m, Uint16 a) { return (mempeek8(m, a) << 8) + mempeek8(m, a + 1); }
35static void devpoke16(Device *d, Uint8 a, Uint16 b) { devpoke8(d, a, b >> 8); devpoke8(d, a + 1, b); } 35static void __attribute__ ((noinline)) devpoke16(Device *d, Uint8 a, Uint16 b) { devpoke8(d, a, b >> 8); devpoke8(d, a + 1, b); }
36 36
37/* clang-format on */ 37/* clang-format on */
38 38