diff --git a/build.sh b/build.sh index f0e845a..1984d69 100755 --- a/build.sh +++ b/build.sh @@ -30,7 +30,7 @@ then fi mkdir -p bin -CC="${CC:-cc}" +CC="${CC:-clang}" CFLAGS="${CFLAGS:--std=c89 -Wall -Wno-unknown-pragmas}" case "$(uname -s 2>/dev/null)" in MSYS_NT*|MINGW*) # MSYS2 on Windows diff --git a/src/devices/ppu_aarch64.c b/src/devices/ppu_aarch64.c new file mode 100644 index 0000000..d0deb35 --- /dev/null +++ b/src/devices/ppu_aarch64.c @@ -0,0 +1,31 @@ +#include +#include "ppu.h" + +void +ppu_redraw(Ppu *p, Uint32 *screen) +{ + uint8x16x4_t pal = vld4q_u8((Uint8*)p->palette); + Uint8 *fg = p->fg.pixels; + Uint8 *bg = p->bg.pixels; + int i; + + p->fg.changed = p->bg.changed = 0; + + __builtin_assume(p->width > 0 && p->height > 0); + + for(i = 0; i < (p->width * p->height & ~15); i += 16, fg += 16, bg += 16, screen += 16) { + uint8x16_t fg8 = vld1q_u8(fg); + uint8x16_t bg8 = vld1q_u8(bg); + uint8x16_t px8 = vbslq_u8(vceqzq_u8(fg8), bg8, fg8); + uint8x16x4_t px = { + vqtbl1q_u8(pal.val[0], px8), + vqtbl1q_u8(pal.val[1], px8), + vqtbl1q_u8(pal.val[2], px8), + vdupq_n_u8(0xff), + }; + vst4q_u8((uint8_t*)screen, px); + } + + for(; i < p->width * p->height; i++) + screen[i] = p->palette[*fg ? *fg : *bg]; +}