From 6338f8d9a2043672ad58bcf9eb0ea83cf13c8d82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigrid=20Solveig=20Hafl=C3=ADnud=C3=B3ttir?= Date: Sun, 26 Dec 2021 07:32:35 +0100 Subject: [PATCH] ppu_aarch64: initial version of ppu_redraw using Neon --- src/devices/ppu_aarch64.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 src/devices/ppu_aarch64.c diff --git a/src/devices/ppu_aarch64.c b/src/devices/ppu_aarch64.c new file mode 100644 index 0000000..b6cf37a --- /dev/null +++ b/src/devices/ppu_aarch64.c @@ -0,0 +1,28 @@ +#include +#include "ppu.h" + +void +ppu_redraw(Ppu *p, Uint32 *screen) +{ + /* FIXME(sigrid): do this better */ + Uint32 *rgba = __builtin_assume_aligned(screen, 16); + Uint8 *fg = __builtin_assume_aligned(p->fg.pixels, 16); + Uint8 *bg = __builtin_assume_aligned(p->bg.pixels, 16); + Uint8 *palette = __builtin_assume_aligned((Uint8*)p->palette, 16); + uint8x16x4_t pal = vld4q_u8(palette); enum { R, G, B, A }; + int i; + + for(i = 0; i < p->width * p->height; i += 16, fg += 16, bg += 16, rgba += 16) { + uint8x16_t fg8 = vld1q_u8(fg); + uint8x16_t bg8 = vld1q_u8(bg); + uint8x16_t bgmask = vceqzq_u8(fg8); + uint8x16_t px8 = vorrq_u8(vandq_u8(bg8, bgmask), vandq_u8(fg8, vceqzq_u8(bgmask))); + uint8x16x4_t px = { + vqtbl1q_u8(pal.val[R], px8), + vqtbl1q_u8(pal.val[G], px8), + vqtbl1q_u8(pal.val[B], px8), + vqtbl1q_u8(pal.val[A], px8), + }; + vst4q_u8((uint8_t*)rgba, px); + } +}