ppu: change the API to signal when redraw is required

To lower CPU load in idle mode (no changes on screen)
make ppu_pixel return non-zero when a change has been
made.

25% → 4% (piano.rom) after this change on Linux amd64,
Thinkpad X220.
This commit is contained in:
Sigrid Solveig Haflínudóttir 2021-09-19 23:30:53 +02:00
parent 781344268f
commit 6337680774
3 changed files with 22 additions and 15 deletions

View File

@ -31,54 +31,63 @@ ppu_clear(Ppu *p)
} }
} }
void int
ppu_pixel(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 color) ppu_pixel(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 color)
{ {
int row = (y % 8) + ((x / 8 + y / 8 * p->width / 8) * 16), col = x % 8; int row = (y % 8) + ((x / 8 + y / 8 * p->width / 8) * 16), col = x % 8, ret;
Uint8 w;
if(x >= p->width || y >= p->height) if(x >= p->width || y >= p->height)
return; return 0;
w = layer[row];
if(color == 0 || color == 2) if(color == 0 || color == 2)
layer[row] &= ~(1UL << (7 - col)); layer[row] &= ~(1UL << (7 - col));
else else
layer[row] |= 1UL << (7 - col); layer[row] |= 1UL << (7 - col);
ret = w ^ layer[row];
w = layer[row + 8];
if(color == 0 || color == 1) if(color == 0 || color == 1)
layer[row + 8] &= ~(1UL << (7 - col)); layer[row + 8] &= ~(1UL << (7 - col));
else else
layer[row + 8] |= 1UL << (7 - col); layer[row + 8] |= 1UL << (7 - col);
return ret | (w ^ layer[row + 8]);
} }
void int
ppu_1bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy) ppu_1bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy)
{ {
Uint16 v, h; Uint16 v, h;
int ret = 0;
for(v = 0; v < 8; v++) for(v = 0; v < 8; v++)
for(h = 0; h < 8; h++) { for(h = 0; h < 8; h++) {
Uint8 ch1 = (sprite[v] >> (7 - h)) & 0x1; Uint8 ch1 = (sprite[v] >> (7 - h)) & 0x1;
if(ch1 || blending[4][color]) if(ch1 || blending[4][color])
ppu_pixel(p, ret |= ppu_pixel(p,
layer, layer,
x + (flipx ? 7 - h : h), x + (flipx ? 7 - h : h),
y + (flipy ? 7 - v : v), y + (flipy ? 7 - v : v),
blending[ch1][color]); blending[ch1][color]);
} }
return ret;
} }
void int
ppu_2bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy) ppu_2bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy)
{ {
Uint16 v, h; Uint16 v, h;
int ret = 0;
for(v = 0; v < 8; v++) for(v = 0; v < 8; v++)
for(h = 0; h < 8; h++) { for(h = 0; h < 8; h++) {
Uint8 ch1 = ((sprite[v] >> (7 - h)) & 0x1); Uint8 ch1 = ((sprite[v] >> (7 - h)) & 0x1);
Uint8 ch2 = ((sprite[v + 8] >> (7 - h)) & 0x1); Uint8 ch2 = ((sprite[v + 8] >> (7 - h)) & 0x1);
Uint8 ch = ch1 + ch2 * 2; Uint8 ch = ch1 + ch2 * 2;
if(ch || blending[4][color]) if(ch || blending[4][color])
ppu_pixel(p, ret |= ppu_pixel(p,
layer, layer,
x + (flipx ? 7 - h : h), x + (flipx ? 7 - h : h),
y + (flipy ? 7 - v : v), y + (flipy ? 7 - v : v),
blending[ch][color]); blending[ch][color]);
} }
return ret;
} }
/* output */ /* output */

View File

@ -24,6 +24,6 @@ typedef struct Ppu {
int ppu_init(Ppu *p, Uint8 hor, Uint8 ver); int ppu_init(Ppu *p, Uint8 hor, Uint8 ver);
int ppu_resize(Ppu *p, Uint8 hor, Uint8 ver); int ppu_resize(Ppu *p, Uint8 hor, Uint8 ver);
void ppu_pixel(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 color); int ppu_pixel(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 color);
void ppu_1bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy); int ppu_1bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy);
void ppu_2bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy); int ppu_2bpp(Ppu *p, Uint8 *layer, Uint16 x, Uint16 y, Uint8 *sprite, Uint8 color, Uint8 flipx, Uint8 flipy);

View File

@ -353,25 +353,23 @@ screen_talk(Device *d, Uint8 b0, Uint8 w)
Uint16 x = peek16(d->dat, 0x8); Uint16 x = peek16(d->dat, 0x8);
Uint16 y = peek16(d->dat, 0xa); Uint16 y = peek16(d->dat, 0xa);
Uint8 layer = d->dat[0xe] & 0x40; Uint8 layer = d->dat[0xe] & 0x40;
ppu_pixel(&ppu, layer ? ppu.fg : ppu.bg, x, y, d->dat[0xe] & 0x3); reqdraw |= ppu_pixel(&ppu, layer ? ppu.fg : ppu.bg, x, y, d->dat[0xe] & 0x3);
if(d->dat[0x6] & 0x01) poke16(d->dat, 0x8, x + 1); /* auto x+1 */ if(d->dat[0x6] & 0x01) poke16(d->dat, 0x8, x + 1); /* auto x+1 */
if(d->dat[0x6] & 0x02) poke16(d->dat, 0xa, y + 1); /* auto y+1 */ if(d->dat[0x6] & 0x02) poke16(d->dat, 0xa, y + 1); /* auto y+1 */
reqdraw = 1;
} else if(b0 == 0xf) { } else if(b0 == 0xf) {
Uint16 x = peek16(d->dat, 0x8); Uint16 x = peek16(d->dat, 0x8);
Uint16 y = peek16(d->dat, 0xa); Uint16 y = peek16(d->dat, 0xa);
Uint8 layer = d->dat[0xf] & 0x40; Uint8 layer = d->dat[0xf] & 0x40;
Uint8 *addr = &d->mem[peek16(d->dat, 0xc)]; Uint8 *addr = &d->mem[peek16(d->dat, 0xc)];
if(d->dat[0xf] & 0x80) { if(d->dat[0xf] & 0x80) {
ppu_2bpp(&ppu, layer ? ppu.fg : ppu.bg, x, y, addr, d->dat[0xf] & 0xf, d->dat[0xf] & 0x10, d->dat[0xf] & 0x20); reqdraw |= ppu_2bpp(&ppu, layer ? ppu.fg : ppu.bg, x, y, addr, d->dat[0xf] & 0xf, d->dat[0xf] & 0x10, d->dat[0xf] & 0x20);
if(d->dat[0x6] & 0x04) poke16(d->dat, 0xc, peek16(d->dat, 0xc) + 16); /* auto addr+16 */ if(d->dat[0x6] & 0x04) poke16(d->dat, 0xc, peek16(d->dat, 0xc) + 16); /* auto addr+16 */
} else { } else {
ppu_1bpp(&ppu, layer ? ppu.fg : ppu.bg, x, y, addr, d->dat[0xf] & 0xf, d->dat[0xf] & 0x10, d->dat[0xf] & 0x20); reqdraw |= ppu_1bpp(&ppu, layer ? ppu.fg : ppu.bg, x, y, addr, d->dat[0xf] & 0xf, d->dat[0xf] & 0x10, d->dat[0xf] & 0x20);
if(d->dat[0x6] & 0x04) poke16(d->dat, 0xc, peek16(d->dat, 0xc) + 8); /* auto addr+8 */ if(d->dat[0x6] & 0x04) poke16(d->dat, 0xc, peek16(d->dat, 0xc) + 8); /* auto addr+8 */
} }
if(d->dat[0x6] & 0x01) poke16(d->dat, 0x8, x + 8); /* auto x+8 */ if(d->dat[0x6] & 0x01) poke16(d->dat, 0x8, x + 8); /* auto x+8 */
if(d->dat[0x6] & 0x02) poke16(d->dat, 0xa, y + 8); /* auto y+8 */ if(d->dat[0x6] & 0x02) poke16(d->dat, 0xa, y + 8); /* auto y+8 */
reqdraw = 1;
} }
return 1; return 1;
} }