Merge branch '2cycle' into dev

This commit is contained in:
MysterD 2023-05-04 12:09:04 -07:00
commit 1e8a7c709b
20 changed files with 997 additions and 562 deletions

View file

@ -313,6 +313,9 @@ s64 DynOS_Gfx_ParseGfxConstants(const String& _Arg, bool* found) {
gfx_constant(CAP);
gfx_constant(METAL);
// Extended
gfx_constant(G_LIGHT_MAP_EXT);
// Common values
gfx_constant(CALC_DXT(4,G_IM_SIZ_4b_BYTES));
gfx_constant(CALC_DXT(8,G_IM_SIZ_4b_BYTES));

View file

@ -1 +1 @@
make RENDER_API=D3D11 WINDOW_API=DXGI DEBUG=1 DEVELOPMENT=1 && ./build/us_pc/sm64.us.f3dex2e.exe
make RENDER_API=D3D12 WINDOW_API=DXGI DEBUG=1 DEVELOPMENT=1 -j && ./build/us_pc/sm64.us.f3dex2e.exe

View file

@ -21,7 +21,7 @@
#define _GBI_H_
#include <PR/ultratypes.h>
#include "src/pc/djui/djui_gbi.h"
#include "gbi_extension.h"
/*
* To use the F3DEX ucodes, define F3DEX_GBI before include this file.

View file

@ -1,5 +1,15 @@
#pragma once
///////////////////////
// G_SETGEOMETRYMODE //
///////////////////////
#define G_LIGHT_MAP_EXT 0x00000800
//////////
// DJUI //
//////////
#define G_TEXCLIP_DJUI 0xe1
#define G_TEXOVERRIDE_DJUI 0xe0
#define G_DJUI_SIMPLE_VERT 0x11

View file

@ -181,9 +181,10 @@ void bhv_lll_bowser_puzzle_piece_action_1(void) {
*/
void bhv_lll_bowser_puzzle_piece_update(void) {
s8* nextAction = o->oBowserPuzzlePieceNextAction;
if (!nextAction) { return; }
// If Mario is standing on this puzzle piece, set a flag in the parent.
if (cur_obj_is_any_player_on_platform())
if (cur_obj_is_any_player_on_platform() && o->parentObj)
o->parentObj->oBowserPuzzleCompletionFlags = 1;
// If we should advance to the next action...
@ -199,7 +200,9 @@ void bhv_lll_bowser_puzzle_piece_update(void) {
// If we're at the end of the list...
if (*nextAction == -1) {
// Set the other completion flag in the parent.
o->parentObj->oBowserPuzzleCompletionFlags |= 2;
if (o->parentObj) {
o->parentObj->oBowserPuzzleCompletionFlags |= 2;
}
// The next action is the first action in the list again.
o->oBowserPuzzlePieceNextAction = o->oBowserPuzzlePieceActionList;

View file

@ -561,7 +561,9 @@ static void geo_process_camera(struct GraphNodeCamera *node) {
if (!increment_mat_stack()) { return; }
// save the camera matrix
mtxf_copy(gCamera->mtx, gMatStack[gMatStackIndex]);
if (gCamera) {
mtxf_copy(gCamera->mtx, gMatStack[gMatStackIndex]);
}
if (node->fnNode.node.children != 0) {
gCurGraphNodeCamera = node;

View file

@ -688,17 +688,17 @@ struct PcDebug gPcDebug = {
.tags = {
0x0000000000000000,
0x000000000000FFFF,
0x440C28A5CC404F11,
0x2783114DDB90E597,
0x0EF4AF18EEC1303A,
0x5E6A9446709E7CFF,
0x914FA1C52D410003,
0xE9A402C28144FD8B,
0x83B8B87B1E6A0B78,
0xEE7B0ED661ABA0ED,
0x2D1D50FB02617949,
0x8AEB7180FAE739EB,
0x0CDB1A233CC71057,
0x53D5D9880C8B278E,
0xE8E307BE5802542E,
0x8A3ACC4FDB4FFE45,
0x09046C2BA3C5000D,
0xF027964ADE989C29,
0x076CF19655C70007,
0x9325E55A037D6511,
0x77ACD7B422D978A6,
0x440C28A5CC404F11,
0xE9A402C28144FD8B,
0x9A2269E87B26BE68,
},
.id = DEFAULT_ID,

View file

@ -1,41 +1,294 @@
#include <stdio.h>
#include "PR/gbi.h"
#include "gfx_cc.h"
#include "gfx_pc.h"
void gfx_cc_get_features(uint32_t shader_id, struct CCFeatures *cc_features) {
for (int32_t i = 0; i < 4; i++) {
cc_features->c[0][i] = (shader_id >> (i * 3)) & 7;
cc_features->c[1][i] = (shader_id >> (12 + i * 3)) & 7;
static u8 sAllowCCPrint = 1;
void gfx_cc_get_features(struct ColorCombiner* cc, struct CCFeatures* ccf) {
// reset ccf
memset(ccf, 0, sizeof(struct CCFeatures));
int cmd_length = cc->cm.use_2cycle ? 16 : 8;
for (int i = 0; i < cmd_length; i++) {
u8 c = cc->shader_commands[i];
if (c >= SHADER_INPUT_1 && c <= SHADER_INPUT_8) {
if (c > ccf->num_inputs) { ccf->num_inputs = c; }
}
ccf->used_textures[0] = ccf->used_textures[0] || c == SHADER_TEXEL0 || c == SHADER_TEXEL0A;
ccf->used_textures[1] = ccf->used_textures[1] || c == SHADER_TEXEL1 || c == SHADER_TEXEL1A;
}
cc_features->opt_alpha = (shader_id & SHADER_OPT_ALPHA) != 0;
cc_features->opt_fog = (shader_id & SHADER_OPT_FOG) != 0;
cc_features->opt_texture_edge = (shader_id & SHADER_OPT_TEXTURE_EDGE) != 0;
cc_features->opt_noise = (shader_id & SHADER_OPT_NOISE) != 0;
// figure out optimizations
for (int i = 0; i < 16 / 4; i++) {
u8* c = &cc->shader_commands[i * 4];
ccf->do_single[i] = (c[2] == 0);
ccf->do_multiply[i] = (c[1] == 0 && c[3] == 0);
ccf->do_mix[i] = (c[1] == c[3]);
}
cc_features->used_textures[0] = false;
cc_features->used_textures[1] = false;
cc_features->num_inputs = 0;
ccf->color_alpha_same[0] = 1;
ccf->color_alpha_same[1] = 1;
for (int32_t i = 0; i < 2; i++) {
for (int32_t j = 0; j < 4; j++) {
if (cc_features->c[i][j] >= SHADER_INPUT_1 && cc_features->c[i][j] <= SHADER_INPUT_4) {
if (cc_features->c[i][j] > cc_features->num_inputs) {
cc_features->num_inputs = cc_features->c[i][j];
}
}
if (cc_features->c[i][j] == SHADER_TEXEL0 || cc_features->c[i][j] == SHADER_TEXEL0A) {
cc_features->used_textures[0] = true;
}
if (cc_features->c[i][j] == SHADER_TEXEL1) {
cc_features->used_textures[1] = true;
for (int i = 0; i < 2; i++) {
u8* cmd = &cc->shader_commands[i * 8];
for (int j = 0; j < 4; j++) {
if (cmd[j] != cmd[j + 4]) {
ccf->color_alpha_same[i] = 0;
break;
}
}
}
cc_features->do_single[0] = cc_features->c[0][2] == 0;
cc_features->do_single[1] = cc_features->c[1][2] == 0;
cc_features->do_multiply[0] = cc_features->c[0][1] == 0 && cc_features->c[0][3] == 0;
cc_features->do_multiply[1] = cc_features->c[1][1] == 0 && cc_features->c[1][3] == 0;
cc_features->do_mix[0] = cc_features->c[0][1] == cc_features->c[0][3];
cc_features->do_mix[1] = cc_features->c[1][1] == cc_features->c[1][3];
cc_features->color_alpha_same = (shader_id & 0xfff) == ((shader_id >> 12) & 0xfff);
}
void gfx_cc_print(struct ColorCombiner *cc) {
if (!sAllowCCPrint) { return; }
#ifdef DEVELOPMENT
printf("\n>> gfx_pc_precomp_shader(");
struct CombineMode* cm = &cc->cm;
printf("0x%08x, ", cm->rgb1);
printf("0x%08x, ", cm->alpha1);
printf("0x%08x, ", cm->rgb2);
printf("0x%08x, ", cm->alpha2);
printf("0x%08x", cm->flags);
printf(");");
printf(" // %016lx", cm->hash);
printf("\n");
#endif
}
void gfx_cc_precomp(void) {
sAllowCCPrint = 0;
gfx_pc_precomp_shader(0x00030001, 0x02000000, 0x000a0004, 0x0a000b0b, 0x00000011); // 741f2ad014006ca1
gfx_pc_precomp_shader(0x00040001, 0x00010005, 0x00040002, 0x0b020b05, 0x00000001); // 110404410ba7b38b
gfx_pc_precomp_shader(0x00040001, 0x00030001, 0x00040002, 0x0b030b02, 0x00000001); // 110404410be9b307
gfx_pc_precomp_shader(0x00040001, 0x00040001, 0x00040002, 0x0b040b02, 0x00000000); // 0084002100596a45
gfx_pc_precomp_shader(0x00040001, 0x00040001, 0x00040002, 0x0b040b02, 0x00000001); // 110404410c0ab307
gfx_pc_precomp_shader(0x00040001, 0x00040001, 0x00040002, 0x0b040b02, 0x00000005); // 110404410c0ab30b
gfx_pc_precomp_shader(0x00040001, 0x00050001, 0x00040002, 0x0b050b02, 0x00000001); // 110404410c2bb307
gfx_pc_precomp_shader(0x00040001, 0x00050001, 0x00040002, 0x0b050b02, 0x00000009); // 110404410c2bb30f
gfx_pc_precomp_shader(0x00040001, 0x01000000, 0x00040002, 0x02000b0b, 0x00000001); // 110404412c86b2e6
gfx_pc_precomp_shader(0x00040001, 0x04000000, 0x00040002, 0x04000b0b, 0x00000001); // 110404418f86b2e6
gfx_pc_precomp_shader(0x00040001, 0x04000000, 0x0a000000, 0x0a000b0b, 0x00000013); // ec161ae4d6006ca3
gfx_pc_precomp_shader(0x00040001, 0x05000000, 0x00040002, 0x05000b0b, 0x00000001); // 11040441b086b2e6
gfx_pc_precomp_shader(0x00040001, 0x05000000, 0x00040002, 0x05000b0b, 0x00000009); // 11040441b086b2ee
gfx_pc_precomp_shader(0x00040001, 0x05000000, 0x0a000000, 0x0a000b0b, 0x00000013); // ec161b7137006ca3
gfx_pc_precomp_shader(0x00050001, 0x00050001, 0x00050002, 0x0b050b02, 0x00000001); // 154504410c2bb307
gfx_pc_precomp_shader(0x00050004, 0x00050004, 0x00050004, 0x0b050b04, 0x00000001); // 154511040c2bb36a
gfx_pc_precomp_shader(0x01000000, 0x00050001, 0x02000000, 0x0b050b02, 0x00000001); // 410000000c2bb307
gfx_pc_precomp_shader(0x01000000, 0x01000000, 0x02000000, 0x02000000, 0x00000001); // 410000002c86b2e6
gfx_pc_precomp_shader(0x01000000, 0x01000000, 0x02000000, 0x02000b0b, 0x00000005); // 410000002c86b2ea
gfx_pc_precomp_shader(0x01000000, 0x01000000, 0x0a000000, 0x0a000b0b, 0x00000017); // 0b0000beb3006ca7
gfx_pc_precomp_shader(0x01000000, 0x04000000, 0x02000000, 0x04000b0b, 0x00000000); // 2100000000596a45
gfx_pc_precomp_shader(0x01000000, 0x04000000, 0x0a000000, 0x0a000b0b, 0x00000013); // 0b000263d6006ca3
gfx_pc_precomp_shader(0x01000000, 0x05000000, 0x02000000, 0x05000b0b, 0x00000001); // 41000000b086b2e6
gfx_pc_precomp_shader(0x01000000, 0x05000000, 0x0a000000, 0x0a000b0b, 0x00000013); // 0b0002f037006ca3
gfx_pc_precomp_shader(0x01070102, 0x01000102, 0x0a000000, 0x04000b0b, 0x00000011); // c8c3b2bfed8de663
gfx_pc_precomp_shader(0x04000000, 0x04000000, 0x04000000, 0x04000000, 0x00000001); // 040000008f86b2e6
gfx_pc_precomp_shader(0x04000000, 0x04000000, 0x04000000, 0x04000b0b, 0x00000000); // 8400000000596a45
gfx_pc_precomp_shader(0x04000000, 0x04000000, 0x04000000, 0x04000b0b, 0x00000005); // 040000008f86b2ea
gfx_pc_precomp_shader(0x04000000, 0x04000000, 0x0a000000, 0x0a000b0b, 0x00000013); // 8e000263d6006ca3
gfx_pc_precomp_shader(0x04000000, 0x05000000, 0x04000000, 0x05000b0b, 0x00000001); // 04000000b086b2e6
gfx_pc_precomp_shader(0x04000000, 0x05000000, 0x04000000, 0x05000b0b, 0x00000005); // 04000000b086b2ea
gfx_pc_precomp_shader(0x04000000, 0x05000000, 0x04000000, 0x05000b0b, 0x00000009); // 04000000b086b2ee
gfx_pc_precomp_shader(0x04060401, 0x04000000, 0x04060402, 0x04000b0b, 0x00000000); // 84c6842100596a45
gfx_pc_precomp_shader(0x04060401, 0x05000000, 0x04060402, 0x05000b0b, 0x00000001); // 1d970841b086b2e6
gfx_pc_precomp_shader(0x01000000, 0x04000000, 0x02000000, 0x04000b0b, 0x00000001); // 410000008f86b2e6
sAllowCCPrint = 1;
}
static uint8_t color_comb_component_a(uint32_t v, uint8_t cycle) {
switch (v) {
case G_CCMUX_COMBINED: return cycle ? CC_COMBINED : CC_0;
case G_CCMUX_TEXEL0: return cycle ? CC_TEXEL1 : CC_TEXEL0;
case G_CCMUX_TEXEL1: return cycle ? CC_TEXEL0 : CC_TEXEL1;
case G_CCMUX_PRIMITIVE: return CC_PRIM;
case G_CCMUX_SHADE: return CC_SHADE;
case G_CCMUX_ENVIRONMENT: return CC_ENV;
case G_CCMUX_1: return CC_1;
//case G_CCMUX_NOISE: return CC_NOISE;
case G_CCMUX_0: return CC_0;
case G_CCMUX_COMBINED_ALPHA: return cycle ? CC_COMBINEDA : CC_0;
case G_CCMUX_TEXEL0_ALPHA: return cycle ? CC_TEXEL1A : CC_TEXEL0A;
case G_CCMUX_TEXEL1_ALPHA: return cycle ? CC_TEXEL0A : CC_TEXEL1A;
case G_CCMUX_PRIMITIVE_ALPHA: return CC_PRIMA;
case G_CCMUX_SHADE_ALPHA: return CC_SHADEA;
case G_CCMUX_ENV_ALPHA: return CC_ENVA;
default: return CC_0;
}
}
static uint8_t color_comb_component_b(uint32_t v, uint8_t cycle) {
switch (v) {
case G_CCMUX_COMBINED: return cycle ? CC_COMBINED : CC_0;
case G_CCMUX_TEXEL0: return cycle ? CC_TEXEL1 : CC_TEXEL0;
case G_CCMUX_TEXEL1: return cycle ? CC_TEXEL0 : CC_TEXEL1;
case G_CCMUX_PRIMITIVE: return CC_PRIM;
case G_CCMUX_SHADE: return CC_SHADE;
case G_CCMUX_ENVIRONMENT: return CC_ENV;
//case G_CCMUX_CENTER: return CC_CENTER; // is this correct for "Chrome Key Center"?
//case G_CCMUX_K4: return CC_K4;
case G_CCMUX_0: return CC_0;
case G_CCMUX_COMBINED_ALPHA: return cycle ? CC_COMBINEDA : CC_0;
case G_CCMUX_TEXEL0_ALPHA: return cycle ? CC_TEXEL1A : CC_TEXEL0A;
case G_CCMUX_TEXEL1_ALPHA: return cycle ? CC_TEXEL0A : CC_TEXEL1A;
case G_CCMUX_PRIMITIVE_ALPHA: return CC_PRIMA;
case G_CCMUX_SHADE_ALPHA: return CC_SHADEA;
case G_CCMUX_ENV_ALPHA: return CC_ENVA;
default: return CC_0;
}
}
static uint8_t color_comb_component_c(uint32_t v, uint8_t cycle) {
switch (v) {
case G_CCMUX_COMBINED: return cycle ? CC_COMBINED : CC_0;
case G_CCMUX_TEXEL0: return cycle ? CC_TEXEL1 : CC_TEXEL0;
case G_CCMUX_TEXEL1: return cycle ? CC_TEXEL0 : CC_TEXEL1;
case G_CCMUX_PRIMITIVE: return CC_PRIM;
case G_CCMUX_SHADE: return CC_SHADE;
case G_CCMUX_ENVIRONMENT: return CC_ENV;
//case G_CCMUX_CENTER: return CC_CENTER; // is this correct for "Chrome Key Center"?
case G_CCMUX_COMBINED_ALPHA: return cycle ? CC_COMBINEDA : CC_0;
case G_CCMUX_TEXEL0_ALPHA: return CC_TEXEL0A;
case G_CCMUX_TEXEL1_ALPHA: return CC_TEXEL1A;
case G_CCMUX_PRIMITIVE_ALPHA: return CC_PRIMA;
case G_CCMUX_SHADE_ALPHA: return CC_SHADEA;
case G_CCMUX_ENV_ALPHA: return CC_ENVA;
case G_CCMUX_LOD_FRACTION: return CC_LOD;
//case G_CCMUX_PRIM_LOD_FRAC: return CC_PRIM_LOD_FRACTION;
//case G_CCMUX_K5: return CC_K5;
case G_CCMUX_0: return CC_0;
default: return CC_0;
}
}
static uint8_t color_comb_component_d(uint32_t v, uint8_t cycle) {
switch (v) {
case G_CCMUX_COMBINED: return cycle ? CC_COMBINED : CC_0;
case G_CCMUX_TEXEL0: return cycle ? CC_TEXEL1 : CC_TEXEL0;
case G_CCMUX_TEXEL1: return cycle ? CC_TEXEL0 : CC_TEXEL1;
case G_CCMUX_PRIMITIVE: return CC_PRIM;
case G_CCMUX_SHADE: return CC_SHADE;
case G_CCMUX_ENVIRONMENT: return CC_ENV;
case G_CCMUX_1: return CC_1;
case G_CCMUX_0: return CC_0;
case G_CCMUX_TEXEL0_ALPHA: return cycle ? CC_TEXEL1A : CC_TEXEL0A;
case G_CCMUX_TEXEL1_ALPHA: return cycle ? CC_TEXEL0A : CC_TEXEL1A;
case G_CCMUX_PRIMITIVE_ALPHA: return CC_PRIMA;
case G_CCMUX_SHADE_ALPHA: return CC_SHADEA;
case G_CCMUX_ENV_ALPHA: return CC_ENVA;
default: return CC_0;
}
}
uint32_t color_comb_rgb(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint8_t cycle) {
return color_comb_component_a(a, cycle)
| (color_comb_component_b(b, cycle) << 8)
| (color_comb_component_c(c, cycle) << 16)
| (color_comb_component_d(d, cycle) << 24);
}
static uint8_t color_comb_component_a_alpha(uint32_t v, uint8_t cycle) {
switch (v) {
case G_CCMUX_COMBINED_ALPHA: return cycle ? CC_COMBINEDA : CC_0;
case G_CCMUX_TEXEL0_ALPHA: return cycle ? CC_TEXEL1A : CC_TEXEL0A;
case G_CCMUX_TEXEL1_ALPHA: return cycle ? CC_TEXEL0A : CC_TEXEL1A;
case G_CCMUX_PRIMITIVE_ALPHA: return CC_PRIMA;
case G_CCMUX_SHADE_ALPHA: return CC_SHADEA;
case G_CCMUX_ENV_ALPHA: return CC_ENVA;
case G_CCMUX_1: return CC_1;
case G_CCMUX_0: return CC_0;
case G_CCMUX_COMBINED: return cycle ? CC_COMBINED : CC_0;
case G_CCMUX_TEXEL0: return cycle ? CC_TEXEL1 : CC_TEXEL0;
case G_CCMUX_TEXEL1: return cycle ? CC_TEXEL0 : CC_TEXEL1;
case G_CCMUX_PRIMITIVE: return CC_PRIM;
case G_CCMUX_SHADE: return CC_SHADE;
case G_CCMUX_ENVIRONMENT: return CC_ENV;
default: return CC_0;
}
}
static uint8_t color_comb_component_b_alpha(uint32_t v, uint8_t cycle) {
switch (v) {
case G_CCMUX_COMBINED_ALPHA: return cycle ? CC_COMBINEDA : CC_0;
case G_CCMUX_TEXEL0_ALPHA: return cycle ? CC_TEXEL1A : CC_TEXEL0A;
case G_CCMUX_TEXEL1_ALPHA: return cycle ? CC_TEXEL0A : CC_TEXEL1A;
case G_CCMUX_PRIMITIVE_ALPHA: return CC_PRIMA;
case G_CCMUX_SHADE_ALPHA: return CC_SHADEA;
case G_CCMUX_ENV_ALPHA: return CC_ENVA;
case G_CCMUX_1: return CC_1;
case G_CCMUX_0: return CC_0;
case G_CCMUX_COMBINED: return cycle ? CC_COMBINED : CC_0;
case G_CCMUX_TEXEL0: return cycle ? CC_TEXEL1 : CC_TEXEL0;
case G_CCMUX_TEXEL1: return cycle ? CC_TEXEL0 : CC_TEXEL1;
case G_CCMUX_PRIMITIVE: return CC_PRIM;
case G_CCMUX_SHADE: return CC_SHADE;
case G_CCMUX_ENVIRONMENT: return CC_ENV;
default: return CC_0;
}
}
static uint8_t color_comb_component_c_alpha(uint32_t v, uint8_t cycle) {
switch (v) {
case G_CCMUX_LOD_FRACTION: return CC_LOD;
case G_CCMUX_TEXEL0_ALPHA: return cycle ? CC_TEXEL1A : CC_TEXEL0A;
case G_CCMUX_TEXEL1_ALPHA: return cycle ? CC_TEXEL1A : CC_TEXEL1A;
case G_CCMUX_PRIMITIVE_ALPHA: return CC_PRIMA;
case G_CCMUX_SHADE_ALPHA: return CC_SHADEA;
case G_CCMUX_ENV_ALPHA: return CC_ENVA;
//case G_CCMUX_PRIM_LOD_FRAC: return CC_PRIM_LOD_FRACTION;
case G_CCMUX_0: return CC_0;
case G_CCMUX_TEXEL0: return cycle ? CC_TEXEL1 : CC_TEXEL0;
case G_CCMUX_TEXEL1: return cycle ? CC_TEXEL0 : CC_TEXEL1;
case G_CCMUX_PRIMITIVE: return CC_PRIM;
case G_CCMUX_SHADE: return CC_SHADE;
case G_CCMUX_ENVIRONMENT: return CC_ENV;
default: return CC_0;
}
}
static uint8_t color_comb_component_d_alpha(uint32_t v, uint8_t cycle) {
switch (v) {
case G_CCMUX_COMBINED_ALPHA: return cycle ? CC_COMBINEDA : CC_0;
case G_CCMUX_TEXEL0_ALPHA: return cycle ? CC_TEXEL1A : CC_TEXEL0A;
case G_CCMUX_TEXEL1_ALPHA: return cycle ? CC_TEXEL0A : CC_TEXEL1A;
case G_CCMUX_PRIMITIVE_ALPHA: return CC_PRIMA;
case G_CCMUX_SHADE_ALPHA: return CC_SHADEA;
case G_CCMUX_ENV_ALPHA: return CC_ENVA;
case G_CCMUX_1: return CC_1;
case G_CCMUX_0: return CC_0;
case G_CCMUX_COMBINED: return cycle ? CC_COMBINED : CC_0;
case G_CCMUX_TEXEL0: return cycle ? CC_TEXEL1 : CC_TEXEL0;
case G_CCMUX_TEXEL1: return cycle ? CC_TEXEL0 : CC_TEXEL1;
case G_CCMUX_PRIMITIVE: return CC_PRIM;
case G_CCMUX_SHADE: return CC_SHADE;
case G_CCMUX_ENVIRONMENT: return CC_ENV;
default: return CC_0;
}
}
uint32_t color_comb_alpha(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint8_t cycle) {
return color_comb_component_a_alpha(a, cycle)
| (color_comb_component_b_alpha(b, cycle) << 8)
| (color_comb_component_c_alpha(c, cycle) << 16)
| (color_comb_component_d_alpha(d, cycle) << 24);
}

View file

@ -12,7 +12,15 @@ enum {
CC_SHADE,
CC_ENV,
CC_TEXEL0A,
CC_LOD
CC_LOD,
CC_1,
CC_TEXEL1A,
CC_COMBINED,
CC_COMBINEDA,
CC_PRIMA,
CC_SHADEA,
CC_ENVA,
CC_ENUM_MAX,
};
enum {
@ -21,9 +29,17 @@ enum {
SHADER_INPUT_2,
SHADER_INPUT_3,
SHADER_INPUT_4,
SHADER_INPUT_5,
SHADER_INPUT_6,
SHADER_INPUT_7,
SHADER_INPUT_8,
SHADER_TEXEL0,
SHADER_TEXEL0A,
SHADER_TEXEL1
SHADER_TEXEL1,
SHADER_TEXEL1A,
SHADER_1,
SHADER_COMBINED,
SHADER_COMBINEDA,
};
#define SHADER_OPT_ALPHA (1 << 24)
@ -32,24 +48,66 @@ enum {
#define SHADER_OPT_NOISE (1 << 27)
struct CCFeatures {
uint8_t c[2][4];
bool opt_alpha;
bool opt_fog;
bool opt_texture_edge;
bool opt_noise;
bool used_textures[2];
int num_inputs;
bool do_single[2];
bool do_multiply[2];
bool do_mix[2];
bool color_alpha_same;
bool do_single[4];
bool do_multiply[4];
bool do_mix[4];
bool color_alpha_same[2];
};
#pragma pack(1)
struct CombineMode {
union {
struct {
uint32_t rgb1;
uint32_t alpha1;
uint32_t rgb2;
uint32_t alpha2;
};
uint8_t all_values[16];
};
union {
struct {
uint8_t use_alpha : 1;
uint8_t use_fog : 1;
uint8_t texture_edge : 1;
uint8_t use_noise : 1;
uint8_t use_2cycle : 1;
uint8_t light_map : 1;
};
uint32_t flags;
};
uint64_t hash;
};
#pragma pack()
#define SHADER_CMD_LENGTH 16
#define CC_MAX_SHADERS 64
struct ColorCombiner {
struct CombineMode cm;
struct ShaderProgram *prg;
union {
uint8_t shader_input_mapping[16];
uint64_t shader_input_mapping_as_u64[8];
};
union {
uint8_t shader_commands[16];
uint64_t shader_commands_as_u64[8];
};
uint64_t hash;
};
#ifdef __cplusplus
extern "C" {
#endif
void gfx_cc_get_features(uint32_t shader_id, struct CCFeatures *cc_features);
void gfx_cc_get_features(struct ColorCombiner* cc, struct CCFeatures *cc_features);
void gfx_cc_print(struct ColorCombiner *cc);
void gfx_cc_precomp(void);
uint32_t color_comb_rgb(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint8_t cycle);
uint32_t color_comb_alpha(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint8_t cycle);
#ifdef __cplusplus
}

View file

@ -68,7 +68,7 @@ struct ShaderProgramD3D11 {
ComPtr<ID3D11InputLayout> input_layout;
ComPtr<ID3D11BlendState> blend_state;
uint32_t shader_id;
uint64_t hash;
uint8_t num_inputs;
uint8_t num_floats;
bool used_textures[2];
@ -103,8 +103,9 @@ static struct {
PerFrameCB per_frame_cb_data;
PerDrawCB per_draw_cb_data;
struct ShaderProgramD3D11 shader_program_pool[64];
struct ShaderProgramD3D11 shader_program_pool[CC_MAX_SHADERS];
uint8_t shader_program_pool_size;
uint8_t shader_program_pool_index;
std::vector<struct TextureData> textures;
int current_tile;
@ -323,14 +324,14 @@ static void gfx_d3d11_load_shader(struct ShaderProgram *new_prg) {
d3d.shader_program = (struct ShaderProgramD3D11 *)new_prg;
}
static struct ShaderProgram *gfx_d3d11_create_and_load_new_shader(uint32_t shader_id) {
CCFeatures cc_features;
gfx_cc_get_features(shader_id, &cc_features);
static struct ShaderProgram *gfx_d3d11_create_and_load_new_shader(struct ColorCombiner* cc) {
CCFeatures cc_features = { 0 };
gfx_cc_get_features(cc, &cc_features);
char buf[4096];
size_t len, num_floats;
gfx_direct3d_common_build_shader(buf, len, num_floats, cc_features, false, THREE_POINT_FILTERING);
gfx_direct3d_common_build_shader(buf, len, num_floats, *cc, cc_features, false, THREE_POINT_FILTERING);
ComPtr<ID3DBlob> vs, ps;
ComPtr<ID3DBlob> error_blob;
@ -355,7 +356,9 @@ static struct ShaderProgram *gfx_d3d11_create_and_load_new_shader(uint32_t shade
throw hr;
}
struct ShaderProgramD3D11 *prg = &d3d.shader_program_pool[d3d.shader_program_pool_size++];
struct ShaderProgramD3D11 *prg = &d3d.shader_program_pool[d3d.shader_program_pool_index];
d3d.shader_program_pool_index = (d3d.shader_program_pool_index + 1) % CC_MAX_SHADERS;
if (d3d.shader_program_pool_size < CC_MAX_SHADERS) { d3d.shader_program_pool_size++; }
ThrowIfFailed(d3d.device->CreateVertexShader(vs->GetBufferPointer(), vs->GetBufferSize(), nullptr, prg->vertex_shader.GetAddressOf()));
ThrowIfFailed(d3d.device->CreatePixelShader(ps->GetBufferPointer(), ps->GetBufferSize(), nullptr, prg->pixel_shader.GetAddressOf()));
@ -368,11 +371,14 @@ static struct ShaderProgram *gfx_d3d11_create_and_load_new_shader(uint32_t shade
if (cc_features.used_textures[0] || cc_features.used_textures[1]) {
ied[ied_index++] = { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 };
}
if (cc_features.opt_fog) {
if (cc->cm.use_fog) {
ied[ied_index++] = { "FOG", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 };
}
if (cc->cm.light_map) {
ied[ied_index++] = { "LIGHTMAP", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 };
}
for (uint32_t i = 0; i < cc_features.num_inputs; i++) {
DXGI_FORMAT format = cc_features.opt_alpha ? DXGI_FORMAT_R32G32B32A32_FLOAT : DXGI_FORMAT_R32G32B32_FLOAT;
DXGI_FORMAT format = cc->cm.use_alpha ? DXGI_FORMAT_R32G32B32A32_FLOAT : DXGI_FORMAT_R32G32B32_FLOAT;
ied[ied_index++] = { "INPUT", i, format, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 };
}
@ -383,7 +389,7 @@ static struct ShaderProgram *gfx_d3d11_create_and_load_new_shader(uint32_t shade
D3D11_BLEND_DESC blend_desc;
ZeroMemory(&blend_desc, sizeof(D3D11_BLEND_DESC));
if (cc_features.opt_alpha) {
if (cc->cm.use_alpha) {
blend_desc.RenderTarget[0].BlendEnable = true;
blend_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA;
blend_desc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA;
@ -401,7 +407,7 @@ static struct ShaderProgram *gfx_d3d11_create_and_load_new_shader(uint32_t shade
// Save some values
prg->shader_id = shader_id;
prg->hash = cc->hash;
prg->num_inputs = cc_features.num_inputs;
prg->num_floats = num_floats;
prg->used_textures[0] = cc_features.used_textures[0];
@ -410,9 +416,9 @@ static struct ShaderProgram *gfx_d3d11_create_and_load_new_shader(uint32_t shade
return (struct ShaderProgram *)(d3d.shader_program = prg);
}
static struct ShaderProgram *gfx_d3d11_lookup_shader(uint32_t shader_id) {
static struct ShaderProgram *gfx_d3d11_lookup_shader(struct ColorCombiner* cc) {
for (size_t i = 0; i < d3d.shader_program_pool_size; i++) {
if (d3d.shader_program_pool[i].shader_id == shader_id) {
if (d3d.shader_program_pool[i].hash == cc->hash) {
return (struct ShaderProgram *)&d3d.shader_program_pool[i];
}
}

View file

@ -50,28 +50,29 @@ using namespace Microsoft::WRL; // For ComPtr
namespace {
struct ShaderProgramD3D12 {
uint32_t shader_id;
struct ColorCombiner cc;
uint64_t hash;
uint8_t num_inputs;
bool used_textures[2];
uint8_t num_floats;
uint8_t num_attribs;
ComPtr<ID3DBlob> vertex_shader;
ComPtr<ID3DBlob> pixel_shader;
ComPtr<ID3D12RootSignature> root_signature;
};
struct PipelineDesc {
uint32_t shader_id;
uint64_t hash;
bool depth_test;
bool depth_mask;
bool zmode_decal;
bool _padding;
bool operator==(const PipelineDesc& o) const {
return memcmp(this, &o, sizeof(*this)) == 0;
}
bool operator<(const PipelineDesc& o) const {
return memcmp(this, &o, sizeof(*this)) < 0;
}
@ -86,7 +87,7 @@ struct TextureData {
ComPtr<ID3D12Resource> resource;
struct TextureHeap *heap;
uint8_t heap_offset;
uint64_t last_frame_counter;
uint32_t descriptor_index;
int sampler_parameters;
@ -103,15 +104,16 @@ static struct {
HMODULE d3d12_module;
PFN_D3D12_CREATE_DEVICE D3D12CreateDevice;
PFN_D3D12_GET_DEBUG_INTERFACE D3D12GetDebugInterface;
HMODULE d3dcompiler_module;
pD3DCompile D3DCompile;
struct ShaderProgramD3D12 shader_program_pool[64];
struct ShaderProgramD3D12 shader_program_pool[CC_MAX_SHADERS];
uint8_t shader_program_pool_size;
uint8_t shader_program_pool_index;
uint32_t current_width, current_height;
ComPtr<ID3D12Device> device;
ComPtr<ID3D12CommandQueue> command_queue;
ComPtr<ID3D12CommandQueue> copy_command_queue;
@ -129,14 +131,14 @@ static struct {
UINT srv_descriptor_size;
ComPtr<ID3D12DescriptorHeap> sampler_heap;
UINT sampler_descriptor_size;
std::map<std::pair<uint32_t, uint32_t>, std::list<struct TextureHeap>> texture_heaps;
std::map<size_t, std::vector<ComPtr<ID3D12Resource>>> upload_heaps;
std::vector<std::pair<size_t, ComPtr<ID3D12Resource>>> upload_heaps_in_flight;
ComPtr<ID3D12Fence> copy_fence;
uint64_t copy_fence_value;
std::vector<struct TextureData> textures;
int current_tile;
uint32_t current_texture_ids[2];
@ -145,30 +147,30 @@ static struct {
int frame_index;
ComPtr<ID3D12Fence> fence;
HANDLE fence_event;
uint64_t frame_counter;
ComPtr<ID3D12Resource> noise_cb;
void *mapped_noise_cb_address;
struct NoiseCB noise_cb_data;
ComPtr<ID3D12Resource> vertex_buffer;
void *mapped_vbuf_address;
int vbuf_pos;
std::vector<ComPtr<ID3D12Resource>> resources_to_clean_at_end_of_frame;
std::vector<std::pair<struct TextureHeap *, uint8_t>> texture_heap_allocations_to_reclaim_at_end_of_frame;
std::map<PipelineDesc, ComPtr<ID3D12PipelineState>> pipeline_states;
bool must_reload_pipeline;
// Current state:
ID3D12PipelineState *pipeline_state;
struct ShaderProgramD3D12 *shader_program;
bool depth_test;
bool depth_mask;
bool zmode_decal;
CD3DX12_VIEWPORT viewport;
CD3DX12_RECT scissor;
} d3d;
@ -234,45 +236,38 @@ static void gfx_direct3d12_load_shader(struct ShaderProgram *new_prg) {
d3d.must_reload_pipeline = true;
}
static struct ShaderProgram *gfx_direct3d12_create_and_load_new_shader(uint32_t shader_id) {
/*static FILE *fp;
if (!fp) {
fp = fopen("shaders.txt", "w");
}
fprintf(fp, "0x%08x\n", shader_id);
fflush(fp);*/
struct ShaderProgramD3D12 *prg = &d3d.shader_program_pool[d3d.shader_program_pool_size++];
CCFeatures cc_features;
gfx_cc_get_features(shader_id, &cc_features);
static struct ShaderProgram *gfx_direct3d12_create_and_load_new_shader(struct ColorCombiner* cc) {
struct ShaderProgramD3D12 *prg = &d3d.shader_program_pool[d3d.shader_program_pool_index];
d3d.shader_program_pool_index = (d3d.shader_program_pool_index + 1) % CC_MAX_SHADERS;
if (d3d.shader_program_pool_size < CC_MAX_SHADERS) { d3d.shader_program_pool_size++; }
CCFeatures cc_features = { 0 };
gfx_cc_get_features(cc, &cc_features);
char buf[2048];
size_t len, num_floats;
gfx_direct3d_common_build_shader(buf, len, num_floats, cc_features, true, false);
//fwrite(buf, 1, len, stdout);
gfx_direct3d_common_build_shader(buf, len, num_floats, *cc, cc_features, true, false);
ThrowIfFailed(d3d.D3DCompile(buf, len, nullptr, nullptr, nullptr, "VSMain", "vs_5_1", D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &prg->vertex_shader, nullptr));
ThrowIfFailed(d3d.D3DCompile(buf, len, nullptr, nullptr, nullptr, "PSMain", "ps_5_1", D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &prg->pixel_shader, nullptr));
ThrowIfFailed(d3d.device->CreateRootSignature(0, prg->pixel_shader->GetBufferPointer(), prg->pixel_shader->GetBufferSize(), IID_PPV_ARGS(&prg->root_signature)));
prg->shader_id = shader_id;
prg->hash = cc->hash;
prg->cc = *cc;
prg->num_inputs = cc_features.num_inputs;
prg->used_textures[0] = cc_features.used_textures[0];
prg->used_textures[1] = cc_features.used_textures[1];
prg->num_floats = num_floats;
//prg->num_attribs = cnt;
d3d.must_reload_pipeline = true;
return (struct ShaderProgram *)(d3d.shader_program = prg);
}
static struct ShaderProgram *gfx_direct3d12_lookup_shader(uint32_t shader_id) {
static struct ShaderProgram *gfx_direct3d12_lookup_shader(struct ColorCombiner* cc) {
for (size_t i = 0; i < d3d.shader_program_pool_size; i++) {
if (d3d.shader_program_pool[i].shader_id == shader_id) {
if (d3d.shader_program_pool[i].hash == cc->hash) {
return (struct ShaderProgram *)&d3d.shader_program_pool[i];
}
}
@ -281,7 +276,7 @@ static struct ShaderProgram *gfx_direct3d12_lookup_shader(uint32_t shader_id) {
static void gfx_direct3d12_shader_get_info(struct ShaderProgram *prg, uint8_t *num_inputs, bool used_textures[2]) {
struct ShaderProgramD3D12 *p = (struct ShaderProgramD3D12 *)prg;
*num_inputs = p->num_inputs;
used_textures[0] = p->used_textures[0];
used_textures[1] = p->used_textures[1];
@ -299,9 +294,9 @@ static void gfx_direct3d12_select_texture(int tile, uint32_t texture_id) {
static void gfx_direct3d12_upload_texture(const uint8_t *rgba32_buf, int width, int height) {
texture_uploads++;
ComPtr<ID3D12Resource> texture_resource;
// Describe and create a Texture2D.
D3D12_RESOURCE_DESC texture_desc = {};
texture_desc.MipLevels = 1;
@ -314,11 +309,11 @@ static void gfx_direct3d12_upload_texture(const uint8_t *rgba32_buf, int width,
texture_desc.SampleDesc.Quality = 0;
texture_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
texture_desc.Alignment = ((width + 31) / 32) * ((height + 31) / 32) > 16 ? 0 : D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT;
D3D12_RESOURCE_ALLOCATION_INFO alloc_info = get_resource_allocation_info(&texture_desc);
std::list<struct TextureHeap>& heaps = d3d.texture_heaps[std::pair<uint32_t, uint32_t>(alloc_info.SizeInBytes, alloc_info.Alignment)];
struct TextureHeap *found_heap = nullptr;
for (struct TextureHeap& heap : heaps) {
if (!heap.free_list.empty()) {
@ -328,7 +323,7 @@ static void gfx_direct3d12_upload_texture(const uint8_t *rgba32_buf, int width,
if (found_heap == nullptr) {
heaps.resize(heaps.size() + 1);
found_heap = &heaps.back();
// In case of HD textures, make sure too much memory isn't wasted
int textures_per_heap = 524288 / alloc_info.SizeInBytes;
if (textures_per_heap < 1) {
@ -336,7 +331,7 @@ static void gfx_direct3d12_upload_texture(const uint8_t *rgba32_buf, int width,
} else if (textures_per_heap > 64) {
textures_per_heap = 64;
}
D3D12_HEAP_DESC heap_desc = {};
heap_desc.SizeInBytes = alloc_info.SizeInBytes * textures_per_heap;
if (alloc_info.Alignment == D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT) {
@ -353,17 +348,17 @@ static void gfx_direct3d12_upload_texture(const uint8_t *rgba32_buf, int width,
found_heap->free_list.push_back(i);
}
}
uint8_t heap_offset = found_heap->free_list.back();
found_heap->free_list.pop_back();
ThrowIfFailed(d3d.device->CreatePlacedResource(found_heap->heap.Get(), heap_offset * alloc_info.SizeInBytes, &texture_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&texture_resource)));
D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout;
UINT num_rows;
UINT64 row_size_in_bytes;
UINT64 upload_buffer_size;
d3d.device->GetCopyableFootprints(&texture_desc, 0, 1, 0, &layout, &num_rows, &row_size_in_bytes, &upload_buffer_size);
std::vector<ComPtr<ID3D12Resource>>& upload_heaps = d3d.upload_heaps[upload_buffer_size];
ComPtr<ID3D12Resource> upload_heap;
if (upload_heaps.empty()) {
@ -380,13 +375,13 @@ static void gfx_direct3d12_upload_texture(const uint8_t *rgba32_buf, int width,
upload_heap = upload_heaps.back();
upload_heaps.pop_back();
}
{
D3D12_SUBRESOURCE_DATA texture_data = {};
texture_data.pData = rgba32_buf;
texture_data.RowPitch = width * 4; // RGBA
texture_data.SlicePitch = texture_data.RowPitch * height;
void *data;
upload_heap->Map(0, nullptr, &data);
D3D12_MEMCPY_DEST dest_data = { (uint8_t *)data + layout.Offset, layout.Footprint.RowPitch, SIZE_T(layout.Footprint.RowPitch) * SIZE_T(num_rows) };
@ -397,12 +392,12 @@ static void gfx_direct3d12_upload_texture(const uint8_t *rgba32_buf, int width,
CD3DX12_TEXTURE_COPY_LOCATION src(upload_heap.Get(), layout);
d3d.copy_command_list->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr);
}
CD3DX12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition(texture_resource.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
d3d.command_list->ResourceBarrier(1, &barrier);
d3d.upload_heaps_in_flight.push_back(std::make_pair((size_t)upload_buffer_size, std::move(upload_heap)));
struct TextureData& td = d3d.textures[d3d.current_texture_ids[d3d.current_tile]];
if (td.resource.Get() != nullptr) {
d3d.resources_to_clean_at_end_of_frame.push_back(std::move(td.resource));
@ -454,10 +449,10 @@ static void gfx_direct3d12_set_use_alpha(bool use_alpha) {
static void gfx_direct3d12_draw_triangles(float buf_vbo[], size_t buf_vbo_len, size_t buf_vbo_num_tris) {
struct ShaderProgramD3D12 *prg = d3d.shader_program;
if (d3d.must_reload_pipeline) {
ComPtr<ID3D12PipelineState>& pipeline_state = d3d.pipeline_states[PipelineDesc{
prg->shader_id,
prg->hash,
d3d.depth_test,
d3d.depth_mask,
d3d.zmode_decal,
@ -471,14 +466,17 @@ static void gfx_direct3d12_draw_triangles(float buf_vbo[], size_t buf_vbo_len, s
if (prg->used_textures[0] || prg->used_textures[1]) {
ied[ied_pos++] = D3D12_INPUT_ELEMENT_DESC{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0};
}
if (prg->shader_id & SHADER_OPT_FOG) {
if (prg->cc.cm.use_fog) {
ied[ied_pos++] = D3D12_INPUT_ELEMENT_DESC{"FOG", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0};
}
if (prg->cc.cm.light_map) {
ied[ied_pos++] = D3D12_INPUT_ELEMENT_DESC{"LIGHTMAP", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0};
}
for (int32_t i = 0; i < prg->num_inputs; i++) {
DXGI_FORMAT format = (prg->shader_id & SHADER_OPT_ALPHA) ? DXGI_FORMAT_R32G32B32A32_FLOAT : DXGI_FORMAT_R32G32B32_FLOAT;
DXGI_FORMAT format = (prg->cc.cm.use_alpha) ? DXGI_FORMAT_R32G32B32A32_FLOAT : DXGI_FORMAT_R32G32B32_FLOAT;
ied[ied_pos++] = D3D12_INPUT_ELEMENT_DESC{"INPUT", (UINT)i, format, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0};
}
D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {};
desc.InputLayout = { ied, ied_pos };
desc.pRootSignature = prg->root_signature.Get();
@ -489,7 +487,7 @@ static void gfx_direct3d12_draw_triangles(float buf_vbo[], size_t buf_vbo_len, s
desc.RasterizerState.SlopeScaledDepthBias = -2.0f;
}
desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
if (prg->shader_id & SHADER_OPT_ALPHA) {
if (prg->cc.cm.use_alpha) {
D3D12_BLEND_DESC bd = {};
bd.AlphaToCoverageEnable = FALSE;
bd.IndependentBlendEnable = FALSE;
@ -521,51 +519,51 @@ static void gfx_direct3d12_draw_triangles(float buf_vbo[], size_t buf_vbo_len, s
d3d.pipeline_state = pipeline_state.Get();
d3d.must_reload_pipeline = false;
}
d3d.command_list->SetGraphicsRootSignature(prg->root_signature.Get());
d3d.command_list->SetPipelineState(d3d.pipeline_state);
ID3D12DescriptorHeap *heaps[] = { d3d.srv_heap.Get(), d3d.sampler_heap.Get() };
d3d.command_list->SetDescriptorHeaps(2, heaps);
int root_param_index = 0;
if ((prg->shader_id & (SHADER_OPT_ALPHA | SHADER_OPT_NOISE)) == (SHADER_OPT_ALPHA | SHADER_OPT_NOISE)) {
if (prg->cc.cm.use_alpha && prg->cc.cm.use_noise) {
d3d.command_list->SetGraphicsRootConstantBufferView(root_param_index++, d3d.noise_cb->GetGPUVirtualAddress());
}
for (int32_t i = 0; i < 2; i++) {
if (prg->used_textures[i]) {
struct TextureData& td = d3d.textures[d3d.current_texture_ids[i]];
if (td.last_frame_counter != d3d.frame_counter) {
td.descriptor_index = d3d.srv_pos;
td.last_frame_counter = d3d.frame_counter;
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {};
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
srv_desc.Texture2D.MipLevels = 1;
CD3DX12_CPU_DESCRIPTOR_HANDLE srv_handle(get_cpu_descriptor_handle(d3d.srv_heap), d3d.srv_pos++, d3d.srv_descriptor_size);
d3d.device->CreateShaderResourceView(td.resource.Get(), &srv_desc, srv_handle);
}
CD3DX12_GPU_DESCRIPTOR_HANDLE srv_gpu_handle(get_gpu_descriptor_handle(d3d.srv_heap), td.descriptor_index, d3d.srv_descriptor_size);
d3d.command_list->SetGraphicsRootDescriptorTable(root_param_index++, srv_gpu_handle);
CD3DX12_GPU_DESCRIPTOR_HANDLE sampler_gpu_handle(get_gpu_descriptor_handle(d3d.sampler_heap), td.sampler_parameters, d3d.sampler_descriptor_size);
d3d.command_list->SetGraphicsRootDescriptorTable(root_param_index++, sampler_gpu_handle);
}
}
CD3DX12_CPU_DESCRIPTOR_HANDLE rtv_handle(get_cpu_descriptor_handle(d3d.rtv_heap), d3d.frame_index, d3d.rtv_descriptor_size);
D3D12_CPU_DESCRIPTOR_HANDLE dsv_handle = get_cpu_descriptor_handle(d3d.dsv_heap);
d3d.command_list->OMSetRenderTargets(1, &rtv_handle, FALSE, &dsv_handle);
d3d.command_list->RSSetViewports(1, &d3d.viewport);
d3d.command_list->RSSetScissorRects(1, &d3d.scissor);
int current_pos = d3d.vbuf_pos;
memcpy((uint8_t *)d3d.mapped_vbuf_address + current_pos, buf_vbo, buf_vbo_len * sizeof(float));
d3d.vbuf_pos += buf_vbo_len * sizeof(float);
@ -574,12 +572,12 @@ static void gfx_direct3d12_draw_triangles(float buf_vbo[], size_t buf_vbo_len, s
maxpos = d3d.vbuf_pos;
//printf("NEW MAXPOS: %d\n", maxpos);
}
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view;
vertex_buffer_view.BufferLocation = d3d.vertex_buffer->GetGPUVirtualAddress() + current_pos;
vertex_buffer_view.StrideInBytes = buf_vbo_len / (3 * buf_vbo_num_tris) * sizeof(float);
vertex_buffer_view.SizeInBytes = buf_vbo_len * sizeof(float);
d3d.command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
d3d.command_list->IASetVertexBuffers(0, 1, &vertex_buffer_view);
d3d.command_list->DrawInstanced(3 * buf_vbo_num_tris, 1, 0, 0);
@ -591,22 +589,22 @@ static void gfx_direct3d12_start_frame(void) {
texture_uploads = 0;
ThrowIfFailed(d3d.command_allocator->Reset());
ThrowIfFailed(d3d.command_list->Reset(d3d.command_allocator.Get(), nullptr));
CD3DX12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition(
d3d.render_targets[d3d.frame_index].Get(),
D3D12_RESOURCE_STATE_PRESENT,
D3D12_RESOURCE_STATE_RENDER_TARGET);
d3d.command_list->ResourceBarrier(1, &barrier);
CD3DX12_CPU_DESCRIPTOR_HANDLE rtv_handle(get_cpu_descriptor_handle(d3d.rtv_heap), d3d.frame_index, d3d.rtv_descriptor_size);
D3D12_CPU_DESCRIPTOR_HANDLE dsv_handle = get_cpu_descriptor_handle(d3d.dsv_heap);
d3d.command_list->OMSetRenderTargets(1, &rtv_handle, FALSE, &dsv_handle);
static unsigned char c;
const float clear_color[] = { 0.0f, 0.0f, 0.0f, 1.0f };
d3d.command_list->ClearRenderTargetView(rtv_handle, clear_color, 0, nullptr);
d3d.command_list->ClearDepthStencilView(dsv_handle, D3D12_CLEAR_FLAG_DEPTH, 1.0f, 0, 0, nullptr);
d3d.noise_cb_data.noise_frame++;
if (d3d.noise_cb_data.noise_frame > 150) {
// No high values, as noise starts to look ugly
@ -616,7 +614,7 @@ static void gfx_direct3d12_start_frame(void) {
d3d.noise_cb_data.noise_scale_x = 120 * aspect_ratio; // 120 = N64 height resolution (240) / 2
d3d.noise_cb_data.noise_scale_y = 120;
memcpy(d3d.mapped_noise_cb_address, &d3d.noise_cb_data, sizeof(struct NoiseCB));
d3d.vbuf_pos = 0;
}
@ -634,10 +632,10 @@ static void create_depth_buffer(void) {
ThrowIfFailed(d3d.swap_chain->GetDesc1(&desc1));
UINT width = desc1.Width;
UINT height = desc1.Height;
d3d.current_width = width;
d3d.current_height = height;
D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc = {};
dsv_desc.Format = DXGI_FORMAT_D32_FLOAT;
dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
@ -667,7 +665,7 @@ static void create_depth_buffer(void) {
rd.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
rd.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
ThrowIfFailed(d3d.device->CreateCommittedResource(&hp, D3D12_HEAP_FLAG_NONE, &rd, D3D12_RESOURCE_STATE_DEPTH_WRITE, &depth_optimized_cv, IID_PPV_ARGS(&d3d.depth_stencil_buffer)));
d3d.device->CreateDepthStencilView(d3d.depth_stencil_buffer.Get(), &dsv_desc, get_cpu_descriptor_handle(d3d.dsv_heap));
}
@ -699,7 +697,7 @@ static void gfx_direct3d12_init(void ) {
ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError()), gfx_dxgi_get_h_wnd(), "D3DCompiler_47.dll could not be loaded");
}
d3d.D3DCompile = (pD3DCompile)GetProcAddress(d3d.d3dcompiler_module, "D3DCompile");
// Create device
{
UINT debug_flags = 0;
@ -710,14 +708,14 @@ static void gfx_direct3d12_init(void ) {
debug_flags |= DXGI_CREATE_FACTORY_DEBUG;
}
#endif
gfx_dxgi_create_factory_and_device(DEBUG_D3D, 12, [](IDXGIAdapter1 *adapter, bool test_only) {
HRESULT res = d3d.D3D12CreateDevice(
adapter,
D3D_FEATURE_LEVEL_11_0,
IID_ID3D12Device,
test_only ? nullptr : IID_PPV_ARGS_Helper(&d3d.device));
if (test_only) {
return SUCCEEDED(res);
} else {
@ -726,7 +724,7 @@ static void gfx_direct3d12_init(void ) {
}
});
}
// Create command queues
{
D3D12_COMMAND_QUEUE_DESC queue_desc = {};
@ -740,14 +738,14 @@ static void gfx_direct3d12_init(void ) {
queue_desc.Type = D3D12_COMMAND_LIST_TYPE_COPY;
ThrowIfFailed(d3d.device->CreateCommandQueue(&queue_desc, IID_PPV_ARGS(&d3d.copy_command_queue)));
}
// Create swap chain
{
ComPtr<IDXGISwapChain1> swap_chain1 = gfx_dxgi_create_swap_chain(d3d.command_queue.Get());
ThrowIfFailed(swap_chain1->QueryInterface(__uuidof(IDXGISwapChain3), &d3d.swap_chain));
d3d.frame_index = d3d.swap_chain->GetCurrentBackBufferIndex();
}
// Create render target views
{
D3D12_DESCRIPTOR_HEAP_DESC rtv_heap_desc = {};
@ -759,7 +757,7 @@ static void gfx_direct3d12_init(void ) {
create_render_target_views();
}
// Create Z-buffer
{
D3D12_DESCRIPTOR_HEAP_DESC dsv_heap_desc = {};
@ -770,7 +768,7 @@ static void gfx_direct3d12_init(void ) {
create_depth_buffer();
}
// Create SRV heap for texture descriptors
{
D3D12_DESCRIPTOR_HEAP_DESC srv_heap_desc = {};
@ -780,7 +778,7 @@ static void gfx_direct3d12_init(void ) {
ThrowIfFailed(d3d.device->CreateDescriptorHeap(&srv_heap_desc, IID_PPV_ARGS(&d3d.srv_heap)));
d3d.srv_descriptor_size = d3d.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
// Create sampler heap and descriptors
{
D3D12_DESCRIPTOR_HEAP_DESC sampler_heap_desc = {};
@ -789,13 +787,13 @@ static void gfx_direct3d12_init(void ) {
sampler_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
ThrowIfFailed(d3d.device->CreateDescriptorHeap(&sampler_heap_desc, IID_PPV_ARGS(&d3d.sampler_heap)));
d3d.sampler_descriptor_size = d3d.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
static const D3D12_TEXTURE_ADDRESS_MODE address_modes[] = {
D3D12_TEXTURE_ADDRESS_MODE_WRAP,
D3D12_TEXTURE_ADDRESS_MODE_MIRROR,
D3D12_TEXTURE_ADDRESS_MODE_CLAMP
};
D3D12_CPU_DESCRIPTOR_HANDLE sampler_handle = get_cpu_descriptor_handle(d3d.sampler_heap);
int pos = 0;
for (int linear_filter = 0; linear_filter < 2; linear_filter++) {
@ -816,7 +814,7 @@ static void gfx_direct3d12_init(void ) {
}
}
}
// Create constant buffer view for noise
{
/*D3D12_DESCRIPTOR_HEAP_DESC cbv_heap_desc = {};
@ -824,7 +822,7 @@ static void gfx_direct3d12_init(void ) {
cbv_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
srv_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
ThrowIfFailed(d3d.device->CreateDescriptorHeap*/
CD3DX12_HEAP_PROPERTIES hp(D3D12_HEAP_TYPE_UPLOAD);
CD3DX12_RESOURCE_DESC rdb = CD3DX12_RESOURCE_DESC::Buffer(256);
ThrowIfFailed(d3d.device->CreateCommittedResource(
@ -834,27 +832,27 @@ static void gfx_direct3d12_init(void ) {
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&d3d.noise_cb)));
CD3DX12_RANGE read_range(0, 0); // Read not possible from CPU
ThrowIfFailed(d3d.noise_cb->Map(0, &read_range, &d3d.mapped_noise_cb_address));
}
ThrowIfFailed(d3d.device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&d3d.command_allocator)));
ThrowIfFailed(d3d.device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COPY, IID_PPV_ARGS(&d3d.copy_command_allocator)));
ThrowIfFailed(d3d.device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, d3d.command_allocator.Get(), nullptr, IID_PPV_ARGS(&d3d.command_list)));
ThrowIfFailed(d3d.device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COPY, d3d.copy_command_allocator.Get(), nullptr, IID_PPV_ARGS(&d3d.copy_command_list)));
ThrowIfFailed(d3d.command_list->Close());
ThrowIfFailed(d3d.device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&d3d.fence)));
d3d.fence_event = CreateEvent(nullptr, FALSE, FALSE, nullptr);
if (d3d.fence_event == nullptr) {
ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError()));
}
ThrowIfFailed(d3d.device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&d3d.copy_fence)));
{
// Create a buffer of 1 MB in size. With a 120 star speed run 192 kB seems to be max usage.
CD3DX12_HEAP_PROPERTIES hp(D3D12_HEAP_TYPE_UPLOAD);
@ -866,7 +864,7 @@ static void gfx_direct3d12_init(void ) {
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&d3d.vertex_buffer)));
CD3DX12_RANGE read_range(0, 0); // Read not possible from CPU
ThrowIfFailed(d3d.vertex_buffer->Map(0, &read_range, &d3d.mapped_vbuf_address));
}
@ -880,29 +878,29 @@ static void gfx_direct3d12_end_frame(void) {
}
//printf("Texture uploads: %d %d\n", max_texture_uploads, texture_uploads);
texture_uploads = 0;
ThrowIfFailed(d3d.copy_command_list->Close());
{
ID3D12CommandList *lists[] = { d3d.copy_command_list.Get() };
d3d.copy_command_queue->ExecuteCommandLists(1, lists);
d3d.copy_command_queue->Signal(d3d.copy_fence.Get(), ++d3d.copy_fence_value);
}
CD3DX12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition(
d3d.render_targets[d3d.frame_index].Get(),
D3D12_RESOURCE_STATE_RENDER_TARGET,
D3D12_RESOURCE_STATE_PRESENT);
d3d.command_list->ResourceBarrier(1, &barrier);
d3d.command_queue->Wait(d3d.copy_fence.Get(), d3d.copy_fence_value);
ThrowIfFailed(d3d.command_list->Close());
{
ID3D12CommandList *lists[] = { d3d.command_list.Get() };
d3d.command_queue->ExecuteCommandLists(1, lists);
}
{
LARGE_INTEGER t0;
QueryPerformanceCounter(&t0);
@ -913,7 +911,7 @@ static void gfx_direct3d12_end_frame(void) {
static void gfx_direct3d12_finish_render(void) {
LARGE_INTEGER t0, t1, t2;
QueryPerformanceCounter(&t0);
static UINT64 fence_value;
ThrowIfFailed(d3d.command_queue->Signal(d3d.fence.Get(), ++fence_value));
if (d3d.fence->GetCompletedValue() < fence_value) {
@ -921,7 +919,7 @@ static void gfx_direct3d12_finish_render(void) {
WaitForSingleObject(d3d.fence_event, INFINITE);
}
QueryPerformanceCounter(&t1);
d3d.resources_to_clean_at_end_of_frame.clear();
for (std::pair<size_t, ComPtr<ID3D12Resource>>& heap : d3d.upload_heaps_in_flight) {
d3d.upload_heaps[heap.first].push_back(std::move(heap.second));
@ -931,14 +929,14 @@ static void gfx_direct3d12_finish_render(void) {
item.first->free_list.push_back(item.second);
}
d3d.texture_heap_allocations_to_reclaim_at_end_of_frame.clear();
QueryPerformanceCounter(&t2);
d3d.frame_index = d3d.swap_chain->GetCurrentBackBufferIndex();
ThrowIfFailed(d3d.copy_command_allocator->Reset());
ThrowIfFailed(d3d.copy_command_list->Reset(d3d.copy_command_allocator.Get(), nullptr));
//printf("done %llu gpu:%d wait:%d freed:%llu frame:%u %u monitor:%u t:%llu\n", (unsigned long long)(t0.QuadPart - d3d.qpc_init), (int)(t1.QuadPart - t0.QuadPart), (int)(t2.QuadPart - t0.QuadPart), (unsigned long long)(t2.QuadPart - d3d.qpc_init), d3d.pending_frame_stats.rbegin()->first, stats.PresentCount, stats.SyncRefreshCount, (unsigned long long)(stats.SyncQPCTime.QuadPart - d3d.qpc_init));
}

View file

@ -5,46 +5,6 @@
#include "gfx_direct3d_common.h"
#include "gfx_cc.h"
void get_cc_features(uint32_t shader_id, CCFeatures *cc_features) {
for (int32_t i = 0; i < 4; i++) {
cc_features->c[0][i] = (shader_id >> (i * 3)) & 7;
cc_features->c[1][i] = (shader_id >> (12 + i * 3)) & 7;
}
cc_features->opt_alpha = (shader_id & SHADER_OPT_ALPHA) != 0;
cc_features->opt_fog = (shader_id & SHADER_OPT_FOG) != 0;
cc_features->opt_texture_edge = (shader_id & SHADER_OPT_TEXTURE_EDGE) != 0;
cc_features->opt_noise = (shader_id & SHADER_OPT_NOISE) != 0;
cc_features->used_textures[0] = false;
cc_features->used_textures[1] = false;
cc_features->num_inputs = 0;
for (int32_t i = 0; i < 2; i++) {
for (int32_t j = 0; j < 4; j++) {
if (cc_features->c[i][j] >= SHADER_INPUT_1 && cc_features->c[i][j] <= SHADER_INPUT_4) {
if (cc_features->c[i][j] > cc_features->num_inputs) {
cc_features->num_inputs = cc_features->c[i][j];
}
}
if (cc_features->c[i][j] == SHADER_TEXEL0 || cc_features->c[i][j] == SHADER_TEXEL0A) {
cc_features->used_textures[0] = true;
}
if (cc_features->c[i][j] == SHADER_TEXEL1) {
cc_features->used_textures[1] = true;
}
}
}
cc_features->do_single[0] = cc_features->c[0][2] == 0;
cc_features->do_single[1] = cc_features->c[1][2] == 0;
cc_features->do_multiply[0] = cc_features->c[0][1] == 0 && cc_features->c[0][3] == 0;
cc_features->do_multiply[1] = cc_features->c[1][1] == 0 && cc_features->c[1][3] == 0;
cc_features->do_mix[0] = cc_features->c[0][1] == cc_features->c[0][3];
cc_features->do_mix[1] = cc_features->c[1][1] == cc_features->c[1][3];
cc_features->color_alpha_same = (shader_id & 0xfff) == ((shader_id >> 12) & 0xfff);
}
static void append_str(char *buf, size_t *len, const char *str) {
while (*str != '\0') buf[(*len)++] = *str++;
}
@ -61,6 +21,8 @@ static const char *shader_item_to_str(int32_t item, bool with_alpha, bool only_a
default:
case SHADER_0:
return with_alpha ? "float4(0.0, 0.0, 0.0, 0.0)" : "float3(0.0, 0.0, 0.0)";
case SHADER_1:
return with_alpha ? "float4(1.0, 1.0, 1.0, 1.0)" : "float3(1.0, 1.0, 1.0)";
case SHADER_INPUT_1:
return with_alpha || !inputs_have_alpha ? "input.input1" : "input.input1.rgb";
case SHADER_INPUT_2:
@ -69,18 +31,34 @@ static const char *shader_item_to_str(int32_t item, bool with_alpha, bool only_a
return with_alpha || !inputs_have_alpha ? "input.input3" : "input.input3.rgb";
case SHADER_INPUT_4:
return with_alpha || !inputs_have_alpha ? "input.input4" : "input.input4.rgb";
case SHADER_INPUT_5:
return with_alpha || !inputs_have_alpha ? "input.input5" : "input.input5.rgb";
case SHADER_INPUT_6:
return with_alpha || !inputs_have_alpha ? "input.input6" : "input.input6.rgb";
case SHADER_INPUT_7:
return with_alpha || !inputs_have_alpha ? "input.input7" : "input.input7.rgb";
case SHADER_INPUT_8:
return with_alpha || !inputs_have_alpha ? "input.input8" : "input.input8.rgb";
case SHADER_TEXEL0:
return with_alpha ? "texVal0" : "texVal0.rgb";
case SHADER_TEXEL0A:
return hint_single_element ? "texVal0.a" : (with_alpha ? "float4(texVal0.a, texVal0.a, texVal0.a, texVal0.a)" : "float3(texVal0.a, texVal0.a, texVal0.a)");
case SHADER_TEXEL1:
return with_alpha ? "texVal1" : "texVal1.rgb";
case SHADER_TEXEL1A:
return hint_single_element ? "texVal1.a" : (with_alpha ? "float4(texVal1.a, texVal1.a, texVal1.a, texVal1.a)" : "float3(texVal1.a, texVal1.a, texVal1.a)");
case SHADER_COMBINED:
return with_alpha ? "texel" : "texel.rgb";
case SHADER_COMBINEDA:
return hint_single_element ? "texel.a" : (with_alpha ? "float4(texel.a, texel.a, texel.a, texel.a)" : "float3(texel.a, texel.a, texel.a)");
}
} else {
switch (item) {
default:
case SHADER_0:
return "0.0";
case SHADER_1:
return "1.0";
case SHADER_INPUT_1:
return "input.input1.a";
case SHADER_INPUT_2:
@ -89,44 +67,58 @@ static const char *shader_item_to_str(int32_t item, bool with_alpha, bool only_a
return "input.input3.a";
case SHADER_INPUT_4:
return "input.input4.a";
case SHADER_INPUT_5:
return "input.input5.a";
case SHADER_INPUT_6:
return "input.input6.a";
case SHADER_INPUT_7:
return "input.input7.a";
case SHADER_INPUT_8:
return "input.input8.a";
case SHADER_TEXEL0:
return "texVal0.a";
case SHADER_TEXEL0A:
return "texVal0.a";
case SHADER_TEXEL1:
return "texVal1.a";
case SHADER_TEXEL1A:
return "texVal1.a";
case SHADER_COMBINED:
return "texel.a";
case SHADER_COMBINEDA:
return "texel.a";
}
}
}
static void append_formula(char *buf, size_t *len, const uint8_t c[2][4], bool do_single, bool do_multiply, bool do_mix, bool with_alpha, bool only_alpha, bool opt_alpha) {
static void append_formula(char *buf, size_t *len, const uint8_t* c, bool do_single, bool do_multiply, bool do_mix, bool with_alpha, bool only_alpha, bool opt_alpha) {
if (do_single) {
append_str(buf, len, shader_item_to_str(c[only_alpha][3], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 3], with_alpha, only_alpha, opt_alpha, false));
} else if (do_multiply) {
append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 0], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, " * ");
append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true));
append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 2], with_alpha, only_alpha, opt_alpha, true));
} else if (do_mix) {
append_str(buf, len, "lerp(");
append_str(buf, len, shader_item_to_str(c[only_alpha][1], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 1], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, ", ");
append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 0], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, ", ");
append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true));
append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 2], with_alpha, only_alpha, opt_alpha, true));
append_str(buf, len, ")");
} else {
append_str(buf, len, "(");
append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 0], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, " - ");
append_str(buf, len, shader_item_to_str(c[only_alpha][1], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 1], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, ") * ");
append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true));
append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 2], with_alpha, only_alpha, opt_alpha, true));
append_str(buf, len, " + ");
append_str(buf, len, shader_item_to_str(c[only_alpha][3], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 3], with_alpha, only_alpha, opt_alpha, false));
}
}
void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_floats, const CCFeatures& cc_features, bool include_root_signature, bool three_point_filtering) {
void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_floats, struct ColorCombiner& cc, const CCFeatures& ccf, bool include_root_signature, bool three_point_filtering) {
len = 0;
num_floats = 4;
@ -134,14 +126,14 @@ void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_f
if (include_root_signature) {
append_str(buf, &len, "#define RS \"RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT | DENY_VERTEX_SHADER_ROOT_ACCESS)");
if (cc_features.opt_alpha && cc_features.opt_noise) {
if (cc.cm.use_alpha && cc.cm.use_noise) {
append_str(buf, &len, ",CBV(b0, visibility = SHADER_VISIBILITY_PIXEL)");
}
if (cc_features.used_textures[0]) {
if (ccf.used_textures[0]) {
append_str(buf, &len, ",DescriptorTable(SRV(t0), visibility = SHADER_VISIBILITY_PIXEL)");
append_str(buf, &len, ",DescriptorTable(Sampler(s0), visibility = SHADER_VISIBILITY_PIXEL)");
}
if (cc_features.used_textures[1]) {
if (ccf.used_textures[1]) {
append_str(buf, &len, ",DescriptorTable(SRV(t1), visibility = SHADER_VISIBILITY_PIXEL)");
append_str(buf, &len, ",DescriptorTable(Sampler(s1), visibility = SHADER_VISIBILITY_PIXEL)");
}
@ -150,37 +142,41 @@ void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_f
append_line(buf, &len, "struct PSInput {");
append_line(buf, &len, " float4 position : SV_POSITION;");
if (cc_features.used_textures[0] || cc_features.used_textures[1]) {
if (ccf.used_textures[0] || ccf.used_textures[1]) {
append_line(buf, &len, " float2 uv : TEXCOORD;");
num_floats += 2;
}
if (cc_features.opt_alpha && cc_features.opt_noise) {
if (cc.cm.use_alpha && cc.cm.use_noise) {
append_line(buf, &len, " float4 screenPos : TEXCOORD1;");
}
if (cc_features.opt_fog) {
if (cc.cm.use_fog) {
append_line(buf, &len, " float4 fog : FOG;");
num_floats += 4;
}
for (int32_t i = 0; i < cc_features.num_inputs; i++) {
len += sprintf(buf + len, " float%d input%d : INPUT%d;\r\n", cc_features.opt_alpha ? 4 : 3, i + 1, i);
num_floats += cc_features.opt_alpha ? 4 : 3;
if (cc.cm.light_map) {
append_line(buf, &len, " float2 lightmap : LIGHTMAP;");
num_floats += 2;
}
for (int32_t i = 0; i < ccf.num_inputs; i++) {
len += sprintf(buf + len, " float%d input%d : INPUT%d;\r\n", cc.cm.use_alpha ? 4 : 3, i + 1, i);
num_floats += cc.cm.use_alpha ? 4 : 3;
}
append_line(buf, &len, "};");
// Textures and samplers
if (cc_features.used_textures[0]) {
if (ccf.used_textures[0]) {
append_line(buf, &len, "Texture2D g_texture0 : register(t0);");
append_line(buf, &len, "SamplerState g_sampler0 : register(s0);");
}
if (cc_features.used_textures[1]) {
if (ccf.used_textures[1]) {
append_line(buf, &len, "Texture2D g_texture1 : register(t1);");
append_line(buf, &len, "SamplerState g_sampler1 : register(s1);");
}
// Constant buffer and random function
if (cc_features.opt_alpha && cc_features.opt_noise) {
if (cc.cm.use_alpha && cc.cm.use_noise) {
append_line(buf, &len, "cbuffer PerFrameCB : register(b0) {");
append_line(buf, &len, " uint noise_frame;");
append_line(buf, &len, " float2 noise_scale;");
@ -196,7 +192,7 @@ void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_f
// Original author: ArthurCarvalho
// Based on GLSL implementation by twinaphex, mupen64plus-libretro project.
if (three_point_filtering && (cc_features.used_textures[0] || cc_features.used_textures[1])) {
if (three_point_filtering && (ccf.used_textures[0] || ccf.used_textures[1])) {
append_line(buf, &len, "cbuffer PerDrawCB : register(b1) {");
append_line(buf, &len, " struct {");
append_line(buf, &len, " uint width;");
@ -218,28 +214,34 @@ void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_f
// Vertex shader
append_str(buf, &len, "PSInput VSMain(float4 position : POSITION");
if (cc_features.used_textures[0] || cc_features.used_textures[1]) {
if (ccf.used_textures[0] || ccf.used_textures[1]) {
append_str(buf, &len, ", float2 uv : TEXCOORD");
}
if (cc_features.opt_fog) {
if (cc.cm.use_fog) {
append_str(buf, &len, ", float4 fog : FOG");
}
for (int32_t i = 0; i < cc_features.num_inputs; i++) {
len += sprintf(buf + len, ", float%d input%d : INPUT%d", cc_features.opt_alpha ? 4 : 3, i + 1, i);
if (cc.cm.light_map) {
append_str(buf, &len, ", float2 lightmap : LIGHTMAP");
}
for (int32_t i = 0; i < ccf.num_inputs; i++) {
len += sprintf(buf + len, ", float%d input%d : INPUT%d", cc.cm.use_alpha ? 4 : 3, i + 1, i);
}
append_line(buf, &len, ") {");
append_line(buf, &len, " PSInput result;");
append_line(buf, &len, " result.position = position;");
if (cc_features.opt_alpha && cc_features.opt_noise) {
if (cc.cm.use_alpha && cc.cm.use_noise) {
append_line(buf, &len, " result.screenPos = position;");
}
if (cc_features.used_textures[0] || cc_features.used_textures[1]) {
if (ccf.used_textures[0] || ccf.used_textures[1]) {
append_line(buf, &len, " result.uv = uv;");
}
if (cc_features.opt_fog) {
if (cc.cm.use_fog) {
append_line(buf, &len, " result.fog = fog;");
}
for (int32_t i = 0; i < cc_features.num_inputs; i++) {
if (cc.cm.light_map) {
append_line(buf, &len, " result.lightmap = lightmap;");
}
for (int32_t i = 0; i < ccf.num_inputs; i++) {
len += sprintf(buf + len, " result.input%d = input%d;\r\n", i + 1, i + 1);
}
append_line(buf, &len, " return result;");
@ -250,7 +252,7 @@ void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_f
append_line(buf, &len, "[RootSignature(RS)]");
}
append_line(buf, &len, "float4 PSMain(PSInput input) : SV_TARGET {");
if (cc_features.used_textures[0]) {
if (ccf.used_textures[0]) {
if (three_point_filtering) {
append_line(buf, &len, " float4 texVal0;");
append_line(buf, &len, " if (textures[0].linear_filtering)");
@ -261,48 +263,67 @@ void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_f
append_line(buf, &len, " float4 texVal0 = g_texture0.Sample(g_sampler0, input.uv);");
}
}
if (cc_features.used_textures[1]) {
if (three_point_filtering) {
append_line(buf, &len, " float4 texVal1;");
append_line(buf, &len, " if (textures[1].linear_filtering)");
append_line(buf, &len, " texVal1 = tex2D3PointFilter(g_texture1, g_sampler1, input.uv, float2(textures[1].width, textures[1].height));");
append_line(buf, &len, " else");
append_line(buf, &len, " texVal1 = g_texture1.Sample(g_sampler1, input.uv);");
if (ccf.used_textures[1]) {
if (cc.cm.light_map) {
if (three_point_filtering) {
append_line(buf, &len, " float4 texVal1;");
append_line(buf, &len, " if (textures[1].linear_filtering)");
append_line(buf, &len, " texVal1 = tex2D3PointFilter(g_texture1, g_sampler1, input.lightmap, float2(textures[1].width, textures[1].height));");
append_line(buf, &len, " else");
append_line(buf, &len, " texVal1 = g_texture1.Sample(g_sampler1, input.lightmap);");
} else {
append_line(buf, &len, " float4 texVal1 = g_texture1.Sample(g_sampler1, input.lightmap);");
}
} else {
append_line(buf, &len, " float4 texVal1 = g_texture1.Sample(g_sampler1, input.uv);");
if (three_point_filtering) {
append_line(buf, &len, " float4 texVal1;");
append_line(buf, &len, " if (textures[1].linear_filtering)");
append_line(buf, &len, " texVal1 = tex2D3PointFilter(g_texture1, g_sampler1, input.uv, float2(textures[1].width, textures[1].height));");
append_line(buf, &len, " else");
append_line(buf, &len, " texVal1 = g_texture1.Sample(g_sampler1, input.uv);");
} else {
append_line(buf, &len, " float4 texVal1 = g_texture1.Sample(g_sampler1, input.uv);");
}
}
}
append_str(buf, &len, cc_features.opt_alpha ? " float4 texel = " : " float3 texel = ");
if (!cc_features.color_alpha_same && cc_features.opt_alpha) {
append_str(buf, &len, "float4(");
append_formula(buf, &len, cc_features.c, cc_features.do_single[0], cc_features.do_multiply[0], cc_features.do_mix[0], false, false, true);
append_str(buf, &len, ", ");
append_formula(buf, &len, cc_features.c, cc_features.do_single[1], cc_features.do_multiply[1], cc_features.do_mix[1], true, true, true);
append_str(buf, &len, ")");
} else {
append_formula(buf, &len, cc_features.c, cc_features.do_single[0], cc_features.do_multiply[0], cc_features.do_mix[0], cc_features.opt_alpha, false, cc_features.opt_alpha);
}
append_line(buf, &len, ";");
append_str(buf, &len, cc.cm.use_alpha ? " float4 texel = " : " float3 texel = ");
for (int i = 0; i < (cc.cm.use_2cycle + 1); i++) {
uint8_t* cmd = &cc.shader_commands[i * 8];
if (!ccf.color_alpha_same[i] && cc.cm.use_alpha) {
append_str(buf, &len, "float4(");
append_formula(buf, &len, cmd, ccf.do_single[i*2+0], ccf.do_multiply[i*2+0], ccf.do_mix[i*2+0], false, false, true);
append_str(buf, &len, ", ");
append_formula(buf, &len, cmd, ccf.do_single[i*2+1], ccf.do_multiply[i*2+1], ccf.do_mix[i*2+1], true, true, true);
append_str(buf, &len, ")");
} else {
append_formula(buf, &len, cmd, ccf.do_single[i*2+0], ccf.do_multiply[i*2+0], ccf.do_mix[i*2+0], cc.cm.use_alpha, false, cc.cm.use_alpha);
}
append_line(buf, &len, ";");
if (cc_features.opt_texture_edge && cc_features.opt_alpha) {
if (i == 0 && cc.cm.use_2cycle) {
append_str(buf, &len, "texel = ");
}
}
if (cc.cm.texture_edge && cc.cm.use_alpha) {
append_line(buf, &len, " if (texel.a > 0.3) texel.a = 1.0; else discard;");
}
// TODO discard if alpha is 0?
if (cc_features.opt_fog) {
if (cc_features.opt_alpha) {
if (cc.cm.use_fog) {
if (cc.cm.use_alpha) {
append_line(buf, &len, " texel = float4(lerp(texel.rgb, input.fog.rgb, input.fog.a), texel.a);");
} else {
append_line(buf, &len, " texel = lerp(texel, input.fog.rgb, input.fog.a);");
}
}
if (cc_features.opt_alpha && cc_features.opt_noise) {
if (cc.cm.use_alpha && cc.cm.use_noise) {
append_line(buf, &len, " float2 coords = (input.screenPos.xy / input.screenPos.w) * noise_scale;");
append_line(buf, &len, " texel.a *= round(random(float3(floor(coords), noise_frame)));");
}
if (cc_features.opt_alpha) {
if (cc.cm.use_alpha) {
append_line(buf, &len, " return texel;");
} else {
append_line(buf, &len, " return float4(texel, 1.0);");

View file

@ -7,7 +7,7 @@
#include "gfx_cc.h"
void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_floats, const CCFeatures& cc_features, bool include_root_signature, bool three_point_filtering);
void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_floats, struct ColorCombiner& cc, const CCFeatures& cc_features, bool include_root_signature, bool three_point_filtering);
#endif

View file

@ -212,13 +212,18 @@ static void update_screen_settings(void) {
if (configWindow.fullscreen != dxgi.is_full_screen)
toggle_borderless_window_full_screen(configWindow.fullscreen);
if (!dxgi.is_full_screen) {
/*
// this code is buggy, and I just simply don't care enough about direct x to fix it
// when this is enabled, the window will be placed in the wrong spot... often off screen
const int screen_width = GetSystemMetrics(SM_CXSCREEN);
const int screen_height = GetSystemMetrics(SM_CYSCREEN);
const int xpos = (configWindow.x == WAPI_WIN_CENTERPOS) ? (screen_width - configWindow.w) * 0.5 : configWindow.x;
const int ypos = (configWindow.y == WAPI_WIN_CENTERPOS) ? (screen_height - configWindow.h) * 0.5 : configWindow.y;
RECT wr = { xpos, ypos, xpos + (int)configWindow.w, ypos + (int)configWindow.h };
AdjustWindowRect(&wr, WS_OVERLAPPEDWINDOW, FALSE);
SetWindowPos(dxgi.h_wnd, NULL, wr.left, wr.top, wr.right - wr.left, wr.bottom - wr.top, SWP_NOACTIVATE | SWP_NOZORDER);
*/
}
}

View file

@ -43,7 +43,7 @@
#define TEX_CACHE_STEP 512
struct ShaderProgram {
uint32_t shader_id;
uint64_t hash;
GLuint opengl_program_id;
uint8_t num_inputs;
bool used_textures[2];
@ -61,8 +61,9 @@ struct GLTexture {
bool filter;
};
static struct ShaderProgram shader_program_pool[64];
static uint8_t shader_program_pool_size;
static struct ShaderProgram shader_program_pool[CC_MAX_SHADERS];
static uint8_t shader_program_pool_size = 0;
static uint8_t shader_program_pool_index = 0;
static GLuint opengl_vbo;
static int tex_cache_size = 0;
@ -136,6 +137,8 @@ static const char *shader_item_to_str(uint32_t item, bool with_alpha, bool only_
switch (item) {
case SHADER_0:
return with_alpha ? "vec4(0.0, 0.0, 0.0, 0.0)" : "vec3(0.0, 0.0, 0.0)";
case SHADER_1:
return with_alpha ? "vec4(1.0, 1.0, 1.0, 1.0)" : "vec3(1.0, 1.0, 1.0)";
case SHADER_INPUT_1:
return with_alpha || !inputs_have_alpha ? "vInput1" : "vInput1.rgb";
case SHADER_INPUT_2:
@ -144,6 +147,14 @@ static const char *shader_item_to_str(uint32_t item, bool with_alpha, bool only_
return with_alpha || !inputs_have_alpha ? "vInput3" : "vInput3.rgb";
case SHADER_INPUT_4:
return with_alpha || !inputs_have_alpha ? "vInput4" : "vInput4.rgb";
case SHADER_INPUT_5:
return with_alpha || !inputs_have_alpha ? "vInput5" : "vInput5.rgb";
case SHADER_INPUT_6:
return with_alpha || !inputs_have_alpha ? "vInput6" : "vInput6.rgb";
case SHADER_INPUT_7:
return with_alpha || !inputs_have_alpha ? "vInput7" : "vInput7.rgb";
case SHADER_INPUT_8:
return with_alpha || !inputs_have_alpha ? "vInput8" : "vInput8.rgb";
case SHADER_TEXEL0:
return with_alpha ? "texVal0" : "texVal0.rgb";
case SHADER_TEXEL0A:
@ -151,11 +162,21 @@ static const char *shader_item_to_str(uint32_t item, bool with_alpha, bool only_
(with_alpha ? "vec4(texelVal0.a, texelVal0.a, texelVal0.a, texelVal0.a)" : "vec3(texelVal0.a, texelVal0.a, texelVal0.a)");
case SHADER_TEXEL1:
return with_alpha ? "texVal1" : "texVal1.rgb";
case SHADER_TEXEL1A:
return hint_single_element ? "texVal1.a" :
(with_alpha ? "vec4(texelVal1.a, texelVal1.a, texelVal1.a, texelVal1.a)" : "vec3(texelVal1.a, texelVal1.a, texelVal1.a)");
case SHADER_COMBINED:
return with_alpha ? "texel" : "texel.rgb";
case SHADER_COMBINEDA:
return hint_single_element ? "texel.a" :
(with_alpha ? "vec4(texel.a, texel.a, texel.a, texel.a)" : "vec3(texel.a, texel.a, texel.a)");
}
} else {
switch (item) {
case SHADER_0:
return "0.0";
case SHADER_1:
return "1.0";
case SHADER_INPUT_1:
return "vInput1.a";
case SHADER_INPUT_2:
@ -164,81 +185,74 @@ static const char *shader_item_to_str(uint32_t item, bool with_alpha, bool only_
return "vInput3.a";
case SHADER_INPUT_4:
return "vInput4.a";
case SHADER_INPUT_5:
return "vInput5.a";
case SHADER_INPUT_6:
return "vInput6.a";
case SHADER_INPUT_7:
return "vInput7.a";
case SHADER_INPUT_8:
return "vInput8.a";
case SHADER_TEXEL0:
return "texVal0.a";
case SHADER_TEXEL0A:
return "texVal0.a";
case SHADER_TEXEL1:
return "texVal1.a";
case SHADER_TEXEL1A:
return "texVal1.a";
case SHADER_COMBINED:
return "texel.a";
case SHADER_COMBINEDA:
return "texel.a";
}
}
return "unknown";
}
static void append_formula(char *buf, size_t *len, uint8_t c[2][4], bool do_single, bool do_multiply, bool do_mix, bool with_alpha, bool only_alpha, bool opt_alpha) {
static void append_formula(char *buf, size_t *len, uint8_t* cmd, bool do_single, bool do_multiply, bool do_mix, bool with_alpha, bool only_alpha, bool opt_alpha) {
if (do_single) {
append_str(buf, len, shader_item_to_str(c[only_alpha][3], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 3], with_alpha, only_alpha, opt_alpha, false));
} else if (do_multiply) {
append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 0], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, " * ");
append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true));
append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 2], with_alpha, only_alpha, opt_alpha, true));
} else if (do_mix) {
append_str(buf, len, "mix(");
append_str(buf, len, shader_item_to_str(c[only_alpha][1], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 1], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, ", ");
append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 0], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, ", ");
append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true));
append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 2], with_alpha, only_alpha, opt_alpha, true));
append_str(buf, len, ")");
} else {
append_str(buf, len, "(");
append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 0], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, " - ");
append_str(buf, len, shader_item_to_str(c[only_alpha][1], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 1], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, ") * ");
append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true));
append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 2], with_alpha, only_alpha, opt_alpha, true));
append_str(buf, len, " + ");
append_str(buf, len, shader_item_to_str(c[only_alpha][3], with_alpha, only_alpha, opt_alpha, false));
append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 3], with_alpha, only_alpha, opt_alpha, false));
}
}
static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shader_id) {
uint8_t c[2][4];
for (int i = 0; i < 4; i++) {
c[0][i] = (shader_id >> (i * 3)) & 7;
c[1][i] = (shader_id >> (12 + i * 3)) & 7;
}
bool opt_alpha = (shader_id & SHADER_OPT_ALPHA) != 0;
bool opt_fog = (shader_id & SHADER_OPT_FOG) != 0;
bool opt_texture_edge = (shader_id & SHADER_OPT_TEXTURE_EDGE) != 0;
static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(struct ColorCombiner* cc) {
struct CCFeatures ccf = { 0 };
gfx_cc_get_features(cc, &ccf);
bool opt_alpha = cc->cm.use_alpha;
bool opt_fog = cc->cm.use_fog;
bool opt_texture_edge = cc->cm.texture_edge;
bool opt_2cycle = cc->cm.use_2cycle;
bool opt_light_map = cc->cm.light_map;
#ifdef USE_GLES
bool opt_noise = false;
#else
bool opt_noise = (shader_id & SHADER_OPT_NOISE) != 0;
bool opt_noise = cc->cm.use_noise;
#endif
bool used_textures[2] = { 0, 0 };
int num_inputs = 0;
for (int i = 0; i < 2; i++) {
for (int j = 0; j < 4; j++) {
if (c[i][j] >= SHADER_INPUT_1 && c[i][j] <= SHADER_INPUT_4) {
if (c[i][j] > num_inputs) {
num_inputs = c[i][j];
}
}
if (c[i][j] == SHADER_TEXEL0 || c[i][j] == SHADER_TEXEL0A) {
used_textures[0] = true;
}
if (c[i][j] == SHADER_TEXEL1) {
used_textures[1] = true;
}
}
}
bool do_single[2] = { c[0][2] == 0, c[1][2] == 0 };
bool do_multiply[2] = { c[0][1] == 0 && c[0][3] == 0, c[1][1] == 0 && c[1][3] == 0 };
bool do_mix[2] = { c[0][1] == c[0][3], c[1][1] == c[1][3] };
bool color_alpha_same = (shader_id & 0xfff) == ((shader_id >> 12) & 0xfff);
char vs_buf[1024];
char fs_buf[2048];
size_t vs_len = 0;
@ -252,7 +266,7 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad
append_line(vs_buf, &vs_len, "#version 120");
#endif
append_line(vs_buf, &vs_len, "attribute vec4 aVtxPos;");
if (used_textures[0] || used_textures[1]) {
if (ccf.used_textures[0] || ccf.used_textures[1]) {
append_line(vs_buf, &vs_len, "attribute vec2 aTexCoord;");
append_line(vs_buf, &vs_len, "varying vec2 vTexCoord;");
num_floats += 2;
@ -262,19 +276,27 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad
append_line(vs_buf, &vs_len, "varying vec4 vFog;");
num_floats += 4;
}
for (int i = 0; i < num_inputs; i++) {
if (opt_light_map) {
append_line(vs_buf, &vs_len, "attribute vec2 aLightMap;");
append_line(vs_buf, &vs_len, "varying vec2 vLightMap;");
num_floats += 2;
}
for (int i = 0; i < ccf.num_inputs; i++) {
vs_len += sprintf(vs_buf + vs_len, "attribute vec%d aInput%d;\n", opt_alpha ? 4 : 3, i + 1);
vs_len += sprintf(vs_buf + vs_len, "varying vec%d vInput%d;\n", opt_alpha ? 4 : 3, i + 1);
num_floats += opt_alpha ? 4 : 3;
}
append_line(vs_buf, &vs_len, "void main() {");
if (used_textures[0] || used_textures[1]) {
if (ccf.used_textures[0] || ccf.used_textures[1]) {
append_line(vs_buf, &vs_len, "vTexCoord = aTexCoord;");
}
if (opt_fog) {
append_line(vs_buf, &vs_len, "vFog = aFog;");
}
for (int i = 0; i < num_inputs; i++) {
if (opt_light_map) {
append_line(vs_buf, &vs_len, "vLightMap = aLightMap;");
}
for (int i = 0; i < ccf.num_inputs; i++) {
vs_len += sprintf(vs_buf + vs_len, "vInput%d = aInput%d;\n", i + 1, i + 1);
}
append_line(vs_buf, &vs_len, "gl_Position = aVtxPos;");
@ -288,21 +310,24 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad
append_line(fs_buf, &fs_len, "#version 120");
#endif
if (used_textures[0] || used_textures[1]) {
if (ccf.used_textures[0] || ccf.used_textures[1]) {
append_line(fs_buf, &fs_len, "varying vec2 vTexCoord;");
}
if (opt_fog) {
append_line(fs_buf, &fs_len, "varying vec4 vFog;");
}
for (int i = 0; i < num_inputs; i++) {
if (opt_light_map) {
append_line(fs_buf, &fs_len, "varying vec2 vLightMap;");
}
for (int i = 0; i < ccf.num_inputs; i++) {
fs_len += sprintf(fs_buf + fs_len, "varying vec%d vInput%d;\n", opt_alpha ? 4 : 3, i + 1);
}
if (used_textures[0]) {
if (ccf.used_textures[0]) {
append_line(fs_buf, &fs_len, "uniform sampler2D uTex0;");
append_line(fs_buf, &fs_len, "uniform vec2 uTex0Size;");
append_line(fs_buf, &fs_len, "uniform bool uTex0Filter;");
}
if (used_textures[1]) {
if (ccf.used_textures[1]) {
append_line(fs_buf, &fs_len, "uniform sampler2D uTex1;");
append_line(fs_buf, &fs_len, "uniform vec2 uTex1Size;");
append_line(fs_buf, &fs_len, "uniform bool uTex1Filter;");
@ -312,7 +337,7 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad
// Original author: ArthurCarvalho
// Slightly modified GLSL implementation by twinaphex, mupen64plus-libretro project.
if (used_textures[0] || used_textures[1]) {
if (ccf.used_textures[0] || ccf.used_textures[1]) {
if (configFiltering == 2) {
append_line(fs_buf, &fs_len, "#define TEX_OFFSET(off) texture2D(tex, texCoord - (off)/texSize)");
append_line(fs_buf, &fs_len, "vec4 filter3point(in sampler2D tex, in vec2 texCoord, in vec2 texSize) {");
@ -347,29 +372,42 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad
append_line(fs_buf, &fs_len, "void main() {");
if (used_textures[0]) {
if (ccf.used_textures[0]) {
append_line(fs_buf, &fs_len, "vec4 texVal0 = sampleTex(uTex0, vTexCoord, uTex0Size, uTex0Filter);");
}
if (used_textures[1]) {
append_line(fs_buf, &fs_len, "vec4 texVal1 = sampleTex(uTex1, vTexCoord, uTex1Size, uTex1Filter);");
if (ccf.used_textures[1]) {
if (cc->cm.light_map) {
append_line(fs_buf, &fs_len, "vec4 texVal1 = sampleTex(uTex1, vLightMap, uTex1Size, uTex1Filter);");
} else {
append_line(fs_buf, &fs_len, "vec4 texVal1 = sampleTex(uTex1, vTexCoord, uTex1Size, uTex1Filter);");
}
}
append_str(fs_buf, &fs_len, opt_alpha ? "vec4 texel = " : "vec3 texel = ");
if (!color_alpha_same && opt_alpha) {
append_str(fs_buf, &fs_len, "vec4(");
append_formula(fs_buf, &fs_len, c, do_single[0], do_multiply[0], do_mix[0], false, false, true);
append_str(fs_buf, &fs_len, ", ");
append_formula(fs_buf, &fs_len, c, do_single[1], do_multiply[1], do_mix[1], true, true, true);
append_str(fs_buf, &fs_len, ")");
} else {
append_formula(fs_buf, &fs_len, c, do_single[0], do_multiply[0], do_mix[0], opt_alpha, false, opt_alpha);
append_str(fs_buf, &fs_len, (opt_alpha) ? "vec4 texel = " : "vec3 texel = ");
for (int i = 0; i < (opt_2cycle + 1); i++) {
u8* cmd = &cc->shader_commands[i * 8];
if (!ccf.color_alpha_same[i] && opt_alpha) {
append_str(fs_buf, &fs_len, "vec4(");
append_formula(fs_buf, &fs_len, cmd, ccf.do_single[i*2+0], ccf.do_multiply[i*2+0], ccf.do_mix[i*2+0], false, false, true);
append_str(fs_buf, &fs_len, ", ");
append_formula(fs_buf, &fs_len, cmd, ccf.do_single[i*2+1], ccf.do_multiply[i*2+1], ccf.do_mix[i*2+1], true, true, true);
append_str(fs_buf, &fs_len, ")");
} else {
append_formula(fs_buf, &fs_len, cmd, ccf.do_single[i*2+0], ccf.do_multiply[i*2+0], ccf.do_mix[i*2+0], opt_alpha, false, opt_alpha);
}
append_line(fs_buf, &fs_len, ";");
if (i == 0 && opt_2cycle) {
append_str(fs_buf, &fs_len, "texel = ");
}
}
append_line(fs_buf, &fs_len, ";");
if (opt_texture_edge && opt_alpha) {
append_line(fs_buf, &fs_len, "if (texel.a > 0.3) texel.a = 1.0; else discard;");
}
// TODO discard if alpha is 0?
if (opt_fog) {
if (opt_alpha) {
append_line(fs_buf, &fs_len, "texel = vec4(mix(texel.rgb, vFog.rgb, vFog.a), texel.a);");
@ -436,12 +474,15 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad
size_t cnt = 0;
struct ShaderProgram *prg = &shader_program_pool[shader_program_pool_size++];
struct ShaderProgram *prg = &shader_program_pool[shader_program_pool_index];
shader_program_pool_index = (shader_program_pool_index + 1) % CC_MAX_SHADERS;
if (shader_program_pool_size < CC_MAX_SHADERS) { shader_program_pool_size++; }
prg->attrib_locations[cnt] = glGetAttribLocation(shader_program, "aVtxPos");
prg->attrib_sizes[cnt] = 4;
++cnt;
if (used_textures[0] || used_textures[1]) {
if (ccf.used_textures[0] || ccf.used_textures[1]) {
prg->attrib_locations[cnt] = glGetAttribLocation(shader_program, "aTexCoord");
prg->attrib_sizes[cnt] = 2;
++cnt;
@ -453,7 +494,13 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad
++cnt;
}
for (int i = 0; i < num_inputs; i++) {
if (opt_light_map) {
prg->attrib_locations[cnt] = glGetAttribLocation(shader_program, "aLightMap");
prg->attrib_sizes[cnt] = 2;
++cnt;
}
for (int i = 0; i < ccf.num_inputs; i++) {
char name[16];
sprintf(name, "aInput%d", i + 1);
prg->attrib_locations[cnt] = glGetAttribLocation(shader_program, name);
@ -461,23 +508,23 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad
++cnt;
}
prg->shader_id = shader_id;
prg->hash = cc->hash;
prg->opengl_program_id = shader_program;
prg->num_inputs = num_inputs;
prg->used_textures[0] = used_textures[0];
prg->used_textures[1] = used_textures[1];
prg->num_inputs = ccf.num_inputs;
prg->used_textures[0] = ccf.used_textures[0];
prg->used_textures[1] = ccf.used_textures[1];
prg->num_floats = num_floats;
prg->num_attribs = cnt;
gfx_opengl_load_shader(prg);
if (used_textures[0]) {
if (ccf.used_textures[0]) {
GLint sampler_location = glGetUniformLocation(shader_program, "uTex0");
prg->uniform_locations[0] = glGetUniformLocation(shader_program, "uTex0Size");
prg->uniform_locations[1] = glGetUniformLocation(shader_program, "uTex0Filter");
glUniform1i(sampler_location, 0);
}
if (used_textures[1]) {
if (ccf.used_textures[1]) {
GLint sampler_location = glGetUniformLocation(shader_program, "uTex1");
prg->uniform_locations[2] = glGetUniformLocation(shader_program, "uTex1Size");
prg->uniform_locations[3] = glGetUniformLocation(shader_program, "uTex1Filter");
@ -494,9 +541,9 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad
return prg;
}
static struct ShaderProgram *gfx_opengl_lookup_shader(uint32_t shader_id) {
static struct ShaderProgram *gfx_opengl_lookup_shader(struct ColorCombiner* cc) {
for (size_t i = 0; i < shader_program_pool_size; i++) {
if (shader_program_pool[i].shader_id == shader_id) {
if (shader_program_pool[i].hash == cc->hash) {
return &shader_program_pool[i];
}
}

View file

@ -49,8 +49,9 @@ enum MixType {
struct ShaderProgram {
bool enabled;
uint32_t shader_id;
struct CCFeatures cc;
uint64_t hash;
struct ColorCombiner cc;
struct CCFeatures ccf;
enum MixType mix;
bool texture_used[2];
int texture_ord[2];
@ -140,25 +141,12 @@ static inline GLenum texenv_set_texture(UNUSED struct ShaderProgram *prg) {
}
static inline GLenum texenv_set_texture_color(struct ShaderProgram *prg) {
GLenum mode;
// HACK: lord forgive me for this, but this is easier
switch (prg->shader_id) {
case 0x0000038D: // mario's eyes
case 0x01045A00: // peach letter
case 0x01200A00: // intro copyright fade in
mode = GL_DECAL;
break;
case 0x00000551: // goddard
mode = GL_BLEND;
break;
default:
mode = GL_MODULATE;
break;
if (prg->cc.cm.rgb1 == color_comb_rgb(G_CCMUX_TEXEL0, G_CCMUX_SHADE, G_CCMUX_TEXEL0_ALPHA, G_CCMUX_SHADE, 0)) {
return GL_DECAL;
} else {
return GL_MODULATE;
}
return mode;
}
static inline GLenum texenv_set_texture_texture(UNUSED struct ShaderProgram *prg) {
@ -183,7 +171,7 @@ static void gfx_opengl_apply_shader(struct ShaderProgram *prg) {
glDisable(GL_TEXTURE_2D);
}
if (prg->shader_id & SHADER_OPT_FOG) {
if (prg->cc.cm.use_fog) {
// blend it on top of normal tris later
cur_fog_ofs = ofs;
ofs += 4;
@ -195,10 +183,10 @@ static void gfx_opengl_apply_shader(struct ShaderProgram *prg) {
// HACK: if there's a texture and two colors, one of them is likely for speculars or some shit (see mario head)
// if there's two colors but no texture, the real color is likely the second one
// HACKHACK: alpha is 0 in the transition shader (0x01A00045), maybe figure out the flags instead
const int vlen = (prg->cc.opt_alpha && prg->shader_id != 0x01A00045) ? 4 : 3;
const int vlen = (prg->cc.cm.use_alpha /*&& prg->shader_id != 0x01A00045*/) ? 4 : 3;
const int hack = vlen * (prg->num_inputs > 1);
if (prg->texture_used[1] && prg->cc.do_mix[0]) {
if (prg->texture_used[1] && prg->ccf.do_mix[0]) {
// HACK: when two textures are mixed by vertex color, store the color
// it will be used later when rendering two texture passes
c_mix[0] = *(ofs + hack + 0);
@ -224,7 +212,7 @@ static void gfx_opengl_apply_shader(struct ShaderProgram *prg) {
// we only need to do this once
prg->enabled = true;
if (prg->shader_id & SHADER_OPT_TEXTURE_EDGE) {
if (prg->cc.cm.texture_edge) {
// (horrible) alpha discard
glEnable(GL_ALPHA_TEST);
glAlphaFunc(GL_GREATER, 0.666f);
@ -258,14 +246,15 @@ static void gfx_opengl_load_shader(struct ShaderProgram *new_prg) {
cur_shader->enabled = false;
}
static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shader_id) {
struct CCFeatures ccf;
gfx_cc_get_features(shader_id, &ccf);
static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(struct ColorCombiner* cc) {
struct ShaderProgram *prg = &shader_program_pool[shader_program_pool_size++];
prg->shader_id = shader_id;
prg->cc = ccf;
struct CCFeatures ccf = { 0 };
gfx_cc_get_features(cc, &ccf);
prg->hash = cc->hash;
prg->cc = *cc;
prg->ccf = ccf;
prg->num_inputs = ccf.num_inputs;
prg->texture_used[0] = ccf.used_textures[0];
prg->texture_used[1] = ccf.used_textures[1];
@ -296,9 +285,9 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad
return prg;
}
static struct ShaderProgram *gfx_opengl_lookup_shader(uint32_t shader_id) {
static struct ShaderProgram *gfx_opengl_lookup_shader(struct ColorCombiner* cc) {
for (size_t i = 0; i < shader_program_pool_size; i++)
if (shader_program_pool[i].shader_id == shader_id)
if (shader_program_pool[i].hash == cc->hash)
return &shader_program_pool[i];
return NULL;
}

View file

@ -94,14 +94,9 @@ static struct {
uint32_t pool_pos;
} gfx_texture_cache;
struct ColorCombiner {
uint32_t cc_id;
struct ShaderProgram *prg;
uint8_t shader_input_mapping[2][4];
};
static struct ColorCombiner color_combiner_pool[64];
static uint8_t color_combiner_pool_size;
static struct ColorCombiner color_combiner_pool[CC_MAX_SHADERS] = { 0 };
static uint8_t color_combiner_pool_size = 0;
static uint8_t color_combiner_pool_index = 0;
static struct RSP {
float modelview_matrix_stack[11][4][4];
@ -148,7 +143,7 @@ static struct RDP {
bool textures_changed[2];
uint32_t other_mode_l, other_mode_h;
uint32_t combine_mode;
struct CombineMode combine_mode;
struct RGBA env_color, prim_color, fog_color, fill_color;
struct XYWidthHeight viewport, scissor;
@ -234,77 +229,132 @@ static void gfx_flush(void) {
}
}
static struct ShaderProgram *gfx_lookup_or_create_shader_program(uint32_t shader_id) {
struct ShaderProgram *prg = gfx_rapi->lookup_shader(shader_id);
static void combine_mode_update_hash(struct CombineMode* cm) {
uint64_t hash = 5381;
cm->hash = 0;
hash = (hash << 5) + hash + ((u64)cm->rgb1 << 32);
if (cm->use_alpha) {
hash = (hash << 5) + hash + ((u64)cm->alpha1);
}
if (cm->use_2cycle) {
hash = (hash << 5) + hash + ((u64)cm->rgb2 << 32);
if (cm->use_alpha) {
hash = (hash << 5) + hash + ((u64)cm->alpha2);
}
}
hash = (hash << 5) + hash + cm->flags;
cm->hash = hash;
}
static void color_combiner_update_hash(struct ColorCombiner* cc) {
uint64_t hash = cc->cm.hash;
for (int i = 0; i < 8; i++) {
hash = (hash << 5) + hash + cc->shader_input_mapping_as_u64[i];
hash = (hash << 5) + hash + cc->shader_commands_as_u64[i];
}
cc->hash = hash;
}
static struct ShaderProgram *gfx_lookup_or_create_shader_program(struct ColorCombiner* cc) {
struct ShaderProgram *prg = gfx_rapi->lookup_shader(cc);
if (prg == NULL) {
gfx_rapi->unload_shader(rendering_state.shader_program);
prg = gfx_rapi->create_and_load_new_shader(shader_id);
prg = gfx_rapi->create_and_load_new_shader(cc);
rendering_state.shader_program = prg;
}
return prg;
}
static void gfx_generate_cc(struct ColorCombiner *comb, uint32_t cc_id) {
uint8_t c[2][4];
uint32_t shader_id = (cc_id >> 24) << 24;
uint8_t shader_input_mapping[2][4] = {{0}};
for (int32_t i = 0; i < 4; i++) {
c[0][i] = (cc_id >> (i * 3)) & 7;
c[1][i] = (cc_id >> (12 + i * 3)) & 7;
}
for (int32_t i = 0; i < 2; i++) {
if (c[i][0] == c[i][1] || c[i][2] == CC_0) {
c[i][0] = c[i][1] = c[i][2] = 0;
}
uint8_t input_number[8] = {0};
int next_input_number = SHADER_INPUT_1;
for (int j = 0; j < 4; j++) {
int val = 0;
switch (c[i][j]) {
case CC_0:
break;
case CC_TEXEL0:
val = SHADER_TEXEL0;
break;
case CC_TEXEL1:
val = SHADER_TEXEL1;
break;
case CC_TEXEL0A:
val = SHADER_TEXEL0A;
break;
case CC_PRIM:
case CC_SHADE:
case CC_ENV:
case CC_LOD:
if (input_number[c[i][j]] == 0) {
shader_input_mapping[i][next_input_number - 1] = c[i][j];
input_number[c[i][j]] = next_input_number++;
}
val = input_number[c[i][j]];
break;
}
shader_id |= val << (i * 12 + j * 3);
static void gfx_generate_cc(struct ColorCombiner *cc) {
u8 next_input_number = 0;
u8 input_number[CC_ENUM_MAX] = { 0 };
for (int i = 0; i < SHADER_CMD_LENGTH; i++) {
u8 cm_cmd = cc->cm.all_values[i];
u8 shader_cmd = 0;
switch (cm_cmd) {
case CC_0:
shader_cmd = SHADER_0;
break;
case CC_1:
shader_cmd = SHADER_1;
break;
case CC_TEXEL0:
shader_cmd = SHADER_TEXEL0;
break;
case CC_TEXEL1:
shader_cmd = SHADER_TEXEL1;
break;
case CC_TEXEL0A:
shader_cmd = SHADER_TEXEL0A;
break;
case CC_TEXEL1A:
shader_cmd = SHADER_TEXEL1A;
break;
case CC_COMBINED:
shader_cmd = cc->cm.use_2cycle ? SHADER_COMBINED : SHADER_0;
break;
case CC_COMBINEDA:
shader_cmd = cc->cm.use_2cycle ? SHADER_COMBINEDA : SHADER_0;
break;
case CC_PRIM:
case CC_PRIMA:
case CC_SHADE:
case CC_SHADEA:
case CC_ENV:
case CC_ENVA:
case CC_LOD:
if (input_number[cm_cmd] == 0) {
cc->shader_input_mapping[next_input_number] = cm_cmd;
input_number[cm_cmd] = SHADER_INPUT_1 + next_input_number;
next_input_number++;
}
shader_cmd = input_number[cm_cmd];
break;
default:
shader_cmd = SHADER_0;
break;
}
cc->shader_commands[i] = shader_cmd;
}
comb->cc_id = cc_id;
comb->prg = gfx_lookup_or_create_shader_program(shader_id);
memcpy(comb->shader_input_mapping, shader_input_mapping, sizeof(shader_input_mapping));
color_combiner_update_hash(cc);
cc->prg = gfx_lookup_or_create_shader_program(cc);
gfx_cc_print(cc);
}
static struct ColorCombiner *gfx_lookup_or_create_color_combiner(uint32_t cc_id) {
static struct ColorCombiner *gfx_lookup_or_create_color_combiner(struct CombineMode* cm) {
combine_mode_update_hash(cm);
static struct ColorCombiner *prev_combiner;
if (prev_combiner != NULL && prev_combiner->cc_id == cc_id) {
if (prev_combiner != NULL && prev_combiner->cm.hash == cm->hash) {
return prev_combiner;
}
for (size_t i = 0; i < color_combiner_pool_size; i++) {
if (color_combiner_pool[i].cc_id == cc_id) {
if (color_combiner_pool[i].cm.hash == cm->hash) {
return prev_combiner = &color_combiner_pool[i];
}
}
gfx_flush();
struct ColorCombiner *comb = &color_combiner_pool[color_combiner_pool_size++];
gfx_generate_cc(comb, cc_id);
struct ColorCombiner *comb = &color_combiner_pool[color_combiner_pool_index];
color_combiner_pool_index = (color_combiner_pool_index + 1) % CC_MAX_SHADERS;
if (color_combiner_pool_size < CC_MAX_SHADERS) { color_combiner_pool_size++; }
memcpy(&comb->cm, cm, sizeof(struct CombineMode));
gfx_generate_cc(comb);
printf(">> added %016lx\n", comb->cm.hash);
return prev_combiner = comb;
}
@ -979,27 +1029,22 @@ static void OPTIMIZE_O3 gfx_sp_tri1(uint8_t vtx1_idx, uint8_t vtx2_idx, uint8_t
rdp.viewport_or_scissor_changed = false;
}
uint32_t cc_id = rdp.combine_mode;
struct CombineMode* cm = &rdp.combine_mode;
bool use_alpha = (rdp.other_mode_l & (G_BL_A_MEM << 18)) == 0;
bool use_fog = (rdp.other_mode_l >> 30) == G_BL_CLR_FOG;
bool texture_edge = (rdp.other_mode_l & CVG_X_ALPHA) == CVG_X_ALPHA;
bool use_noise = (rdp.other_mode_l & G_AC_DITHER) == G_AC_DITHER;
cm->use_alpha = (rdp.other_mode_l & (G_BL_A_MEM << 18)) == 0;
cm->texture_edge = (rdp.other_mode_l & CVG_X_ALPHA) == CVG_X_ALPHA;
cm->use_noise = (rdp.other_mode_l & G_AC_DITHER) == G_AC_DITHER;
cm->use_2cycle = (rdp.other_mode_h & (3U << G_MDSFT_CYCLETYPE)) == G_CYC_2CYCLE;
cm->use_fog = (rdp.other_mode_l >> 30) == G_BL_CLR_FOG;
cm->light_map = (rsp.geometry_mode & G_LIGHT_MAP_EXT) == G_LIGHT_MAP_EXT;
if (texture_edge) {
use_alpha = true;
if (cm->texture_edge) {
cm->use_alpha = true;
}
if (use_alpha) cc_id |= SHADER_OPT_ALPHA;
if (use_fog) cc_id |= SHADER_OPT_FOG;
if (texture_edge) cc_id |= SHADER_OPT_TEXTURE_EDGE;
if (use_noise) cc_id |= SHADER_OPT_NOISE;
struct ColorCombiner *comb = gfx_lookup_or_create_color_combiner(cm);
cm = &comb->cm;
if (!use_alpha) {
cc_id &= ~0xfff000;
}
struct ColorCombiner *comb = gfx_lookup_or_create_color_combiner(cc_id);
struct ShaderProgram *prg = comb->prg;
if (prg != rendering_state.shader_program) {
gfx_flush();
@ -1007,10 +1052,10 @@ static void OPTIMIZE_O3 gfx_sp_tri1(uint8_t vtx1_idx, uint8_t vtx2_idx, uint8_t
gfx_rapi->load_shader(prg);
rendering_state.shader_program = prg;
}
if (use_alpha != rendering_state.alpha_blend) {
if (cm->use_alpha != rendering_state.alpha_blend) {
gfx_flush();
gfx_rapi->set_use_alpha(use_alpha);
rendering_state.alpha_blend = use_alpha;
gfx_rapi->set_use_alpha(cm->use_alpha);
rendering_state.alpha_blend = cm->use_alpha;
}
uint8_t num_inputs;
bool used_textures[2];
@ -1065,18 +1110,26 @@ static void OPTIMIZE_O3 gfx_sp_tri1(uint8_t vtx1_idx, uint8_t vtx2_idx, uint8_t
buf_vbo[buf_vbo_len++] = v / tex_height;
}
if (use_fog) {
if (cm->use_fog) {
buf_vbo[buf_vbo_len++] = rdp.fog_color.r / 255.0f;
buf_vbo[buf_vbo_len++] = rdp.fog_color.g / 255.0f;
buf_vbo[buf_vbo_len++] = rdp.fog_color.b / 255.0f;
buf_vbo[buf_vbo_len++] = v_arr[i]->color.a / 255.0f; // fog factor (not alpha)
}
if (cm->light_map) {
struct RGBA* col = &v_arr[i]->color;
buf_vbo[buf_vbo_len++] = ( (((uint16_t)col->g) << 8) | ((uint16_t)col->r) ) / 65535.0f;
buf_vbo[buf_vbo_len++] = 1.0f - (( (((uint16_t)col->a) << 8) | ((uint16_t)col->b) ) / 65535.0f);
}
for (int j = 0; j < num_inputs; j++) {
struct RGBA *color;
struct RGBA tmp;
for (int k = 0; k < 1 + (use_alpha ? 1 : 0); k++) {
switch (comb->shader_input_mapping[k][j]) {
struct RGBA *color = NULL;
struct RGBA tmp = { 0 };
for (int a = 0; a < (cm->use_alpha ? 2 : 1 ); a++) {
u8 mapping = comb->shader_input_mapping[j];
switch (mapping) {
case CC_PRIM:
color = &rdp.prim_color;
break;
@ -1086,6 +1139,18 @@ static void OPTIMIZE_O3 gfx_sp_tri1(uint8_t vtx1_idx, uint8_t vtx2_idx, uint8_t
case CC_ENV:
color = &rdp.env_color;
break;
case CC_PRIMA:
memset(&tmp, rdp.prim_color.a, sizeof(tmp));
color = &tmp;
break;
case CC_SHADEA:
memset(&tmp, v_arr[i]->color.a, sizeof(tmp));
color = &tmp;
break;
case CC_ENVA:
memset(&tmp, rdp.env_color.a, sizeof(tmp));
color = &tmp;
break;
case CC_LOD:
{
float distance_frac = (v1->w - 3000.0f) / 3000.0f;
@ -1100,12 +1165,12 @@ static void OPTIMIZE_O3 gfx_sp_tri1(uint8_t vtx1_idx, uint8_t vtx2_idx, uint8_t
color = &tmp;
break;
}
if (k == 0) {
if (a == 0) {
buf_vbo[buf_vbo_len++] = color->r / 255.0f;
buf_vbo[buf_vbo_len++] = color->g / 255.0f;
buf_vbo[buf_vbo_len++] = color->b / 255.0f;
} else {
if (use_fog && color == &v_arr[i]->color) {
if (cm->use_fog && color == &v_arr[i]->color) {
// Shade alpha is 100% for fog
buf_vbo[buf_vbo_len++] = 1.0f;
} else {
@ -1346,36 +1411,17 @@ static void gfx_dp_load_tile(uint8_t tile, uint32_t uls, uint32_t ult, uint32_t
rdp.texture_tile.lrt = lrt;
}
static uint8_t color_comb_component(uint32_t v) {
switch (v) {
case G_CCMUX_TEXEL0:
return CC_TEXEL0;
case G_CCMUX_TEXEL1:
return CC_TEXEL1;
case G_CCMUX_PRIMITIVE:
return CC_PRIM;
case G_CCMUX_SHADE:
return CC_SHADE;
case G_CCMUX_ENVIRONMENT:
return CC_ENV;
case G_CCMUX_TEXEL0_ALPHA:
return CC_TEXEL0A;
case G_CCMUX_LOD_FRACTION:
return CC_LOD;
default:
return CC_0;
}
}
static void gfx_dp_set_combine_mode(uint32_t rgb1, uint32_t alpha1, uint32_t rgb2, uint32_t alpha2) {
//printf(">>> combine: %08x %08x %08x %08x\n", rgb1, alpha1, rgb2, alpha2);
memset(&rdp.combine_mode, 0, sizeof(struct CombineMode));
static inline uint32_t color_comb(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
return color_comb_component(a) |
(color_comb_component(b) << 3) |
(color_comb_component(c) << 6) |
(color_comb_component(d) << 9);
}
rdp.combine_mode.rgb1 = rgb1;
rdp.combine_mode.alpha1 = alpha1;
static void gfx_dp_set_combine_mode(uint32_t rgb, uint32_t alpha) {
rdp.combine_mode = rgb | (alpha << 12);
rdp.combine_mode.rgb2 = rgb2;
rdp.combine_mode.alpha2 = alpha2;
rdp.combine_mode.flags = 0;
}
static void gfx_dp_set_env_color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) {
@ -1480,14 +1526,18 @@ static void gfx_draw_rectangle(int32_t ulx, int32_t uly, int32_t lrx, int32_t lr
}
static void gfx_dp_texture_rectangle(int32_t ulx, int32_t uly, int32_t lrx, int32_t lry, UNUSED uint8_t tile, int16_t uls, int16_t ult, int16_t dsdx, int16_t dtdy, bool flip) {
uint32_t saved_combine_mode = rdp.combine_mode;
struct CombineMode saved_combine_mode = rdp.combine_mode;
if ((rdp.other_mode_h & (3U << G_MDSFT_CYCLETYPE)) == G_CYC_COPY) {
// Per RDP Command Summary Set Tile's shift s and this dsdx should be set to 4 texels
// Divide by 4 to get 1 instead
dsdx >>= 2;
// Color combiner is turned off in copy mode
gfx_dp_set_combine_mode(color_comb(0, 0, 0, G_CCMUX_TEXEL0), color_comb(0, 0, 0, G_ACMUX_TEXEL0));
gfx_dp_set_combine_mode(
color_comb_rgb (G_CCMUX_0, G_CCMUX_0, G_CCMUX_0, G_CCMUX_TEXEL0, 0),
color_comb_alpha(G_CCMUX_0, G_CCMUX_0, G_CCMUX_0, G_ACMUX_TEXEL0, 0),
color_comb_rgb (G_CCMUX_0, G_CCMUX_0, G_CCMUX_0, G_CCMUX_TEXEL0, 1),
color_comb_alpha(G_CCMUX_0, G_CCMUX_0, G_CCMUX_0, G_ACMUX_TEXEL0, 1));
// Per documentation one extra pixel is added in this modes to each edge
lrx += 1 << 2;
@ -1528,7 +1578,10 @@ static void gfx_dp_texture_rectangle(int32_t ulx, int32_t uly, int32_t lrx, int3
}
gfx_draw_rectangle(ulx, uly, lrx, lry);
u32 cflags = rdp.combine_mode.flags;
rdp.combine_mode = saved_combine_mode;
rdp.combine_mode.flags = cflags;
}
static void gfx_dp_fill_rectangle(int32_t ulx, int32_t uly, int32_t lrx, int32_t lry) {
@ -1549,10 +1602,17 @@ static void gfx_dp_fill_rectangle(int32_t ulx, int32_t uly, int32_t lrx, int32_t
v->color = rdp.fill_color;
}
uint32_t saved_combine_mode = rdp.combine_mode;
gfx_dp_set_combine_mode(color_comb(0, 0, 0, G_CCMUX_SHADE), color_comb(0, 0, 0, G_ACMUX_SHADE));
struct CombineMode saved_combine_mode = rdp.combine_mode;
gfx_dp_set_combine_mode(
color_comb_rgb (G_CCMUX_0, G_CCMUX_0, G_CCMUX_0, G_CCMUX_SHADE, 0),
color_comb_alpha(G_CCMUX_0, G_CCMUX_0, G_CCMUX_0, G_ACMUX_SHADE, 0),
color_comb_rgb (G_CCMUX_0, G_CCMUX_0, G_CCMUX_0, G_CCMUX_SHADE, 1),
color_comb_alpha(G_CCMUX_0, G_CCMUX_0, G_CCMUX_0, G_ACMUX_SHADE, 1));
gfx_draw_rectangle(ulx, uly, lrx, lry);
u32 cflags = rdp.combine_mode.flags;
rdp.combine_mode = saved_combine_mode;
rdp.combine_mode.flags = cflags;
}
static void gfx_dp_set_z_image(void *z_buf_address) {
@ -1719,10 +1779,10 @@ static void OPTIMIZE_O3 gfx_run_dl(Gfx* cmd) {
break;
case G_SETCOMBINE:
gfx_dp_set_combine_mode(
color_comb(C0(20, 4), C1(28, 4), C0(15, 5), C1(15, 3)),
color_comb(C0(12, 3), C1(12, 3), C0(9, 3), C1(9, 3)));
/*color_comb(C0(5, 4), C1(24, 4), C0(0, 5), C1(6, 3)),
color_comb(C1(21, 3), C1(3, 3), C1(18, 3), C1(0, 3)));*/
color_comb_rgb (C0(20, 4), C1(28, 4), C0(15, 5), C1(15, 3), 0),
color_comb_alpha(C0(12, 3), C1(12, 3), C0(9, 3), C1(9, 3), 0),
color_comb_rgb (C0(5, 4), C1(24, 4), C0(0, 5), C1(6, 3), 1),
color_comb_alpha(C1(21, 3), C1(3, 3), C1(18, 3), C1(0, 3), 1));
break;
// G_SETPRIMCOLOR, G_CCMUX_PRIMITIVE, G_ACMUX_PRIMITIVE, is used by Goddard
// G_CCMUX_TEXEL1, LOD_FRACTION is used in Bowser room 1
@ -1808,38 +1868,7 @@ void gfx_init(struct GfxWindowManagerAPI *wapi, struct GfxRenderingAPI *rapi, co
gfx_wapi->init(window_title);
gfx_rapi->init();
// Used in the 120 star TAS
static uint32_t precomp_shaders[] = {
0x01200200,
0x00000045,
0x00000200,
0x01200a00,
0x00000a00,
0x01a00045,
0x00000551,
0x01045045,
0x05a00a00,
0x01200045,
0x05045045,
0x01045a00,
0x01a00a00,
0x0000038d,
0x01081081,
0x0120038d,
0x03200045,
0x03200a00,
0x01a00a6f,
0x01141045,
0x07a00a00,
0x05200200,
0x03200200,
0x09200200,
0x0920038d,
0x09200045
};
for (size_t i = 0; i < sizeof(precomp_shaders) / sizeof(uint32_t); i++)
gfx_lookup_or_create_shader_program(precomp_shaders[i]);
gfx_cc_precomp();
}
#ifdef EXTERNAL_DATA
@ -2133,6 +2162,15 @@ static void OPTIMIZE_O3 djui_gfx_sp_simple_tri1(uint8_t vtx1_idx, uint8_t vtx2_i
*/
}
void gfx_pc_precomp_shader(uint32_t rgb1, uint32_t alpha1, uint32_t rgb2, uint32_t alpha2, uint32_t flags) {
gfx_dp_set_combine_mode(rgb1, alpha1, rgb2, alpha2);
struct CombineMode* cm = &rdp.combine_mode;
cm->flags = flags;
gfx_lookup_or_create_color_combiner(cm);
}
void OPTIMIZE_O3 djui_gfx_run_dl(Gfx* cmd) {
uint32_t opcode = cmd->words.w0 >> 24;
switch (opcode) {

View file

@ -26,6 +26,7 @@ void gfx_run(Gfx *commands);
void gfx_end_frame(void);
void gfx_precache_textures(void);
void gfx_shutdown(void);
void gfx_pc_precomp_shader(uint32_t rgb1, uint32_t alpha1, uint32_t rgb2, uint32_t alpha2, uint32_t flags);
#ifdef __cplusplus
}

View file

@ -6,13 +6,14 @@
#include <stdbool.h>
struct ShaderProgram;
struct ColorCombiner;
struct GfxRenderingAPI {
bool (*z_is_from_0_to_1)(void);
void (*unload_shader)(struct ShaderProgram *old_prg);
void (*load_shader)(struct ShaderProgram *new_prg);
struct ShaderProgram *(*create_and_load_new_shader)(uint32_t shader_id);
struct ShaderProgram *(*lookup_shader)(uint32_t shader_id);
struct ShaderProgram *(*create_and_load_new_shader)(struct ColorCombiner* cc);
struct ShaderProgram *(*lookup_shader)(struct ColorCombiner* cc);
void (*shader_get_info)(struct ShaderProgram *prg, uint8_t *num_inputs, bool used_textures[2]);
uint32_t (*new_texture)(void);
void (*select_texture)(int tile, uint32_t texture_id);

View file

@ -1,7 +1,7 @@
#ifndef GFX_SCREEN_CONFIG_H
#define GFX_SCREEN_CONFIG_H
#define DESIRED_SCREEN_WIDTH 640
#define DESIRED_SCREEN_HEIGHT 480
#define DESIRED_SCREEN_WIDTH 800
#define DESIRED_SCREEN_HEIGHT 600
#endif