Started optimizing text rendering

This commit is contained in:
MysterD 2023-04-01 23:44:10 -07:00
parent 090122491f
commit e578943700
7 changed files with 192 additions and 24 deletions

View file

@ -3853,6 +3853,9 @@ ALIGNED8 static const u8 texture_font_normal_char_punc_sp_qu[] = {
#include "textures/segment2/custom_font_normal_char_punc_sp_qu.ia4.inc.c"
};
ALIGNED8 const u8 texture_font_normal[] = {
#include "textures/custom_font_title/custom_font_normal.rgba32.inc.c"
};
const u8* const font_normal_chars[] = {
texture_font_char_us_exclamation, // !

View file

@ -27,9 +27,9 @@ const Gfx dl_font_normal_display_list_begin[] = {
const Gfx dl_font_normal_display_list[] = {
gsDPLoadBlock(G_TX_LOADTILE, 0, 0, ((16 * 8 + G_IM_SIZ_4b_INCR) >> G_IM_SIZ_4b_SHIFT) - 1, CALC_DXT(16, G_IM_SIZ_4b_BYTES)),
gsSPVertex(djui_font_normal_vertices, 4, 0),
gsSPVertexDjui(djui_font_normal_vertices, 4, 0),
gsSPExecuteDjui(G_TEXCLIP_DJUI),
gsSP2Triangles(0, 1, 2, 0x0, 0, 2, 3, 0x0),
gsSP2TrianglesDjui(0, 1, 2, 0x0, 0, 2, 3, 0x0),
gsSPEndDisplayList(),
};
@ -39,11 +39,19 @@ static void djui_font_normal_render_char(char* c) {
u32 index = djui_unicode_get_sprite_index(c);
extern const u8* const font_normal_chars[];
void* fontChar = (void*)font_normal_chars[index];
//extern const u8* const font_normal_chars[];
//void* fontChar = (void*)font_normal_chars[index];
extern ALIGNED8 const u8 texture_font_normal[];
void* fontChar = texture_font_normal;
gDPSetTextureImage(gDisplayListHead++, G_IM_FMT_IA, G_IM_SIZ_16b, 1, (void*)fontChar);
gSPDisplayList(gDisplayListHead++, dl_font_normal_display_list);
//djui_gfx_render_texture(texture_font_normal, 256, 128, 32);
//djui_gfx_render_texture_tile(texture_font_normal, 8, 16, 32, 0, 0, 256, 128);
u32 tx = index % 32;
u32 ty = index / 32;
djui_gfx_render_texture_tile(texture_font_normal, 256, 128, 32, tx * 8, ty * 16, 8, 16);
//gDPSetTextureImage(gDisplayListHead++, G_IM_FMT_IA, G_IM_SIZ_16b, 1, (void*)fontChar);
//gSPDisplayList(gDisplayListHead++, dl_font_normal_display_list);
}
static f32 djui_font_normal_char_width(char* c) {
@ -57,8 +65,8 @@ static const struct DjuiFont sDjuiFontNormal = {
.charHeight = 1.0f,
.lineHeight = 0.8125f,
.defaultFontScale = 32.0f,
.rotatedUV = true,
.textBeginDisplayList = dl_font_normal_display_list_begin,
.rotatedUV = false,
.textBeginDisplayList = NULL, //dl_font_normal_display_list_begin,
.render_char = djui_font_normal_render_char,
.char_width = djui_font_normal_char_width,
};

View file

@ -2,6 +2,8 @@
#define G_TEXCLIP_DJUI 0xe1
#define G_TEXOVERRIDE_DJUI 0xe0
#define G_DJUI_SIMPLE_VERT 0x11
#define G_DJUI_SIMPLE_TRI2 0x12
#define G_EXECUTE_DJUI 0xdd
#define gSetClippingDjui(pkt, cmd, rot, x1, y1, x2, y2) \
@ -20,10 +22,64 @@
_g->words.w1 = (uintptr_t)(texture); \
}
# define gSPVertexDjui(pkt, v, n, v0) \
{ \
Gfx *_g = (Gfx *)(pkt); \
_g->words.w0 = \
_SHIFTL(G_DJUI_SIMPLE_VERT,24,8)|_SHIFTL((n),12,8)|_SHIFTL((v0)+(n),1,7); \
_g->words.w1 = (uintptr_t)(v); \
}
#define gSP2TrianglesDjui(pkt, v00, v01, v02, flag0, v10, v11, v12, flag1) \
{ \
Gfx *_g = (Gfx *)(pkt); \
\
_g->words.w0 = (_SHIFTL(G_DJUI_SIMPLE_TRI2, 24, 8)| \
__gsSP1Triangle_w1f(v00, v01, v02, flag0)); \
_g->words.w1 = __gsSP1Triangle_w1f(v10, v11, v12, flag1); \
}
#define gsSPExecuteDjui(word) \
{{ \
_SHIFTL(G_EXECUTE_DJUI, 24, 8), (unsigned int)(word) \
}}
#define gDPLoadTextureBlockWithoutTexture(pkt, timg, fmt, siz, width, height, \
pal, cms, cmt, masks, maskt, shifts, shiftt) \
{ \
gDPSetTile(pkt, fmt, siz##_LOAD_BLOCK, 0, 0, G_TX_LOADTILE, \
0 , cmt, maskt, shiftt, cms, masks, shifts); \
gDPLoadSync(pkt); \
gDPLoadBlock(pkt, G_TX_LOADTILE, 0, 0, \
(((width)*(height) + siz##_INCR) >> siz##_SHIFT) -1, \
CALC_DXT(width, siz##_BYTES)); \
gDPPipeSync(pkt); \
gDPSetTile(pkt, fmt, siz, \
(((width) * siz##_LINE_BYTES)+7)>>3, 0, \
G_TX_RENDERTILE, pal, cmt, maskt, shiftt, cms, masks, \
shifts); \
gDPSetTileSize(pkt, G_TX_RENDERTILE, 0, 0, \
((width)-1) << G_TEXTURE_IMAGE_FRAC, \
((height)-1) << G_TEXTURE_IMAGE_FRAC) \
}
#define gDPSetTextureClippingDjui(pkt, rot, x1, y1, x2, y2) gSetClippingDjui(pkt, G_TEXCLIP_DJUI, rot, x1, y1, x2, y2)
#define gDPSetTextureOverrideDjui(pkt, texture, w, h, bitSize) gSetOverrideDjui(pkt, G_TEXOVERRIDE_DJUI, texture, w, h, bitSize)
// DO NOT COMMIT //
# define gsSPVertexDjui(v, n, v0) \
{{ \
(_SHIFTL(G_DJUI_SIMPLE_VERT,24,8)|_SHIFTL((n),12,8)|_SHIFTL((v0)+(n),1,7)), \
(uintptr_t)(v) \
}}
#define gsSP2TrianglesDjui(v00, v01, v02, flag0, v10, v11, v12, flag1) \
{{ \
(_SHIFTL(G_DJUI_SIMPLE_TRI2, 24, 8)| \
__gsSP1Triangle_w1f(v00, v01, v02, flag0)), \
__gsSP1Triangle_w1f(v10, v11, v12, flag1) \
}}
// DO NOT COMMIT //

View file

@ -84,21 +84,25 @@ void djui_gfx_render_texture(const u8* texture, u32 w, u32 h, u32 bitSize) {
void djui_gfx_render_texture_tile(const u8* texture, u32 w, u32 h, u32 bitSize, u32 tileX, u32 tileY, u32 tileW, u32 tileH) {
Vtx *vtx = alloc_display_list(sizeof(Vtx) * 4);
vtx[0] = (Vtx) {{{ 0, -1, 0 }, 0, { ( tileX * 512) / w, ((tileY + tileH) * 512) / h }, { 0xff, 0xff, 0xff, 0xff }}};
vtx[1] = (Vtx) {{{ 1, -1, 0 }, 0, { ((tileX + tileW) * 512) / w, ((tileY + tileH) * 512) / h }, { 0xff, 0xff, 0xff, 0xff }}};
vtx[2] = (Vtx) {{{ 1, 0, 0 }, 0, { ((tileX + tileW) * 512) / w, ( tileY * 512) / h }, { 0xff, 0xff, 0xff, 0xff }}};
vtx[3] = (Vtx) {{{ 0, 0, 0 }, 0, { ( tileX * 512) / w, ( tileY * 512) / h }, { 0xff, 0xff, 0xff, 0xff }}};
gDPSetTextureOverrideDjui(gDisplayListHead++, texture, djui_gfx_power_of_two(w), djui_gfx_power_of_two(h), bitSize);
f32 aspect = w ? ((f32)h / (f32)w) : 1;
vtx[0] = (Vtx) {{{ 0, -1, 0 }, 0, { ( tileX * 512) / w, ((tileY + tileH) * 512) / h }, { 0xff, 0xff, 0xff, 0xff }}};
vtx[1] = (Vtx) {{{ 1 * aspect, -1, 0 }, 0, { ((tileX + tileW) * 512) / w, ((tileY + tileH) * 512) / h }, { 0xff, 0xff, 0xff, 0xff }}};
vtx[2] = (Vtx) {{{ 1 * aspect, 0, 0 }, 0, { ((tileX + tileW) * 512) / w, ( tileY * 512) / h }, { 0xff, 0xff, 0xff, 0xff }}};
vtx[3] = (Vtx) {{{ 0, 0, 0 }, 0, { ( tileX * 512) / w, ( tileY * 512) / h }, { 0xff, 0xff, 0xff, 0xff }}};
gSPClearGeometryMode(gDisplayListHead++, G_LIGHTING);
gDPSetCombineMode(gDisplayListHead++, G_CC_FADEA, G_CC_FADEA);
gDPSetRenderMode(gDisplayListHead++, G_RM_XLU_SURF, G_RM_XLU_SURF2);
gDPSetTextureFilter(gDisplayListHead++, G_TF_POINT);
gSPTexture(gDisplayListHead++, 0xFFFF, 0xFFFF, 0, G_TX_RENDERTILE, G_ON);
gDPLoadTextureBlock(gDisplayListHead++, NULL, G_IM_FMT_RGBA, G_IM_SIZ_16b, 16, 16, 0, G_TX_CLAMP, G_TX_CLAMP, 5, 5, G_TX_NOLOD, G_TX_NOLOD);
gDPSetTextureOverrideDjui(gDisplayListHead++, texture, djui_gfx_power_of_two(w), djui_gfx_power_of_two(h), bitSize);
gDPLoadTextureBlockWithoutTexture(gDisplayListHead++, NULL, G_IM_FMT_RGBA, G_IM_SIZ_16b, 16, 16, 0, G_TX_CLAMP, G_TX_CLAMP, 5, 5, G_TX_NOLOD, G_TX_NOLOD);
*(gDisplayListHead++) = (Gfx) gsSPExecuteDjui(G_TEXOVERRIDE_DJUI);
gSPVertex(gDisplayListHead++, vtx, 4, 0);
gSPVertexDjui(gDisplayListHead++, vtx, 4, 0);
*(gDisplayListHead++) = (Gfx) gsSPExecuteDjui(G_TEXCLIP_DJUI);
gSP2Triangles(gDisplayListHead++, 0, 1, 2, 0x0, 0, 2, 3, 0x0);
gSP2TrianglesDjui(gDisplayListHead++, 0, 1, 2, 0x0, 0, 2, 3, 0x0);
gSPTexture(gDisplayListHead++, 0xFFFF, 0xFFFF, 0, G_TX_RENDERTILE, G_OFF);
gDPSetCombineMode(gDisplayListHead++, G_CC_SHADE, G_CC_SHADE);
}

View file

@ -900,9 +900,7 @@ static void OPTIMIZE_O3 gfx_sp_tri1(uint8_t vtx1_idx, uint8_t vtx2_idx, uint8_t
struct LoadedVertex *v2 = &rsp.loaded_vertices[vtx2_idx];
struct LoadedVertex *v3 = &rsp.loaded_vertices[vtx3_idx];
struct LoadedVertex *v_arr[3] = {v1, v2, v3};
//if (rand()%2) return;
if (v1->clip_rej & v2->clip_rej & v3->clip_rej) {
// The whole triangle lies outside the visible area
return;
@ -1293,9 +1291,9 @@ static void gfx_dp_load_block(uint8_t tile, uint32_t uls, uint32_t ult, uint32_t
}
uint32_t size_bytes = (lrs + 1) << word_size_shift;
rdp.loaded_texture[rdp.texture_to_load.tile_number].size_bytes = size_bytes;
rdp.textures_changed[rdp.texture_to_load.tile_number] = rdp.loaded_texture[rdp.texture_to_load.tile_number].addr != rdp.texture_to_load.addr;
rdp.loaded_texture[rdp.texture_to_load.tile_number].addr = rdp.texture_to_load.addr;
rdp.textures_changed[rdp.texture_to_load.tile_number] = true;
}
static void gfx_dp_load_tile(uint8_t tile, uint32_t uls, uint32_t ult, uint32_t lrs, uint32_t lrt) {
@ -1997,8 +1995,9 @@ static void OPTIMIZE_O3 djui_gfx_dp_execute_override(void) {
uint32_t lrs = (sDjuiOverrideW * sDjuiOverrideH) - 1;
uint32_t sizeBytes = (lrs + 1) << wordSizeShift;
rdp.loaded_texture[rdp.texture_to_load.tile_number].size_bytes = sizeBytes;
rdp.textures_changed[rdp.texture_to_load.tile_number] = rdp.loaded_texture[rdp.texture_to_load.tile_number].addr != rdp.texture_to_load.addr;
rdp.loaded_texture[rdp.texture_to_load.tile_number].addr = rdp.texture_to_load.addr;
rdp.textures_changed[rdp.texture_to_load.tile_number] = true;
//rdp.textures_changed[rdp.texture_to_load.tile_number] = true;
// gsDPSetTile
uint32_t line = (((sDjuiOverrideW * 2) + 7) >> 3);
@ -2009,8 +2008,6 @@ static void OPTIMIZE_O3 djui_gfx_dp_execute_override(void) {
rdp.texture_tile.ult = 0;
rdp.texture_tile.lrs = (sDjuiOverrideW - 1) << G_TEXTURE_IMAGE_FRAC;
rdp.texture_tile.lrt = (sDjuiOverrideH - 1) << G_TEXTURE_IMAGE_FRAC;*/
rdp.textures_changed[0] = true;
rdp.textures_changed[1] = true;
}
static void OPTIMIZE_O3 djui_gfx_dp_execute_djui(uint32_t opcode) {
@ -2037,6 +2034,99 @@ static void OPTIMIZE_O3 djui_gfx_dp_set_override(void* texture, uint32_t w, uint
sDjuiOverride = (texture != NULL);
}
static void OPTIMIZE_O3 djui_gfx_sp_simple_vertex(size_t n_vertices, size_t dest_index, const Vtx *vertices) {
for (size_t i = 0; i < n_vertices; i++, dest_index++) {
const Vtx_t *v = &vertices[i].v;
struct LoadedVertex *d = &rsp.loaded_vertices[dest_index];
float x = v->ob[0] * rsp.MP_matrix[0][0] + v->ob[1] * rsp.MP_matrix[1][0] + v->ob[2] * rsp.MP_matrix[2][0] + rsp.MP_matrix[3][0];
float y = v->ob[0] * rsp.MP_matrix[0][1] + v->ob[1] * rsp.MP_matrix[1][1] + v->ob[2] * rsp.MP_matrix[2][1] + rsp.MP_matrix[3][1];
float z = v->ob[0] * rsp.MP_matrix[0][2] + v->ob[1] * rsp.MP_matrix[1][2] + v->ob[2] * rsp.MP_matrix[2][2] + rsp.MP_matrix[3][2];
float w = v->ob[0] * rsp.MP_matrix[0][3] + v->ob[1] * rsp.MP_matrix[1][3] + v->ob[2] * rsp.MP_matrix[2][3] + rsp.MP_matrix[3][3];
x = gfx_adjust_x_for_aspect_ratio(x);
short U = v->tc[0] * rsp.texture_scaling_factor.s >> 16;
short V = v->tc[1] * rsp.texture_scaling_factor.t >> 16;
d->color.r = v->cn[0];
d->color.g = v->cn[1];
d->color.b = v->cn[2];
d->u = U;
d->v = V;
d->x = x;
d->y = y;
d->z = z;
d->w = w;
d->color.a = v->cn[3];
}
}
static void OPTIMIZE_O3 djui_gfx_sp_simple_tri1(uint8_t vtx1_idx, uint8_t vtx2_idx, uint8_t vtx3_idx) {
struct LoadedVertex *v1 = &rsp.loaded_vertices[vtx1_idx];
struct LoadedVertex *v2 = &rsp.loaded_vertices[vtx2_idx];
struct LoadedVertex *v3 = &rsp.loaded_vertices[vtx3_idx];
struct LoadedVertex *v_arr[3] = {v1, v2, v3};
uint32_t cc_id = rdp.combine_mode;
bool use_alpha = true;
cc_id |= SHADER_OPT_ALPHA;
if (!use_alpha) {
cc_id &= ~0xfff000;
}
struct ColorCombiner *comb = gfx_lookup_or_create_color_combiner(cc_id);
struct ShaderProgram *prg = comb->prg;
if (prg != rendering_state.shader_program) {
gfx_flush();
gfx_rapi->unload_shader(rendering_state.shader_program);
gfx_rapi->load_shader(prg);
rendering_state.shader_program = prg;
}
if (rdp.textures_changed[0]) {
gfx_flush();
import_texture(0);
rdp.textures_changed[0] = false;
}
uint32_t tex_width = (rdp.texture_tile.lrs - rdp.texture_tile.uls + 4) / 4;
uint32_t tex_height = (rdp.texture_tile.lrt - rdp.texture_tile.ult + 4) / 4;
bool z_is_from_0_to_1 = gfx_rapi->z_is_from_0_to_1();
for (int32_t i = 0; i < 3; i++) {
float z = v_arr[i]->z, w = v_arr[i]->w;
if (z_is_from_0_to_1) {
z = (z + w) / 2.0f;
}
buf_vbo[buf_vbo_len++] = v_arr[i]->x;
buf_vbo[buf_vbo_len++] = v_arr[i]->y;
buf_vbo[buf_vbo_len++] = z;
buf_vbo[buf_vbo_len++] = w;
float u = (v_arr[i]->u - rdp.texture_tile.uls * 8) / 32.0f;
float v = (v_arr[i]->v - rdp.texture_tile.ult * 8) / 32.0f;
buf_vbo[buf_vbo_len++] = u / tex_width;
buf_vbo[buf_vbo_len++] = v / tex_height;
struct RGBA *color;
color = &rdp.env_color;
buf_vbo[buf_vbo_len++] = color->r / 255.0f;
buf_vbo[buf_vbo_len++] = color->g / 255.0f;
buf_vbo[buf_vbo_len++] = color->b / 255.0f;
buf_vbo[buf_vbo_len++] = color->a / 255.0f;
}
if (++buf_vbo_num_tris == MAX_BUFFERED) {
gfx_flush();
}
}
void OPTIMIZE_O3 djui_gfx_run_dl(Gfx* cmd) {
uint32_t opcode = cmd->words.w0 >> 24;
switch (opcode) {
@ -2046,6 +2136,13 @@ void OPTIMIZE_O3 djui_gfx_run_dl(Gfx* cmd) {
case G_TEXOVERRIDE_DJUI:
djui_gfx_dp_set_override(seg_addr(cmd->words.w1), 1 << C0(16, 8), 1 << C0(8, 8), C0(0, 8));
break;
case G_DJUI_SIMPLE_VERT:
djui_gfx_sp_simple_vertex(C0(12, 8), C0(1, 7) - C0(12, 8), seg_addr(cmd->words.w1));
break;
case G_DJUI_SIMPLE_TRI2:
djui_gfx_sp_simple_tri1(C0(16, 8) / 2, C0(8, 8) / 2, C0(0, 8) / 2);
djui_gfx_sp_simple_tri1(C1(16, 8) / 2, C1(8, 8) / 2, C1(0, 8) / 2);
break;
case G_EXECUTE_DJUI:
djui_gfx_dp_execute_djui(cmd->words.w1);
break;

View file

@ -202,7 +202,7 @@ void produce_interpolation_frames_and_delay(void) {
sFrameTargetTime += sFrameTime * gGameSpeed;
gRenderingInterpolated = false;
//printf(">>> fpt: %llu, fps: %f :: %f\n", frames, sAvgFps, fps);
printf(">>> fpt: %llu, fps: %f :: %f\n", frames, sAvgFps, fps);
}
void produce_one_frame(void) {

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB