glasm: Implement FSWZADD
This commit is contained in:
parent
3da7b98d37
commit
36d040da70
3 changed files with 28 additions and 4 deletions
|
@ -281,7 +281,8 @@ void SetupOptions(const IR::Program& program, const Profile& profile, std::strin
|
||||||
if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
|
if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
|
||||||
header += "OPTION NV_shader_atomic_fp16_vector;";
|
header += "OPTION NV_shader_atomic_fp16_vector;";
|
||||||
}
|
}
|
||||||
if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote) {
|
if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote ||
|
||||||
|
info.uses_fswzadd) {
|
||||||
header += "OPTION NV_shader_thread_group;";
|
header += "OPTION NV_shader_thread_group;";
|
||||||
}
|
}
|
||||||
if (info.uses_subgroup_shuffles) {
|
if (info.uses_subgroup_shuffles) {
|
||||||
|
@ -416,12 +417,25 @@ std::string EmitGLASM(const Profile& profile, IR::Program& program, Bindings& bi
|
||||||
if (program.local_memory_size > 0) {
|
if (program.local_memory_size > 0) {
|
||||||
header += fmt::format("lmem[{}],", program.local_memory_size);
|
header += fmt::format("lmem[{}],", program.local_memory_size);
|
||||||
}
|
}
|
||||||
|
if (program.info.uses_fswzadd) {
|
||||||
|
header += "FSWZA[4],FSWZB[4],";
|
||||||
|
}
|
||||||
header += "RC;"
|
header += "RC;"
|
||||||
"LONG TEMP ";
|
"LONG TEMP ";
|
||||||
for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) {
|
for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) {
|
||||||
header += fmt::format("D{},", index);
|
header += fmt::format("D{},", index);
|
||||||
}
|
}
|
||||||
header += "DC;";
|
header += "DC;";
|
||||||
|
if (program.info.uses_fswzadd) {
|
||||||
|
header += "MOV.F FSWZA[0],-1;"
|
||||||
|
"MOV.F FSWZA[1],1;"
|
||||||
|
"MOV.F FSWZA[2],-1;"
|
||||||
|
"MOV.F FSWZA[3],0;"
|
||||||
|
"MOV.F FSWZB[0],-1;"
|
||||||
|
"MOV.F FSWZB[1],-1;"
|
||||||
|
"MOV.F FSWZB[2],1;"
|
||||||
|
"MOV.F FSWZB[3],-1;";
|
||||||
|
}
|
||||||
ctx.code.insert(0, header);
|
ctx.code.insert(0, header);
|
||||||
ctx.code += "END";
|
ctx.code += "END";
|
||||||
return ctx.code;
|
return ctx.code;
|
||||||
|
|
|
@ -616,7 +616,8 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU3
|
||||||
const IR::Value& clamp, const IR::Value& segmentation_mask);
|
const IR::Value& clamp, const IR::Value& segmentation_mask);
|
||||||
void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
|
void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
|
||||||
const IR::Value& clamp, const IR::Value& segmentation_mask);
|
const IR::Value& clamp, const IR::Value& segmentation_mask);
|
||||||
void EmitFSwizzleAdd(EmitContext& ctx, ScalarF32 op_a, ScalarF32 op_b, ScalarU32 swizzle);
|
void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b,
|
||||||
|
ScalarU32 swizzle);
|
||||||
void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
|
void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
|
||||||
void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
|
void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
|
||||||
void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
|
void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a);
|
||||||
|
|
|
@ -95,8 +95,17 @@ void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, Sca
|
||||||
Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "XOR");
|
Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "XOR");
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitFSwizzleAdd(EmitContext&, ScalarF32, ScalarF32, ScalarU32) {
|
void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b,
|
||||||
throw NotImplementedException("GLASM instruction");
|
ScalarU32 swizzle) {
|
||||||
|
const auto ret{ctx.reg_alloc.Define(inst)};
|
||||||
|
ctx.Add("AND.U RC.z,{}.threadid,3;"
|
||||||
|
"SHL.U RC.z,RC.z,1;"
|
||||||
|
"SHR.U RC.z,{},RC.z;"
|
||||||
|
"AND.U RC.z,RC.z,3;"
|
||||||
|
"MUL.F RC.x,{},FSWZA[RC.z];"
|
||||||
|
"MUL.F RC.y,{},FSWZB[RC.z];"
|
||||||
|
"ADD.F {}.x,RC.x,RC.y;",
|
||||||
|
ctx.stage_name, swizzle, op_a, op_b, ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
|
void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) {
|
||||||
|
|
Loading…
Reference in a new issue