New render frame interpolation via transformation matrix de-/construction (#98)

This method of interpolation aims to correctly interpolate the render transformation matrices as best as possible without doing an entirely new render pass. This is accomplished by deconstructing the matrices into its composition of transformations, interpolating those simpler transformations individually, and then reconstructing the transformation matrix with the interpolated transformations.
2024-11-26 05:55:15 +00:00 · 2022-05-21 21:46:41 -07:00 · 2022-05-21 21:46:41 -07:00 · c8a70f2d50
commit c8a70f2d50
parent f5258f2d41
6 changed files with 448 additions and 8 deletions
--- a/src/engine/graph_node.h
+++ b/src/engine/graph_node.h
@ -127,6 +127,7 @@ struct DisplayListNode
    Mtx *transformPrev;
    void *displayList;
    struct DisplayListNode *next;
+    u8 usingCamSpace;
 };

 /** GraphNode that manages the 8 top-level display lists that will be drawn
--- a/src/engine/math_util.c
+++ b/src/engine/math_util.c
@ -155,6 +155,15 @@ f32 vec3f_dot(Vec3f a, Vec3f b)
 	return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
 }

+/// takes respective scales of vecA and vecB, and sums them
+void vec3f_combine(Vec3f dest, Vec3f vecA, Vec3f vecB, f32 sclA, f32 sclB) {
+    register int i;
+
+    for (i = 0; i < 3; ++i) {
+        dest[i] = vecA[i] * sclA + vecB[i] * sclB;
+    }
+}
+
 #pragma GCC diagnostic pop

 /// Copy matrix 'src' to 'dest'
@ -638,6 +647,55 @@ void mtxf_rotate_xy(Mtx *mtx, s16 angle) {
    mtxf_to_mtx(mtx, temp);
 }

+/**
+ * Get inverse matrix 'dest' of matrix 'src'.
+ *
+ * fast inverse matrix code is brought over from "inverse.c" from Graphics Gems II
+ * Author: Kevin Wu
+ * additional Graphics Gems code by Andrew Glassner and Rod G. Bogart
+ * http://www.realtimerendering.com/resources/GraphicsGems/gemsii/inverse.c
+ *
+ * this function assumes the transform is affine
+ * matrix perspective is not used in SM64, so this isn't a concern
+ * furthermore, this is currently only used to get the inverse of the camera transform
+ * because that is always orthonormal, the determinant will never be 0, so that check is removed
+ */
+void mtxf_inverse(register Mat4 dest, register Mat4 src) {
+    register f32 det_1;
+    Mat4 buf;
+
+    // calculating the determinant has been reduced since the check is removed
+    det_1 = 1.0f / (
+          src[0][0] * src[1][1] * src[2][2]
+        + src[0][1] * src[1][2] * src[2][0]
+        + src[0][2] * src[1][0] * src[2][1]
+        - src[0][2] * src[1][1] * src[2][0]
+        - src[0][1] * src[1][0] * src[2][2]
+        - src[0][0] * src[1][2] * src[2][1]
+    );
+
+    // inverse of axis vectors (adj(A) / det(A))
+    buf[0][0] = (src[1][1] * src[2][2] - src[1][2] * src[2][1]) * det_1;
+    buf[1][0] = (src[1][2] * src[2][0] - src[1][0] * src[2][2]) * det_1;
+    buf[2][0] = (src[1][0] * src[2][1] - src[1][1] * src[2][0]) * det_1;
+    buf[0][1] = (src[0][2] * src[2][1] - src[0][1] * src[2][2]) * det_1;
+    buf[1][1] = (src[0][0] * src[2][2] - src[0][2] * src[2][0]) * det_1;
+    buf[2][1] = (src[0][1] * src[2][0] - src[0][0] * src[2][1]) * det_1;
+    buf[0][2] = (src[0][1] * src[1][2] - src[0][2] * src[1][1]) * det_1;
+    buf[1][2] = (src[0][2] * src[1][0] - src[0][0] * src[1][2]) * det_1;
+    buf[2][2] = (src[0][0] * src[1][1] - src[0][1] * src[1][0]) * det_1;
+
+    // inverse of translation (-C * inv(A))
+    buf[3][0] = -src[3][0] * buf[0][0] - src[3][1] * buf[1][0] - src[3][2] * buf[2][0];
+    buf[3][1] = -src[3][0] * buf[0][1] - src[3][1] * buf[1][1] - src[3][2] * buf[2][1];
+    buf[3][2] = -src[3][0] * buf[0][2] - src[3][1] * buf[1][2] - src[3][2] * buf[2][2];
+
+    buf[0][3] = buf[1][3] = buf[2][3] = 0.0f;
+    buf[3][3] = 1.0f;
+
+    memcpy(dest, buf, sizeof(f32) * 4 * 4);
+}
+
 /**
 * Extract a position given an object's transformation matrix and a camera matrix.
 * This is used for determining the world position of the held object: since objMtx
--- a/src/engine/math_util.h
+++ b/src/engine/math_util.h
@ -54,6 +54,7 @@ void *vec3f_cross(Vec3f dest, Vec3f a, Vec3f b);
 void *vec3f_normalize(Vec3f dest);
 f32 vec3f_length(Vec3f a);
 f32 vec3f_dot(Vec3f a, Vec3f b);
+void vec3f_combine(Vec3f dest, Vec3f vecA, Vec3f vecB, f32 sclA, f32 sclB);
 void mtxf_copy(Mat4 dest, Mat4 src);
 void mtxf_identity(Mat4 mtx);
 void mtxf_translate(Mat4 dest, Vec3f b);
@ -69,6 +70,7 @@ void mtxf_scale_vec3f(Mat4 dest, Mat4 mtx, Vec3f s);
 void mtxf_mul_vec3s(Mat4 mtx, Vec3s b);
 void mtxf_to_mtx(Mtx *dest, Mat4 src);
 void mtxf_rotate_xy(Mtx *mtx, s16 angle);
+void mtxf_inverse(Mat4 dest, Mat4 src);
 void get_pos_from_transform_mtx(Vec3f dest, Mat4 objMtx, Mat4 camMtx);
 void vec3f_get_dist_and_angle(Vec3f from, Vec3f to, f32 *dist, s16 *pitch, s16 *yaw);
 void vec3f_set_dist_and_angle(Vec3f from, Vec3f to, f32  dist, s16  pitch, s16  yaw);
--- a/src/game/rendering_graph_node.c
+++ b/src/game/rendering_graph_node.c
@ -48,6 +48,16 @@ Mat4 gMatStackPrev[MATRIX_STACK_SIZE] = {};
 Mtx *gMatStackFixed[MATRIX_STACK_SIZE] = { 0 };
 Mtx *gMatStackPrevFixed[MATRIX_STACK_SIZE] = { 0 };

+u8 sUsingCamSpace = FALSE;
+Mtx sPrevCamTranf, sCurrCamTranf = {
+    .m = {
+        {1.0f, 0.0f, 0.0f, 0.0f},
+        {0.0f, 1.0f, 0.0f, 0.0f},
+        {0.0f, 0.0f, 1.0f, 0.0f},
+        {0.0f, 0.0f, 0.0f, 1.0f}
+    }
+};
+
 /**
 * Animation nodes have state in global variables, so this struct captures
 * the animation state so a 'context switch' can be made when rendering the
@ -161,12 +171,15 @@ struct ShadowInterp sShadowInterp[MAX_SHADOW_NODES] = { 0 };
 struct ShadowInterp* gShadowInterpCurrent = NULL;
 static u8 sShadowInterpCount = 0;

+static struct GraphNodeCamera * sCameraNode = NULL;
+
 struct {
    Gfx *pos;
    Mtx *mtx;
    Mtx *mtxPrev;
    void *displayList;
    Mtx interp;
+    u8 usingCamSpace;
 } gMtxTbl[6400];
 s32 gMtxTblSize;

@ -204,6 +217,8 @@ void patch_mtx_before(void) {
 }

 void patch_mtx_interpolated(f32 delta) {
+    Mtx camTranfInv, prevCamTranfInv;
+
    if (sPerspectiveNode != NULL) {
        u16 perspNorm;
        f32 fovInterpolated = delta_interpolate_f32(sPerspectiveNode->prevFov, sPerspectiveNode->fov, delta);
@ -257,9 +272,37 @@ void patch_mtx_interpolated(f32 delta) {
    }
    gCurGraphNodeObject = savedObj;

+    // calculate outside of for loop to reduce overhead
+    // technically this is improper use of mtxf functions, but coop doesn't target N64
+    mtxf_inverse(camTranfInv.m, *sCameraNode->matrixPtr);
+    mtxf_inverse(prevCamTranfInv.m, *sCameraNode->matrixPtrPrev);
+
    for (s32 i = 0; i < gMtxTblSize; i++) {
+        Mtx bufMtx, bufMtxPrev;
+
+        memcpy(bufMtx.m, ((Mtx*) gMtxTbl[i].mtx)->m, sizeof(f32) * 4 * 4);
+        memcpy(bufMtxPrev.m, ((Mtx*) gMtxTbl[i].mtxPrev)->m, sizeof(f32) * 4 * 4);
+
        Gfx *pos = gMtxTbl[i].pos;
-        delta_interpolate_mtx(&gMtxTbl[i].interp, (Mtx*) gMtxTbl[i].mtxPrev, (Mtx*) gMtxTbl[i].mtx, delta);
+
+        if (gMtxTbl[i].usingCamSpace) {
+            // transform out of camera space so the matrix can interp in world space
+            mtxf_mul(bufMtx.m, bufMtx.m, camTranfInv.m);
+            mtxf_mul(bufMtxPrev.m, bufMtxPrev.m, prevCamTranfInv.m);
+        }
+        delta_interpolate_mtx(&gMtxTbl[i].interp, &bufMtxPrev, &bufMtx, delta);
+        if (gMtxTbl[i].usingCamSpace) {
+            // transform back to camera space, respecting camera interpolation
+            Mtx camInterp;
+            Vec3f posInterp, focusInterp;
+
+            // use camera node's stored information to calculate interpolated camera transform
+            delta_interpolate_vec3f(posInterp, sCameraNode->prevPos, sCameraNode->pos, delta);
+            delta_interpolate_vec3f(focusInterp, sCameraNode->prevFocus, sCameraNode->focus, delta);
+            mtxf_lookat(camInterp.m, posInterp, focusInterp, sCameraNode->roll);
+            mtxf_to_mtx(&camInterp, camInterp.m);
+            mtxf_mul(gMtxTbl[i].interp.m, gMtxTbl[i].interp.m, camInterp.m);
+        }
        gSPMatrix(pos++, VIRTUAL_TO_PHYSICAL(&gMtxTbl[i].interp),
                  G_MTX_MODELVIEW | G_MTX_LOAD | G_MTX_NOPUSH);
    }
@ -308,13 +351,13 @@ static void geo_process_master_list_sub(struct GraphNodeMasterList *node) {
        if ((currList = node->listHeads[i]) != NULL) {
            gDPSetRenderMode(gDisplayListHead++, modeList->modes[i], mode2List->modes[i]);
            while (currList != NULL) {
-                detect_and_skip_mtx_interpolation(&currList->transform, &currList->transformPrev);
+                //detect_and_skip_mtx_interpolation(&currList->transform, &currList->transformPrev);
                if ((u32) gMtxTblSize < sizeof(gMtxTbl) / sizeof(gMtxTbl[0])) {
                    gMtxTbl[gMtxTblSize].pos = gDisplayListHead;
                    gMtxTbl[gMtxTblSize].mtx = currList->transform;
                    gMtxTbl[gMtxTblSize].mtxPrev = currList->transformPrev;
                    gMtxTbl[gMtxTblSize].displayList = currList->displayList;
-                    gMtxTblSize++;
+                    gMtxTbl[gMtxTblSize++].usingCamSpace = currList->usingCamSpace;
                }
                gSPMatrix(gDisplayListHead++, VIRTUAL_TO_PHYSICAL(currList->transformPrev),
                          G_MTX_MODELVIEW | G_MTX_LOAD | G_MTX_NOPUSH);
@ -347,6 +390,7 @@ static void geo_append_display_list(void *displayList, s16 layer) {
        listNode->transformPrev = gMatStackPrevFixed[gMatStackIndex];
        listNode->displayList = displayList;
        listNode->next = 0;
+        listNode->usingCamSpace = sUsingCamSpace;
        if (gCurGraphNodeMasterList->listHeads[layer] == 0) {
            gCurGraphNodeMasterList->listHeads[layer] = listNode;
        } else {
@ -475,6 +519,9 @@ static void geo_process_camera(struct GraphNodeCamera *node) {
    Mtx *rollMtx = alloc_display_list(sizeof(*rollMtx));
    if (rollMtx == NULL) { return; }

+    vec3f_copy(node->prevPos, node->pos);
+    vec3f_copy(node->prevFocus, node->focus);
+
    if (node->fnNode.func != NULL) {
        node->fnNode.func(GEO_CONTEXT_RENDER, &node->fnNode.node, gMatStack[gMatStackIndex]);
    }
@ -492,10 +539,8 @@ static void geo_process_camera(struct GraphNodeCamera *node) {
        mtxf_lookat(cameraTransform, node->pos, node->focus, node->roll);
        mtxf_mul(gMatStackPrev[gMatStackIndex + 1], cameraTransform, gMatStackPrev[gMatStackIndex]);
    }
-
-    vec3f_copy(node->prevPos, node->pos);
-    vec3f_copy(node->prevFocus, node->focus);
    node->prevTimestamp = gGlobalTimer;
+    sCameraNode = node;

    // Increment the matrix stack, If we fail to do so. Just return.
    if (!increment_mat_stack()) { return; }
@ -505,10 +550,12 @@ static void geo_process_camera(struct GraphNodeCamera *node) {

    if (node->fnNode.node.children != 0) {
        gCurGraphNodeCamera = node;
+        sUsingCamSpace = TRUE;
        node->matrixPtr = &gMatStack[gMatStackIndex];
        node->matrixPtrPrev = &gMatStackPrev[gMatStackIndex];
        geo_process_node_and_siblings(node->fnNode.node.children);
        gCurGraphNodeCamera = NULL;
+        sUsingCamSpace = FALSE;
    }
    gMatStackIndex--;
 }
--- a/src/pc/utils/misc.c
+++ b/src/pc/utils/misc.c
@ -117,6 +117,7 @@ next_get:

 /////////////////

+/*
 static f32 sm64_to_radians(f32 val) {
    return val * M_PI / 0x8000;
 }
@ -166,6 +167,336 @@ void delta_interpolate_rgba(u8* res, u8* a, u8* b, f32 delta) {
    res[3] = ((a[3] * antiDelta) + (b[3] * delta));
 }

+/*
+void delta_interpolate_mtx(Mtx* out, Mtx* a, Mtx* b, f32 delta) {
+    f32 antiDelta = 1.0f - delta;
+    for (s32 i = 0; i < 4; i++) {
+        for (s32 j = 0; j < 4; j++) {
+            out->m[i][j] = (a->m[i][j] * antiDelta) + (b->m[i][j] * delta);
+        }
+    }
+}
+*/
+
+static f32 get_quat_compo_abs(f32 xPiece, f32 yPiece, f32 zPiece) {
+    return sqrt((1.0f + xPiece + yPiece + zPiece) * 0.25f);
+}
+
+static u8 float_ge_with_nan_check(f32 a, f32 b) {
+    return !isnan(a) && (isnan(b) || a >= b);
+}
+
+// this function expects an orthonormal rotation matrix
+static void rot_mat_to_rot_quat(Vec4f q, Vec3f a[3]) {
+    // get absolute value of coefficients
+    q[0] = get_quat_compo_abs(a[0][0], a[1][1], a[2][2]);
+    q[1] = get_quat_compo_abs(a[0][0], -a[1][1], -a[2][2]);
+    q[2] = get_quat_compo_abs(-a[0][0], a[1][1], -a[2][2]);
+    q[3] = get_quat_compo_abs(-a[0][0], -a[1][1], a[2][2]);
+
+    // find the coefficient with greatest magnitude
+    // NaN checks are because of possible square root of negative number in get_quat_compo_abs
+    int maxCompoMagCase = float_ge_with_nan_check(q[0], q[1])
+        ? float_ge_with_nan_check(q[0], q[2])
+            ? float_ge_with_nan_check(q[0], q[3])
+                ? 0
+                : 3
+            : float_ge_with_nan_check(q[2], q[3])
+                ? 2
+                : 3
+        : float_ge_with_nan_check(q[1], q[2])
+            ? float_ge_with_nan_check(q[1], q[3])
+                ? 1
+                : 3
+            : float_ge_with_nan_check(q[2], q[3])
+                ? 2
+                : 3;
+
+    // adjust signs of coefficients; base on greatest magnitude to improve float accuracy
+    switch (maxCompoMagCase) {
+        f32 divFactor;
+
+        case 0:
+            divFactor = 0.25f / q[0];
+            q[1] = (a[1][2] - a[2][1]) * divFactor;
+            q[2] = (a[2][0] - a[0][2]) * divFactor;
+            q[3] = (a[0][1] - a[1][0]) * divFactor;
+            return;
+
+        case 1:
+            divFactor = 0.25f / q[1];
+            q[0] = (a[1][2] - a[2][1]) * divFactor;
+            q[2] = (a[1][0] + a[0][1]) * divFactor;
+            q[3] = (a[2][0] + a[0][2]) * divFactor;
+            return;
+
+        case 2:
+            divFactor = 0.25f / q[2];
+            q[0] = (a[2][0] - a[0][2]) * divFactor;
+            q[1] = (a[1][0] + a[0][1]) * divFactor;
+            q[3] = (a[2][1] + a[1][2]) * divFactor;
+            return;
+
+        case 3:
+            divFactor = 0.25f / q[3];
+            q[0] = (a[0][1] - a[1][0]) * divFactor;
+            q[1] = (a[2][0] + a[0][2]) * divFactor;
+            q[2] = (a[2][1] + a[1][2]) * divFactor;
+            return;
+    }
+}
+
+static void rot_quat_to_mtx_rot(Vec3f a[3], Vec4f q) {
+    f32 dq0s = 2.0f * sqr(q[0]), dq1s = 2.0f * sqr(q[1]), dq2s = 2.0f * sqr(q[2]),
+        dq3s = 2.0f * sqr(q[3]), dq12 = 2.0f * q[1] * q[2], dq03 = 2.0f * q[0] * q[3],
+        dq13 = 2.0f * q[1] * q[3], dq02 = 2.0f * q[0] * q[2], dq23 = 2.0f * q[2] * q[3],
+        dq01 = 2.0f * q[0] * q[1];
+
+    a[0][0] = dq0s + dq1s - 1.0f;
+    a[0][1] = dq12 + dq03;
+    a[0][2] = dq13 - dq02;
+    a[1][0] = dq12 - dq03;
+    a[1][1] = dq0s + dq2s - 1.0f;
+    a[1][2] = dq23 + dq01;
+    a[2][0] = dq13 + dq02;
+    a[2][1] = dq23 - dq01;
+    a[2][2] = dq0s + dq3s - 1.0f;
+}
+
+// rotation quaternion spherical linear interpolation
+static void rot_quat_slerp(Vec4f out, Vec4f a, Vec4f b, f32 t) {
+    // credit where it's due:
+    // Martin John Baker
+    // https://www.euclideanspace.com/maths/algebra/realNormedAlgebra/quaternions/slerp/index.htm
+
+    f32 halfTh, halfSin, st, sat, halfCos = a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
+
+    memcpy(out, b, sizeof(f32) * 4);
+
+    // if the abs of cos is 1, then there is no rotation
+    if (fabs(halfCos) >= 1.0f) {
+        return;
+    }
+
+    // if cos is negative, rotation takes long path; invert to take short path
+    if (halfCos < 0.0f) {
+        out[0] *= -1.0f;
+        out[1] *= -1.0f;
+        out[2] *= -1.0f;
+        out[3] *= -1.0f;
+        halfCos *= -1.0f;
+    }
+
+    halfTh = acos(halfCos);
+    halfSin = sqrt(1.0f - sqr(halfCos));
+
+    if (halfSin == 0.0f) {
+        // this shouldn't happen, but float imprecision can make it happen anyway
+        halfSin = FLT_EPSILON;
+    }
+
+    sat = sin((1 - t) * halfTh) / halfSin;
+    st = sin(t * halfTh) / halfSin;
+    out[0] = a[0] * sat + out[0] * st;
+    out[1] = a[1] * sat + out[1] * st;
+    out[2] = a[2] * sat + out[2] * st;
+    out[3] = a[3] * sat + out[3] * st;
+}
+
+// removes scaling from the shear value
+static f32 unmat_unscale_shear(f32 shear, f32 scale) {
+    if (scale == 0.0f) {
+        // assume no shear
+        return 0.0f;
+    }
+
+    return shear / scale;
+}
+
+// matrix decomposition code is brought over from "unmatrix.c" from Graphics Gems II
+// Author: Spencer W. Thomas of University of Michigan
+// additional Graphics Gems II code by Andrew Glassner and Rod G. Bogart
+// thanks to fgsfds for informing me of this
+// http://www.realtimerendering.com/resources/GraphicsGems/gemsii/unmatrix.c
+//
+// matrix perspective is not used in SM64, so those indices are stripped from the output parameter
+// return value was related to if matrix was non-singular, which was necessary for perspective
+// since perspective is not used, the return value is also strippped
+//
+// additionally, rotation is not converted to euler angles
+// instead, it is converted to a quaternion to avoid gimbal lock
+//
+// tranfs is returned as follows:
+// scale(x, y, z), shear(xy, xz, zy), rotation(a, b, c, d), translation(x, y, z)
+static void unmatrix(Mtx * mat, f32 tranfs[13]) {
+    register int i;
+    Vec3f axisVecs[3], yzCross;
+
+    Mtx locMat = *mat;
+
+    // matrix normalization skipped since all SM64 matrices are normalized
+
+    // perspective is not used in SM64 matrices, so the steps for it are skipped
+
+    for (i = 0; i < 3; ++i) {
+        // translation (position)
+        tranfs[10 + i] = locMat.m[3][i];
+
+        // axis vector
+        memcpy(axisVecs[i], locMat.m[i], sizeof(f32) * 3);
+    }
+
+    // X-scale
+    tranfs[0] = vec3f_length(axisVecs[0]);
+
+    // normalize X-axis vector
+    if (tranfs[0] == 0.0f) {
+        axisVecs[0][0] = 1.0f;
+        axisVecs[0][1] = 0.0f;
+        axisVecs[0][2] = 0.0f;
+    } else {
+        for (i = 0; i < 3; ++i) {
+            axisVecs[0][i] /= tranfs[0];
+        }
+    }
+
+    // XY-shear
+    tranfs[3] = vec3f_dot(axisVecs[0], axisVecs[1]);
+
+    // orthogonalize Y-axis vector to X-axis vector
+    vec3f_combine(axisVecs[1], axisVecs[1], axisVecs[0], 1.0f, -tranfs[3]);
+
+    // Y-scale
+    tranfs[1] = vec3f_length(axisVecs[1]);
+
+    // normalize Y-axis vector
+    if (tranfs[1] == 0.0f) {
+        axisVecs[1][0] = 0.0f;
+        axisVecs[1][1] = 1.0f;
+        axisVecs[1][2] = 0.0f;
+    } else {
+        for (i = 0; i < 3; ++i) {
+            axisVecs[1][i] /= tranfs[1];
+        }
+    }
+
+    // unscale XY-shear
+    tranfs[3] = unmat_unscale_shear(tranfs[3], tranfs[1]);
+
+    // XZ-shear
+    tranfs[4] = vec3f_dot(axisVecs[0], axisVecs[2]);
+
+    // orthogonalize Z-axis vector to X-axis vector
+    vec3f_combine(axisVecs[2], axisVecs[2], axisVecs[0], 1.0f, -tranfs[4]);
+
+    // ZY-shear
+    tranfs[5] = vec3f_dot(axisVecs[1], axisVecs[2]);
+
+    // orthogonalize Z-axis vector to Y-axis vector
+    vec3f_combine(axisVecs[2], axisVecs[2], axisVecs[1], 1.0f, -tranfs[5]);
+
+    // Z-scale
+    tranfs[2] = vec3f_length(axisVecs[2]);
+
+    // normalize Z-axis vector
+    if (tranfs[2] == 0.0f) {
+        axisVecs[2][0] = 0.0f;
+        axisVecs[2][1] = 0.0f;
+        axisVecs[2][2] = 1.0f;
+    } else {
+        for (i = 0; i < 3; ++i) {
+            axisVecs[2][i] /= tranfs[2];
+        }
+    }
+
+    // unscale remaining shears
+    tranfs[4] = unmat_unscale_shear(tranfs[4], tranfs[2]);
+    tranfs[5] = unmat_unscale_shear(tranfs[5], tranfs[2]);
+
+    // check if coordinate system needs to be flipped
+    vec3f_cross(yzCross, axisVecs[1], axisVecs[2]);
+    if (vec3f_dot(axisVecs[0], yzCross) < 0.0f) {
+        for (i = 0; i < 3; ++i) {
+            tranfs[i] *= -1.0f;
+            vec3f_mul(axisVecs[i], -1.0f);
+        }
+    }
+
+    // rotation
+    rot_mat_to_rot_quat(tranfs + 6, axisVecs);
+}
+
+// builds a transformation matrix from a decomposed sequence from unmatrix
+// see unmatrix for what tranfs means
+static void rematrix(Mtx * mat, f32 tranfs[13]) {
+    register int i;
+    Vec3f rotAxes[3];
+    Mat4 rotMat;
+
+    // start with the identity matrix
+    for (i = 0; i < 4; ++i) {
+        register int j;
+
+        mat->m[i][i] = 1.0f;
+        for (j = 3; j > i; --j) {
+            mat->m[i][j] = mat->m[j][i] = 0.0f;
+        }
+    }
+
+    // scale
+    for (i = 0; i < 3; ++i) {
+        vec3f_mul(mat->m[i], tranfs[i]);
+    }
+
+    // shear
+    mat->m[1][0] = mat->m[1][1] * tranfs[3];
+    mat->m[2][0] = mat->m[2][2] * tranfs[4];
+    mat->m[2][1] = mat->m[2][2] * tranfs[5];
+
+    // rotate
+    // get the rotation matrix for the quat
+    rot_quat_to_mtx_rot(rotAxes, tranfs + 6);
+
+    // transfer to the Mat4 struct
+    for (i = 0; i < 3; ++i) {
+        memcpy(rotMat[i], rotAxes[i], sizeof(f32) * 3);
+        rotMat[i][3] = rotMat[3][i] = 0.0f;
+    }
+    rotMat[3][3] = 1.0f;
+
+    // apply the rotation
+    // this is technically abuse of Mat4 vs. Mtx, but Coop doesn't target N64 anyway
+    mtxf_mul(mat->m, mat->m, rotMat);
+
+    // translate
+    for (i = 0; i < 3; ++i) {
+        mat->m[3][i] = tranfs[10 + i];
+    }
+}
+
+void delta_interpolate_mtx(Mtx* out, Mtx* a, Mtx* b, f32 delta) {
+    register int i;
+    f32 matTranfsA[13], matTranfsB[13];
+
+    f32 antiDelta = 1.0f - delta;
+
+    unmatrix(a, matTranfsA);
+    unmatrix(b, matTranfsB);
+
+    // skip rot quat
+    for (i = 0; i < 6; ++i) {
+        matTranfsB[i] = matTranfsA[i] * antiDelta + matTranfsB[i] * delta;
+    }
+    for (i = 10; i < 13; ++i) {
+        matTranfsB[i] = matTranfsA[i] * antiDelta + matTranfsB[i] * delta;
+    }
+
+    rot_quat_slerp(matTranfsB + 6, matTranfsA + 6, matTranfsB + 6, delta);
+
+    rematrix(out, matTranfsB);
+}
+
+/*
 static s16 delta_interpolate_angle(s16 a, s16 b, f32 delta) {
    s32 absDiff = b - a;
    if (absDiff < 0) {
@ -218,3 +549,4 @@ void detect_and_skip_mtx_interpolation(Mtx** mtxPrev, Mtx** mtx) {
        *mtx = *mtxPrev;
    }
 }
+*/
--- a/src/pc/utils/misc.h
+++ b/src/pc/utils/misc.h
@ -21,6 +21,6 @@ void delta_interpolate_vec3s(Vec3s res, Vec3s a, Vec3s b, f32 delta);
 void delta_interpolate_normal(s8* res, s8* a, s8* b, f32 delta);
 void delta_interpolate_rgba(u8* res, u8* a, u8* b, f32 delta);
 void delta_interpolate_mtx(Mtx* out, Mtx* a, Mtx* b, f32 delta);
-void detect_and_skip_mtx_interpolation(Mtx** mtxPrev, Mtx** mtx);
+//void detect_and_skip_mtx_interpolation(Mtx** mtxPrev, Mtx** mtx);

 #endif