sm64coopdx/tools/sdk-tools/adpcm/vencode.c

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "vadpcm.h"

void vencodeframe(FILE *ofile, s16 *inBuffer, s32 *state, s32 ***coefTable, s32 order, s32 npredictors, s32 nsam)
{
    s16 ix[16];
    s32 prediction[16];
    s32 inVector[16];
    s32 saveState[16];
    s32 optimalp;
    s32 scale;
    s32 llevel;
    s32 ulevel;
    s32 i;
    s32 j;
    s32 k;
    s32 ie[16];
    s32 nIter;
    s32 max;
    s32 cV;
    s32 maxClip;
    u8 header;
    u8 c;
    f32 e[16];
    f32 se;
    f32 min;

    // We are only given 'nsam' samples; pad with zeroes to 16.
    for (i = nsam; i < 16; i++)
    {
        inBuffer[i] = 0;
    }

    llevel = -8;
    ulevel = -llevel - 1;

    // Determine the best-fitting predictor.
    min = 1e30;
    optimalp = 0;
    for (k = 0; k < npredictors; k++)
    {
        // Copy over the last 'order' samples from the previous output.
        for (i = 0; i < order; i++)
        {
            inVector[i] = state[16 - order + i];
        }

        // For 8 samples...
        for (i = 0; i < 8; i++)
        {
            // Compute a prediction based on 'order' values from the old state,
            // plus previous errors in this chunk, as an inner product with the
            // coefficient table.
            prediction[i] = inner_product(order + i, coefTable[k][i], inVector);
            // Record the error in inVector (thus, its first 'order' samples
            // will contain actual values, the rest will be error terms), and
            // in floating point form in e (for no particularly good reason).
            inVector[i + order] = inBuffer[i] - prediction[i];
            e[i] = (f32) inVector[i + order];
        }

        // For the next 8 samples, start with 'order' values from the end of
        // the previous 8-sample chunk of inBuffer. (The code is equivalent to
        // inVector[i] = inBuffer[8 - order + i].)
        for (i = 0; i < order; i++)
        {
            inVector[i] = prediction[8 - order + i] + inVector[8 + i];
        }

        // ... and do the same thing as before to get predictions.
        for (i = 0; i < 8; i++)
        {
            prediction[8 + i] = inner_product(order + i, coefTable[k][i], inVector);
            inVector[i + order] = inBuffer[8 + i] - prediction[8 + i];
            e[8 + i] = (f32) inVector[i + order];
        }

        // Compute the L2 norm of the errors; the lowest norm decides which
        // predictor to use.
        se = 0.0f;
        for (j = 0; j < 16; j++)
        {
            se += e[j] * e[j];
        }

        if (se < min)
        {
            min = se;
            optimalp = k;
        }
    }

    // Do exactly the same thing again, for real.
    for (i = 0; i < order; i++)
    {
        inVector[i] = state[16 - order + i];
    }

    for (i = 0; i < 8; i++)
    {
        if (coefTable == NULL || coefTable[optimalp] == NULL) { continue; }
        prediction[i] = inner_product(order + i, coefTable[optimalp][i], inVector);
        inVector[i + order] = inBuffer[i] - prediction[i];
        e[i] = (f32) inVector[i + order];
    }

    for (i = 0; i < order; i++)
    {
        inVector[i] = prediction[8 - order + i] + inVector[8 + i];
    }

    for (i = 0; i < 8; i++)
    {
        prediction[8 + i] = inner_product(order + i, coefTable[optimalp][i], inVector);
        inVector[i + order] = inBuffer[8 + i] - prediction[8 + i];
        e[8 + i] = (f32) inVector[i + order];
    }

    // Clamp the errors to 16-bit signed ints, and put them in ie.
    clamp(16, e, ie, 16);

    // Find a value with highest absolute value.
    // @bug If this first finds -2^n and later 2^n, it should set 'max' to the
    // latter, which needs a higher value for 'scale'.
    max = 0;
    for (i = 0; i < 16; i++)
    {
        if (fabs((f64)ie[i]) > fabs((f64)max))
        {
            max = ie[i];
        }
    }

    // Compute which power of two we need to scale down by in order to make
    // all values representable as 4-bit signed integers (i.e. be in [-8, 7]).
    // The worst-case 'max' is -2^15, so this will be at most 12.
    for (scale = 0; scale <= 12; scale++)
    {
        if (max <= ulevel && max >= llevel)
        {
            goto out;
        }
        max /= 2;
    }
out:;

    for (i = 0; i < 16; i++)
    {
        saveState[i] = state[i];
    }

    // Try with the computed scale, but if it turns out we don't fit in 4 bits
    // (if some |cV| >= 2), use scale + 1 instead (i.e. downscaling by another
    // factor of 2).
    scale--;
    nIter = 0;
    do
    {
        nIter++;
        maxClip = 0;
        scale++;
        if (scale > 12)
        {
            scale = 12;
        }

        // Copy over the last 'order' samples from the previous output.
        for (i = 0; i < order; i++)
        {
            inVector[i] = saveState[16 - order + i];
        }

        // For 8 samples...
        for (i = 0; i < 8; i++)
        {
            // Compute a prediction based on 'order' values from the old state,
            // plus previous *quantized* errors in this chunk (because that's
            // all the decoder will have available).
            prediction[i] = inner_product(order + i, coefTable[optimalp][i], inVector);

            // Compute the error, and divide it by 2^scale, rounding to the
            // nearest integer. This should ideally result in a 4-bit integer.
            se = (f32) inBuffer[i] - (f32) prediction[i];
            ix[i] = qsample(se, 1 << scale);

            // Clamp the error to a 4-bit signed integer, and record what delta
            // was needed for that.
            cV = (s16) clip(ix[i], llevel, ulevel) - ix[i];
            if (maxClip < abs(cV))
            {
                maxClip = abs(cV);
            }
            ix[i] += cV;

            // Record the quantized error in inVector for later predictions,
            // and the quantized (decoded) output in state (for use in the next
            // batch of 8 samples).
            inVector[i + order] = ix[i] * (1 << scale);
            state[i] = prediction[i] + inVector[i + order];
        }

        // Copy over the last 'order' decoded samples from the above chunk.
        for (i = 0; i < order; i++)
        {
            inVector[i] = state[8 - order + i];
        }

        // ... and do the same thing as before.
        for (i = 0; i < 8; i++)
        {
            prediction[8 + i] = inner_product(order + i, coefTable[optimalp][i], inVector);
            se = (f32) inBuffer[8 + i] - (f32) prediction[8 + i];
            ix[8 + i] = qsample(se, 1 << scale);
            cV = (s16) clip(ix[8 + i], llevel, ulevel) - ix[8 + i];
            if (maxClip < abs(cV))
            {
                maxClip = abs(cV);
            }
            ix[8 + i] += cV;
            inVector[i + order] = ix[8 + i] * (1 << scale);
            state[8 + i] = prediction[8 + i] + inVector[i + order];
        }
    }
    while (maxClip >= 2 && nIter < 2);

    // The scale, the predictor index, and the 16 computed outputs are now all
    // 4-bit numbers. Write them out as 1 + 8 bytes.
    header = (scale << 4) | (optimalp & 0xf);
    fwrite(&header, 1, 1, ofile);
    for (i = 0; i < 16; i += 2)
    {
        c = (ix[i] << 4) | (ix[i + 1] & 0xf);
        fwrite(&c, 1, 1, ofile);
    }
}
init2 2019-08-25 04:46:40 +00:00			`#include <stdio.h>`
			`#include <stdlib.h>`
			`#include <math.h>`
			`#include "vadpcm.h"`

			`void vencodeframe(FILE ofile, s16 inBuffer, s32 state, s32 **coefTable, s32 order, s32 npredictors, s32 nsam)`
			`{`
			`s16 ix[16];`
			`s32 prediction[16];`
			`s32 inVector[16];`
			`s32 saveState[16];`
			`s32 optimalp;`
			`s32 scale;`
			`s32 llevel;`
			`s32 ulevel;`
			`s32 i;`
			`s32 j;`
			`s32 k;`
			`s32 ie[16];`
			`s32 nIter;`
			`s32 max;`
			`s32 cV;`
			`s32 maxClip;`
			`u8 header;`
			`u8 c;`
			`f32 e[16];`
			`f32 se;`
			`f32 min;`

			`// We are only given 'nsam' samples; pad with zeroes to 16.`
			`for (i = nsam; i < 16; i++)`
			`{`
			`inBuffer[i] = 0;`
			`}`

			`llevel = -8;`
			`ulevel = -llevel - 1;`

			`// Determine the best-fitting predictor.`
			`min = 1e30;`
			`optimalp = 0;`
			`for (k = 0; k < npredictors; k++)`
			`{`
			`// Copy over the last 'order' samples from the previous output.`
			`for (i = 0; i < order; i++)`
			`{`
			`inVector[i] = state[16 - order + i];`
			`}`

			`// For 8 samples...`
			`for (i = 0; i < 8; i++)`
			`{`
			`// Compute a prediction based on 'order' values from the old state,`
			`// plus previous errors in this chunk, as an inner product with the`
			`// coefficient table.`
			`prediction[i] = inner_product(order + i, coefTable[k][i], inVector);`
			`// Record the error in inVector (thus, its first 'order' samples`
			`// will contain actual values, the rest will be error terms), and`
			`// in floating point form in e (for no particularly good reason).`
			`inVector[i + order] = inBuffer[i] - prediction[i];`
			`e[i] = (f32) inVector[i + order];`
			`}`

			`// For the next 8 samples, start with 'order' values from the end of`
			`// the previous 8-sample chunk of inBuffer. (The code is equivalent to`
			`// inVector[i] = inBuffer[8 - order + i].)`
			`for (i = 0; i < order; i++)`
			`{`
			`inVector[i] = prediction[8 - order + i] + inVector[8 + i];`
			`}`

			`// ... and do the same thing as before to get predictions.`
			`for (i = 0; i < 8; i++)`
			`{`
			`prediction[8 + i] = inner_product(order + i, coefTable[k][i], inVector);`
			`inVector[i + order] = inBuffer[8 + i] - prediction[8 + i];`
			`e[8 + i] = (f32) inVector[i + order];`
			`}`

			`// Compute the L2 norm of the errors; the lowest norm decides which`
			`// predictor to use.`
			`se = 0.0f;`
			`for (j = 0; j < 16; j++)`
			`{`
			`se += e[j] * e[j];`
			`}`

			`if (se < min)`
			`{`
			`min = se;`
			`optimalp = k;`
			`}`
			`}`

			`// Do exactly the same thing again, for real.`
			`for (i = 0; i < order; i++)`
			`{`
			`inVector[i] = state[16 - order + i];`
			`}`

			`for (i = 0; i < 8; i++)`
			`{`
Sloppy fixes to adpcm Stopped ignoring warnings in the makefile. Made warnings an error. Sanity checked nsequences to prevent allocating gigantic tables. 2022-02-06 23:18:20 +00:00			`if (coefTable == NULL \|\| coefTable[optimalp] == NULL) { continue; }`
init2 2019-08-25 04:46:40 +00:00			`prediction[i] = inner_product(order + i, coefTable[optimalp][i], inVector);`
			`inVector[i + order] = inBuffer[i] - prediction[i];`
			`e[i] = (f32) inVector[i + order];`
			`}`

			`for (i = 0; i < order; i++)`
			`{`
			`inVector[i] = prediction[8 - order + i] + inVector[8 + i];`
			`}`

			`for (i = 0; i < 8; i++)`
			`{`
			`prediction[8 + i] = inner_product(order + i, coefTable[optimalp][i], inVector);`
			`inVector[i + order] = inBuffer[8 + i] - prediction[8 + i];`
			`e[8 + i] = (f32) inVector[i + order];`
			`}`

			`// Clamp the errors to 16-bit signed ints, and put them in ie.`
			`clamp(16, e, ie, 16);`

			`// Find a value with highest absolute value.`
			`// @bug If this first finds -2^n and later 2^n, it should set 'max' to the`
			`// latter, which needs a higher value for 'scale'.`
			`max = 0;`
			`for (i = 0; i < 16; i++)`
			`{`
Sloppy fixes to adpcm Stopped ignoring warnings in the makefile. Made warnings an error. Sanity checked nsequences to prevent allocating gigantic tables. 2022-02-06 23:18:20 +00:00			`if (fabs((f64)ie[i]) > fabs((f64)max))`
init2 2019-08-25 04:46:40 +00:00			`{`
			`max = ie[i];`
			`}`
			`}`

			`// Compute which power of two we need to scale down by in order to make`
			`// all values representable as 4-bit signed integers (i.e. be in [-8, 7]).`
			`// The worst-case 'max' is -2^15, so this will be at most 12.`
			`for (scale = 0; scale <= 12; scale++)`
			`{`
			`if (max <= ulevel && max >= llevel)`
			`{`
			`goto out;`
			`}`
			`max /= 2;`
			`}`
			`out:;`

			`for (i = 0; i < 16; i++)`
			`{`
			`saveState[i] = state[i];`
			`}`

			`// Try with the computed scale, but if it turns out we don't fit in 4 bits`
			`// (if some \|cV\| >= 2), use scale + 1 instead (i.e. downscaling by another`
			`// factor of 2).`
			`scale--;`
			`nIter = 0;`
			`do`
			`{`
			`nIter++;`
			`maxClip = 0;`
			`scale++;`
			`if (scale > 12)`
			`{`
			`scale = 12;`
			`}`

			`// Copy over the last 'order' samples from the previous output.`
			`for (i = 0; i < order; i++)`
			`{`
			`inVector[i] = saveState[16 - order + i];`
			`}`

			`// For 8 samples...`
			`for (i = 0; i < 8; i++)`
			`{`
			`// Compute a prediction based on 'order' values from the old state,`
			`// plus previous quantized errors in this chunk (because that's`
			`// all the decoder will have available).`
			`prediction[i] = inner_product(order + i, coefTable[optimalp][i], inVector);`

			`// Compute the error, and divide it by 2^scale, rounding to the`
			`// nearest integer. This should ideally result in a 4-bit integer.`
			`se = (f32) inBuffer[i] - (f32) prediction[i];`
			`ix[i] = qsample(se, 1 << scale);`

			`// Clamp the error to a 4-bit signed integer, and record what delta`
			`// was needed for that.`
			`cV = (s16) clip(ix[i], llevel, ulevel) - ix[i];`
			`if (maxClip < abs(cV))`
			`{`
			`maxClip = abs(cV);`
			`}`
			`ix[i] += cV;`

			`// Record the quantized error in inVector for later predictions,`
			`// and the quantized (decoded) output in state (for use in the next`
			`// batch of 8 samples).`
			`inVector[i + order] = ix[i] * (1 << scale);`
			`state[i] = prediction[i] + inVector[i + order];`
			`}`

			`// Copy over the last 'order' decoded samples from the above chunk.`
			`for (i = 0; i < order; i++)`
			`{`
			`inVector[i] = state[8 - order + i];`
			`}`

			`// ... and do the same thing as before.`
			`for (i = 0; i < 8; i++)`
			`{`
			`prediction[8 + i] = inner_product(order + i, coefTable[optimalp][i], inVector);`
			`se = (f32) inBuffer[8 + i] - (f32) prediction[8 + i];`
			`ix[8 + i] = qsample(se, 1 << scale);`
			`cV = (s16) clip(ix[8 + i], llevel, ulevel) - ix[8 + i];`
			`if (maxClip < abs(cV))`
			`{`
			`maxClip = abs(cV);`
			`}`
			`ix[8 + i] += cV;`
			`inVector[i + order] = ix[8 + i] * (1 << scale);`
			`state[8 + i] = prediction[8 + i] + inVector[i + order];`
			`}`
			`}`
			`while (maxClip >= 2 && nIter < 2);`

			`// The scale, the predictor index, and the 16 computed outputs are now all`
			`// 4-bit numbers. Write them out as 1 + 8 bytes.`
			`header = (scale << 4) \| (optimalp & 0xf);`
			`fwrite(&header, 1, 1, ofile);`
			`for (i = 0; i < 16; i += 2)`
			`{`
			`c = (ix[i] << 4) \| (ix[i + 1] & 0xf);`
			`fwrite(&c, 1, 1, ofile);`
			`}`
			`}`