From b712d4b6a31aae502105a532e39e2dbffd96dd4e Mon Sep 17 00:00:00 2001 From: John S <138552829+Multi-Volt@users.noreply.github.com> Date: Fri, 18 Oct 2024 02:47:00 -0400 Subject: [PATCH] Fix the branch bloat found in the atan2s and atan2_lookup functions (#375) * Fix branch bloat in atan2s and atan2_lookup * Fix unecessary inline on lookup --- src/engine/math_util.c | 68 ++++++++++++++---------------------------- 1 file changed, 23 insertions(+), 45 deletions(-) diff --git a/src/engine/math_util.c b/src/engine/math_util.c index 4d488bc6..d4fbb215 100644 --- a/src/engine/math_util.c +++ b/src/engine/math_util.c @@ -872,58 +872,36 @@ f32 approach_f32(f32 current, f32 target, f32 inc, f32 dec) { * the resulting angle is in range [0, 0x2000] (1/8 of a circle). */ static u16 atan2_lookup(f32 y, f32 x) { - u16 ret; - - if (x == 0) { - ret = gArctanTable[0]; - } else { - s32 index = (s32)(y / x * 1024 + 0.5f); - if (index >= 0x401 || index < 0) { index = 0; } - ret = gArctanTable[index]; - } - return ret; + s16 idx = (s16)(y / x * 1024.0f + 0.5f); + idx = (idx >= 0 && idx < 0x401) ? idx : 0; + return gArctanTable[idx]; } /** * Compute the angle from (0, 0) to (x, y) as a s16. Given that terrain is in * the xz-plane, this is commonly called with (z, x) to get a yaw angle. */ -s16 atan2s(f32 y, f32 x) { - u16 ret; +inline s16 atan2s(f32 y, f32 x) { + // Extract sign bits: 1 if negative, 0 otherwise + u8 signx = (x < 0.0f); + u8 signy = (y < 0.0f); - if (x >= 0) { - if (y >= 0) { - if (y >= x) { - ret = atan2_lookup(x, y); - } else { - ret = 0x4000 - atan2_lookup(y, x); - } - } else { - y = -y; - if (y < x) { - ret = 0x4000 + atan2_lookup(y, x); - } else { - ret = 0x8000 - atan2_lookup(x, y); - } - } - } else { - x = -x; - if (y < 0) { - y = -y; - if (y >= x) { - ret = 0x8000 + atan2_lookup(x, y); - } else { - ret = 0xC000 - atan2_lookup(y, x); - } - } else { - if (y < x) { - ret = 0xC000 + atan2_lookup(y, x); - } else { - ret = -atan2_lookup(x, y); - } - } - } - return ret; + // Take absolute values + f32 absx = absx(x); + f32 absy = absx(y); + + // Compute the angle in the first octant + u16 angle = atan2_lookup(min(absx, absy), max(absy, absx)); + + // Create an index based on the signs and swap status + u8 idx = ((absy > absx) << 2) | (signx << 1) | signy; + + // Combined lookup tables for offsets and sign multipliers + static const s16 offsets[] = {0x4000, 0x4000, 0xC000, 0xC000, 0x0000, 0x8000, 0x0000, 0x8000}; + static const s8 signs[] = {-1, 1, 1, -1, 1, -1, -1, 1}; + + // Ensure the result fits into 16 bits via an explicit cast on angle + return (offsets[idx] + (signs[idx] * (s16)angle)) & 0xFFFF; } /**