From 1923a724d75af023b96b576bbded92a4604bf21e Mon Sep 17 00:00:00 2001
From: Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
Date: Fri, 28 Sep 2018 14:17:43 +0200
Subject: [PATCH] util-math: Formatting and is_power_of_two templates

---
 source/util-math.h | 68 +++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 58 insertions(+), 10 deletions(-)
diff --git a/source/util-math.h b/source/util-math.h
index f86ad7f2..055ef229 100644
--- a/source/util-math.h
+++ b/source/util-math.h
@@ -18,10 +18,10 @@
 */
 
 #pragma once
-#include <math.h>
 #include <inttypes.h>
-#include <utility>
+#include <math.h>
 #include <string>
+#include <utility>
 
 // OBS
 #include <graphics/vec2.h>
@@ -29,29 +29,33 @@
 #include <graphics/vec4.h>
 
 // Constants
-#define PI		3.1415926535897932384626433832795
-#define PI2		6.283185307179586476925286766559
-#define PI2_SQROOT	2.506628274631000502415765284811		
+#define PI 3.1415926535897932384626433832795
+#define PI2 6.283185307179586476925286766559
+#define PI2_SQROOT 2.506628274631000502415765284811
 
-inline double_t Gaussian1D(double_t x, double_t o) {
+inline double_t Gaussian1D(double_t x, double_t o)
+{
 	double_t c = (x / o);
 	double_t b = exp(-0.5 * c * c);
 	double_t a = (1.0 / (o * PI2_SQROOT));
 	return a * b;
 }
 
-inline double_t Bilateral1D(double_t x, double_t o) {
+inline double_t Bilateral1D(double_t x, double_t o)
+{
 	double_t c = (x / 0);
 	double_t d = c * c;
 	double_t b = exp(-0.5 * d) / o;
 	return 0.39894 * b; // Seems to be (1.0 / (1 * PI2_SQROOT)) * b, otherwise no difference from Gaussian Blur
 }
 
-inline size_t GetNearestPowerOfTwoAbove(size_t v) {
+inline size_t GetNearestPowerOfTwoAbove(size_t v)
+{
 	return 1ull << size_t(ceil(log10(double(v)) / log10(2.0)));
 }
 
-inline size_t GetNearestPowerOfTwoBelow(size_t v) {
+inline size_t GetNearestPowerOfTwoBelow(size_t v)
+{
 	return 1ull << size_t(floor(log10(double(v)) / log10(2.0)));
 }
 
@@ -71,4 +75,48 @@ namespace util {
 	};
 
 	std::pair<int64_t, int64_t> SizeFromString(std::string text, bool allowSquare = true);
-}
+
+	namespace math {
+		// Proven by tests to be the fastest implementation on Intel and AMD CPUs.
+		// Ranking: log10, loop < bitscan < pow
+		// loop and log10 trade blows, usually almost identical.
+		// loop is used for integers, log10 for anything else.
+		template<typename T>
+		inline bool is_power_of_two(T v)
+		{
+			return T(1ull << uint64_t(log10(T(size)) / log10(2.0))) == v;
+		};
+
+		template<typename T>
+		inline bool is_power_of_two_loop(T v)
+		{
+			bool have_bit = false;
+			for (size_t index = 63; index >= 0; index--) {
+				bool cur = (v & (1ull << index)) != 0;
+				if (cur) {
+					if (have_bit)
+						return false;
+					have_bit = true;
+				}
+			}
+			return true;
+		}
+
+#pragma push_macro("is_power_of_two_as_loop")
+#define is_power_of_two_as_loop(x)                        \
+	template<>                          \
+	inline bool is_power_of_two(x v)    \
+	{                                   \
+		return is_power_of_two_loop(v); \
+	};
+		is_power_of_two_as_loop(int8_t);
+		is_power_of_two_as_loop(uint8_t);
+		is_power_of_two_as_loop(int16_t);
+		is_power_of_two_as_loop(uint16_t);
+		is_power_of_two_as_loop(int32_t);
+		is_power_of_two_as_loop(uint32_t);
+		is_power_of_two_as_loop(int64_t);
+		is_power_of_two_as_loop(uint64_t);
+#pragma pop_macro("is_power_of_two_as_loop")
+	} // namespace math
+} // namespace util