diff --git a/source/util-math.h b/source/util-math.h index f86ad7f2..055ef229 100644 --- a/source/util-math.h +++ b/source/util-math.h @@ -18,10 +18,10 @@ */ #pragma once -#include #include -#include +#include #include +#include // OBS #include @@ -29,29 +29,33 @@ #include // Constants -#define PI 3.1415926535897932384626433832795 -#define PI2 6.283185307179586476925286766559 -#define PI2_SQROOT 2.506628274631000502415765284811 +#define PI 3.1415926535897932384626433832795 +#define PI2 6.283185307179586476925286766559 +#define PI2_SQROOT 2.506628274631000502415765284811 -inline double_t Gaussian1D(double_t x, double_t o) { +inline double_t Gaussian1D(double_t x, double_t o) +{ double_t c = (x / o); double_t b = exp(-0.5 * c * c); double_t a = (1.0 / (o * PI2_SQROOT)); return a * b; } -inline double_t Bilateral1D(double_t x, double_t o) { +inline double_t Bilateral1D(double_t x, double_t o) +{ double_t c = (x / 0); double_t d = c * c; double_t b = exp(-0.5 * d) / o; return 0.39894 * b; // Seems to be (1.0 / (1 * PI2_SQROOT)) * b, otherwise no difference from Gaussian Blur } -inline size_t GetNearestPowerOfTwoAbove(size_t v) { +inline size_t GetNearestPowerOfTwoAbove(size_t v) +{ return 1ull << size_t(ceil(log10(double(v)) / log10(2.0))); } -inline size_t GetNearestPowerOfTwoBelow(size_t v) { +inline size_t GetNearestPowerOfTwoBelow(size_t v) +{ return 1ull << size_t(floor(log10(double(v)) / log10(2.0))); } @@ -71,4 +75,48 @@ namespace util { }; std::pair SizeFromString(std::string text, bool allowSquare = true); -} + + namespace math { + // Proven by tests to be the fastest implementation on Intel and AMD CPUs. + // Ranking: log10, loop < bitscan < pow + // loop and log10 trade blows, usually almost identical. + // loop is used for integers, log10 for anything else. + template + inline bool is_power_of_two(T v) + { + return T(1ull << uint64_t(log10(T(size)) / log10(2.0))) == v; + }; + + template + inline bool is_power_of_two_loop(T v) + { + bool have_bit = false; + for (size_t index = 63; index >= 0; index--) { + bool cur = (v & (1ull << index)) != 0; + if (cur) { + if (have_bit) + return false; + have_bit = true; + } + } + return true; + } + +#pragma push_macro("is_power_of_two_as_loop") +#define is_power_of_two_as_loop(x) \ + template<> \ + inline bool is_power_of_two(x v) \ + { \ + return is_power_of_two_loop(v); \ + }; + is_power_of_two_as_loop(int8_t); + is_power_of_two_as_loop(uint8_t); + is_power_of_two_as_loop(int16_t); + is_power_of_two_as_loop(uint16_t); + is_power_of_two_as_loop(int32_t); + is_power_of_two_as_loop(uint32_t); + is_power_of_two_as_loop(int64_t); + is_power_of_two_as_loop(uint64_t); +#pragma pop_macro("is_power_of_two_as_loop") + } // namespace math +} // namespace util