SFTtech · heinezen · Apr 23, 2025 · Apr 13, 2025 · Apr 13, 2025 · Apr 21, 2025
diff --git a/copying.md b/copying.md
@@ -166,6 +166,7 @@ _the openage authors_ are:
 | Eelco Empting               | Eeelco                      | me à eelco dawt de                                |
 | Jordan Sutton               | jsutCodes                   | jsutcodes à gmail dawt com                        |
 | Daniel Wieczorek            | Danio                       | danielwieczorek96 à gmail dawt com                |
+|                             | bytegrrrl                   | bytegrrrl à proton dawt me                        |
 
 If you're a first-time committer, add yourself to the above list. This is not
 just for legal reasons, but also to keep an overview of all those nicknames.

diff --git a/libopenage/util/fixed_point.h b/libopenage/util/fixed_point.h
@@ -3,6 +3,7 @@
 #pragma once
 
 #include <algorithm>
+#include <bit>
 #include <climits>
 #include <cmath>
 #include <iomanip>
@@ -446,8 +447,52 @@ class FixedPoint {
 		return is;
 	}
 
-	constexpr double sqrt() {
-		return std::sqrt(this->to_double());
+	/**
+	 * Pure FixedPoint sqrt implementation using Heron's Algorithm.
+	 *
+	 * Note that this function is undefined for negative values.
+	 *
+	 * There's a small loss in precision depending on the value of fractional_bits and the position of
+	 * the most significant bit: if the integer portion is very large, we won't have as much (absolute)
+	 * precision. Ideally you would want the intermediate_type to be twice the size of raw_type to avoid
+	 * any losses.
+	 */
+	constexpr FixedPoint sqrt() {
+		// Zero can cause issues later, so deal with now.
+		if (this->raw_value == 0) {
+			return zero();
+		}
+
+		// Check for negative values
+		if constexpr (std::is_signed<raw_type>()) {
+			ENSURE(this->raw_value > 0, "FixedPoint::sqrt() is undefined for negative values.");
+		}
+
+		// A greater shift = more precision, but can overflow the intermediate type if too large.
+		size_t max_shift = std::countl_zero(static_cast<unsigned_intermediate_type>(this->raw_value)) - 1;
+		size_t shift = max_shift > fractional_bits ? fractional_bits : max_shift;
+
+		// shift + fractional bits must be an even number
+		if ((shift + fractional_bits) % 2) {
+			shift -= 1;
+		}
+
+		// We can't use the safe shift since the shift value is unknown at compile time.
+		intermediate_type n = static_cast<intermediate_type>(this->raw_value) << shift;
+		intermediate_type guess = static_cast<intermediate_type>(1) << fractional_bits;
+
+		for (size_t i = 0; i < fractional_bits; i++) {
+			intermediate_type prev = guess;
+			guess = (guess + n / guess) / 2;
+			if (guess == prev) {
+				break;
+			}
+		}
+
+		// The sqrt operation halves the number of bits, so we'll we'll have to calculate a shift back
+		size_t unshift = fractional_bits - (shift + fractional_bits) / 2;
+
+		return from_raw_value(guess << unshift);
 	}
 
 	constexpr double atan2(const FixedPoint &n) {
@@ -574,7 +619,7 @@ namespace std {
 
 template <typename I, unsigned F, typename Inter>
 constexpr double sqrt(openage::util::FixedPoint<I, F, Inter> n) {
-	return n.sqrt();
+	return static_cast<double>(n.sqrt());
 }
 
 template <typename I, unsigned F, typename Inter>

diff --git a/libopenage/util/fixed_point_test.cpp b/libopenage/util/fixed_point_test.cpp
@@ -156,6 +156,53 @@ void fixed_point() {
 		TESTEQUALS_FLOAT((c/b).to_double(), -4.75/3.5, 0.1);
 	}
 
+	// Pure FixedPoint sqrt tests
+	{
+		using T = FixedPoint<int64_t, 32, int64_t>;
+		TESTEQUALS_FLOAT(T(41231.131).sqrt(), 203.0545025356, 1e-7);
+		TESTEQUALS_FLOAT(T(547965.116).sqrt(), 740.2466588915, 1e-7);
+
+		TESTEQUALS_FLOAT(T(2).sqrt(), T::sqrt_2(), 1e-9);
+		TESTEQUALS_FLOAT(2 / std::sqrt(T::pi()), T::inv2_sqrt_pi(), 1e-9);
+
+		// Powers of two (anything over 2^15 will overflow (2^16)^2 = 2^32 >).
+		for (size_t i = 0; i < 15; i++) {
+			int64_t value = 1 << i;
+			TESTEQUALS_FLOAT(T(value * value).sqrt(), value, 1e-7);
+		}
+
+		for (size_t i = 0; i < 100; i++) {
+			double value = 14.25 * i;
+			TESTEQUALS_FLOAT(T(value * value).sqrt(), value, 1e-7);
+		}
+
+		// This one can go up to 2^63, but that would take years.
+		for (uint32_t i = 0; i < 65536; i++) {
+			T value = T::from_raw_value(i * i);
+			TESTEQUALS_FLOAT(value.sqrt(), std::sqrt(value.to_double()), 1e-7);
+		}
+
+		// We lose some precision when raw_type == intermediate_type
+		for (uint64_t i = 1; i < std::numeric_limits<uint64_t>::max(); i = (i * 2) ^ i) {
+			T value = T::from_raw_value(i * i);
+			if (value < 0) {
+				value = -value;
+			}
+			TESTEQUALS_FLOAT(value.sqrt(), std::sqrt(value.to_double()), 1e-4);
+		}
+
+		using FP16_16 = FixedPoint<uint32_t, 16, uint64_t>;
+		for (uint32_t i = 1; i < 65536; i++) {
+			FP16_16 value = FP16_16::from_raw_value(i);
+			TESTEQUALS_FLOAT(value.sqrt(), std::sqrt(value.to_double()), 1e-4);
+		}
+
+
+		// Test with negative number
+		TESTTHROWS((FixedPoint<int64_t, 32>::from_float(-3.25).sqrt()));
+		TESTNOEXCEPT((FixedPoint<int64_t, 32>::from_float(3.25).sqrt()));
+		TESTNOEXCEPT((FixedPoint<uint64_t, 32>::from_float(-3.25).sqrt()));
+	}
 }
 
 }}} // openage::util::tests