/*------------------------------------------------------------------------- * drawElements Quality Program Tester Core * ---------------------------------------- * * Copyright 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * *//*! * \file * \brief Adjustable-precision floating point operations. *//*--------------------------------------------------------------------*/ #include "tcuFloatFormat.hpp" #include "deMath.h" #include "deUniquePtr.hpp" #include <sstream> #include <iomanip> #include <limits> namespace tcu { namespace { Interval chooseInterval(YesNoMaybe choice, const Interval& no, const Interval& yes) { switch (choice) { case NO: return no; case YES: return yes; case MAYBE: return no | yes; default: DE_FATAL("Impossible case"); } return Interval(); } double computeMaxValue (int maxExp, int fractionBits) { return (deLdExp(1.0, maxExp) + deLdExp(double((1ull << fractionBits) - 1), maxExp - fractionBits)); } } // anonymous FloatFormat::FloatFormat (int minExp, int maxExp, int fractionBits, bool exactPrecision, YesNoMaybe hasSubnormal_, YesNoMaybe hasInf_, YesNoMaybe hasNaN_) : m_minExp (minExp) , m_maxExp (maxExp) , m_fractionBits (fractionBits) , m_hasSubnormal (hasSubnormal_) , m_hasInf (hasInf_) , m_hasNaN (hasNaN_) , m_exactPrecision (exactPrecision) , m_maxValue (computeMaxValue(maxExp, fractionBits)) { DE_ASSERT(minExp <= maxExp); } /*------------------------------------------------------------------------- * On the definition of ULP * * The GLSL spec does not define ULP. However, it refers to IEEE 754, which * (reportedly) uses Harrison's definition: * * ULP(x) is the distance between the closest floating point numbers * a and be such that a <= x <= b and a != b * * Note that this means that when x = 2^n, ULP(x) = 2^(n-p-1), i.e. it is the * distance to the next lowest float, not next highest. * * Furthermore, it is assumed that ULP is calculated relative to the exact * value, not the approximation. This is because otherwise a less accurate * approximation could be closer in ULPs, because its ULPs are bigger. * * For details, see "On the definition of ulp(x)" by Jean-Michel Muller * *-----------------------------------------------------------------------*/ double FloatFormat::ulp (double x, double count) const { int exp = 0; const double frac = deFractExp(deAbs(x), &exp); if (deIsNaN(frac)) return TCU_NAN; else if (deIsInf(frac)) return deLdExp(1.0, m_maxExp - m_fractionBits); else if (frac == 1.0) { // Harrison's ULP: choose distance to closest (i.e. next lower) at binade // boundary. --exp; } else if (frac == 0.0) exp = m_minExp; // ULP cannot be lower than the smallest quantum. exp = de::max(exp, m_minExp); { const double oneULP = deLdExp(1.0, exp - m_fractionBits); ScopedRoundingMode ctx (DE_ROUNDINGMODE_TO_POSITIVE_INF); return oneULP * count; } } //! Return the difference between the given nominal exponent and //! the exponent of the lowest significand bit of the //! representation of a number with this format. //! For normal numbers this is the number of significand bits, but //! for subnormals it is less and for values of exp where 2^exp is too //! small to represent it is <0 int FloatFormat::exponentShift (int exp) const { return m_fractionBits - de::max(m_minExp - exp, 0); } //! Return the number closest to `d` that is exactly representable with the //! significand bits and minimum exponent of the floatformat. Round up if //! `upward` is true, otherwise down. double FloatFormat::round (double d, bool upward) const { int exp = 0; const double frac = deFractExp(d, &exp); const int shift = exponentShift(exp); const double shiftFrac = deLdExp(frac, shift); const double roundFrac = upward ? deCeil(shiftFrac) : deFloor(shiftFrac); return deLdExp(roundFrac, exp - shift); } //! Return the range of numbers that `d` might be converted to in the //! floatformat, given its limitations with infinities, subnormals and maximum //! exponent. Interval FloatFormat::clampValue (double d) const { const double rSign = deSign(d); int rExp = 0; DE_ASSERT(!deIsNaN(d)); deFractExp(d, &rExp); if (rExp < m_minExp) return chooseInterval(m_hasSubnormal, rSign * 0.0, d); else if (deIsInf(d) || rExp > m_maxExp) return chooseInterval(m_hasInf, rSign * getMaxValue(), rSign * TCU_INFINITY); return Interval(d); } //! Return the range of numbers that might be used with this format to //! represent a number within `x`. Interval FloatFormat::convert (const Interval& x) const { Interval ret; Interval tmp = x; if (x.hasNaN()) { // If NaN might be supported, NaN is a legal return value if (m_hasNaN != NO) ret |= TCU_NAN; // If NaN might not be supported, any (non-NaN) value is legal, // _subject_ to clamping. Hence we modify tmp, not ret. if (m_hasNaN != YES) tmp = Interval::unbounded(); } // Round both bounds _inwards_ to closest representable values. if (!tmp.empty()) ret |= clampValue(round(tmp.lo(), true)) | clampValue(round(tmp.hi(), false)); // If this format's precision is not exact, the (possibly out-of-bounds) // original value is also a possible result. if (!m_exactPrecision) ret |= x; return ret; } double FloatFormat::roundOut (double d, bool upward, bool roundUnderOverflow) const { int exp = 0; deFractExp(d, &exp); if (roundUnderOverflow && exp > m_maxExp && (upward == (d < 0.0))) return deSign(d) * getMaxValue(); else return round(d, upward); } //! Round output of an operation. //! \param roundUnderOverflow Can +/-inf rounded to min/max representable; //! should be false if any of operands was inf, true otherwise. Interval FloatFormat::roundOut (const Interval& x, bool roundUnderOverflow) const { Interval ret = x.nan(); if (!x.empty()) ret |= Interval(roundOut(x.lo(), false, roundUnderOverflow), roundOut(x.hi(), true, roundUnderOverflow)); return ret; } std::string FloatFormat::floatToHex (double x) const { if (deIsNaN(x)) return "NaN"; else if (deIsInf(x)) return (x < 0.0 ? "-" : "+") + std::string("inf"); else if (x == 0.0) // \todo [2014-03-27 lauri] Negative zero return "0.0"; int exp = 0; const double frac = deFractExp(deAbs(x), &exp); const int shift = exponentShift(exp); const deUint64 bits = deUint64(deLdExp(frac, shift)); const deUint64 whole = bits >> m_fractionBits; const deUint64 fraction = bits & ((deUint64(1) << m_fractionBits) - 1); const int exponent = exp + m_fractionBits - shift; const int numDigits = (m_fractionBits + 3) / 4; const deUint64 aligned = fraction << (numDigits * 4 - m_fractionBits); std::ostringstream oss; oss << (x < 0 ? "-" : "") << "0x" << whole << "." << std::hex << std::setw(numDigits) << std::setfill('0') << aligned << "p" << std::dec << std::setw(0) << exponent; return oss.str(); } std::string FloatFormat::intervalToHex (const Interval& interval) const { if (interval.empty()) return interval.hasNaN() ? "{ NaN }" : "{}"; else if (interval.lo() == interval.hi()) return (std::string(interval.hasNaN() ? "{ NaN, " : "{ ") + floatToHex(interval.lo()) + " }"); else if (interval == Interval::unbounded(true)) return "<any>"; return (std::string(interval.hasNaN() ? "{ NaN } | " : "") + "[" + floatToHex(interval.lo()) + ", " + floatToHex(interval.hi()) + "]"); } template <typename T> static FloatFormat nativeFormat (void) { typedef std::numeric_limits<T> Limits; DE_ASSERT(Limits::radix == 2); return FloatFormat(Limits::min_exponent - 1, // These have a built-in offset of one Limits::max_exponent - 1, Limits::digits - 1, // don't count the hidden bit Limits::has_denorm != std::denorm_absent, Limits::has_infinity ? YES : NO, Limits::has_quiet_NaN ? YES : NO, ((Limits::has_denorm == std::denorm_present) ? YES : (Limits::has_denorm == std::denorm_absent) ? NO : MAYBE)); } FloatFormat FloatFormat::nativeFloat (void) { return nativeFormat<float>(); } FloatFormat FloatFormat::nativeDouble (void) { return nativeFormat<double>(); } namespace { using std::string; using std::ostringstream; using de::MovePtr; using de::UniquePtr; class Test { protected: Test (MovePtr<FloatFormat> fmt) : m_fmt(fmt) {} double p (int e) const { return deLdExp(1.0, e); } void check (const string& expr, double result, double reference) const; void testULP (double arg, double ref) const; void testRound (double arg, double refDown, double refUp) const; UniquePtr<FloatFormat> m_fmt; }; void Test::check (const string& expr, double result, double reference) const { if (result != reference) { ostringstream oss; oss << expr << " returned " << result << ", expected " << reference; TCU_FAIL(oss.str().c_str()); } } void Test::testULP (double arg, double ref) const { ostringstream oss; oss << "ulp(" << arg << ")"; check(oss.str(), m_fmt->ulp(arg), ref); } void Test::testRound (double arg, double refDown, double refUp) const { { ostringstream oss; oss << "round(" << arg << ", false)"; check(oss.str(), m_fmt->round(arg, false), refDown); } { ostringstream oss; oss << "round(" << arg << ", true)"; check(oss.str(), m_fmt->round(arg, true), refUp); } } class TestBinary32 : public Test { public: TestBinary32 (void) : Test (MovePtr<FloatFormat>(new FloatFormat(-126, 127, 23, true))) {} void runTest (void) const; }; void TestBinary32::runTest (void) const { testULP(p(0), p(-24)); testULP(p(0) + p(-23), p(-23)); testULP(p(-124), p(-148)); testULP(p(-125), p(-149)); testULP(p(-125) + p(-140), p(-148)); testULP(p(-126), p(-149)); testULP(p(-130), p(-149)); testRound(p(0) + p(-20) + p(-40), p(0) + p(-20), p(0) + p(-20) + p(-23)); testRound(p(-126) - p(-150), p(-126) - p(-149), p(-126)); TCU_CHECK(m_fmt->floatToHex(p(0)) == "0x1.000000p0"); TCU_CHECK(m_fmt->floatToHex(p(8) + p(-4)) == "0x1.001000p8"); TCU_CHECK(m_fmt->floatToHex(p(-140)) == "0x0.000400p-126"); TCU_CHECK(m_fmt->floatToHex(p(-140)) == "0x0.000400p-126"); TCU_CHECK(m_fmt->floatToHex(p(-126) + p(-125)) == "0x1.800000p-125"); } } // anonymous void FloatFormat_selfTest (void) { TestBinary32 test32; test32.runTest(); } } // tcu