/* libs/pixelflinger/fixed.cpp ** ** Copyright 2006, The Android Open Source Project ** ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** ** http://www.apache.org/licenses/LICENSE-2.0 ** ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. */ #include <stdio.h> #include <private/pixelflinger/ggl_context.h> #include <private/pixelflinger/ggl_fixed.h> // ------------------------------------------------------------------------ int32_t gglRecipQNormalized(int32_t x, int* exponent) { const int32_t s = x>>31; uint32_t a = s ? -x : x; // the result will overflow, so just set it to the biggest/inf value if (ggl_unlikely(a <= 2LU)) { *exponent = 0; return s ? FIXED_MIN : FIXED_MAX; } // Newton-Raphson iteration: // x = r*(2 - a*r) const int32_t lz = gglClz(a); a <<= lz; // 0.32 uint32_t r = a; // note: if a == 0x80000000, this means x was a power-of-2, in this // case we don't need to compute anything. We get the reciprocal for // (almost) free. if (a != 0x80000000) { r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a // 0.32 + 2.30 = 2.62 -> 2.30 // 2.30 + 2.30 = 4.60 -> 2.30 r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30; r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30; } // shift right 1-bit to make room for the sign bit *exponent = 30-lz-1; r >>= 1; return s ? -r : r; } int32_t gglRecipQ(GGLfixed x, int q) { int shift; x = gglRecipQNormalized(x, &shift); shift += 16-q; x += 1L << (shift-1); // rounding x >>= shift; return x; } // ------------------------------------------------------------------------ GGLfixed gglFastDivx(GGLfixed n, GGLfixed d) { if ((d>>24) && ((d>>24)+1)) { n >>= 8; d >>= 8; } return gglMulx(n, gglRecip(d)); } // ------------------------------------------------------------------------ static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = { // 1/sqrt(x) with x = 1-N/16, N=[8...1] 0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865 }; GGLfixed gglSqrtRecipx(GGLfixed x) { if (x == 0) return FIXED_MAX; if (x == FIXED_ONE) return x; const GGLfixed a = x; const int32_t lz = gglClz(x); x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7]; const int32_t exp = lz - 16; if (exp <= 0) x >>= -exp>>1; else x <<= (exp>>1) + (exp & 1); if (exp & 1) { x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1; } // 2 Newton-Raphson iterations: x = x/2*(3-(a*x)*x) x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x))); x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x))); return x; } GGLfixed gglSqrtx(GGLfixed a) { // Compute a full precision square-root (24 bits accuracy) GGLfixed r = 0; GGLfixed bit = 0x800000; int32_t bshift = 15; do { GGLfixed temp = bit + (r<<1); if (bshift >= 8) temp <<= (bshift-8); else temp >>= (8-bshift); if (a >= temp) { r += bit; a -= temp; } bshift--; } while (bit>>=1); return r; } // ------------------------------------------------------------------------ static const GGLfixed ggl_log_approx_tab[] = { // -ln(x)/ln(2) with x = N/16, N=[8...16] 0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000 }; static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0] 0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000 }; GGLfixed gglPowx(GGLfixed x, GGLfixed y) { // prerequisite: 0 <= x <= 1, and y >=0 // pow(x,y) = 2^(y*log2(x)) // = 2^(y*log2(x*(2^exp)*(2^-exp)))) // = 2^(y*(log2(X)-exp)) // = 2^(log2(X)*y - y*exp) // = 2^( - (-log2(X)*y + y*exp) ) int32_t exp = gglClz(x) - 16; GGLfixed f = x << exp; x = (f & 0x0FFF)<<4; f = (f >> 12) & 0x7; GGLfixed p = gglMulAddx( ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x, ggl_log_approx_tab[f]); p = gglMulAddx(p, y, y*exp); exp = gglFixedToIntFloor(p); if (exp < 31) { p = gglFracx(p); x = (p & 0x1FFF)<<3; p >>= 13; p = gglMulAddx( ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x, ggl_alog_approx_tab[p]); p >>= exp; } else { p = 0; } return p; // ( powf((a*65536.0f), (b*65536.0f)) ) * 65536.0f; } // ------------------------------------------------------------------------ int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i) { //int32_t r =int32_t((int64_t(n)<<i)/d); const int32_t ds = n^d; if (n<0) n = -n; if (d<0) d = -d; int nd = gglClz(d) - gglClz(n); i += nd + 1; if (nd > 0) d <<= nd; else n <<= -nd; uint32_t q = 0; int j = i & 7; i >>= 3; // gcc deals with the code below pretty well. // we get 3.75 cycles per bit in the main loop // and 8 cycles per bit in the termination loop if (ggl_likely(i)) { n -= d; do { q <<= 8; if (n>=0) q |= 128; else n += d; n = n*2 - d; if (n>=0) q |= 64; else n += d; n = n*2 - d; if (n>=0) q |= 32; else n += d; n = n*2 - d; if (n>=0) q |= 16; else n += d; n = n*2 - d; if (n>=0) q |= 8; else n += d; n = n*2 - d; if (n>=0) q |= 4; else n += d; n = n*2 - d; if (n>=0) q |= 2; else n += d; n = n*2 - d; if (n>=0) q |= 1; else n += d; if (--i == 0) goto finish; n = n*2 - d; } while(true); do { q <<= 1; n = n*2 - d; if (n>=0) q |= 1; else n += d; finish: ; } while (j--); return (ds<0) ? -q : q; } n -= d; if (n>=0) q |= 1; else n += d; j--; goto finish; } // ------------------------------------------------------------------------ // assumes that the int32_t values of a, b, and c are all positive // use when both a and b are larger than c template <typename T> static inline void swap(T& a, T& b) { T t(a); a = b; b = t; } static __attribute__((noinline)) int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c) { // first we compute a*b as a 64-bit integer // (GCC generates umull with the code below) uint64_t ab = uint64_t(a)*b; uint32_t hi = ab>>32; uint32_t lo = ab; uint32_t result; // now perform the division if (hi >= c) { overflow: result = 0x7fffffff; // basic overflow } else if (hi == 0) { result = lo/c; // note: c can't be 0 if ((result >> 31) != 0) // result must fit in 31 bits goto overflow; } else { uint32_t r = hi; int bits = 31; result = 0; do { r = (r << 1) | (lo >> 31); lo <<= 1; result <<= 1; if (r >= c) { r -= c; result |= 1; } } while (bits--); } return int32_t(result); } // assumes a >= 0 and c >= b >= 0 static inline int32_t quick_muldiv(int32_t a, int32_t b, int32_t c) { int32_t r = 0, q = 0, i; int leading = gglClz(a); i = 32 - leading; a <<= leading; do { r <<= 1; if (a < 0) r += b; a <<= 1; q <<= 1; if (r >= c) { r -= c; q++; } asm(""::); // gcc generates better code this way if (r >= c) { r -= c; q++; } } while (--i); return q; } // this function computes a*b/c with 64-bit intermediate accuracy // overflows (e.g. division by 0) are handled and return INT_MAX int32_t gglMulDivi(int32_t a, int32_t b, int32_t c) { int32_t result; int32_t sign = a^b^c; if (a < 0) a = -a; if (b < 0) b = -b; if (c < 0) c = -c; if (a < b) { swap(a, b); } if (b <= c) result = quick_muldiv(a, b, c); else result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c); if (sign < 0) result = -result; return result; }