/* * Copyright (C) 2016 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include <algos/mat.h> #include <assert.h> #include <nanohub_math.h> #include <sys/types.h> #include <float.h> #include <string.h> #include <seos.h> #define kEps 1E-5 void initZeroMatrix(struct Mat33 *A) { memset(A->elem, 0.0f, sizeof(A->elem)); } UNROLLED void initDiagonalMatrix(struct Mat33 *A, float x) { initZeroMatrix(A); uint32_t i; for (i = 0; i < 3; ++i) { A->elem[i][i] = x; } } void initMatrixColumns(struct Mat33 *A, const struct Vec3 *v1, const struct Vec3 *v2, const struct Vec3 *v3) { A->elem[0][0] = v1->x; A->elem[0][1] = v2->x; A->elem[0][2] = v3->x; A->elem[1][0] = v1->y; A->elem[1][1] = v2->y; A->elem[1][2] = v3->y; A->elem[2][0] = v1->z; A->elem[2][1] = v2->z; A->elem[2][2] = v3->z; } void mat33Apply(struct Vec3 *out, const struct Mat33 *A, const struct Vec3 *v) { // assert(out != v); out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z; out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z; out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z; } UNROLLED void mat33Multiply(struct Mat33 *out, const struct Mat33 *A, const struct Mat33 *B) { // assert(out != A); // assert(out != B); uint32_t i; for (i = 0; i < 3; ++i) { uint32_t j; for (j = 0; j < 3; ++j) { uint32_t k; float sum = 0.0f; for (k = 0; k < 3; ++k) { sum += A->elem[i][k] * B->elem[k][j]; } out->elem[i][j] = sum; } } } UNROLLED void mat33ScalarMul(struct Mat33 *A, float c) { uint32_t i; for (i = 0; i < 3; ++i) { uint32_t j; for (j = 0; j < 3; ++j) { A->elem[i][j] *= c; } } } UNROLLED void mat33Add(struct Mat33 *out, const struct Mat33 *A) { uint32_t i; for (i = 0; i < 3; ++i) { uint32_t j; for (j = 0; j < 3; ++j) { out->elem[i][j] += A->elem[i][j]; } } } UNROLLED void mat33Sub(struct Mat33 *out, const struct Mat33 *A) { uint32_t i; for (i = 0; i < 3; ++i) { uint32_t j; for (j = 0; j < 3; ++j) { out->elem[i][j] -= A->elem[i][j]; } } } UNROLLED int mat33IsPositiveSemidefinite(const struct Mat33 *A, float tolerance) { uint32_t i; for (i = 0; i < 3; ++i) { if (A->elem[i][i] < 0.0f) { return 0; } } for (i = 0; i < 3; ++i) { uint32_t j; for (j = i + 1; j < 3; ++j) { if (fabsf(A->elem[i][j] - A->elem[j][i]) > tolerance) { return 0; } } } return 1; } UNROLLED void mat33MultiplyTransposed(struct Mat33 *out, const struct Mat33 *A, const struct Mat33 *B) { // assert(out != A); // assert(out != B); uint32_t i; for (i = 0; i < 3; ++i) { uint32_t j; for (j = 0; j < 3; ++j) { uint32_t k; float sum = 0.0f; for (k = 0; k < 3; ++k) { sum += A->elem[k][i] * B->elem[k][j]; } out->elem[i][j] = sum; } } } UNROLLED void mat33MultiplyTransposed2(struct Mat33 *out, const struct Mat33 *A, const struct Mat33 *B) { // assert(out != A); // assert(out != B); uint32_t i; for (i = 0; i < 3; ++i) { uint32_t j; for (j = 0; j < 3; ++j) { uint32_t k; float sum = 0.0f; for (k = 0; k < 3; ++k) { sum += A->elem[i][k] * B->elem[j][k]; } out->elem[i][j] = sum; } } } UNROLLED void mat33Invert(struct Mat33 *out, const struct Mat33 *A) { float t; initDiagonalMatrix(out, 1.0f); struct Mat33 tmp = *A; uint32_t i, k; for (i = 0; i < 3; ++i) { uint32_t swap = i; uint32_t j; for (j = i + 1; j < 3; ++j) { if (fabsf(tmp.elem[j][i]) > fabsf(tmp.elem[i][i])) { swap = j; } } if (swap != i) { for (k = 0; k < 3; ++k) { t = tmp.elem[i][k]; tmp.elem[i][k] = tmp.elem[swap][k]; tmp.elem[swap][k] = t; t = out->elem[i][k]; out->elem[i][k] = out->elem[swap][k]; out->elem[swap][k] = t; } } t = 1.0f / tmp.elem[i][i]; for (k = 0; k < 3; ++k) { tmp.elem[i][k] *= t; out->elem[i][k] *= t; } for (j = 0; j < 3; ++j) { if (j != i) { t = tmp.elem[j][i]; for (k = 0; k < 3; ++k) { tmp.elem[j][k] -= tmp.elem[i][k] * t; out->elem[j][k] -= out->elem[i][k] * t; } } } } } UNROLLED void mat33Transpose(struct Mat33 *out, const struct Mat33 *A) { uint32_t i; for (i = 0; i < 3; ++i) { uint32_t j; for (j = 0; j < 3; ++j) { out->elem[i][j] = A->elem[j][i]; } } } UNROLLED void mat33DecomposeLup(struct Mat33 *LU, struct Size3 *pivot) { const uint32_t N = 3; uint32_t i, j, k; for (k = 0; k < N; ++k) { pivot->elem[k] = k; float max = fabsf(LU->elem[k][k]); for (j = k + 1; j < N; ++j) { if (max < fabsf(LU->elem[j][k])) { max = fabsf(LU->elem[j][k]); pivot->elem[k] = j; } } if (pivot->elem[k] != k) { mat33SwapRows(LU, k, pivot->elem[k]); } if (fabsf(LU->elem[k][k]) < kEps) { continue; } for (j = k + 1; j < N; ++j) { LU->elem[k][j] /= LU->elem[k][k]; } for (i = k + 1; i < N; ++i) { for (j = k + 1; j < 3; ++j) { LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j]; } } } } UNROLLED void mat33SwapRows(struct Mat33 *A, const uint32_t i, const uint32_t j) { const uint32_t N = 3; uint32_t k; if (i == j) { return; } for (k = 0; k < N; ++k) { float tmp = A->elem[i][k]; A->elem[i][k] = A->elem[j][k]; A->elem[j][k] = tmp; } } UNROLLED void mat33Solve(const struct Mat33 *A, struct Vec3 *x, const struct Vec3 *b, const struct Size3 *pivot) { const uint32_t N = 3; uint32_t i, k; float bCopy[N]; bCopy[0] = b->x; bCopy[1] = b->y; bCopy[2] = b->z; float _x[N]; for (k = 0; k < N; ++k) { if (pivot->elem[k] != k) { float tmp = bCopy[k]; bCopy[k] = bCopy[pivot->elem[k]]; bCopy[pivot->elem[k]] = tmp; } _x[k] = bCopy[k]; for (i = 0; i < k; ++i) { _x[k] -= _x[i] * A->elem[k][i]; } _x[k] /= A->elem[k][k]; } for (k = N; k-- > 0;) { for (i = k + 1; i < N; ++i) { _x[k] -= _x[i] * A->elem[k][i]; } } initVec3(x, _x[0], _x[1], _x[2]); } /* Returns the eigenvalues and corresponding eigenvectors of the _symmetric_ matrix. The i-th eigenvalue corresponds to the eigenvector in the i-th _row_ of "eigenvecs". */ UNROLLED void mat33GetEigenbasis(struct Mat33 *S, struct Vec3 *eigenvals, struct Mat33 *eigenvecs) { const uint32_t N = 3; uint32_t i, j, k, l, m; float _eigenvals[N]; uint32_t ind[N]; for (k = 0; k < N; ++k) { ind[k] = mat33Maxind(S, k); _eigenvals[k] = S->elem[k][k]; } initDiagonalMatrix(eigenvecs, 1.0f); for (;;) { m = 0; for (k = 1; k + 1 < N; ++k) { if (fabsf(S->elem[k][ind[k]]) > fabsf(S->elem[m][ind[m]])) { m = k; } } k = m; l = ind[m]; float p = S->elem[k][l]; if (fabsf(p) < kEps) { break; } float y = (_eigenvals[l] - _eigenvals[k]) * 0.5f; float t = fabsf(y) + sqrtf(p * p + y * y); float s = sqrtf(p * p + t * t); float c = t / s; s = p / s; t = p * p / t; if (y < 0.0f) { s = -s; t = -t; } S->elem[k][l] = 0.0f; _eigenvals[k] -= t; _eigenvals[l] += t; for (i = 0; i < k; ++i) { mat33Rotate(S, c, s, i, k, i, l); } for (i = k + 1; i < l; ++i) { mat33Rotate(S, c, s, k, i, i, l); } for (i = l + 1; i < N; ++i) { mat33Rotate(S, c, s, k, i, l, i); } for (i = 0; i < N; ++i) { float tmp = c * eigenvecs->elem[k][i] - s * eigenvecs->elem[l][i]; eigenvecs->elem[l][i] = s * eigenvecs->elem[k][i] + c * eigenvecs->elem[l][i]; eigenvecs->elem[k][i] = tmp; } ind[k] = mat33Maxind(S, k); ind[l] = mat33Maxind(S, l); float sum = 0.0f; for (i = 0; i < N; ++i) { for (j = i + 1; j < N; ++j) { sum += fabsf(S->elem[i][j]); } } if (sum < kEps) { break; } } for (k = 0; k < N; ++k) { m = k; for (l = k + 1; l < N; ++l) { if (_eigenvals[l] > _eigenvals[m]) { m = l; } } if (k != m) { float tmp = _eigenvals[k]; _eigenvals[k] = _eigenvals[m]; _eigenvals[m] = tmp; mat33SwapRows(eigenvecs, k, m); } } initVec3(eigenvals, _eigenvals[0], _eigenvals[1], _eigenvals[2]); } // index of largest off-diagonal element in row k UNROLLED uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k) { const uint32_t N = 3; uint32_t m = k + 1; uint32_t i; for (i = k + 2; i < N; ++i) { if (fabsf(A->elem[k][i]) > fabsf(A->elem[k][m])) { m = i; } } return m; } void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k, uint32_t l, uint32_t i, uint32_t j) { float tmp = c * A->elem[k][l] - s * A->elem[i][j]; A->elem[i][j] = s * A->elem[k][l] + c * A->elem[i][j]; A->elem[k][l] = tmp; } void mat44Apply(struct Vec4 *out, const struct Mat44 *A, const struct Vec4 *v) { // assert(out != v); out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z + A->elem[0][3] * v->w; out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z + A->elem[1][3] * v->w; out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z + A->elem[2][3] * v->w; out->w = A->elem[3][0] * v->x + A->elem[3][1] * v->y + A->elem[3][2] * v->z + A->elem[3][3] * v->w; } UNROLLED void mat44DecomposeLup(struct Mat44 *LU, struct Size4 *pivot) { const uint32_t N = 4; uint32_t i, j, k; for (k = 0; k < N; ++k) { pivot->elem[k] = k; float max = fabsf(LU->elem[k][k]); for (j = k + 1; j < N; ++j) { if (max < fabsf(LU->elem[j][k])) { max = fabsf(LU->elem[j][k]); pivot->elem[k] = j; } } if (pivot->elem[k] != k) { mat44SwapRows(LU, k, pivot->elem[k]); } if (fabsf(LU->elem[k][k]) < kEps) { continue; } for (j = k + 1; j < N; ++j) { LU->elem[k][j] /= LU->elem[k][k]; } for (i = k + 1; i < N; ++i) { for (j = k + 1; j < N; ++j) { LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j]; } } } } UNROLLED void mat44SwapRows(struct Mat44 *A, const uint32_t i, const uint32_t j) { const uint32_t N = 4; uint32_t k; if (i == j) { return; } for (k = 0; k < N; ++k) { float tmp = A->elem[i][k]; A->elem[i][k] = A->elem[j][k]; A->elem[j][k] = tmp; } } UNROLLED void mat44Solve(const struct Mat44 *A, struct Vec4 *x, const struct Vec4 *b, const struct Size4 *pivot) { const uint32_t N = 4; uint32_t i, k; float bCopy[N]; bCopy[0] = b->x; bCopy[1] = b->y; bCopy[2] = b->z; bCopy[3] = b->w; float _x[N]; for (k = 0; k < N; ++k) { if (pivot->elem[k] != k) { float tmp = bCopy[k]; bCopy[k] = bCopy[pivot->elem[k]]; bCopy[pivot->elem[k]] = tmp; } _x[k] = bCopy[k]; for (i = 0; i < k; ++i) { _x[k] -= _x[i] * A->elem[k][i]; } _x[k] /= A->elem[k][k]; } for (k = N; k-- > 0;) { for (i = k + 1; i < N; ++i) { _x[k] -= _x[i] * A->elem[k][i]; } } initVec4(x, _x[0], _x[1], _x[2], _x[3]); }