/*
* Copyright (C) 2016 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common/math/mat.h"
#include <assert.h>
#include <float.h>
#ifdef _OS_BUILD_
#include <nanohub_math.h>
#include <seos.h>
#else
#include <math.h>
#ifndef UNROLLED
#define UNROLLED
#endif
#endif // _OS_BUILD_
#include <stddef.h>
#include <string.h>
#define EPSILON 1E-5
#define CHOLESKY_TOLERANCE 1E-6
// Forward declarations.
static void mat33SwapRows(struct Mat33 *A, uint32_t i, uint32_t j);
static uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k);
static void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k,
uint32_t l, uint32_t i, uint32_t j);
static void mat44SwapRows(struct Mat44 *A, uint32_t i, uint32_t j);
void initZeroMatrix(struct Mat33 *A) {
ASSERT_NOT_NULL(A);
memset(A->elem, 0.0f, sizeof(A->elem));
}
UNROLLED
void initDiagonalMatrix(struct Mat33 *A, float x) {
ASSERT_NOT_NULL(A);
initZeroMatrix(A);
uint32_t i;
for (i = 0; i < 3; ++i) {
A->elem[i][i] = x;
}
}
void initMatrixColumns(struct Mat33 *A, const struct Vec3 *v1,
const struct Vec3 *v2, const struct Vec3 *v3) {
ASSERT_NOT_NULL(A);
ASSERT_NOT_NULL(v1);
ASSERT_NOT_NULL(v2);
ASSERT_NOT_NULL(v3);
A->elem[0][0] = v1->x;
A->elem[0][1] = v2->x;
A->elem[0][2] = v3->x;
A->elem[1][0] = v1->y;
A->elem[1][1] = v2->y;
A->elem[1][2] = v3->y;
A->elem[2][0] = v1->z;
A->elem[2][1] = v2->z;
A->elem[2][2] = v3->z;
}
void mat33Apply(struct Vec3 *out, const struct Mat33 *A, const struct Vec3 *v) {
ASSERT_NOT_NULL(out);
ASSERT_NOT_NULL(A);
ASSERT_NOT_NULL(v);
out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z;
out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z;
out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z;
}
UNROLLED
void mat33Multiply(struct Mat33 *out, const struct Mat33 *A,
const struct Mat33 *B) {
ASSERT_NOT_NULL(out);
ASSERT_NOT_NULL(A);
ASSERT_NOT_NULL(B);
ASSERT(out != A);
ASSERT(out != B);
uint32_t i;
for (i = 0; i < 3; ++i) {
uint32_t j;
for (j = 0; j < 3; ++j) {
uint32_t k;
float sum = 0.0f;
for (k = 0; k < 3; ++k) {
sum += A->elem[i][k] * B->elem[k][j];
}
out->elem[i][j] = sum;
}
}
}
UNROLLED
void mat33ScalarMul(struct Mat33 *A, float c) {
ASSERT_NOT_NULL(A);
uint32_t i;
for (i = 0; i < 3; ++i) {
uint32_t j;
for (j = 0; j < 3; ++j) {
A->elem[i][j] *= c;
}
}
}
UNROLLED
void mat33Add(struct Mat33 *out, const struct Mat33 *A) {
ASSERT_NOT_NULL(out);
ASSERT_NOT_NULL(A);
uint32_t i;
for (i = 0; i < 3; ++i) {
uint32_t j;
for (j = 0; j < 3; ++j) {
out->elem[i][j] += A->elem[i][j];
}
}
}
UNROLLED
void mat33Sub(struct Mat33 *out, const struct Mat33 *A) {
ASSERT_NOT_NULL(out);
ASSERT_NOT_NULL(A);
uint32_t i;
for (i = 0; i < 3; ++i) {
uint32_t j;
for (j = 0; j < 3; ++j) {
out->elem[i][j] -= A->elem[i][j];
}
}
}
UNROLLED
int mat33IsPositiveSemidefinite(const struct Mat33 *A, float tolerance) {
ASSERT_NOT_NULL(A);
uint32_t i;
for (i = 0; i < 3; ++i) {
if (A->elem[i][i] < 0.0f) {
return 0;
}
}
for (i = 0; i < 3; ++i) {
uint32_t j;
for (j = i + 1; j < 3; ++j) {
if (fabsf(A->elem[i][j] - A->elem[j][i]) > tolerance) {
return 0;
}
}
}
return 1;
}
UNROLLED
void mat33MultiplyTransposed(struct Mat33 *out, const struct Mat33 *A,
const struct Mat33 *B) {
ASSERT(out != A);
ASSERT(out != B);
ASSERT_NOT_NULL(out);
ASSERT_NOT_NULL(A);
ASSERT_NOT_NULL(B);
uint32_t i;
for (i = 0; i < 3; ++i) {
uint32_t j;
for (j = 0; j < 3; ++j) {
uint32_t k;
float sum = 0.0f;
for (k = 0; k < 3; ++k) {
sum += A->elem[k][i] * B->elem[k][j];
}
out->elem[i][j] = sum;
}
}
}
UNROLLED
void mat33MultiplyTransposed2(struct Mat33 *out, const struct Mat33 *A,
const struct Mat33 *B) {
ASSERT(out != A);
ASSERT(out != B);
ASSERT_NOT_NULL(out);
ASSERT_NOT_NULL(A);
ASSERT_NOT_NULL(B);
uint32_t i;
for (i = 0; i < 3; ++i) {
uint32_t j;
for (j = 0; j < 3; ++j) {
uint32_t k;
float sum = 0.0f;
for (k = 0; k < 3; ++k) {
sum += A->elem[i][k] * B->elem[j][k];
}
out->elem[i][j] = sum;
}
}
}
UNROLLED
void mat33Invert(struct Mat33 *out, const struct Mat33 *A) {
ASSERT_NOT_NULL(out);
ASSERT_NOT_NULL(A);
float t;
initDiagonalMatrix(out, 1.0f);
struct Mat33 tmp = *A;
uint32_t i, k;
for (i = 0; i < 3; ++i) {
uint32_t swap = i;
uint32_t j;
for (j = i + 1; j < 3; ++j) {
if (fabsf(tmp.elem[j][i]) > fabsf(tmp.elem[i][i])) {
swap = j;
}
}
if (swap != i) {
for (k = 0; k < 3; ++k) {
t = tmp.elem[i][k];
tmp.elem[i][k] = tmp.elem[swap][k];
tmp.elem[swap][k] = t;
t = out->elem[i][k];
out->elem[i][k] = out->elem[swap][k];
out->elem[swap][k] = t;
}
}
// divide by zero guard.
ASSERT(fabs(tmp.elem[i][i]) > 0);
if(!(fabs(tmp.elem[i][i]) > 0)) {
return;
}
t = 1.0f / tmp.elem[i][i];
for (k = 0; k < 3; ++k) {
tmp.elem[i][k] *= t;
out->elem[i][k] *= t;
}
for (j = 0; j < 3; ++j) {
if (j != i) {
t = tmp.elem[j][i];
for (k = 0; k < 3; ++k) {
tmp.elem[j][k] -= tmp.elem[i][k] * t;
out->elem[j][k] -= out->elem[i][k] * t;
}
}
}
}
}
UNROLLED
void mat33Transpose(struct Mat33 *out, const struct Mat33 *A) {
ASSERT_NOT_NULL(out);
ASSERT_NOT_NULL(A);
uint32_t i;
for (i = 0; i < 3; ++i) {
uint32_t j;
for (j = 0; j < 3; ++j) {
out->elem[i][j] = A->elem[j][i];
}
}
}
UNROLLED
void mat33SwapRows(struct Mat33 *A, const uint32_t i, const uint32_t j) {
ASSERT_NOT_NULL(A);
const uint32_t N = 3;
uint32_t k;
if (i == j) {
return;
}
for (k = 0; k < N; ++k) {
float tmp = A->elem[i][k];
A->elem[i][k] = A->elem[j][k];
A->elem[j][k] = tmp;
}
}
UNROLLED
void mat33GetEigenbasis(struct Mat33 *S, struct Vec3 *eigenvals,
struct Mat33 *eigenvecs) {
ASSERT_NOT_NULL(S);
ASSERT_NOT_NULL(eigenvals);
ASSERT_NOT_NULL(eigenvecs);
const uint32_t N = 3;
uint32_t i, j, k, l, m;
float _eigenvals[N];
uint32_t ind[N];
for (k = 0; k < N; ++k) {
ind[k] = mat33Maxind(S, k);
_eigenvals[k] = S->elem[k][k];
}
initDiagonalMatrix(eigenvecs, 1.0f);
for (;;) {
m = 0;
for (k = 1; k + 1 < N; ++k) {
if (fabsf(S->elem[k][ind[k]]) > fabsf(S->elem[m][ind[m]])) {
m = k;
}
}
k = m;
l = ind[m];
float p = S->elem[k][l];
if (fabsf(p) < EPSILON) {
break;
}
float y = (_eigenvals[l] - _eigenvals[k]) * 0.5f;
float t = fabsf(y) + sqrtf(p * p + y * y);
float s = sqrtf(p * p + t * t);
float c = t / s;
s = p / s;
t = p * p / t;
if (y < 0.0f) {
s = -s;
t = -t;
}
S->elem[k][l] = 0.0f;
_eigenvals[k] -= t;
_eigenvals[l] += t;
for (i = 0; i < k; ++i) {
mat33Rotate(S, c, s, i, k, i, l);
}
for (i = k + 1; i < l; ++i) {
mat33Rotate(S, c, s, k, i, i, l);
}
for (i = l + 1; i < N; ++i) {
mat33Rotate(S, c, s, k, i, l, i);
}
for (i = 0; i < N; ++i) {
float tmp = c * eigenvecs->elem[k][i] - s * eigenvecs->elem[l][i];
eigenvecs->elem[l][i] =
s * eigenvecs->elem[k][i] + c * eigenvecs->elem[l][i];
eigenvecs->elem[k][i] = tmp;
}
ind[k] = mat33Maxind(S, k);
ind[l] = mat33Maxind(S, l);
float sum = 0.0f;
for (i = 0; i < N; ++i) {
for (j = i + 1; j < N; ++j) {
sum += fabsf(S->elem[i][j]);
}
}
if (sum < EPSILON) {
break;
}
}
for (k = 0; k < N; ++k) {
m = k;
for (l = k + 1; l < N; ++l) {
if (_eigenvals[l] > _eigenvals[m]) {
m = l;
}
}
if (k != m) {
float tmp = _eigenvals[k];
_eigenvals[k] = _eigenvals[m];
_eigenvals[m] = tmp;
mat33SwapRows(eigenvecs, k, m);
}
}
initVec3(eigenvals, _eigenvals[0], _eigenvals[1], _eigenvals[2]);
}
// index of largest off-diagonal element in row k
UNROLLED
uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k) {
ASSERT_NOT_NULL(A);
const uint32_t N = 3;
uint32_t m = k + 1;
uint32_t i;
for (i = k + 2; i < N; ++i) {
if (fabsf(A->elem[k][i]) > fabsf(A->elem[k][m])) {
m = i;
}
}
return m;
}
void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k, uint32_t l,
uint32_t i, uint32_t j) {
ASSERT_NOT_NULL(A);
float tmp = c * A->elem[k][l] - s * A->elem[i][j];
A->elem[i][j] = s * A->elem[k][l] + c * A->elem[i][j];
A->elem[k][l] = tmp;
}
void mat44Apply(struct Vec4 *out, const struct Mat44 *A, const struct Vec4 *v) {
ASSERT_NOT_NULL(out);
ASSERT_NOT_NULL(A);
ASSERT_NOT_NULL(v);
out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z +
A->elem[0][3] * v->w;
out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z +
A->elem[1][3] * v->w;
out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z +
A->elem[2][3] * v->w;
out->w = A->elem[3][0] * v->x + A->elem[3][1] * v->y + A->elem[3][2] * v->z +
A->elem[3][3] * v->w;
}
UNROLLED
void mat44DecomposeLup(struct Mat44 *LU, struct Size4 *pivot) {
ASSERT_NOT_NULL(LU);
ASSERT_NOT_NULL(pivot);
const uint32_t N = 4;
uint32_t i, j, k;
for (k = 0; k < N; ++k) {
pivot->elem[k] = k;
float max = fabsf(LU->elem[k][k]);
for (j = k + 1; j < N; ++j) {
if (max < fabsf(LU->elem[j][k])) {
max = fabsf(LU->elem[j][k]);
pivot->elem[k] = j;
}
}
if (pivot->elem[k] != k) {
mat44SwapRows(LU, k, pivot->elem[k]);
}
if (fabsf(LU->elem[k][k]) < EPSILON) {
continue;
}
for (j = k + 1; j < N; ++j) {
LU->elem[k][j] /= LU->elem[k][k];
}
for (i = k + 1; i < N; ++i) {
for (j = k + 1; j < N; ++j) {
LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j];
}
}
}
}
UNROLLED
void mat44SwapRows(struct Mat44 *A, const uint32_t i, const uint32_t j) {
ASSERT_NOT_NULL(A);
const uint32_t N = 4;
uint32_t k;
if (i == j) {
return;
}
for (k = 0; k < N; ++k) {
float tmp = A->elem[i][k];
A->elem[i][k] = A->elem[j][k];
A->elem[j][k] = tmp;
}
}
UNROLLED
void mat44Solve(const struct Mat44 *A, struct Vec4 *x, const struct Vec4 *b,
const struct Size4 *pivot) {
ASSERT_NOT_NULL(A);
ASSERT_NOT_NULL(x);
ASSERT_NOT_NULL(b);
ASSERT_NOT_NULL(pivot);
const uint32_t N = 4;
uint32_t i, k;
float bCopy[N];
bCopy[0] = b->x;
bCopy[1] = b->y;
bCopy[2] = b->z;
bCopy[3] = b->w;
float _x[N];
for (k = 0; k < N; ++k) {
if (pivot->elem[k] != k) {
float tmp = bCopy[k];
bCopy[k] = bCopy[pivot->elem[k]];
bCopy[pivot->elem[k]] = tmp;
}
_x[k] = bCopy[k];
for (i = 0; i < k; ++i) {
_x[k] -= _x[i] * A->elem[k][i];
}
_x[k] /= A->elem[k][k];
}
for (k = N; k-- > 0;) {
for (i = k + 1; i < N; ++i) {
_x[k] -= _x[i] * A->elem[k][i];
}
}
initVec4(x, _x[0], _x[1], _x[2], _x[3]);
}
float matMaxDiagonalElement(const float *square_mat, size_t n) {
ASSERT_NOT_NULL(square_mat);
ASSERT(n > 0);
size_t i;
float max = square_mat[0];
const size_t n_square = n * n;
const size_t offset = n + 1;
for (i = offset; i < n_square; i += offset) {
if (square_mat[i] > max) {
max = square_mat[i];
}
}
return max;
}
void matAddConstantDiagonal(float *square_mat, float u, size_t n) {
ASSERT_NOT_NULL(square_mat);
size_t i;
const size_t n_square = n * n;
const size_t offset = n + 1;
for (i = 0; i < n_square; i += offset) {
square_mat[i] += u;
}
}
void matTransposeMultiplyMat(float *out, const float *A,
size_t nrows, size_t ncols) {
ASSERT_NOT_NULL(out);
ASSERT_NOT_NULL(A);
size_t i;
size_t j;
size_t k;
memset(out, 0, sizeof(float) * ncols * ncols);
for (i = 0; i < ncols; ++i) {
for (j = 0; j < ncols; ++j) {
// Since A' * A is symmetric, can use upper diagonal elements
// to fill in the lower diagonal without recomputing.
if (j < i) {
out[i * ncols + j] = out[j * ncols + i];
} else {
// mat_out[i, j] = ai ' * aj
out[i * ncols + j] = 0;
for (k = 0; k < nrows; ++k) {
out[i * ncols + j] += A[k * ncols + i] *
A[k * ncols + j];
}
}
}
}
}
void matMultiplyVec(float *out, const float *A, const float *v,
size_t nrows, size_t ncols) {
ASSERT_NOT_NULL(out);
ASSERT_NOT_NULL(A);
ASSERT_NOT_NULL(v);
size_t i;
for (i = 0; i < nrows; ++i) {
const float *row = &A[i * ncols];
out[i] = vecDot(row, v, (int)ncols);
}
}
void matTransposeMultiplyVec(float *out, const float *A, const float *v,
size_t nrows, size_t ncols) {
ASSERT_NOT_NULL(out);
ASSERT_NOT_NULL(A);
ASSERT_NOT_NULL(v);
size_t i, j;
for (i = 0; i < ncols; ++i) {
out[i] = 0;
for (j = 0; j < nrows; ++j) {
out[i] += A[j * ncols + i] * v[j];
}
}
}
bool matLinearSolveCholesky(float *x, const float *L, const float *b, size_t n) {
ASSERT_NOT_NULL(x);
ASSERT_NOT_NULL(L);
ASSERT_NOT_NULL(b);
ASSERT(n <= INT32_MAX);
int32_t i, j; // Loops below require signed integers.
int32_t s_n = (int32_t)n; // Signed n.
float sum = 0.0f;
// 1. Solve Ly = b through forward substitution. Use x[] to store y.
for (i = 0; i < s_n; ++i) {
sum = 0.0f;
for (j = 0; j < i; ++j) {
sum += L[i * s_n + j] * x[j];
}
// Check for non-zero diagonals (don't divide by zero).
if (L[i * s_n + i] < EPSILON) {
return false;
}
x[i] = (b[i] - sum) / L[i * s_n + i];
}
// 2. Solve L'x = y through backwards substitution. Use x[] to store both
// y and x.
for (i = s_n - 1; i >= 0; --i) {
sum = 0.0f;
for (j = i + 1; j < s_n; ++j) {
sum += L[j * s_n + i] * x[j];
}
x[i] = (x[i] - sum) / L[i * s_n + i];
}
return true;
}
bool matCholeskyDecomposition(float *L, const float *A, size_t n) {
ASSERT_NOT_NULL(L);
ASSERT_NOT_NULL(A);
size_t i, j, k;
float sum = 0.0f;
// initialize L to zero.
memset(L, 0, sizeof(float) * n * n);
for (i = 0; i < n; ++i) {
// compute L[i][i] = sqrt(A[i][i] - sum_k = 1:i-1 L^2[i][k])
sum = 0.0f;
for (k = 0; k < i; ++k) {
sum += L[i * n + k] * L[i * n + k];
}
sum = A[i * n + i] - sum;
// If diagonal element of L is too small, cholesky fails.
if (sum < CHOLESKY_TOLERANCE) {
return false;
}
L[i * n + i] = sqrtf(sum);
// for j = i+1:N, compute L[j][i] =
// (1/L[i][i]) * (A[i][j] - sum_k = 1:i-1 L[i][k] * L[j][k])
for (j = i + 1; j < n; ++j) {
sum = 0.0f;
for (k = 0; k < i; ++k) {
sum += L[i * n + k] * L[j * n + k];
}
// division okay because magnitude of L[i][i] already checked above.
L[j * n + i] = (A[i * n + j] - sum) / L[i * n + i];
}
}
return true;
}