/*
 * Copyright (C) 2016 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <string.h>
#include <stdint.h>
#include <nanohub/aes.h>


#define AES_NUM_ROUNDS    14



static const uint8_t FwdSbox[] = {
    0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
    0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
    0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
    0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
    0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
    0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
    0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
    0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
    0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
    0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
    0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
    0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
    0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
    0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
    0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
    0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16,
};

static const uint8_t RevSbox[] = {
    0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
    0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
    0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
    0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25,
    0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92,
    0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
    0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06,
    0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B,
    0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
    0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E,
    0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
    0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
    0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
    0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
    0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
    0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D,
};

static const uint32_t FwdTab0[] = { //other 3 tables are this same table, RORed 8, 16, and 24 bits respectively.
    0xC66363A5, 0xF87C7C84, 0xEE777799, 0xF67B7B8D, 0xFFF2F20D, 0xD66B6BBD, 0xDE6F6FB1, 0x91C5C554,
    0x60303050, 0x02010103, 0xCE6767A9, 0x562B2B7D, 0xE7FEFE19, 0xB5D7D762, 0x4DABABE6, 0xEC76769A,
    0x8FCACA45, 0x1F82829D, 0x89C9C940, 0xFA7D7D87, 0xEFFAFA15, 0xB25959EB, 0x8E4747C9, 0xFBF0F00B,
    0x41ADADEC, 0xB3D4D467, 0x5FA2A2FD, 0x45AFAFEA, 0x239C9CBF, 0x53A4A4F7, 0xE4727296, 0x9BC0C05B,
    0x75B7B7C2, 0xE1FDFD1C, 0x3D9393AE, 0x4C26266A, 0x6C36365A, 0x7E3F3F41, 0xF5F7F702, 0x83CCCC4F,
    0x6834345C, 0x51A5A5F4, 0xD1E5E534, 0xF9F1F108, 0xE2717193, 0xABD8D873, 0x62313153, 0x2A15153F,
    0x0804040C, 0x95C7C752, 0x46232365, 0x9DC3C35E, 0x30181828, 0x379696A1, 0x0A05050F, 0x2F9A9AB5,
    0x0E070709, 0x24121236, 0x1B80809B, 0xDFE2E23D, 0xCDEBEB26, 0x4E272769, 0x7FB2B2CD, 0xEA75759F,
    0x1209091B, 0x1D83839E, 0x582C2C74, 0x341A1A2E, 0x361B1B2D, 0xDC6E6EB2, 0xB45A5AEE, 0x5BA0A0FB,
    0xA45252F6, 0x763B3B4D, 0xB7D6D661, 0x7DB3B3CE, 0x5229297B, 0xDDE3E33E, 0x5E2F2F71, 0x13848497,
    0xA65353F5, 0xB9D1D168, 0x00000000, 0xC1EDED2C, 0x40202060, 0xE3FCFC1F, 0x79B1B1C8, 0xB65B5BED,
    0xD46A6ABE, 0x8DCBCB46, 0x67BEBED9, 0x7239394B, 0x944A4ADE, 0x984C4CD4, 0xB05858E8, 0x85CFCF4A,
    0xBBD0D06B, 0xC5EFEF2A, 0x4FAAAAE5, 0xEDFBFB16, 0x864343C5, 0x9A4D4DD7, 0x66333355, 0x11858594,
    0x8A4545CF, 0xE9F9F910, 0x04020206, 0xFE7F7F81, 0xA05050F0, 0x783C3C44, 0x259F9FBA, 0x4BA8A8E3,
    0xA25151F3, 0x5DA3A3FE, 0x804040C0, 0x058F8F8A, 0x3F9292AD, 0x219D9DBC, 0x70383848, 0xF1F5F504,
    0x63BCBCDF, 0x77B6B6C1, 0xAFDADA75, 0x42212163, 0x20101030, 0xE5FFFF1A, 0xFDF3F30E, 0xBFD2D26D,
    0x81CDCD4C, 0x180C0C14, 0x26131335, 0xC3ECEC2F, 0xBE5F5FE1, 0x359797A2, 0x884444CC, 0x2E171739,
    0x93C4C457, 0x55A7A7F2, 0xFC7E7E82, 0x7A3D3D47, 0xC86464AC, 0xBA5D5DE7, 0x3219192B, 0xE6737395,
    0xC06060A0, 0x19818198, 0x9E4F4FD1, 0xA3DCDC7F, 0x44222266, 0x542A2A7E, 0x3B9090AB, 0x0B888883,
    0x8C4646CA, 0xC7EEEE29, 0x6BB8B8D3, 0x2814143C, 0xA7DEDE79, 0xBC5E5EE2, 0x160B0B1D, 0xADDBDB76,
    0xDBE0E03B, 0x64323256, 0x743A3A4E, 0x140A0A1E, 0x924949DB, 0x0C06060A, 0x4824246C, 0xB85C5CE4,
    0x9FC2C25D, 0xBDD3D36E, 0x43ACACEF, 0xC46262A6, 0x399191A8, 0x319595A4, 0xD3E4E437, 0xF279798B,
    0xD5E7E732, 0x8BC8C843, 0x6E373759, 0xDA6D6DB7, 0x018D8D8C, 0xB1D5D564, 0x9C4E4ED2, 0x49A9A9E0,
    0xD86C6CB4, 0xAC5656FA, 0xF3F4F407, 0xCFEAEA25, 0xCA6565AF, 0xF47A7A8E, 0x47AEAEE9, 0x10080818,
    0x6FBABAD5, 0xF0787888, 0x4A25256F, 0x5C2E2E72, 0x381C1C24, 0x57A6A6F1, 0x73B4B4C7, 0x97C6C651,
    0xCBE8E823, 0xA1DDDD7C, 0xE874749C, 0x3E1F1F21, 0x964B4BDD, 0x61BDBDDC, 0x0D8B8B86, 0x0F8A8A85,
    0xE0707090, 0x7C3E3E42, 0x71B5B5C4, 0xCC6666AA, 0x904848D8, 0x06030305, 0xF7F6F601, 0x1C0E0E12,
    0xC26161A3, 0x6A35355F, 0xAE5757F9, 0x69B9B9D0, 0x17868691, 0x99C1C158, 0x3A1D1D27, 0x279E9EB9,
    0xD9E1E138, 0xEBF8F813, 0x2B9898B3, 0x22111133, 0xD26969BB, 0xA9D9D970, 0x078E8E89, 0x339494A7,
    0x2D9B9BB6, 0x3C1E1E22, 0x15878792, 0xC9E9E920, 0x87CECE49, 0xAA5555FF, 0x50282878, 0xA5DFDF7A,
    0x038C8C8F, 0x59A1A1F8, 0x09898980, 0x1A0D0D17, 0x65BFBFDA, 0xD7E6E631, 0x844242C6, 0xD06868B8,
    0x824141C3, 0x299999B0, 0x5A2D2D77, 0x1E0F0F11, 0x7BB0B0CB, 0xA85454FC, 0x6DBBBBD6, 0x2C16163A,
};

static const uint32_t RevTab0[] = { //other 3 tables are this same table, RORed 8, 16, and 24 bits respectively.
    0x51F4A750, 0x7E416553, 0x1A17A4C3, 0x3A275E96, 0x3BAB6BCB, 0x1F9D45F1, 0xACFA58AB, 0x4BE30393,
    0x2030FA55, 0xAD766DF6, 0x88CC7691, 0xF5024C25, 0x4FE5D7FC, 0xC52ACBD7, 0x26354480, 0xB562A38F,
    0xDEB15A49, 0x25BA1B67, 0x45EA0E98, 0x5DFEC0E1, 0xC32F7502, 0x814CF012, 0x8D4697A3, 0x6BD3F9C6,
    0x038F5FE7, 0x15929C95, 0xBF6D7AEB, 0x955259DA, 0xD4BE832D, 0x587421D3, 0x49E06929, 0x8EC9C844,
    0x75C2896A, 0xF48E7978, 0x99583E6B, 0x27B971DD, 0xBEE14FB6, 0xF088AD17, 0xC920AC66, 0x7DCE3AB4,
    0x63DF4A18, 0xE51A3182, 0x97513360, 0x62537F45, 0xB16477E0, 0xBB6BAE84, 0xFE81A01C, 0xF9082B94,
    0x70486858, 0x8F45FD19, 0x94DE6C87, 0x527BF8B7, 0xAB73D323, 0x724B02E2, 0xE31F8F57, 0x6655AB2A,
    0xB2EB2807, 0x2FB5C203, 0x86C57B9A, 0xD33708A5, 0x302887F2, 0x23BFA5B2, 0x02036ABA, 0xED16825C,
    0x8ACF1C2B, 0xA779B492, 0xF307F2F0, 0x4E69E2A1, 0x65DAF4CD, 0x0605BED5, 0xD134621F, 0xC4A6FE8A,
    0x342E539D, 0xA2F355A0, 0x058AE132, 0xA4F6EB75, 0x0B83EC39, 0x4060EFAA, 0x5E719F06, 0xBD6E1051,
    0x3E218AF9, 0x96DD063D, 0xDD3E05AE, 0x4DE6BD46, 0x91548DB5, 0x71C45D05, 0x0406D46F, 0x605015FF,
    0x1998FB24, 0xD6BDE997, 0x894043CC, 0x67D99E77, 0xB0E842BD, 0x07898B88, 0xE7195B38, 0x79C8EEDB,
    0xA17C0A47, 0x7C420FE9, 0xF8841EC9, 0x00000000, 0x09808683, 0x322BED48, 0x1E1170AC, 0x6C5A724E,
    0xFD0EFFFB, 0x0F853856, 0x3DAED51E, 0x362D3927, 0x0A0FD964, 0x685CA621, 0x9B5B54D1, 0x24362E3A,
    0x0C0A67B1, 0x9357E70F, 0xB4EE96D2, 0x1B9B919E, 0x80C0C54F, 0x61DC20A2, 0x5A774B69, 0x1C121A16,
    0xE293BA0A, 0xC0A02AE5, 0x3C22E043, 0x121B171D, 0x0E090D0B, 0xF28BC7AD, 0x2DB6A8B9, 0x141EA9C8,
    0x57F11985, 0xAF75074C, 0xEE99DDBB, 0xA37F60FD, 0xF701269F, 0x5C72F5BC, 0x44663BC5, 0x5BFB7E34,
    0x8B432976, 0xCB23C6DC, 0xB6EDFC68, 0xB8E4F163, 0xD731DCCA, 0x42638510, 0x13972240, 0x84C61120,
    0x854A247D, 0xD2BB3DF8, 0xAEF93211, 0xC729A16D, 0x1D9E2F4B, 0xDCB230F3, 0x0D8652EC, 0x77C1E3D0,
    0x2BB3166C, 0xA970B999, 0x119448FA, 0x47E96422, 0xA8FC8CC4, 0xA0F03F1A, 0x567D2CD8, 0x223390EF,
    0x87494EC7, 0xD938D1C1, 0x8CCAA2FE, 0x98D40B36, 0xA6F581CF, 0xA57ADE28, 0xDAB78E26, 0x3FADBFA4,
    0x2C3A9DE4, 0x5078920D, 0x6A5FCC9B, 0x547E4662, 0xF68D13C2, 0x90D8B8E8, 0x2E39F75E, 0x82C3AFF5,
    0x9F5D80BE, 0x69D0937C, 0x6FD52DA9, 0xCF2512B3, 0xC8AC993B, 0x10187DA7, 0xE89C636E, 0xDB3BBB7B,
    0xCD267809, 0x6E5918F4, 0xEC9AB701, 0x834F9AA8, 0xE6956E65, 0xAAFFE67E, 0x21BCCF08, 0xEF15E8E6,
    0xBAE79BD9, 0x4A6F36CE, 0xEA9F09D4, 0x29B07CD6, 0x31A4B2AF, 0x2A3F2331, 0xC6A59430, 0x35A266C0,
    0x744EBC37, 0xFC82CAA6, 0xE090D0B0, 0x33A7D815, 0xF104984A, 0x41ECDAF7, 0x7FCD500E, 0x1791F62F,
    0x764DD68D, 0x43EFB04D, 0xCCAA4D54, 0xE49604DF, 0x9ED1B5E3, 0x4C6A881B, 0xC12C1FB8, 0x4665517F,
    0x9D5EEA04, 0x018C355D, 0xFA877473, 0xFB0B412E, 0xB3671D5A, 0x92DBD252, 0xE9105633, 0x6DD64713,
    0x9AD7618C, 0x37A10C7A, 0x59F8148E, 0xEB133C89, 0xCEA927EE, 0xB761C935, 0xE11CE5ED, 0x7A47B13C,
    0x9CD2DF59, 0x55F2733F, 0x1814CE79, 0x73C737BF, 0x53F7CDEA, 0x5FFDAA5B, 0xDF3D6F14, 0x7844DB86,
    0xCAAFF381, 0xB968C43E, 0x3824342C, 0xC2A3405F, 0x161DC372, 0xBCE2250C, 0x283C498B, 0xFF0D9541,
    0x39A80171, 0x080CB3DE, 0xD8B4E49C, 0x6456C190, 0x7BCB8461, 0xD532B670, 0x486C5C74, 0xD0B85742,
};

#ifdef ARM

    #define STRINFIGY2(b) #b
    #define STRINGIFY(b) STRINFIGY2(b)
    #define ror(v, b) ({uint32_t ret; if (b) asm("ror %0, #" STRINGIFY(b) :"=r"(ret):"0"(v)); else ret = v; ret;})

#else

    inline static uint32_t ror(uint32_t val, uint32_t by)
    {
        if (!by)
            return val;

        val = (val >> by) | (val << (32 - by));

        return val;
    }

#endif


void aesInitForEncr(struct AesContext *ctx, const uint32_t *k)
{
    uint32_t i, *ks = ctx->K, roundConstant = 0x01000000;

    //first 8 words are just the key itself
    memcpy(ctx->K, k, sizeof(uint32_t[AES_KEY_WORDS]));

    //create round keys for encryption
    for (i = 0; i < 7; i++, ks += 8, roundConstant <<= 1) {
        ks[8] = ks[0] ^ roundConstant
             ^ (((uint32_t)FwdSbox[(ks[ 7] >> 16) & 0xff]) << 24)
             ^ (((uint32_t)FwdSbox[(ks[ 7] >>  8) & 0xff]) << 16)
             ^ (((uint32_t)FwdSbox[(ks[ 7] >>  0) & 0xff]) <<  8)
             ^ (((uint32_t)FwdSbox[(ks[ 7] >> 24) & 0xff]) <<  0);
        ks[9] = ks[1] ^ ks[8];
        ks[10] = ks[2] ^ ks[9];
        ks[11] = ks[3] ^ ks[10];
        ks[12] = ks[4]
             ^ (((uint32_t)FwdSbox[(ks[11] >> 24) & 0xff]) << 24)
             ^ (((uint32_t)FwdSbox[(ks[11] >> 16) & 0xff]) << 16)
             ^ (((uint32_t)FwdSbox[(ks[11] >>  8) & 0xff]) <<  8)
             ^ (((uint32_t)FwdSbox[(ks[11] >>  0) & 0xff]) <<  0);
        ks[13] = ks[5] ^ ks[12];
        ks[14] = ks[6] ^ ks[13];
        ks[15] = ks[7] ^ ks[14];
    }
}

void aesInitForDecr(struct AesContext *ctx, struct AesSetupTempWorksSpace *tmpSpace, const uint32_t *k)
{
    uint32_t i, j, *ks = ctx->K + 4, *encrK = tmpSpace->tmpCtx.K + 52;

    //we need encryption keys to calculate decryption keys
    aesInitForEncr(&tmpSpace->tmpCtx, k);

    //now we can calculate round keys for decryption
    memcpy(ctx->K, tmpSpace->tmpCtx.K + 56, sizeof(uint32_t[4]));
    for (i = 0; i < AES_NUM_ROUNDS - 1; i++, encrK -= 4, ks += 4) { //num_rounds-1 seems to be concensus, but num_rounds make more sense...
        for (j = 0; j < 4; j++) {
            ks[j] =
                ror(RevTab0[FwdSbox[(encrK[j] >> 24) & 0xff]],  0) ^
                ror(RevTab0[FwdSbox[(encrK[j] >> 16) & 0xff]],  8) ^
                ror(RevTab0[FwdSbox[(encrK[j] >>  8) & 0xff]], 16) ^
                ror(RevTab0[FwdSbox[(encrK[j] >>  0) & 0xff]], 24);
        }
    }
    memcpy(ks, encrK, sizeof(uint32_t[4]));
}

void aesEncr(struct AesContext *ctx, const uint32_t *src, uint32_t *dst)
{
    uint32_t x0, x1, x2, x3; //we CAN use an array, but then GCC will not use registers. so we use separate vars. sigh...
    uint32_t *k = ctx->K, i;

    //setup
    x0 = *src++ ^ *k++;
    x1 = *src++ ^ *k++;
    x2 = *src++ ^ *k++;
    x3 = *src++ ^ *k++;

    //all-but-last round
    for (i = 0; i < AES_NUM_ROUNDS - 1; i++) {
        uint32_t t0, t1, t2;

        t0 = *k++ ^
            ror(FwdTab0[(x0 >> 24) & 0xff],  0) ^
            ror(FwdTab0[(x1 >> 16) & 0xff],  8) ^
            ror(FwdTab0[(x2 >>  8) & 0xff], 16) ^
            ror(FwdTab0[(x3 >>  0) & 0xff], 24);

        t1 = *k++ ^
            ror(FwdTab0[(x1 >> 24) & 0xff],  0) ^
            ror(FwdTab0[(x2 >> 16) & 0xff],  8) ^
            ror(FwdTab0[(x3 >>  8) & 0xff], 16) ^
            ror(FwdTab0[(x0 >>  0) & 0xff], 24);

        t2 = *k++ ^
            ror(FwdTab0[(x2 >> 24) & 0xff],  0) ^
            ror(FwdTab0[(x3 >> 16) & 0xff],  8) ^
            ror(FwdTab0[(x0 >>  8) & 0xff], 16) ^
            ror(FwdTab0[(x1 >>  0) & 0xff], 24);

        x3 = *k++ ^
            ror(FwdTab0[(x3 >> 24) & 0xff],  0) ^
            ror(FwdTab0[(x0 >> 16) & 0xff],  8) ^
            ror(FwdTab0[(x1 >>  8) & 0xff], 16) ^
            ror(FwdTab0[(x2 >>  0) & 0xff], 24);

       x0 = t0;
       x1 = t1;
       x2 = t2;
    }

    //last round
    *dst++ = *k++ ^
            (((uint32_t)(FwdSbox[(x0 >> 24) & 0xff])) << 24) ^
            (((uint32_t)(FwdSbox[(x1 >> 16) & 0xff])) << 16) ^
            (((uint32_t)(FwdSbox[(x2 >>  8) & 0xff])) <<  8) ^
            (((uint32_t)(FwdSbox[(x3 >>  0) & 0xff])) <<  0);

    *dst++ = *k++ ^
            (((uint32_t)(FwdSbox[(x1 >> 24) & 0xff])) << 24) ^
            (((uint32_t)(FwdSbox[(x2 >> 16) & 0xff])) << 16) ^
            (((uint32_t)(FwdSbox[(x3 >>  8) & 0xff])) <<  8) ^
            (((uint32_t)(FwdSbox[(x0 >>  0) & 0xff])) <<  0);

    *dst++ = *k++ ^
            (((uint32_t)(FwdSbox[(x2 >> 24) & 0xff])) << 24) ^
            (((uint32_t)(FwdSbox[(x3 >> 16) & 0xff])) << 16) ^
            (((uint32_t)(FwdSbox[(x0 >>  8) & 0xff])) <<  8) ^
            (((uint32_t)(FwdSbox[(x1 >>  0) & 0xff])) <<  0);

    *dst++ = *k++ ^
            (((uint32_t)(FwdSbox[(x3 >> 24) & 0xff])) << 24) ^
            (((uint32_t)(FwdSbox[(x0 >> 16) & 0xff])) << 16) ^
            (((uint32_t)(FwdSbox[(x1 >>  8) & 0xff])) <<  8) ^
            (((uint32_t)(FwdSbox[(x2 >>  0) & 0xff])) <<  0);
}

void aesDecr(struct AesContext *ctx, const uint32_t *src, uint32_t *dst)
{
    uint32_t x0, x1, x2, x3;
    uint32_t *k = ctx->K, i;

    //setup
    x0 = *src++ ^ *k++;
    x1 = *src++ ^ *k++;
    x2 = *src++ ^ *k++;
    x3 = *src++ ^ *k++;

    //all-but-last round
    for (i = 0; i < AES_NUM_ROUNDS - 1; i++) {
        uint32_t t0, t1, t2;

        t0 = *k++ ^
            ror(RevTab0[(x0 >> 24) & 0xff],  0) ^
            ror(RevTab0[(x3 >> 16) & 0xff],  8) ^
            ror(RevTab0[(x2 >>  8) & 0xff], 16) ^
            ror(RevTab0[(x1 >>  0) & 0xff], 24);

        t1 = *k++ ^
            ror(RevTab0[(x1 >> 24) & 0xff],  0) ^
            ror(RevTab0[(x0 >> 16) & 0xff],  8) ^
            ror(RevTab0[(x3 >>  8) & 0xff], 16) ^
            ror(RevTab0[(x2 >>  0) & 0xff], 24);

        t2 = *k++ ^
            ror(RevTab0[(x2 >> 24) & 0xff],  0) ^
            ror(RevTab0[(x1 >> 16) & 0xff],  8) ^
            ror(RevTab0[(x0 >>  8) & 0xff], 16) ^
            ror(RevTab0[(x3 >>  0) & 0xff], 24);

        x3 = *k++ ^
            ror(RevTab0[(x3 >> 24) & 0xff],  0) ^
            ror(RevTab0[(x2 >> 16) & 0xff],  8) ^
            ror(RevTab0[(x1 >>  8) & 0xff], 16) ^
            ror(RevTab0[(x0 >>  0) & 0xff], 24);

       x0 = t0;
       x1 = t1;
       x2 = t2;
    }

    //last round
    *dst++ = *k++ ^
            (((uint32_t)(RevSbox[(x0 >> 24) & 0xff])) << 24) ^
            (((uint32_t)(RevSbox[(x3 >> 16) & 0xff])) << 16) ^
            (((uint32_t)(RevSbox[(x2 >>  8) & 0xff])) <<  8) ^
            (((uint32_t)(RevSbox[(x1 >>  0) & 0xff])) <<  0);

    *dst++ = *k++ ^
            (((uint32_t)(RevSbox[(x1 >> 24) & 0xff])) << 24) ^
            (((uint32_t)(RevSbox[(x0 >> 16) & 0xff])) << 16) ^
            (((uint32_t)(RevSbox[(x3 >>  8) & 0xff])) <<  8) ^
            (((uint32_t)(RevSbox[(x2 >>  0) & 0xff])) <<  0);

    *dst++ = *k++ ^
            (((uint32_t)(RevSbox[(x2 >> 24) & 0xff])) << 24) ^
            (((uint32_t)(RevSbox[(x1 >> 16) & 0xff])) << 16) ^
            (((uint32_t)(RevSbox[(x0 >>  8) & 0xff])) <<  8) ^
            (((uint32_t)(RevSbox[(x3 >>  0) & 0xff])) <<  0);

    *dst++ = *k++ ^
            (((uint32_t)(RevSbox[(x3 >> 24) & 0xff])) << 24) ^
            (((uint32_t)(RevSbox[(x2 >> 16) & 0xff])) << 16) ^
            (((uint32_t)(RevSbox[(x1 >>  8) & 0xff])) <<  8) ^
            (((uint32_t)(RevSbox[(x0 >>  0) & 0xff])) <<  0);
}

void aesCbcInitForEncr(struct AesCbcContext *ctx, const uint32_t *k, const uint32_t *iv)
{
    aesInitForEncr(&ctx->aes, k);
    memcpy(ctx->iv, iv, sizeof(uint32_t[AES_BLOCK_WORDS]));
}

void aesCbcInitForDecr(struct AesCbcContext *ctx, const uint32_t *k, const uint32_t *iv)
{
    struct AesSetupTempWorksSpace tmp;

    aesInitForDecr(&ctx->aes, &tmp, k);
    memcpy(ctx->iv, iv, sizeof(uint32_t[AES_BLOCK_WORDS]));
}

void aesCbcEncr(struct AesCbcContext *ctx, const uint32_t *src, uint32_t *dst)
{
    uint32_t i;

    for (i = 0; i < AES_BLOCK_WORDS; i++)
        ctx->iv[i] ^= *src++;

    aesEncr(&ctx->aes, ctx->iv, dst);
    memcpy(ctx->iv, dst, sizeof(uint32_t[AES_BLOCK_WORDS]));
}

void aesCbcDecr(struct AesCbcContext *ctx, const uint32_t *src, uint32_t *dst)
{
    uint32_t i, tmp[AES_BLOCK_WORDS];

    aesDecr(&ctx->aes, src, tmp);
    for (i = 0; i < AES_BLOCK_WORDS; i++)
        tmp[i] ^= ctx->iv[i];

    memcpy(ctx->iv, src, sizeof(uint32_t[AES_BLOCK_WORDS]));
    memcpy(dst, tmp, sizeof(uint32_t[AES_BLOCK_WORDS]));
}