#include <string.h> #include <stdio.h> #include <assert.h> typedef unsigned int UInt; typedef signed int Int; typedef unsigned char UChar; typedef unsigned long long int ULong; typedef UChar Bool; #define False ((Bool)0) #define True ((Bool)1) //typedef unsigned char V128[16]; typedef union { UChar uChar[16]; UInt uInt[4]; } V128; static UChar fromhex(char x) { if (x >= '0' && x <= '9') { return(x - '0'); } else if (x >= 'A' && x <= 'F') { return(x - 'A' + 10); } else if (x >= 'a' && x <= 'f') { return(x - 'a' + 10); } else assert(0); } static void expand ( V128* dst, char* summary ) { Int i; assert( strlen(summary) == 32 ); for (i = 0; i < 16; i++) { UChar xx = 0; UChar x = summary[31-2*i]; UChar yy = 0; UChar y = summary[31-2*i-1]; xx = fromhex (x); yy = fromhex (y); assert(xx < 16); assert(yy < 16); xx = (yy << 4) | xx; assert(xx < 256); dst->uChar[i] = xx; } } static int tohex (int nib) { if (nib < 10) return '0' + nib; else return 'a' + nib - 10; } static void unexpand ( V128* dst, char* summary ) { Int i; for (i = 0; i < 16; i++) { *summary++ = tohex((dst->uChar[i] >> 4) & 0xf); *summary++ = tohex(dst->uChar[i] & 0xf); } *summary = 0; } static void AESDEC(char *s_argL, char *s_argR, char *s_exp) { /* ; xmm1 and xmm2 hold two 128-bit inputs (xmm1 = State; xmm2 = Round key). ; The result is delivered in xmm1. */ V128 argL, argR; V128 res; char s_res[33]; V128 exp; expand(&argL, s_argL); expand(&argR, s_argR); __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu %1, %%xmm1" "\n\t" "movdqu %2, %%xmm2" "\n\t" "aesdec %%xmm2, %%xmm1" "\n\t" "movdqu %%xmm1, %0" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=m"(res) : "m"/*in*/(argL), "m"/*in*/(argR) : /*trash*/ "xmm1", "xmm2" ); if (strlen(s_exp) > 0) { expand(&exp, s_exp); assert (0 == memcmp(&res, &exp, 16)); } unexpand (&res, s_res); printf ("aesdec %s %s result %s\n", s_argL, s_argR, s_res); } static void AESDECLAST(char *s_argL, char *s_argR, char *s_exp) { /* ; xmm1 and xmm2 hold two 128-bit inputs (xmm1 = State; xmm2 = Round key). ; The result is delivered in xmm1. */ V128 argL, argR; V128 res; char s_res[33]; V128 exp; expand(&argL, s_argL); expand(&argR, s_argR); __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu %1, %%xmm1" "\n\t" "movdqu %2, %%xmm2" "\n\t" "aesdeclast %%xmm2, %%xmm1" "\n\t" "movdqu %%xmm1, %0" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=m"(res) : "m"/*in*/(argL), "m"/*in*/(argR) : /*trash*/ "xmm1", "xmm2" ); if (strlen(s_exp) > 0) { expand(&exp, s_exp); assert (0 == memcmp(&res, &exp, 16)); } unexpand (&res, s_res); printf ("aesdeclast %s %s result %s\n", s_argL, s_argR, s_res); } static void AESENC(char *s_argL, char *s_argR, char *s_exp) { /* ; xmm1 and xmm2 hold two 128-bit inputs (xmm1 = State; xmm2 = Round key). ; The result is delivered in xmm1. */ V128 argL, argR; V128 res; char s_res[33]; V128 exp; expand(&argL, s_argL); expand(&argR, s_argR); __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu %1, %%xmm1" "\n\t" "movdqu %2, %%xmm2" "\n\t" "aesenc %%xmm2, %%xmm1" "\n\t" "movdqu %%xmm1, %0" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=m"(res) : "m"/*in*/(argL), "m"/*in*/(argR) : /*trash*/ "xmm1", "xmm2" ); if (strlen(s_exp) > 0) { expand(&exp, s_exp); assert (0 == memcmp(&res, &exp, 16)); } unexpand (&res, s_res); printf ("aesenc %s %s result %s\n", s_argL, s_argR, s_res); } static void AESENCLAST(char *s_argL, char *s_argR, char *s_exp) { /* ; xmm1 and xmm2 hold two 128-bit inputs (xmm1 = State; xmm2 = Round key) ; The result delivered in xmm1 */ V128 argL, argR; V128 res; char s_res[33]; V128 exp; expand(&argL, s_argL); expand(&argR, s_argR); __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu %1, %%xmm1" "\n\t" "movdqu %2, %%xmm2" "\n\t" "aesenclast %%xmm2, %%xmm1" "\n\t" "movdqu %%xmm1, %0" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=m"(res) : "m"/*in*/(argL), "m"/*in*/(argR) : /*trash*/ "xmm1", "xmm2" ); if (strlen(s_exp) > 0) { expand(&exp, s_exp); assert (0 == memcmp(&res, &exp, 16)); } unexpand (&res, s_res); printf ("aesenclast %s %s result %s\n", s_argL, s_argR, s_res); } static void AESIMC(char *s_argR, char *s_exp) { /* We test another way to pass input and get results */ /* ; argR hold one 128-bit inputs (argR = Round key) ; result delivered in xmm5 */ V128 argR; V128 res; char s_res[33]; V128 exp; expand(&argR, s_argR); __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "aesimc %1, %%xmm5" "\n\t" "movdqu %%xmm5, %0" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=m"(res) : "m"/*in*/(argR) : /*trash*/ "xmm5" ); if (strlen(s_exp) > 0) { expand(&exp, s_exp); assert (0 == memcmp(&res, &exp, 16)); } unexpand (&res, s_res); printf ("aesimc %s result %s\n", s_argR, s_res); } static void AESKEYGENASSIST(int imm, char* s_argL, char* s_exp) { /* ; xmm2 holds a 128-bit input; imm8 holds the RCON value ; result delivered in xmm1 */ V128 argL; V128 res; char s_res[33]; V128 exp; expand(&argL, s_argL); if (imm == 1) __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu %1, %%xmm2" "\n\t" "aeskeygenassist $1,%%xmm2, %%xmm1" "\n\t" "movdqu %%xmm1, %0" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=m"(res) : "m"/*in*/(argL) : /*trash*/ "xmm1", "xmm2" ); else if (imm == 2) __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu %1, %%xmm2" "\n\t" "aeskeygenassist $2,%%xmm2, %%xmm1" "\n\t" "movdqu %%xmm1, %0" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=m"(res) : "m"/*in*/(argL) : /*trash*/ "xmm1", "xmm2" ); else if (imm == 8) __asm__ __volatile__( "subq $1024, %%rsp" "\n\t" "movdqu %1, %%xmm2" "\n\t" "aeskeygenassist $8,%%xmm2, %%xmm1" "\n\t" "movdqu %%xmm1, %0" "\n\t" "addq $1024, %%rsp" "\n\t" : /*out*/ "=m"(res) : "m"/*in*/(argL) : /*trash*/ "xmm1", "xmm2" ); else assert (0); if (strlen(s_exp) > 0) { expand(&exp, s_exp); assert (0 == memcmp(&res, &exp, 16)); } unexpand (&res, s_res); printf ("aeskeygenassist %d %s result %s\n", imm, s_argL, s_res); } typedef struct Aes_Args { char* argL; char* argR; int imm; // only for aeskeygenassist } Aes_Args; /* Just a bunch of various data to compare a native run with a run under Valgrind. */ static const Aes_Args aes_args[] = { {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", 8}, {"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 8}, {"3243f6a8885a308d313198a2e0370734", "2b7e151628aed2a6abf7158809cf4f3c", 2}, {"193de3bea0f4e22b9ac68d2ae9f84808", "d42711aee0bf98f1b8b45de51e415230", 2}, {"d4bf5d30e0b452aeb84111f11e2798e5", "046681e5e0cb199a48f8d37a2806264c", 1}, {"a0fafe1788542cb123a339392a6c7605", "a49c7ff2689f352b6b5bea43026a5049", 1}, {"49ded28945db96f17f39871a7702533b", "49db873b453953897f02d2f177de961a", 8}, {"584dcaf11b4b5aacdbe7caa81b6bb0e5", "f2c295f27a96b9435935807a7359f67f", 8}, {"aa8f5f0361dde3ef82d24ad26832469a", "ac73cf7befc111df13b5d6b545235ab8", 2}, {"acc1d6b8efb55a7b1323cfdf457311b5", "75ec0993200b633353c0cf7cbb25d0dc", 2}, {"e9317db5cb322c723d2e895faf090794", "d014f9a8c9ee2589e13f0cc8b6630ca6", 1}, {NULL, NULL, 0} }; int main ( void ) { int i; /* test the various instructions, using the examples provided in "White Paper Intel Advanced Encryption Standard AES instruction set" January 2010 (26/1/2010) Rev. 3.0 by Shay Gueron */ AESKEYGENASSIST(1, "3c4fcf098815f7aba6d2ae2816157e2b", "01eb848beb848a013424b5e524b5e434"); AESENC("7b5b54657374566563746f725d53475d", "48692853686179295b477565726f6e5d", "a8311c2f9fdba3c58b104b58ded7e595"); AESENCLAST("7b5b54657374566563746f725d53475d", "48692853686179295b477565726f6e5d", "c7fb881e938c5964177ec42553fdc611"); AESDEC("7b5b54657374566563746f725d53475d", "48692853686179295b477565726f6e5d", "138ac342faea2787b58eb95eb730392a"); AESDECLAST("7b5b54657374566563746f725d53475d", "48692853686179295b477565726f6e5d", "c5a391ef6b317f95d410637b72a593d0"); /* ??? the AESIMC example given in the Intel White paper seems wrong. The below fails both under Valgrind and natively. AESIMC("48692853686179295b477565726f6e5d", "627a6f6644b109c82b18330a81c3b3e5"); So we use the example given for the InvMixColums transformation. */ AESIMC("8dcab9dc035006bc8f57161e00cafd8d", "d635a667928b5eaeeec9cc3bc55f5777"); /* and now a bunch of other calls. The below are verified using the aes.stdout.exp (produced by a native run). */ for (i = 0; aes_args[i].argL != NULL; i++) { AESKEYGENASSIST(aes_args[i].imm, aes_args[i].argL, ""); AESENC(aes_args[i].argL, aes_args[i].argR, ""); AESENCLAST(aes_args[i].argL, aes_args[i].argR, ""); AESDEC(aes_args[i].argL, aes_args[i].argR, ""); AESDECLAST(aes_args[i].argL, aes_args[i].argR, ""); AESIMC(aes_args[i].argL, ""); } return 0; }